nat: fix nat44-ed port range with multiple workers 62/36962/2
authorVladislav Grishenko <themiron@yandex-team.ru>
Fri, 19 Aug 2022 15:42:22 +0000 (20:42 +0500)
committerBeno�t Ganne <bganne@cisco.com>
Thu, 15 Sep 2022 08:39:19 +0000 (08:39 +0000)
The number of available dynamic ports is set to (0xffff - 1024) =
64511, which is not divisable by the pow2 number of workers - the
only integer divisors are 31 and 2081.
So, total dynamic port range of all workers will be less than it:
    1 wrk: n = (port_per_thread = 64511/1)*1 = 64511 + 1025 = 65536
    2 wrk: n = (port_per_thread = 64511/2)*2 = 64510 + 1025 = 65535
    4 wrk: n = (port_per_thread = 64511/4)*4 = 64508 + 1025 = 65533
    8 wrk: n = (port_per_thread = 64511/8)*8 = 64504 + 1025 = 65529
    ...
As seen, with multiple workers there are unused trailing ports for every
nat pool address and that is the reason of out-of-bound index in the
worker array on out2in path due (port - 1024) / port_per_thread math.
This was fixed in 5c9f9968de63fa627b4a72b344df36cdc686d18a, so packets
to unused ports will go to existing worker and dropped there.

Per RFC 6335 https://www.rfc-editor.org/rfc/rfc6335#section-6:
6.  Port Number Ranges
   o  the System Ports, also known as the Well Known Ports, from 0-1023
      (assigned by IANA)
   o  the User Ports, also known as the Registered Ports, from 1024-
      49151 (assigned by IANA)
   o  the Dynamic Ports, also known as the Private or Ephemeral Ports,
      from 49152-65535 (never assigned)

According that let's allocate dynamic ports from 1024 and have full port
range with a wide range of the workers number - 64 integer divisors in
total, including pow2 ones:
    1 wrk: n = (port_per_thread = 64512/1)*1 = 64512 + 1024 = 65536
    2 wrk: n = (port_per_thread = 64512/2)*2 = 64512 + 1024 = 65536
    3 wrk: n = (port_per_thread = 64512/3)*3 = 64512 + 1024 = 65536
    4 wrk: n = (port_per_thread = 64512/4)*4 = 64512 + 1024 = 65536
    5 wrk: n = (port_per_thread = 64512/5)*5 = 64510 + 1024 = 65534
    6 wrk: n = (port_per_thread = 64512/6)*6 = 64512 + 1024 = 65536
    7 wrk: n = (port_per_thread = 64512/7)*7 = 64512 + 1024 = 65536
    8 wrk: n = (port_per_thread = 64512/8)*8 = 64512 + 1024 = 65536
    ...
Modulo from 5c9f9968de63fa627b4a72b344df36cdc686d18a is still required
when the numbers of workers is not the integer divisor of 64512.

Type: fix
Fixes: 5c9f9968de63fa627b4a72b344df36cdc686d18a
Change-Id: I9edaea07e58ff4888812b0d86cbf41a3784b189e
Signed-off-by: Vladislav Grishenko <themiron@yandex-team.ru>
src/plugins/nat/nat44-ed/nat44_ed.c
src/plugins/nat/nat44-ed/nat44_ed.h
src/plugins/nat/nat44-ed/nat44_ed_in2out.c
src/plugins/nat/nat44-ed/nat44_ed_out2in.c
test/test_nat44_ed.py

index 27c1870..2e4c791 100644 (file)
@@ -762,8 +762,8 @@ get_thread_idx_by_port (u16 e_port)
   if (sm->num_workers > 1)
     {
       thread_idx = sm->first_worker_index +
-                  sm->workers[(e_port - 1024) / sm->port_per_thread %
-                              _vec_len (sm->workers)];
+                  sm->workers[(e_port - ED_USER_PORT_OFFSET) /
+                              sm->port_per_thread % _vec_len (sm->workers)];
     }
   return thread_idx;
 }
@@ -2133,7 +2133,7 @@ snat_set_workers (uword * bitmap)
       j++;
     }
 
-  sm->port_per_thread = (0xffff - 1024) / _vec_len (sm->workers);
+  sm->port_per_thread = (65536 - ED_USER_PORT_OFFSET) / _vec_len (sm->workers);
 
   return 0;
 }
@@ -2384,7 +2384,7 @@ nat_init (vlib_main_t * vm)
        }
     }
   num_threads = tm->n_vlib_mains - 1;
-  sm->port_per_thread = 0xffff - 1024;
+  sm->port_per_thread = 65536 - ED_USER_PORT_OFFSET;
   vec_validate (sm->per_thread_data, num_threads);
 
   /* Use all available workers by default */
index 5b5b2ec..6c64c75 100644 (file)
  * as if there were no free ports available to conserve resources */
 #define ED_PORT_ALLOC_ATTEMPTS (10)
 
+/* system ports range is 0-1023, first user port is 1024 per
+ * https://www.rfc-editor.org/rfc/rfc6335#section-6
+ */
+#define ED_USER_PORT_OFFSET 1024
+
 /* NAT buffer flags */
 #define SNAT_FLAG_HAIRPINNING (1 << 0)
 
index 2d8d96a..61ce70f 100644 (file)
@@ -102,14 +102,15 @@ nat_ed_alloc_addr_and_port_with_snat_address (
   u16 port_per_thread, u32 snat_thread_index, snat_session_t *s,
   ip4_address_t *outside_addr, u16 *outside_port)
 {
-  const u16 port_thread_offset = (port_per_thread * snat_thread_index) + 1024;
+  const u16 port_thread_offset =
+    (port_per_thread * snat_thread_index) + ED_USER_PORT_OFFSET;
 
   s->o2i.match.daddr = a->addr;
   /* first try port suggested by caller */
   u16 port = clib_net_to_host_u16 (*outside_port);
   u16 port_offset = port - port_thread_offset;
-  if (port <= port_thread_offset ||
-      port > port_thread_offset + port_per_thread)
+  if (port < port_thread_offset ||
+      port >= port_thread_offset + port_per_thread)
     {
       /* need to pick a different port, suggested port doesn't fit in
        * this thread's port range */
index 3344912..dfe4a15 100644 (file)
@@ -279,7 +279,8 @@ nat44_ed_alloc_i2o_port (snat_main_t *sm, snat_address_t *a, snat_session_t *s,
   for (int i = 0; i < ED_PORT_ALLOC_ATTEMPTS; ++i)
     {
       portnum = (sm->port_per_thread * snat_thread_index) +
-               snat_random_port (0, sm->port_per_thread - 1) + 1024;
+               snat_random_port (0, sm->port_per_thread - 1) +
+               ED_USER_PORT_OFFSET;
       portnum = clib_host_to_net_u16 (portnum);
       nat_6t_i2o_flow_init (sm, thread_index, s, i2o_addr, i2o_port, a->addr,
                            portnum, i2o_fib_index, proto);
index 7745987..d90afd2 100644 (file)
@@ -71,7 +71,7 @@ class TestNAT44ED(VppTestCase):
 
     @staticmethod
     def random_port():
-        return randint(1025, 65535)
+        return randint(1024, 65535)
 
     @staticmethod
     def proto2layer(proto):
@@ -2358,6 +2358,197 @@ class TestNAT44ED(VppTestCase):
                 % (p_sent[IP].src, p_recvd[IP].src, a),
             )
 
+    def test_dynamic_edge_ports(self):
+        """NAT44ED dynamic translation test: edge ports"""
+
+        worker_count = self.vpp_worker_count or 1
+        port_offset = 1024
+        port_per_thread = (65536 - port_offset) // worker_count
+        port_count = port_per_thread * worker_count
+
+        # worker thread edge ports
+        thread_edge_ports = {0, port_offset - 1, 65535}
+        for i in range(0, worker_count):
+            port_thread_offset = (port_per_thread * i) + port_offset
+            for port_range_offset in [0, port_per_thread - 1]:
+                port = port_thread_offset + port_range_offset
+                thread_edge_ports.add(port)
+        thread_drop_ports = set(
+            filter(
+                lambda x: x not in range(port_offset, port_offset + port_count),
+                thread_edge_ports,
+            )
+        )
+
+        in_if = self.pg7
+        out_if = self.pg8
+
+        self.nat_add_address(self.nat_addr)
+
+        try:
+            self.configure_ip4_interface(in_if, hosts=worker_count)
+            self.configure_ip4_interface(out_if)
+
+            self.nat_add_inside_interface(in_if)
+            self.nat_add_outside_interface(out_if)
+
+            # in2out
+            tc1 = self.statistics["/nat44-ed/in2out/slowpath/tcp"]
+            uc1 = self.statistics["/nat44-ed/in2out/slowpath/udp"]
+            ic1 = self.statistics["/nat44-ed/in2out/slowpath/icmp"]
+            dc1 = self.statistics["/nat44-ed/in2out/slowpath/drops"]
+
+            pkt_count = worker_count * len(thread_edge_ports)
+
+            i2o_pkts = [[] for x in range(0, worker_count)]
+            for i in range(0, worker_count):
+                remote_host = in_if.remote_hosts[i]
+                for port in thread_edge_ports:
+                    p = (
+                        Ether(dst=in_if.local_mac, src=in_if.remote_mac)
+                        / IP(src=remote_host.ip4, dst=out_if.remote_ip4)
+                        / TCP(sport=port, dport=port)
+                    )
+                    i2o_pkts[i].append(p)
+
+                    p = (
+                        Ether(dst=in_if.local_mac, src=in_if.remote_mac)
+                        / IP(src=remote_host.ip4, dst=out_if.remote_ip4)
+                        / UDP(sport=port, dport=port)
+                    )
+                    i2o_pkts[i].append(p)
+
+                    p = (
+                        Ether(dst=in_if.local_mac, src=in_if.remote_mac)
+                        / IP(src=remote_host.ip4, dst=out_if.remote_ip4)
+                        / ICMP(id=port, seq=port, type="echo-request")
+                    )
+                    i2o_pkts[i].append(p)
+
+            for i in range(0, worker_count):
+                if len(i2o_pkts[i]) > 0:
+                    in_if.add_stream(i2o_pkts[i], worker=i)
+
+            self.pg_enable_capture(self.pg_interfaces)
+            self.pg_start()
+            capture = out_if.get_capture(pkt_count * 3)
+            for packet in capture:
+                self.assert_packet_checksums_valid(packet)
+                if packet.haslayer(TCP):
+                    self.assert_in_range(
+                        packet[TCP].sport,
+                        port_offset,
+                        port_offset + port_count,
+                        "src TCP port",
+                    )
+                elif packet.haslayer(UDP):
+                    self.assert_in_range(
+                        packet[UDP].sport,
+                        port_offset,
+                        port_offset + port_count,
+                        "src UDP port",
+                    )
+                elif packet.haslayer(ICMP):
+                    self.assert_in_range(
+                        packet[ICMP].id,
+                        port_offset,
+                        port_offset + port_count,
+                        "ICMP id",
+                    )
+                else:
+                    self.fail(
+                        ppp("Unexpected or invalid packet (outside network):", packet)
+                    )
+
+            if_idx = in_if.sw_if_index
+            tc2 = self.statistics["/nat44-ed/in2out/slowpath/tcp"]
+            uc2 = self.statistics["/nat44-ed/in2out/slowpath/udp"]
+            ic2 = self.statistics["/nat44-ed/in2out/slowpath/icmp"]
+            dc2 = self.statistics["/nat44-ed/in2out/slowpath/drops"]
+
+            self.assertEqual(tc2[:, if_idx].sum() - tc1[:, if_idx].sum(), pkt_count)
+            self.assertEqual(uc2[:, if_idx].sum() - uc1[:, if_idx].sum(), pkt_count)
+            self.assertEqual(ic2[:, if_idx].sum() - ic1[:, if_idx].sum(), pkt_count)
+            self.assertEqual(dc2[:, if_idx].sum() - dc1[:, if_idx].sum(), 0)
+
+            # out2in
+            tc1 = self.statistics["/nat44-ed/out2in/fastpath/tcp"]
+            uc1 = self.statistics["/nat44-ed/out2in/fastpath/udp"]
+            ic1 = self.statistics["/nat44-ed/out2in/fastpath/icmp"]
+            dc1 = self.statistics["/nat44-ed/out2in/fastpath/drops"]
+            dc3 = self.statistics["/nat44-ed/out2in/slowpath/drops"]
+
+            # replies to unchanged thread ports should pass on each worker,
+            # excluding packets outside dynamic port range
+            drop_count = worker_count * len(thread_drop_ports)
+            pass_count = worker_count * len(thread_edge_ports) - drop_count
+
+            o2i_pkts = [[] for x in range(0, worker_count)]
+            for i in range(0, worker_count):
+                for port in thread_edge_ports:
+                    p = (
+                        Ether(dst=out_if.local_mac, src=out_if.remote_mac)
+                        / IP(src=out_if.remote_ip4, dst=self.nat_addr)
+                        / TCP(sport=port, dport=port)
+                    )
+                    o2i_pkts[i].append(p)
+
+                    p = (
+                        Ether(dst=out_if.local_mac, src=out_if.remote_mac)
+                        / IP(src=out_if.remote_ip4, dst=self.nat_addr)
+                        / UDP(sport=port, dport=port)
+                    )
+                    o2i_pkts[i].append(p)
+
+                    p = (
+                        Ether(dst=out_if.local_mac, src=out_if.remote_mac)
+                        / IP(src=out_if.remote_ip4, dst=self.nat_addr)
+                        / ICMP(id=port, seq=port, type="echo-reply")
+                    )
+                    o2i_pkts[i].append(p)
+
+            for i in range(0, worker_count):
+                if len(o2i_pkts[i]) > 0:
+                    out_if.add_stream(o2i_pkts[i], worker=i)
+
+            self.pg_enable_capture(self.pg_interfaces)
+            self.pg_start()
+            capture = in_if.get_capture(pass_count * 3)
+            for packet in capture:
+                self.assert_packet_checksums_valid(packet)
+                if packet.haslayer(TCP):
+                    self.assertIn(packet[TCP].dport, thread_edge_ports, "dst TCP port")
+                    self.assertEqual(packet[TCP].dport, packet[TCP].sport, "TCP ports")
+                elif packet.haslayer(UDP):
+                    self.assertIn(packet[UDP].dport, thread_edge_ports, "dst UDP port")
+                    self.assertEqual(packet[UDP].dport, packet[UDP].sport, "UDP ports")
+                elif packet.haslayer(ICMP):
+                    self.assertIn(packet[ICMP].id, thread_edge_ports, "ICMP id")
+                    self.assertEqual(packet[ICMP].id, packet[ICMP].seq, "ICMP id & seq")
+                else:
+                    self.fail(
+                        ppp("Unexpected or invalid packet (inside network):", packet)
+                    )
+
+            if_idx = out_if.sw_if_index
+            tc2 = self.statistics["/nat44-ed/out2in/fastpath/tcp"]
+            uc2 = self.statistics["/nat44-ed/out2in/fastpath/udp"]
+            ic2 = self.statistics["/nat44-ed/out2in/fastpath/icmp"]
+            dc2 = self.statistics["/nat44-ed/out2in/fastpath/drops"]
+            dc4 = self.statistics["/nat44-ed/out2in/slowpath/drops"]
+
+            self.assertEqual(tc2[:, if_idx].sum() - tc1[:, if_idx].sum(), pass_count)
+            self.assertEqual(uc2[:, if_idx].sum() - uc1[:, if_idx].sum(), pass_count)
+            self.assertEqual(ic2[:, if_idx].sum() - ic1[:, if_idx].sum(), pass_count)
+            self.assertEqual(dc2[:, if_idx].sum() - dc1[:, if_idx].sum(), 0)
+            self.assertEqual(
+                dc4[:, if_idx].sum() - dc3[:, if_idx].sum(), drop_count * 3
+            )
+
+        finally:
+            in_if.unconfig()
+            out_if.unconfig()
+
 
 class TestNAT44EDMW(TestNAT44ED):
     """NAT44ED MW Test Case"""