SNAT: IP fragmentation (VPP-890) 56/8056/4
authorMatus Fabian <matfabia@cisco.com>
Tue, 15 Aug 2017 13:59:19 +0000 (06:59 -0700)
committerOle Trøan <otroan@employees.org>
Tue, 7 Nov 2017 21:58:31 +0000 (21:58 +0000)
Translation of fragmented packets.

Change-Id: I9b1f2e9433ce273638080f32c2d3bff39c49899d
Signed-off-by: Matus Fabian <matfabia@cisco.com>
15 files changed:
src/plugins/nat.am
src/plugins/nat/in2out.c
src/plugins/nat/nat.api
src/plugins/nat/nat.c
src/plugins/nat/nat.h
src/plugins/nat/nat64_db.c
src/plugins/nat/nat64_db.h
src/plugins/nat/nat64_in2out.c
src/plugins/nat/nat64_out2in.c
src/plugins/nat/nat_api.c
src/plugins/nat/nat_reass.c [new file with mode: 0644]
src/plugins/nat/nat_reass.h [new file with mode: 0644]
src/plugins/nat/out2in.c
test/test_nat.py
test/vpp_papi_provider.py

index add82f0..b6c369f 100644 (file)
@@ -22,6 +22,7 @@ nat_plugin_la_SOURCES = nat/nat.c             \
        nat/nat_plugin.api.h                    \
         nat/nat_ipfix_logging.c                        \
         nat/nat_det.c                          \
        nat/nat_plugin.api.h                    \
         nat/nat_ipfix_logging.c                        \
         nat/nat_det.c                          \
+        nat/nat_reass.c                        \
         nat/nat64.c                            \
         nat/nat64_cli.c                        \
         nat/nat64_in2out.c                     \
         nat/nat64.c                            \
         nat/nat64_cli.c                        \
         nat/nat64_in2out.c                     \
index b059390..e4dbe91 100755 (executable)
@@ -24,6 +24,7 @@
 #include <nat/nat.h>
 #include <nat/nat_ipfix_logging.h>
 #include <nat/nat_det.h>
 #include <nat/nat.h>
 #include <nat/nat_ipfix_logging.h>
 #include <nat/nat_det.h>
+#include <nat/nat_reass.h>
 
 #include <vppinfra/hash.h>
 #include <vppinfra/error.h>
 
 #include <vppinfra/hash.h>
 #include <vppinfra/error.h>
@@ -83,6 +84,25 @@ static u8 * format_snat_in2out_worker_handoff_trace (u8 * s, va_list * args)
   return s;
 }
 
   return s;
 }
 
+typedef struct {
+  u32 sw_if_index;
+  u32 next_index;
+  u8 cached;
+} nat44_in2out_reass_trace_t;
+
+static u8 * format_nat44_in2out_reass_trace (u8 * s, va_list * args)
+{
+  CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+  CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+  nat44_in2out_reass_trace_t * t = va_arg (*args, nat44_in2out_reass_trace_t *);
+
+  s = format (s, "NAT44_IN2OUT_REASS: sw_if_index %d, next index %d, status %s",
+              t->sw_if_index, t->next_index,
+              t->cached ? "cached" : "translated");
+
+  return s;
+}
+
 vlib_node_registration_t snat_in2out_node;
 vlib_node_registration_t snat_in2out_slowpath_node;
 vlib_node_registration_t snat_in2out_fast_node;
 vlib_node_registration_t snat_in2out_node;
 vlib_node_registration_t snat_in2out_slowpath_node;
 vlib_node_registration_t snat_in2out_fast_node;
@@ -94,6 +114,7 @@ vlib_node_registration_t snat_in2out_output_worker_handoff_node;
 vlib_node_registration_t snat_hairpin_dst_node;
 vlib_node_registration_t snat_hairpin_src_node;
 vlib_node_registration_t nat44_hairpinning_node;
 vlib_node_registration_t snat_hairpin_dst_node;
 vlib_node_registration_t snat_hairpin_src_node;
 vlib_node_registration_t nat44_hairpinning_node;
+vlib_node_registration_t nat44_in2out_reass_node;
 
 
 #define foreach_snat_in2out_error                       \
 
 
 #define foreach_snat_in2out_error                       \
@@ -103,7 +124,10 @@ _(OUT_OF_PORTS, "Out of ports")                         \
 _(BAD_OUTSIDE_FIB, "Outside VRF ID not found")          \
 _(BAD_ICMP_TYPE, "unsupported ICMP type")               \
 _(NO_TRANSLATION, "No translation")                     \
 _(BAD_OUTSIDE_FIB, "Outside VRF ID not found")          \
 _(BAD_ICMP_TYPE, "unsupported ICMP type")               \
 _(NO_TRANSLATION, "No translation")                     \
-_(MAX_SESSIONS_EXCEEDED, "Maximum sessions exceeded")
+_(MAX_SESSIONS_EXCEEDED, "Maximum sessions exceeded")   \
+_(DROP_FRAGMENT, "Drop fragment")                       \
+_(MAX_REASS, "Maximum reassemblies exceeded")           \
+_(MAX_FRAG, "Maximum fragments per reassembly exceeded")
 
 typedef enum {
 #define _(sym,str) SNAT_IN2OUT_ERROR_##sym,
 
 typedef enum {
 #define _(sym,str) SNAT_IN2OUT_ERROR_##sym,
@@ -123,6 +147,7 @@ typedef enum {
   SNAT_IN2OUT_NEXT_DROP,
   SNAT_IN2OUT_NEXT_ICMP_ERROR,
   SNAT_IN2OUT_NEXT_SLOW_PATH,
   SNAT_IN2OUT_NEXT_DROP,
   SNAT_IN2OUT_NEXT_ICMP_ERROR,
   SNAT_IN2OUT_NEXT_SLOW_PATH,
+  SNAT_IN2OUT_NEXT_REASS,
   SNAT_IN2OUT_N_NEXT,
 } snat_in2out_next_t;
 
   SNAT_IN2OUT_N_NEXT,
 } snat_in2out_next_t;
 
@@ -243,6 +268,7 @@ static u32 slow_path (snat_main_t *sm, vlib_buffer_t *b0,
   u32 address_index = ~0;
   u32 outside_fib_index;
   uword * p;
   u32 address_index = ~0;
   u32 outside_fib_index;
   uword * p;
+  udp_header_t * udp0 = ip4_next_header (ip0);
 
   if (PREDICT_FALSE (maximum_sessions_exceeded(sm, thread_index)))
     {
 
   if (PREDICT_FALSE (maximum_sessions_exceeded(sm, thread_index)))
     {
@@ -443,6 +469,7 @@ static u32 slow_path (snat_main_t *sm, vlib_buffer_t *b0,
   s->out2in.protocol = key0->protocol;
   s->out2in.fib_index = outside_fib_index;
   s->ext_host_addr.as_u32 = ip0->dst_address.as_u32;
   s->out2in.protocol = key0->protocol;
   s->out2in.fib_index = outside_fib_index;
   s->ext_host_addr.as_u32 = ip0->dst_address.as_u32;
+  s->ext_host_port = udp0->dst_port;
   *sessionp = s;
 
   /* Add to translation hashes */
   *sessionp = s;
 
   /* Add to translation hashes */
@@ -1645,6 +1672,12 @@ snat_in2out_node_fn_inline (vlib_main_t * vm,
                   next0 = SNAT_IN2OUT_NEXT_SLOW_PATH;
                   goto trace00;
                 }
                   next0 = SNAT_IN2OUT_NEXT_SLOW_PATH;
                   goto trace00;
                 }
+
+              if (ip4_is_fragment (ip0))
+                {
+                  next0 = SNAT_IN2OUT_NEXT_REASS;
+                  goto trace00;
+                }
             }
 
           key0.addr = ip0->src_address;
             }
 
           key0.addr = ip0->src_address;
@@ -1819,6 +1852,12 @@ snat_in2out_node_fn_inline (vlib_main_t * vm,
                   next1 = SNAT_IN2OUT_NEXT_SLOW_PATH;
                   goto trace01;
                 }
                   next1 = SNAT_IN2OUT_NEXT_SLOW_PATH;
                   goto trace01;
                 }
+
+              if (ip4_is_fragment (ip1))
+                {
+                  next0 = SNAT_IN2OUT_NEXT_REASS;
+                  goto trace01;
+                }
             }
 
           b1->flags |= VNET_BUFFER_F_IS_NATED;
             }
 
           b1->flags |= VNET_BUFFER_F_IS_NATED;
@@ -2029,6 +2068,12 @@ snat_in2out_node_fn_inline (vlib_main_t * vm,
                   next0 = SNAT_IN2OUT_NEXT_SLOW_PATH;
                   goto trace0;
                 }
                   next0 = SNAT_IN2OUT_NEXT_SLOW_PATH;
                   goto trace0;
                 }
+
+              if (ip4_is_fragment (ip0))
+                {
+                  next0 = SNAT_IN2OUT_NEXT_REASS;
+                  goto trace0;
+                }
             }
 
           key0.addr = ip0->src_address;
             }
 
           key0.addr = ip0->src_address;
@@ -2194,6 +2239,7 @@ VLIB_REGISTER_NODE (snat_in2out_node) = {
     [SNAT_IN2OUT_NEXT_LOOKUP] = "ip4-lookup",
     [SNAT_IN2OUT_NEXT_SLOW_PATH] = "nat44-in2out-slowpath",
     [SNAT_IN2OUT_NEXT_ICMP_ERROR] = "ip4-icmp-error",
     [SNAT_IN2OUT_NEXT_LOOKUP] = "ip4-lookup",
     [SNAT_IN2OUT_NEXT_SLOW_PATH] = "nat44-in2out-slowpath",
     [SNAT_IN2OUT_NEXT_ICMP_ERROR] = "ip4-icmp-error",
+    [SNAT_IN2OUT_NEXT_REASS] = "nat44-in2out-reass",
   },
 };
 
   },
 };
 
@@ -2227,6 +2273,7 @@ VLIB_REGISTER_NODE (snat_in2out_output_node) = {
     [SNAT_IN2OUT_NEXT_LOOKUP] = "interface-output",
     [SNAT_IN2OUT_NEXT_SLOW_PATH] = "nat44-in2out-output-slowpath",
     [SNAT_IN2OUT_NEXT_ICMP_ERROR] = "ip4-icmp-error",
     [SNAT_IN2OUT_NEXT_LOOKUP] = "interface-output",
     [SNAT_IN2OUT_NEXT_SLOW_PATH] = "nat44-in2out-output-slowpath",
     [SNAT_IN2OUT_NEXT_ICMP_ERROR] = "ip4-icmp-error",
+    [SNAT_IN2OUT_NEXT_REASS] = "nat44-in2out-reass",
   },
 };
 
   },
 };
 
@@ -2261,6 +2308,7 @@ VLIB_REGISTER_NODE (snat_in2out_slowpath_node) = {
     [SNAT_IN2OUT_NEXT_LOOKUP] = "ip4-lookup",
     [SNAT_IN2OUT_NEXT_SLOW_PATH] = "nat44-in2out-slowpath",
     [SNAT_IN2OUT_NEXT_ICMP_ERROR] = "ip4-icmp-error",
     [SNAT_IN2OUT_NEXT_LOOKUP] = "ip4-lookup",
     [SNAT_IN2OUT_NEXT_SLOW_PATH] = "nat44-in2out-slowpath",
     [SNAT_IN2OUT_NEXT_ICMP_ERROR] = "ip4-icmp-error",
+    [SNAT_IN2OUT_NEXT_REASS] = "nat44-in2out-reass",
   },
 };
 
   },
 };
 
@@ -2295,6 +2343,7 @@ VLIB_REGISTER_NODE (snat_in2out_output_slowpath_node) = {
     [SNAT_IN2OUT_NEXT_LOOKUP] = "interface-output",
     [SNAT_IN2OUT_NEXT_SLOW_PATH] = "nat44-in2out-output-slowpath",
     [SNAT_IN2OUT_NEXT_ICMP_ERROR] = "ip4-icmp-error",
     [SNAT_IN2OUT_NEXT_LOOKUP] = "interface-output",
     [SNAT_IN2OUT_NEXT_SLOW_PATH] = "nat44-in2out-output-slowpath",
     [SNAT_IN2OUT_NEXT_ICMP_ERROR] = "ip4-icmp-error",
+    [SNAT_IN2OUT_NEXT_REASS] = "nat44-in2out-reass",
   },
 };
 
   },
 };
 
@@ -2392,6 +2441,371 @@ VLIB_REGISTER_NODE (nat44_hairpinning_node) = {
 VLIB_NODE_FUNCTION_MULTIARCH (nat44_hairpinning_node,
                               nat44_hairpinning_fn);
 
 VLIB_NODE_FUNCTION_MULTIARCH (nat44_hairpinning_node,
                               nat44_hairpinning_fn);
 
+static inline void
+nat44_reass_hairpinning (snat_main_t *sm,
+                         vlib_buffer_t * b0,
+                         ip4_header_t * ip0,
+                         u16 sport,
+                         u16 dport,
+                         u32 proto0)
+{
+  snat_session_key_t key0, sm0;
+  snat_session_t * s0;
+  clib_bihash_kv_8_8_t kv0, value0;
+  ip_csum_t sum0;
+  u32 new_dst_addr0 = 0, old_dst_addr0, ti = 0, si;
+  u16 new_dst_port0, old_dst_port0;
+  udp_header_t * udp0;
+  tcp_header_t * tcp0;
+
+  key0.addr = ip0->dst_address;
+  key0.port = dport;
+  key0.protocol = proto0;
+  key0.fib_index = sm->outside_fib_index;
+  kv0.key = key0.as_u64;
+
+  udp0 = ip4_next_header (ip0);
+
+  /* Check if destination is static mappings */
+  if (!snat_static_mapping_match(sm, key0, &sm0, 1, 0))
+    {
+      new_dst_addr0 = sm0.addr.as_u32;
+      new_dst_port0 = sm0.port;
+      vnet_buffer(b0)->sw_if_index[VLIB_TX] = sm0.fib_index;
+    }
+  /* or active sessions */
+  else
+    {
+      if (sm->num_workers > 1)
+        ti = (clib_net_to_host_u16 (udp0->dst_port) - 1024) / sm->port_per_thread;
+      else
+        ti = sm->num_workers;
+
+      if (!clib_bihash_search_8_8 (&sm->per_thread_data[ti].out2in, &kv0, &value0))
+        {
+          si = value0.value;
+          s0 = pool_elt_at_index (sm->per_thread_data[ti].sessions, si);
+          new_dst_addr0 = s0->in2out.addr.as_u32;
+          new_dst_port0 = s0->in2out.port;
+          vnet_buffer(b0)->sw_if_index[VLIB_TX] = s0->in2out.fib_index;
+        }
+    }
+
+  /* Destination is behind the same NAT, use internal address and port */
+  if (new_dst_addr0)
+    {
+      old_dst_addr0 = ip0->dst_address.as_u32;
+      ip0->dst_address.as_u32 = new_dst_addr0;
+      sum0 = ip0->checksum;
+      sum0 = ip_csum_update (sum0, old_dst_addr0, new_dst_addr0,
+                             ip4_header_t, dst_address);
+      ip0->checksum = ip_csum_fold (sum0);
+
+      old_dst_port0 = dport;
+      if (PREDICT_TRUE(new_dst_port0 != old_dst_port0 &&
+                       ip4_is_first_fragment (ip0)))
+        {
+          if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
+            {
+              tcp0 = ip4_next_header (ip0);
+              tcp0->dst = new_dst_port0;
+              sum0 = tcp0->checksum;
+              sum0 = ip_csum_update (sum0, old_dst_addr0, new_dst_addr0,
+                                     ip4_header_t, dst_address);
+              sum0 = ip_csum_update (sum0, old_dst_port0, new_dst_port0,
+                                     ip4_header_t /* cheat */, length);
+              tcp0->checksum = ip_csum_fold(sum0);
+            }
+          else
+            {
+              udp0->dst_port = new_dst_port0;
+              udp0->checksum = 0;
+            }
+        }
+      else
+        {
+          if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
+            {
+              tcp0 = ip4_next_header (ip0);
+              sum0 = tcp0->checksum;
+              sum0 = ip_csum_update (sum0, old_dst_addr0, new_dst_addr0,
+                                     ip4_header_t, dst_address);
+              tcp0->checksum = ip_csum_fold(sum0);
+            }
+        }
+    }
+}
+
+static uword
+nat44_in2out_reass_node_fn (vlib_main_t * vm,
+                            vlib_node_runtime_t * node,
+                            vlib_frame_t * frame)
+{
+  u32 n_left_from, *from, *to_next;
+  snat_in2out_next_t next_index;
+  u32 pkts_processed = 0;
+  snat_main_t *sm = &snat_main;
+  f64 now = vlib_time_now (vm);
+  u32 thread_index = vlib_get_thread_index ();
+  snat_main_per_thread_data_t *per_thread_data =
+    &sm->per_thread_data[thread_index];
+  u32 *fragments_to_drop = 0;
+  u32 *fragments_to_loopback = 0;
+
+  from = vlib_frame_vector_args (frame);
+  n_left_from = frame->n_vectors;
+  next_index = node->cached_next_index;
+
+  while (n_left_from > 0)
+    {
+      u32 n_left_to_next;
+
+      vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+
+      while (n_left_from > 0 && n_left_to_next > 0)
+       {
+          u32 bi0, sw_if_index0, proto0, rx_fib_index0, new_addr0, old_addr0;
+         vlib_buffer_t *b0;
+          u32 next0;
+          u8 cached0 = 0;
+          ip4_header_t *ip0;
+          nat_reass_ip4_t *reass0;
+          udp_header_t * udp0;
+          tcp_header_t * tcp0;
+          snat_session_key_t key0;
+          clib_bihash_kv_8_8_t kv0, value0;
+          snat_session_t * s0 = 0;
+          u16 old_port0, new_port0;
+          ip_csum_t sum0;
+
+          /* speculatively enqueue b0 to the current next frame */
+         bi0 = from[0];
+         to_next[0] = bi0;
+         from += 1;
+         to_next += 1;
+         n_left_from -= 1;
+         n_left_to_next -= 1;
+
+         b0 = vlib_get_buffer (vm, bi0);
+          next0 = SNAT_IN2OUT_NEXT_LOOKUP;
+
+          sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
+          rx_fib_index0 = fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP4,
+                                                               sw_if_index0);
+
+          if (PREDICT_FALSE (nat_reass_is_drop_frag(0)))
+            {
+              next0 = SNAT_IN2OUT_NEXT_DROP;
+              b0->error = node->errors[SNAT_IN2OUT_ERROR_DROP_FRAGMENT];
+              goto trace0;
+            }
+
+          ip0 = (ip4_header_t *) vlib_buffer_get_current (b0);
+          udp0 = ip4_next_header (ip0);
+          tcp0 = (tcp_header_t *) udp0;
+          proto0 = ip_proto_to_snat_proto (ip0->protocol);
+
+          reass0 = nat_ip4_reass_find_or_create (ip0->src_address,
+                                                 ip0->dst_address,
+                                                 ip0->fragment_id,
+                                                 ip0->protocol,
+                                                 1,
+                                                 &fragments_to_drop);
+
+          if (PREDICT_FALSE (!reass0))
+            {
+              next0 = SNAT_IN2OUT_NEXT_DROP;
+              b0->error = node->errors[SNAT_IN2OUT_ERROR_MAX_REASS];
+              goto trace0;
+            }
+
+          if (PREDICT_FALSE (ip4_is_first_fragment (ip0)))
+            {
+              key0.addr = ip0->src_address;
+              key0.port = udp0->src_port;
+              key0.protocol = proto0;
+              key0.fib_index = rx_fib_index0;
+              kv0.key = key0.as_u64;
+
+              if (clib_bihash_search_8_8 (&per_thread_data->in2out, &kv0, &value0))
+                {
+                  if (PREDICT_FALSE(snat_not_translate(sm, node, sw_if_index0,
+                      ip0, proto0, rx_fib_index0, thread_index)))
+                    goto trace0;
+
+                  next0 = slow_path (sm, b0, ip0, rx_fib_index0, &key0,
+                                     &s0, node, next0, thread_index);
+
+                  if (PREDICT_FALSE (next0 == SNAT_IN2OUT_NEXT_DROP))
+                    goto trace0;
+
+                  reass0->sess_index = s0 - per_thread_data->sessions;
+                }
+              else
+                {
+                  s0 = pool_elt_at_index (per_thread_data->sessions,
+                                          value0.value);
+                  reass0->sess_index = value0.value;
+                }
+              nat_ip4_reass_get_frags (reass0, &fragments_to_loopback);
+            }
+          else
+            {
+              if (PREDICT_FALSE (reass0->sess_index == (u32) ~0))
+                {
+                  if (nat_ip4_reass_add_fragment (reass0, bi0))
+                    {
+                      b0->error = node->errors[SNAT_IN2OUT_ERROR_MAX_FRAG];
+                      next0 = SNAT_IN2OUT_NEXT_DROP;
+                      goto trace0;
+                    }
+                  cached0 = 1;
+                  goto trace0;
+                }
+              s0 = pool_elt_at_index (per_thread_data->sessions,
+                                      reass0->sess_index);
+            }
+
+          old_addr0 = ip0->src_address.as_u32;
+          ip0->src_address = s0->out2in.addr;
+          new_addr0 = ip0->src_address.as_u32;
+          vnet_buffer(b0)->sw_if_index[VLIB_TX] = s0->out2in.fib_index;
+
+          sum0 = ip0->checksum;
+          sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
+                                 ip4_header_t,
+                                 src_address /* changed member */);
+          ip0->checksum = ip_csum_fold (sum0);
+
+          if (PREDICT_FALSE (ip4_is_first_fragment (ip0)))
+            {
+              if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
+                {
+                  old_port0 = tcp0->src_port;
+                  tcp0->src_port = s0->out2in.port;
+                  new_port0 = tcp0->src_port;
+
+                  sum0 = tcp0->checksum;
+                  sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
+                                         ip4_header_t,
+                                         dst_address /* changed member */);
+                  sum0 = ip_csum_update (sum0, old_port0, new_port0,
+                                         ip4_header_t /* cheat */,
+                                         length /* changed member */);
+                  tcp0->checksum = ip_csum_fold(sum0);
+                }
+              else
+                {
+                  old_port0 = udp0->src_port;
+                  udp0->src_port = s0->out2in.port;
+                  udp0->checksum = 0;
+                }
+            }
+
+          /* Hairpinning */
+          nat44_reass_hairpinning (sm, b0, ip0, s0->out2in.port,
+                                   s0->ext_host_port, proto0);
+
+          /* Accounting */
+          s0->last_heard = now;
+          s0->total_pkts++;
+          s0->total_bytes += vlib_buffer_length_in_chain (vm, b0);
+          /* Per-user LRU list maintenance for dynamic translation */
+          if (!snat_is_session_static (s0))
+            {
+              clib_dlist_remove (sm->per_thread_data[thread_index].list_pool,
+                                 s0->per_user_index);
+              clib_dlist_addtail (sm->per_thread_data[thread_index].list_pool,
+                                  s0->per_user_list_head_index,
+                                  s0->per_user_index);
+            }
+
+        trace0:
+          if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)
+                            && (b0->flags & VLIB_BUFFER_IS_TRACED)))
+            {
+              nat44_in2out_reass_trace_t *t =
+                 vlib_add_trace (vm, node, b0, sizeof (*t));
+              t->cached = cached0;
+              t->sw_if_index = sw_if_index0;
+              t->next_index = next0;
+            }
+
+          if (cached0)
+            {
+              n_left_to_next++;
+              to_next--;
+            }
+          else
+            {
+              pkts_processed += next0 != SNAT_IN2OUT_NEXT_DROP;
+
+              /* verify speculative enqueue, maybe switch current next frame */
+              vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
+                                               to_next, n_left_to_next,
+                                               bi0, next0);
+            }
+
+         if (n_left_from == 0 && vec_len (fragments_to_loopback))
+           {
+             from = vlib_frame_vector_args (frame);
+             u32 len = vec_len (fragments_to_loopback);
+             if (len <= VLIB_FRAME_SIZE)
+               {
+                 clib_memcpy (from, fragments_to_loopback, sizeof (u32) * len);
+                 n_left_from = len;
+                 vec_reset_length (fragments_to_loopback);
+               }
+             else
+               {
+                 clib_memcpy (from,
+                               fragments_to_loopback + (len - VLIB_FRAME_SIZE),
+                               sizeof (u32) * VLIB_FRAME_SIZE);
+                 n_left_from = VLIB_FRAME_SIZE;
+                 _vec_len (fragments_to_loopback) = len - VLIB_FRAME_SIZE;
+               }
+           }
+       }
+
+      vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+    }
+
+  vlib_node_increment_counter (vm, nat44_in2out_reass_node.index,
+                               SNAT_IN2OUT_ERROR_IN2OUT_PACKETS,
+                               pkts_processed);
+
+  nat_send_all_to_node (vm, fragments_to_drop, node,
+                        &node->errors[SNAT_IN2OUT_ERROR_DROP_FRAGMENT],
+                        SNAT_IN2OUT_NEXT_DROP);
+
+  vec_free (fragments_to_drop);
+  vec_free (fragments_to_loopback);
+  return frame->n_vectors;
+}
+
+VLIB_REGISTER_NODE (nat44_in2out_reass_node) = {
+  .function = nat44_in2out_reass_node_fn,
+  .name = "nat44-in2out-reass",
+  .vector_size = sizeof (u32),
+  .format_trace = format_nat44_in2out_reass_trace,
+  .type = VLIB_NODE_TYPE_INTERNAL,
+
+  .n_errors = ARRAY_LEN(snat_in2out_error_strings),
+  .error_strings = snat_in2out_error_strings,
+
+  .n_next_nodes = SNAT_IN2OUT_N_NEXT,
+  .next_nodes = {
+    [SNAT_IN2OUT_NEXT_DROP] = "error-drop",
+    [SNAT_IN2OUT_NEXT_LOOKUP] = "ip4-lookup",
+    [SNAT_IN2OUT_NEXT_SLOW_PATH] = "nat44-in2out-slowpath",
+    [SNAT_IN2OUT_NEXT_ICMP_ERROR] = "ip4-icmp-error",
+    [SNAT_IN2OUT_NEXT_REASS] = "nat44-in2out-reass",
+  },
+};
+
+VLIB_NODE_FUNCTION_MULTIARCH (nat44_in2out_reass_node,
+                              nat44_in2out_reass_node_fn);
+
 /**************************/
 /*** deterministic mode ***/
 /**************************/
 /**************************/
 /*** deterministic mode ***/
 /**************************/
@@ -3771,6 +4185,7 @@ VLIB_REGISTER_NODE (snat_in2out_fast_node) = {
     [SNAT_IN2OUT_NEXT_LOOKUP] = "ip4-lookup",
     [SNAT_IN2OUT_NEXT_SLOW_PATH] = "nat44-in2out-slowpath",
     [SNAT_IN2OUT_NEXT_ICMP_ERROR] = "ip4-icmp-error",
     [SNAT_IN2OUT_NEXT_LOOKUP] = "ip4-lookup",
     [SNAT_IN2OUT_NEXT_SLOW_PATH] = "nat44-in2out-slowpath",
     [SNAT_IN2OUT_NEXT_ICMP_ERROR] = "ip4-icmp-error",
+    [SNAT_IN2OUT_NEXT_REASS] = "nat44-in2out-reass",
   },
 };
 
   },
 };
 
index 187de25..d8fdf72 100644 (file)
@@ -760,6 +760,87 @@ autoreply define nat_ipfix_enable_disable {
   u8 enable;
 };
 
   u8 enable;
 };
 
+/** \brief Set NAT virtual fragmentation reassembly
+    @param client_index - opaque cookie to identify the sender
+    @param context - sender context, to match reply w/ request
+    @param timeout - reassembly timeout
+    @param max_reass - maximum number of concurrent reassemblies
+    @param max_frag - maximum number of fragmets per reassembly
+    @param drop_frag - if 0 translate fragments, otherwise drop fragments
+    @param is_ip6 - 1 if IPv6, 0 if IPv4
+*/
+autoreply define nat_set_reass {
+  u32 client_index;
+  u32 context;
+  u32 timeout;
+  u16 max_reass;
+  u8  max_frag;
+  u8  drop_frag;
+  u8  is_ip6;
+};
+
+/** \brief Get NAT virtual fragmentation reassembly configuration
+    @param client_index - opaque cookie to identify the sender
+    @param context - sender context, to match reply w/ request
+*/
+define nat_get_reass {
+  u32 client_index;
+  u32 context;
+};
+
+/** \brief Get NAT virtual fragmentation reassembly configuration reply
+    @param context - sender context, to match reply w/ request
+    @param retval - return code
+    @param ip4_timeout - reassembly timeout
+    @param ip4_max_reass - maximum number of concurrent reassemblies
+    @param ip4_max_frag - maximum number of fragmets per reassembly
+    @param ip4_drop_frag - if 0 translate fragments, otherwise drop fragments
+    @param ip6_timeout - reassembly timeout
+    @param ip6_max_reass - maximum number of concurrent reassemblies
+    @param ip6_max_frag - maximum number of fragmets per reassembly
+    @param ip6_drop_frag - if 0 translate fragments, otherwise drop fragments
+*/
+define nat_get_reass_reply {
+  u32 context;
+  i32 retval;
+  u32 ip4_timeout;
+  u16 ip4_max_reass;
+  u8  ip4_max_frag;
+  u8  ip4_drop_frag;
+  u32 ip6_timeout;
+  u16 ip6_max_reass;
+  u8  ip6_max_frag;
+  u8  ip6_drop_frag;
+};
+
+/** \brief Dump NAT virtual fragmentation reassemblies
+    @param client_index - opaque cookie to identify the sender
+    @param context - sender context, to match reply w/ request
+*/
+define nat_reass_dump {
+  u32 client_index;
+  u32 context;
+};
+
+/** \brief NAT virtual fragmentation reassemblies response
+    @param context - sender context, to match reply w/ request
+    @param is_ip4 - 1 if address type is IPv4
+    @param src_addr - source IP address
+    @param dst_addr - destination IP address
+    @param frag_id - fragment ID
+    @param proto - protocol
+    @param frag_n - number of cached fragments
+*/
+define nat_reass_details {
+  u32 context;
+  u8 is_ip4;
+  u8 src_addr[16];
+  u8 dst_addr[16];
+  u32 frag_id;
+  u8 proto;
+  u8 frag_n;
+};
+
 /*
  * NAT44 APIs
  */
 /*
  * NAT44 APIs
  */
index cd5a6eb..7e651e5 100644 (file)
@@ -24,6 +24,7 @@
 #include <nat/nat_det.h>
 #include <nat/nat64.h>
 #include <nat/dslite.h>
 #include <nat/nat_det.h>
 #include <nat/nat64.h>
 #include <nat/dslite.h>
+#include <nat/nat_reass.h>
 #include <vnet/fib/fib_table.h>
 #include <vnet/fib/ip4_fib.h>
 
 #include <vnet/fib/fib_table.h>
 #include <vnet/fib/ip4_fib.h>
 
@@ -1447,11 +1448,15 @@ static clib_error_t * snat_init (vlib_main_t * vm)
   /* Init IPFIX logging */
   snat_ipfix_logging_init(vm);
 
   /* Init IPFIX logging */
   snat_ipfix_logging_init(vm);
 
+  /* Init NAT64 */
   error = nat64_init(vm);
   error = nat64_init(vm);
+  if (error)
+    return error;
 
   dslite_init(vm);
 
 
   dslite_init(vm);
 
-  return error;
+  /* Init virtual fragmenentation reassembly */
+  return nat_reass_init(vm);
 }
 
 VLIB_INIT_FUNCTION (snat_init);
 }
 
 VLIB_INIT_FUNCTION (snat_init);
@@ -2889,6 +2894,7 @@ show_snat_command_fn (vlib_main_t * vm,
             }
         }
     }
             }
         }
     }
+
   return 0;
 }
 
   return 0;
 }
 
index b72e075..5bd0a11 100644 (file)
@@ -154,9 +154,9 @@ typedef CLIB_PACKED(struct {
   /* Outside address */
   u32 outside_address_index;    /* 64-67 */
 
   /* Outside address */
   u32 outside_address_index;    /* 64-67 */
 
-  /* External host address */
+  /* External host address and port */
   ip4_address_t ext_host_addr;  /* 68-71 */
   ip4_address_t ext_host_addr;  /* 68-71 */
-
+  u16 ext_host_port;            /* 72-73 */
 }) snat_session_t;
 
 
 }) snat_session_t;
 
 
@@ -563,4 +563,30 @@ maximum_sessions_exceeded (snat_main_t *sm, u32 thread_index)
   return 0;
 }
 
   return 0;
 }
 
-#endif /* __included_nat_h__ */
+static_always_inline void
+nat_send_all_to_node(vlib_main_t *vm, u32 *bi_vector,
+                     vlib_node_runtime_t *node, vlib_error_t *error, u32 next)
+{
+  u32 n_left_from, *from, next_index, *to_next, n_left_to_next;
+
+  from = bi_vector;
+  n_left_from = vec_len(bi_vector);
+  next_index = node->cached_next_index;
+  while (n_left_from > 0) {
+    vlib_get_next_frame(vm, node, next_index, to_next, n_left_to_next);
+    while (n_left_from > 0 && n_left_to_next > 0) {
+      u32 bi0 = to_next[0] = from[0];
+      from += 1;
+      n_left_from -= 1;
+      to_next += 1;
+      n_left_to_next -= 1;
+      vlib_buffer_t *p0 = vlib_get_buffer(vm, bi0);
+      p0->error = *error;
+      vlib_validate_buffer_enqueue_x1(vm, node, next_index, to_next,
+                                      n_left_to_next, bi0, next);
+    }
+    vlib_put_next_frame(vm, node, next_index, n_left_to_next);
+  }
+}
+
+#endif /* __included_snat_h__ */
index da73cee..008a137 100644 (file)
@@ -529,6 +529,52 @@ nat64_db_st_entry_find (nat64_db_t * db, ip46_address_t * l_addr,
   return ste;
 }
 
   return ste;
 }
 
+u32
+nat64_db_st_entry_get_index (nat64_db_t * db, nat64_db_st_entry_t * ste)
+{
+  nat64_db_st_entry_t *st;
+
+  switch (ip_proto_to_snat_proto (ste->proto))
+    {
+/* *INDENT-OFF* */
+#define _(N, i, n, s) \
+    case SNAT_PROTOCOL_##N: \
+      st = db->st._##n##_st; \
+      break;
+      foreach_snat_protocol
+#undef _
+/* *INDENT-ON* */
+    default:
+      st = db->st._unk_proto_st;
+      return (u32) ~ 0;
+    }
+
+  return ste - st;
+}
+
+nat64_db_st_entry_t *
+nat64_db_st_entry_by_index (nat64_db_t * db, u8 proto, u32 ste_index)
+{
+  nat64_db_st_entry_t *st;
+
+  switch (ip_proto_to_snat_proto (proto))
+    {
+/* *INDENT-OFF* */
+#define _(N, i, n, s) \
+    case SNAT_PROTOCOL_##N: \
+      st = db->st._##n##_st; \
+      break;
+      foreach_snat_protocol
+#undef _
+/* *INDENT-ON* */
+    default:
+      st = db->st._unk_proto_st;
+      break;
+    }
+
+  return pool_elt_at_index (st, ste_index);
+}
+
 void
 nad64_db_st_free_expired (nat64_db_t * db, u32 now)
 {
 void
 nad64_db_st_free_expired (nat64_db_t * db, u32 now)
 {
index 394ca87..94d9a8b 100644 (file)
@@ -296,6 +296,27 @@ void nad64_db_st_free_expired (nat64_db_t * db, u32 now);
  */
 void nat64_db_free_out_addr (nat64_db_t * db, ip4_address_t * out_addr);
 
  */
 void nat64_db_free_out_addr (nat64_db_t * db, ip4_address_t * out_addr);
 
+/*
+ * @brief Get ST entry index.
+ *
+ * @param db NAT64 DB.
+ * @param ste ST entry.
+ *
+ * @return ST entry index on success, ~0 otherwise.
+ */
+u32 nat64_db_st_entry_get_index (nat64_db_t * db, nat64_db_st_entry_t * ste);
+
+/**
+ * @brief Get ST entry by index and protocol.
+ *
+ * @param db NAT64 DB.
+ * @param proto L4 protocol.
+ * @param bibe_index ST entry index.
+ *
+ * @return BIB entry if found.
+ */
+nat64_db_st_entry_t *nat64_db_st_entry_by_index (nat64_db_t * db,
+                                                u8 proto, u32 ste_index);
 #endif /* __included_nat64_db_h__ */
 
 /*
 #endif /* __included_nat64_db_h__ */
 
 /*
index f78baff..4f94575 100644 (file)
@@ -18,6 +18,7 @@
  */
 
 #include <nat/nat64.h>
  */
 
 #include <nat/nat64.h>
+#include <nat/nat_reass.h>
 #include <vnet/ip/ip6_to_ip4.h>
 #include <vnet/fib/fib_table.h>
 
 #include <vnet/ip/ip6_to_ip4.h>
 #include <vnet/fib/fib_table.h>
 
@@ -45,14 +46,42 @@ format_nat64_in2out_trace (u8 * s, va_list * args)
   return s;
 }
 
   return s;
 }
 
+typedef struct
+{
+  u32 sw_if_index;
+  u32 next_index;
+  u8 cached;
+} nat64_in2out_reass_trace_t;
+
+static u8 *
+format_nat64_in2out_reass_trace (u8 * s, va_list * args)
+{
+  CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+  CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+  nat64_in2out_reass_trace_t *t =
+    va_arg (*args, nat64_in2out_reass_trace_t *);
+
+  s =
+    format (s, "NAT64-in2out-reass: sw_if_index %d, next index %d, status %s",
+           t->sw_if_index, t->next_index,
+           t->cached ? "cached" : "translated");
+
+  return s;
+}
+
 vlib_node_registration_t nat64_in2out_node;
 vlib_node_registration_t nat64_in2out_slowpath_node;
 vlib_node_registration_t nat64_in2out_node;
 vlib_node_registration_t nat64_in2out_slowpath_node;
+vlib_node_registration_t nat64_in2out_reass_node;
+
+#define foreach_nat64_in2out_error                       \
+_(UNSUPPORTED_PROTOCOL, "unsupported protocol")          \
+_(IN2OUT_PACKETS, "good in2out packets processed")       \
+_(NO_TRANSLATION, "no translation")                      \
+_(UNKNOWN, "unknown")                                    \
+_(DROP_FRAGMENT, "Drop fragment")                        \
+_(MAX_REASS, "Maximum reassemblies exceeded")            \
+_(MAX_FRAG, "Maximum fragments per reassembly exceeded")
 
 
-#define foreach_nat64_in2out_error                 \
-_(UNSUPPORTED_PROTOCOL, "unsupported protocol")    \
-_(IN2OUT_PACKETS, "good in2out packets processed") \
-_(NO_TRANSLATION, "no translation")                \
-_(UNKNOWN, "unknown")
 
 typedef enum
 {
 
 typedef enum
 {
@@ -74,6 +103,7 @@ typedef enum
   NAT64_IN2OUT_NEXT_IP6_LOOKUP,
   NAT64_IN2OUT_NEXT_DROP,
   NAT64_IN2OUT_NEXT_SLOWPATH,
   NAT64_IN2OUT_NEXT_IP6_LOOKUP,
   NAT64_IN2OUT_NEXT_DROP,
   NAT64_IN2OUT_NEXT_SLOWPATH,
+  NAT64_IN2OUT_NEXT_REASS,
   NAT64_IN2OUT_N_NEXT,
 } nat64_in2out_next_t;
 
   NAT64_IN2OUT_N_NEXT,
 } nat64_in2out_next_t;
 
@@ -936,13 +966,6 @@ nat64_in2out_node_fn_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
            }
 
          proto0 = ip_proto_to_snat_proto (l4_protocol0);
            }
 
          proto0 = ip_proto_to_snat_proto (l4_protocol0);
-         if (frag_offset0 != 0)
-           {
-             next0 = NAT64_IN2OUT_NEXT_DROP;
-             b0->error =
-               node->errors[NAT64_IN2OUT_ERROR_UNSUPPORTED_PROTOCOL];
-             goto trace0;
-           }
 
          if (is_slow_path)
            {
 
          if (is_slow_path)
            {
@@ -979,6 +1002,13 @@ nat64_in2out_node_fn_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
                }
            }
 
                }
            }
 
+         if (PREDICT_FALSE
+             (ip60->protocol == IP_PROTOCOL_IPV6_FRAGMENTATION))
+           {
+             next0 = NAT64_IN2OUT_NEXT_REASS;
+             goto trace0;
+           }
+
          if (proto0 == SNAT_PROTOCOL_ICMP)
            {
              if (is_hairpinning (&ip60->dst_address))
          if (proto0 == SNAT_PROTOCOL_ICMP)
            {
              if (is_hairpinning (&ip60->dst_address))
@@ -1073,6 +1103,7 @@ VLIB_REGISTER_NODE (nat64_in2out_node) = {
     [NAT64_IN2OUT_NEXT_IP4_LOOKUP] = "ip4-lookup",
     [NAT64_IN2OUT_NEXT_IP6_LOOKUP] = "ip6-lookup",
     [NAT64_IN2OUT_NEXT_SLOWPATH] = "nat64-in2out-slowpath",
     [NAT64_IN2OUT_NEXT_IP4_LOOKUP] = "ip4-lookup",
     [NAT64_IN2OUT_NEXT_IP6_LOOKUP] = "ip6-lookup",
     [NAT64_IN2OUT_NEXT_SLOWPATH] = "nat64-in2out-slowpath",
+    [NAT64_IN2OUT_NEXT_REASS] = "nat64-in2out-reass",
   },
 };
 /* *INDENT-ON* */
   },
 };
 /* *INDENT-ON* */
@@ -1102,6 +1133,7 @@ VLIB_REGISTER_NODE (nat64_in2out_slowpath_node) = {
     [NAT64_IN2OUT_NEXT_IP4_LOOKUP] = "ip4-lookup",
     [NAT64_IN2OUT_NEXT_IP6_LOOKUP] = "ip6-lookup",
     [NAT64_IN2OUT_NEXT_SLOWPATH] = "nat64-in2out-slowpath",
     [NAT64_IN2OUT_NEXT_IP4_LOOKUP] = "ip4-lookup",
     [NAT64_IN2OUT_NEXT_IP6_LOOKUP] = "ip6-lookup",
     [NAT64_IN2OUT_NEXT_SLOWPATH] = "nat64-in2out-slowpath",
+    [NAT64_IN2OUT_NEXT_REASS] = "nat64-in2out-reass",
   },
 };
 /* *INDENT-ON* */
   },
 };
 /* *INDENT-ON* */
@@ -1109,6 +1141,455 @@ VLIB_REGISTER_NODE (nat64_in2out_slowpath_node) = {
 VLIB_NODE_FUNCTION_MULTIARCH (nat64_in2out_slowpath_node,
                              nat64_in2out_slowpath_node_fn);
 
 VLIB_NODE_FUNCTION_MULTIARCH (nat64_in2out_slowpath_node,
                              nat64_in2out_slowpath_node_fn);
 
+typedef struct nat64_in2out_frag_set_ctx_t_
+{
+  vlib_main_t *vm;
+  u32 sess_index;
+  u16 l4_offset;
+  u8 proto;
+  u8 first_frag;
+} nat64_in2out_frag_set_ctx_t;
+
+static int
+nat64_in2out_frag_set_cb (ip6_header_t * ip6, ip4_header_t * ip4, void *arg)
+{
+  nat64_main_t *nm = &nat64_main;
+  nat64_in2out_frag_set_ctx_t *ctx = arg;
+  nat64_db_st_entry_t *ste;
+  nat64_db_bib_entry_t *bibe;
+  udp_header_t *udp;
+
+  ste = nat64_db_st_entry_by_index (&nm->db, ctx->proto, ctx->sess_index);
+  if (!ste)
+    return -1;
+
+  bibe = nat64_db_bib_entry_by_index (&nm->db, ctx->proto, ste->bibe_index);
+  if (!bibe)
+    return -1;
+
+  nat64_session_reset_timeout (ste, ctx->vm);
+
+  if (ctx->first_frag)
+    {
+      udp = (udp_header_t *) u8_ptr_add (ip6, ctx->l4_offset);
+
+      if (ctx->proto == IP_PROTOCOL_TCP)
+       {
+         u16 *checksum;
+         ip_csum_t csum;
+         tcp_header_t *tcp = (tcp_header_t *) udp;
+
+         checksum = &tcp->checksum;
+         csum = ip_csum_sub_even (*checksum, tcp->src_port);
+         csum = ip_csum_sub_even (csum, ip6->src_address.as_u64[0]);
+         csum = ip_csum_sub_even (csum, ip6->src_address.as_u64[1]);
+         csum = ip_csum_sub_even (csum, ip6->dst_address.as_u64[0]);
+         csum = ip_csum_sub_even (csum, ip6->dst_address.as_u64[1]);
+         csum = ip_csum_add_even (csum, bibe->out_port);
+         csum = ip_csum_add_even (csum, bibe->out_addr.as_u32);
+         csum = ip_csum_add_even (csum, ste->out_r_addr.as_u32);
+         *checksum = ip_csum_fold (csum);
+       }
+
+      udp->src_port = bibe->out_port;
+    }
+
+  ip4->src_address.as_u32 = bibe->out_addr.as_u32;
+  ip4->dst_address.as_u32 = ste->out_r_addr.as_u32;
+
+  return 0;
+}
+
+static int
+nat64_in2out_frag_hairpinning (vlib_buffer_t * b, ip6_header_t * ip6,
+                              nat64_in2out_frag_set_ctx_t * ctx)
+{
+  nat64_main_t *nm = &nat64_main;
+  nat64_db_st_entry_t *ste;
+  nat64_db_bib_entry_t *bibe;
+  udp_header_t *udp = (udp_header_t *) u8_ptr_add (ip6, ctx->l4_offset);
+  tcp_header_t *tcp = (tcp_header_t *) udp;
+  u16 sport = udp->src_port;
+  u16 dport = udp->dst_port;
+  u16 *checksum;
+  ip_csum_t csum;
+  ip46_address_t saddr, daddr;
+
+  if (ctx->first_frag)
+    {
+      if (ctx->proto == IP_PROTOCOL_UDP)
+       checksum = &udp->checksum;
+      else
+       checksum = &tcp->checksum;
+
+      csum = ip_csum_sub_even (*checksum, ip6->src_address.as_u64[0]);
+      csum = ip_csum_sub_even (csum, ip6->src_address.as_u64[1]);
+      csum = ip_csum_sub_even (csum, ip6->dst_address.as_u64[0]);
+      csum = ip_csum_sub_even (csum, ip6->dst_address.as_u64[1]);
+      csum = ip_csum_sub_even (csum, sport);
+      csum = ip_csum_sub_even (csum, dport);
+    }
+
+  ste = nat64_db_st_entry_by_index (&nm->db, ctx->proto, ctx->sess_index);
+  if (!ste)
+    return -1;
+
+  bibe = nat64_db_bib_entry_by_index (&nm->db, ctx->proto, ste->bibe_index);
+  if (!bibe)
+    return -1;
+
+  nat64_session_reset_timeout (ste, ctx->vm);
+
+  sport = bibe->out_port;
+  dport = ste->r_port;
+
+  nat64_compose_ip6 (&ip6->src_address, &bibe->out_addr, bibe->fib_index);
+
+  memset (&saddr, 0, sizeof (saddr));
+  memset (&daddr, 0, sizeof (daddr));
+  saddr.ip4.as_u32 = bibe->out_addr.as_u32;
+  daddr.ip4.as_u32 = ste->out_r_addr.as_u32;
+
+  ste =
+    nat64_db_st_entry_find (&nm->db, &daddr, &saddr, dport, sport, ctx->proto,
+                           0, 0);
+
+  if (ste)
+    {
+      bibe =
+       nat64_db_bib_entry_by_index (&nm->db, ctx->proto, ste->bibe_index);
+      if (!bibe)
+       return -1;
+    }
+  else
+    {
+      bibe =
+       nat64_db_bib_entry_find (&nm->db, &daddr, dport, ctx->proto, 0, 0);
+
+      if (!bibe)
+       return -1;
+
+      ste =
+       nat64_db_st_entry_create (&nm->db, bibe, &ip6->src_address,
+                                 &saddr.ip4, sport);
+    }
+
+  ip6->dst_address.as_u64[0] = bibe->in_addr.as_u64[0];
+  ip6->dst_address.as_u64[1] = bibe->in_addr.as_u64[1];
+
+  if (ctx->first_frag)
+    {
+      udp->dst_port = bibe->in_port;
+      udp->src_port = sport;
+      csum = ip_csum_add_even (csum, ip6->src_address.as_u64[0]);
+      csum = ip_csum_add_even (csum, ip6->src_address.as_u64[1]);
+      csum = ip_csum_add_even (csum, ip6->dst_address.as_u64[0]);
+      csum = ip_csum_add_even (csum, ip6->dst_address.as_u64[1]);
+      csum = ip_csum_add_even (csum, udp->src_port);
+      csum = ip_csum_add_even (csum, udp->dst_port);
+      *checksum = ip_csum_fold (csum);
+    }
+
+  return 0;
+}
+
+static uword
+nat64_in2out_reass_node_fn (vlib_main_t * vm,
+                           vlib_node_runtime_t * node, vlib_frame_t * frame)
+{
+  u32 n_left_from, *from, *to_next;
+  nat64_in2out_next_t next_index;
+  u32 pkts_processed = 0;
+  u32 *fragments_to_drop = 0;
+  u32 *fragments_to_loopback = 0;
+  nat64_main_t *nm = &nat64_main;
+
+  from = vlib_frame_vector_args (frame);
+  n_left_from = frame->n_vectors;
+  next_index = node->cached_next_index;
+
+  while (n_left_from > 0)
+    {
+      u32 n_left_to_next;
+
+      vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+
+      while (n_left_from > 0 && n_left_to_next > 0)
+       {
+         u32 bi0;
+         vlib_buffer_t *b0;
+         u32 next0;
+         u8 cached0 = 0;
+         ip6_header_t *ip60;
+         u16 l4_offset0, frag_offset0;
+         u8 l4_protocol0;
+         nat_reass_ip6_t *reass0;
+         ip6_frag_hdr_t *frag0;
+         nat64_db_bib_entry_t *bibe0;
+         nat64_db_st_entry_t *ste0;
+         udp_header_t *udp0;
+         snat_protocol_t proto0;
+         u32 sw_if_index0, fib_index0;
+         ip46_address_t saddr0, daddr0;
+         nat64_in2out_frag_set_ctx_t ctx0;
+
+         /* speculatively enqueue b0 to the current next frame */
+         bi0 = from[0];
+         to_next[0] = bi0;
+         from += 1;
+         to_next += 1;
+         n_left_from -= 1;
+         n_left_to_next -= 1;
+
+         b0 = vlib_get_buffer (vm, bi0);
+         next0 = NAT64_IN2OUT_NEXT_IP4_LOOKUP;
+
+         sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
+         fib_index0 =
+           fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP6,
+                                                sw_if_index0);
+
+         if (PREDICT_FALSE (nat_reass_is_drop_frag (1)))
+           {
+             next0 = NAT64_IN2OUT_NEXT_DROP;
+             b0->error = node->errors[NAT64_IN2OUT_ERROR_DROP_FRAGMENT];
+             goto trace0;
+           }
+
+         ip60 = (ip6_header_t *) vlib_buffer_get_current (b0);
+
+         if (PREDICT_FALSE
+             (ip6_parse
+              (ip60, b0->current_length, &l4_protocol0, &l4_offset0,
+               &frag_offset0)))
+           {
+             next0 = NAT64_IN2OUT_NEXT_DROP;
+             b0->error = node->errors[NAT64_IN2OUT_ERROR_UNKNOWN];
+             goto trace0;
+           }
+
+         if (PREDICT_FALSE
+             (!(l4_protocol0 == IP_PROTOCOL_TCP
+                || l4_protocol0 == IP_PROTOCOL_UDP)))
+           {
+             next0 = NAT64_IN2OUT_NEXT_DROP;
+             b0->error = node->errors[NAT64_IN2OUT_ERROR_DROP_FRAGMENT];
+             goto trace0;
+           }
+
+         udp0 = (udp_header_t *) u8_ptr_add (ip60, l4_offset0);
+         frag0 = (ip6_frag_hdr_t *) u8_ptr_add (ip60, frag_offset0);
+         proto0 = ip_proto_to_snat_proto (l4_protocol0);
+
+         reass0 = nat_ip6_reass_find_or_create (ip60->src_address,
+                                                ip60->dst_address,
+                                                frag0->identification,
+                                                l4_protocol0,
+                                                1, &fragments_to_drop);
+
+         if (PREDICT_FALSE (!reass0))
+           {
+             next0 = NAT64_IN2OUT_NEXT_DROP;
+             b0->error = node->errors[NAT64_IN2OUT_ERROR_MAX_REASS];
+             goto trace0;
+           }
+
+         if (PREDICT_TRUE (ip6_frag_hdr_offset (frag0)))
+           {
+             ctx0.first_frag = 0;
+             if (PREDICT_FALSE (reass0->sess_index == (u32) ~ 0))
+               {
+                 if (nat_ip6_reass_add_fragment (reass0, bi0))
+                   {
+                     b0->error = node->errors[NAT64_IN2OUT_ERROR_MAX_FRAG];
+                     next0 = NAT64_IN2OUT_NEXT_DROP;
+                     goto trace0;
+                   }
+                 cached0 = 1;
+                 goto trace0;
+               }
+           }
+         else
+           {
+             ctx0.first_frag = 1;
+
+             saddr0.as_u64[0] = ip60->src_address.as_u64[0];
+             saddr0.as_u64[1] = ip60->src_address.as_u64[1];
+             daddr0.as_u64[0] = ip60->dst_address.as_u64[0];
+             daddr0.as_u64[1] = ip60->dst_address.as_u64[1];
+
+             ste0 =
+               nat64_db_st_entry_find (&nm->db, &saddr0, &daddr0,
+                                       udp0->src_port, udp0->dst_port,
+                                       l4_protocol0, fib_index0, 1);
+             if (!ste0)
+               {
+                 bibe0 =
+                   nat64_db_bib_entry_find (&nm->db, &saddr0, udp0->src_port,
+                                            l4_protocol0, fib_index0, 1);
+                 if (!bibe0)
+                   {
+                     u16 out_port0;
+                     ip4_address_t out_addr0;
+                     if (nat64_alloc_out_addr_and_port
+                         (fib_index0, proto0, &out_addr0, &out_port0))
+                       {
+                         next0 = NAT64_IN2OUT_NEXT_DROP;
+                         b0->error =
+                           node->errors[NAT64_IN2OUT_ERROR_NO_TRANSLATION];
+                         goto trace0;
+                       }
+
+                     bibe0 =
+                       nat64_db_bib_entry_create (&nm->db,
+                                                  &ip60->src_address,
+                                                  &out_addr0, udp0->src_port,
+                                                  clib_host_to_net_u16
+                                                  (out_port0), fib_index0,
+                                                  l4_protocol0, 0);
+                     if (!bibe0)
+                       {
+                         next0 = NAT64_IN2OUT_NEXT_DROP;
+                         b0->error =
+                           node->errors[NAT64_IN2OUT_ERROR_NO_TRANSLATION];
+                         goto trace0;
+                       }
+                   }
+                 nat64_extract_ip4 (&ip60->dst_address, &daddr0.ip4,
+                                    fib_index0);
+                 ste0 =
+                   nat64_db_st_entry_create (&nm->db, bibe0,
+                                             &ip60->dst_address, &daddr0.ip4,
+                                             udp0->dst_port);
+                 if (!ste0)
+                   {
+                     next0 = NAT64_IN2OUT_NEXT_DROP;
+                     b0->error =
+                       node->errors[NAT64_IN2OUT_ERROR_NO_TRANSLATION];
+                     goto trace0;
+                   }
+               }
+             reass0->sess_index =
+               nat64_db_st_entry_get_index (&nm->db, ste0);
+
+             nat_ip6_reass_get_frags (reass0, &fragments_to_loopback);
+           }
+
+         ctx0.sess_index = reass0->sess_index;
+         ctx0.proto = l4_protocol0;
+         ctx0.vm = vm;
+         ctx0.l4_offset = l4_offset0;
+
+         if (PREDICT_FALSE (is_hairpinning (&ip60->dst_address)))
+           {
+             next0 = NAT64_IN2OUT_NEXT_IP6_LOOKUP;
+             if (nat64_in2out_frag_hairpinning (b0, ip60, &ctx0))
+               {
+                 next0 = NAT64_IN2OUT_NEXT_DROP;
+                 b0->error = node->errors[NAT64_IN2OUT_ERROR_NO_TRANSLATION];
+               }
+             goto trace0;
+           }
+         else
+           {
+             if (ip6_to_ip4_fragmented (b0, nat64_in2out_frag_set_cb, &ctx0))
+               {
+                 next0 = NAT64_IN2OUT_NEXT_DROP;
+                 b0->error = node->errors[NAT64_IN2OUT_ERROR_UNKNOWN];
+                 goto trace0;
+               }
+           }
+
+       trace0:
+         if (PREDICT_FALSE
+             ((node->flags & VLIB_NODE_FLAG_TRACE)
+              && (b0->flags & VLIB_BUFFER_IS_TRACED)))
+           {
+             nat64_in2out_reass_trace_t *t =
+               vlib_add_trace (vm, node, b0, sizeof (*t));
+             t->cached = cached0;
+             t->sw_if_index = sw_if_index0;
+             t->next_index = next0;
+           }
+
+         if (cached0)
+           {
+             n_left_to_next++;
+             to_next--;
+           }
+         else
+           {
+             pkts_processed += next0 != NAT64_IN2OUT_NEXT_DROP;
+
+             /* verify speculative enqueue, maybe switch current next frame */
+             vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
+                                              to_next, n_left_to_next,
+                                              bi0, next0);
+           }
+
+         if (n_left_from == 0 && vec_len (fragments_to_loopback))
+           {
+             from = vlib_frame_vector_args (frame);
+             u32 len = vec_len (fragments_to_loopback);
+             if (len <= VLIB_FRAME_SIZE)
+               {
+                 clib_memcpy (from, fragments_to_loopback,
+                              sizeof (u32) * len);
+                 n_left_from = len;
+                 vec_reset_length (fragments_to_loopback);
+               }
+             else
+               {
+                 clib_memcpy (from,
+                              fragments_to_loopback + (len -
+                                                       VLIB_FRAME_SIZE),
+                              sizeof (u32) * VLIB_FRAME_SIZE);
+                 n_left_from = VLIB_FRAME_SIZE;
+                 _vec_len (fragments_to_loopback) = len - VLIB_FRAME_SIZE;
+               }
+           }
+       }
+
+      vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+    }
+
+  vlib_node_increment_counter (vm, nat64_in2out_reass_node.index,
+                              NAT64_IN2OUT_ERROR_IN2OUT_PACKETS,
+                              pkts_processed);
+
+  nat_send_all_to_node (vm, fragments_to_drop, node,
+                       &node->errors[NAT64_IN2OUT_ERROR_DROP_FRAGMENT],
+                       NAT64_IN2OUT_NEXT_DROP);
+
+  vec_free (fragments_to_drop);
+  vec_free (fragments_to_loopback);
+  return frame->n_vectors;
+}
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (nat64_in2out_reass_node) = {
+  .function = nat64_in2out_reass_node_fn,
+  .name = "nat64-in2out-reass",
+  .vector_size = sizeof (u32),
+  .format_trace = format_nat64_in2out_reass_trace,
+  .type = VLIB_NODE_TYPE_INTERNAL,
+  .n_errors = ARRAY_LEN (nat64_in2out_error_strings),
+  .error_strings = nat64_in2out_error_strings,
+  .n_next_nodes = NAT64_IN2OUT_N_NEXT,
+  /* edit / add dispositions here */
+  .next_nodes = {
+    [NAT64_IN2OUT_NEXT_DROP] = "error-drop",
+    [NAT64_IN2OUT_NEXT_IP4_LOOKUP] = "ip4-lookup",
+    [NAT64_IN2OUT_NEXT_IP6_LOOKUP] = "ip6-lookup",
+    [NAT64_IN2OUT_NEXT_SLOWPATH] = "nat64-in2out-slowpath",
+    [NAT64_IN2OUT_NEXT_REASS] = "nat64-in2out-reass",
+  },
+};
+/* *INDENT-ON* */
+
+VLIB_NODE_FUNCTION_MULTIARCH (nat64_in2out_reass_node,
+                             nat64_in2out_reass_node_fn);
+
 /*
  * fd.io coding-style-patch-verification: ON
  *
 /*
  * fd.io coding-style-patch-verification: ON
  *
index 61e88a7..eb5ecb4 100644 (file)
@@ -18,6 +18,7 @@
  */
 
 #include <nat/nat64.h>
  */
 
 #include <nat/nat64.h>
+#include <nat/nat_reass.h>
 #include <vnet/ip/ip4_to_ip6.h>
 #include <vnet/fib/ip4_fib.h>
 
 #include <vnet/ip/ip4_to_ip6.h>
 #include <vnet/fib/ip4_fib.h>
 
@@ -41,13 +42,41 @@ format_nat64_out2in_trace (u8 * s, va_list * args)
   return s;
 }
 
   return s;
 }
 
+typedef struct
+{
+  u32 sw_if_index;
+  u32 next_index;
+  u8 cached;
+} nat64_out2in_reass_trace_t;
+
+static u8 *
+format_nat64_out2in_reass_trace (u8 * s, va_list * args)
+{
+  CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+  CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+  nat64_out2in_reass_trace_t *t =
+    va_arg (*args, nat64_out2in_reass_trace_t *);
+
+  s =
+    format (s, "NAT64-out2in-reass: sw_if_index %d, next index %d, status %s",
+           t->sw_if_index, t->next_index,
+           t->cached ? "cached" : "translated");
+
+  return s;
+}
+
 vlib_node_registration_t nat64_out2in_node;
 vlib_node_registration_t nat64_out2in_node;
+vlib_node_registration_t nat64_out2in_reass_node;
+
+#define foreach_nat64_out2in_error                       \
+_(UNSUPPORTED_PROTOCOL, "Unsupported protocol")          \
+_(OUT2IN_PACKETS, "Good out2in packets processed")       \
+_(NO_TRANSLATION, "No translation")                      \
+_(UNKNOWN, "unknown")                                    \
+_(DROP_FRAGMENT, "Drop fragment")                        \
+_(MAX_REASS, "Maximum reassemblies exceeded")            \
+_(MAX_FRAG, "Maximum fragments per reassembly exceeded")
 
 
-#define foreach_nat64_out2in_error                 \
-_(UNSUPPORTED_PROTOCOL, "Unsupported protocol")    \
-_(OUT2IN_PACKETS, "Good out2in packets processed") \
-_(NO_TRANSLATION, "No translation")                \
-_(UNKNOWN, "unknown")
 
 typedef enum
 {
 
 typedef enum
 {
@@ -67,6 +96,7 @@ typedef enum
 {
   NAT64_OUT2IN_NEXT_LOOKUP,
   NAT64_OUT2IN_NEXT_DROP,
 {
   NAT64_OUT2IN_NEXT_LOOKUP,
   NAT64_OUT2IN_NEXT_DROP,
+  NAT64_OUT2IN_NEXT_REASS,
   NAT64_OUT2IN_N_NEXT,
 } nat64_out2in_next_t;
 
   NAT64_OUT2IN_N_NEXT,
 } nat64_out2in_next_t;
 
@@ -412,20 +442,27 @@ nat64_out2in_node_fn (vlib_main_t * vm, vlib_node_runtime_t * node,
 
          proto0 = ip_proto_to_snat_proto (ip40->protocol);
 
 
          proto0 = ip_proto_to_snat_proto (ip40->protocol);
 
-         if (proto0 == SNAT_PROTOCOL_ICMP)
+         if (PREDICT_FALSE (proto0 == ~0))
            {
            {
-             if (icmp_to_icmp6
-                 (b0, nat64_out2in_icmp_set_cb, &ctx0,
-                  nat64_out2in_inner_icmp_set_cb, &ctx0))
+             if (ip4_to_ip6 (b0, nat64_out2in_unk_proto_set_cb, &ctx0))
                {
                  next0 = NAT64_OUT2IN_NEXT_DROP;
                  b0->error = node->errors[NAT64_OUT2IN_ERROR_NO_TRANSLATION];
                {
                  next0 = NAT64_OUT2IN_NEXT_DROP;
                  b0->error = node->errors[NAT64_OUT2IN_ERROR_NO_TRANSLATION];
-                 goto trace0;
                }
                }
+             goto trace0;
            }
            }
-         else if (proto0 == SNAT_PROTOCOL_TCP || proto0 == SNAT_PROTOCOL_UDP)
+
+         if (PREDICT_FALSE (ip4_is_fragment (ip40)))
            {
            {
-             if (ip4_to_ip6_tcp_udp (b0, nat64_out2in_tcp_udp_set_cb, &ctx0))
+             next0 = NAT64_OUT2IN_NEXT_REASS;
+             goto trace0;
+           }
+
+         if (proto0 == SNAT_PROTOCOL_ICMP)
+           {
+             if (icmp_to_icmp6
+                 (b0, nat64_out2in_icmp_set_cb, &ctx0,
+                  nat64_out2in_inner_icmp_set_cb, &ctx0))
                {
                  next0 = NAT64_OUT2IN_NEXT_DROP;
                  b0->error = node->errors[NAT64_OUT2IN_ERROR_NO_TRANSLATION];
                {
                  next0 = NAT64_OUT2IN_NEXT_DROP;
                  b0->error = node->errors[NAT64_OUT2IN_ERROR_NO_TRANSLATION];
@@ -434,7 +471,7 @@ nat64_out2in_node_fn (vlib_main_t * vm, vlib_node_runtime_t * node,
            }
          else
            {
            }
          else
            {
-             if (ip4_to_ip6 (b0, nat64_out2in_unk_proto_set_cb, &ctx0))
+             if (ip4_to_ip6_tcp_udp (b0, nat64_out2in_tcp_udp_set_cb, &ctx0))
                {
                  next0 = NAT64_OUT2IN_NEXT_DROP;
                  b0->error = node->errors[NAT64_OUT2IN_ERROR_NO_TRANSLATION];
                {
                  next0 = NAT64_OUT2IN_NEXT_DROP;
                  b0->error = node->errors[NAT64_OUT2IN_ERROR_NO_TRANSLATION];
@@ -474,17 +511,361 @@ VLIB_REGISTER_NODE (nat64_out2in_node) = {
   .format_trace = format_nat64_out2in_trace,
   .type = VLIB_NODE_TYPE_INTERNAL,
   .n_errors = ARRAY_LEN (nat64_out2in_error_strings),
   .format_trace = format_nat64_out2in_trace,
   .type = VLIB_NODE_TYPE_INTERNAL,
   .n_errors = ARRAY_LEN (nat64_out2in_error_strings),
-  .error_strings = nat64_out2in_error_strings,.n_next_nodes = 2,
+  .error_strings = nat64_out2in_error_strings,
+  .n_next_nodes = NAT64_OUT2IN_N_NEXT,
   /* edit / add dispositions here */
   .next_nodes = {
     [NAT64_OUT2IN_NEXT_DROP] = "error-drop",
     [NAT64_OUT2IN_NEXT_LOOKUP] = "ip6-lookup",
   /* edit / add dispositions here */
   .next_nodes = {
     [NAT64_OUT2IN_NEXT_DROP] = "error-drop",
     [NAT64_OUT2IN_NEXT_LOOKUP] = "ip6-lookup",
+    [NAT64_OUT2IN_NEXT_REASS] = "nat64-out2in-reass",
   },
 };
 /* *INDENT-ON* */
 
 VLIB_NODE_FUNCTION_MULTIARCH (nat64_out2in_node, nat64_out2in_node_fn);
 
   },
 };
 /* *INDENT-ON* */
 
 VLIB_NODE_FUNCTION_MULTIARCH (nat64_out2in_node, nat64_out2in_node_fn);
 
+typedef struct nat64_out2in_frag_set_ctx_t_
+{
+  vlib_main_t *vm;
+  vlib_buffer_t *b;
+  u32 sess_index;
+  u8 proto;
+  u8 first_frag;
+} nat64_out2in_frag_set_ctx_t;
+
+static int
+nat64_out2in_frag_set_cb (ip4_header_t * ip4, ip6_header_t * ip6, void *arg)
+{
+  nat64_main_t *nm = &nat64_main;
+  nat64_out2in_frag_set_ctx_t *ctx = arg;
+  nat64_db_st_entry_t *ste;
+  nat64_db_bib_entry_t *bibe;
+  udp_header_t *udp = ip4_next_header (ip4);
+  ip_csum_t csum;
+  u16 *checksum;
+
+  ste = nat64_db_st_entry_by_index (&nm->db, ctx->proto, ctx->sess_index);
+  if (!ste)
+    return -1;
+
+  bibe = nat64_db_bib_entry_by_index (&nm->db, ctx->proto, ste->bibe_index);
+  if (!bibe)
+    return -1;
+
+  nat64_session_reset_timeout (ste, ctx->vm);
+
+  if (ctx->first_frag)
+    {
+      udp->dst_port = bibe->in_port;
+
+      if (ip4->protocol == IP_PROTOCOL_UDP)
+       {
+         checksum = &udp->checksum;
+
+         if (!checksum)
+           {
+             u16 udp_len =
+               clib_host_to_net_u16 (ip4->length) - sizeof (*ip4);
+             csum = ip_incremental_checksum (0, udp, udp_len);
+             csum =
+               ip_csum_with_carry (csum, clib_host_to_net_u16 (udp_len));
+             csum =
+               ip_csum_with_carry (csum,
+                                   clib_host_to_net_u16 (IP_PROTOCOL_UDP));
+             csum = ip_csum_with_carry (csum, ste->in_r_addr.as_u64[0]);
+             csum = ip_csum_with_carry (csum, ste->in_r_addr.as_u64[1]);
+             csum = ip_csum_with_carry (csum, bibe->in_addr.as_u64[0]);
+             csum = ip_csum_with_carry (csum, bibe->in_addr.as_u64[1]);
+             *checksum = ~ip_csum_fold (csum);
+           }
+         else
+           {
+             csum = ip_csum_sub_even (*checksum, bibe->out_addr.as_u32);
+             csum = ip_csum_sub_even (csum, ste->out_r_addr.as_u32);
+             csum = ip_csum_sub_even (csum, bibe->out_port);
+             csum = ip_csum_add_even (csum, ste->in_r_addr.as_u64[0]);
+             csum = ip_csum_add_even (csum, ste->in_r_addr.as_u64[1]);
+             csum = ip_csum_add_even (csum, bibe->in_addr.as_u64[0]);
+             csum = ip_csum_add_even (csum, bibe->in_addr.as_u64[1]);
+             csum = ip_csum_add_even (csum, bibe->in_port);
+             *checksum = ip_csum_fold (csum);
+           }
+       }
+      else
+       {
+         tcp_header_t *tcp = ip4_next_header (ip4);
+         checksum = &tcp->checksum;
+         csum = ip_csum_sub_even (*checksum, bibe->out_addr.as_u32);
+         csum = ip_csum_sub_even (csum, ste->out_r_addr.as_u32);
+         csum = ip_csum_sub_even (csum, bibe->out_port);
+         csum = ip_csum_add_even (csum, ste->in_r_addr.as_u64[0]);
+         csum = ip_csum_add_even (csum, ste->in_r_addr.as_u64[1]);
+         csum = ip_csum_add_even (csum, bibe->in_addr.as_u64[0]);
+         csum = ip_csum_add_even (csum, bibe->in_addr.as_u64[1]);
+         csum = ip_csum_add_even (csum, bibe->in_port);
+         *checksum = ip_csum_fold (csum);
+       }
+
+    }
+
+  ip6->src_address.as_u64[0] = ste->in_r_addr.as_u64[0];
+  ip6->src_address.as_u64[1] = ste->in_r_addr.as_u64[1];
+
+  ip6->dst_address.as_u64[0] = bibe->in_addr.as_u64[0];
+  ip6->dst_address.as_u64[1] = bibe->in_addr.as_u64[1];
+
+  vnet_buffer (ctx->b)->sw_if_index[VLIB_TX] = bibe->fib_index;
+
+  return 0;
+}
+
+static uword
+nat64_out2in_reass_node_fn (vlib_main_t * vm, vlib_node_runtime_t * node,
+                           vlib_frame_t * frame)
+{
+  u32 n_left_from, *from, *to_next;
+  nat64_out2in_next_t next_index;
+  u32 pkts_processed = 0;
+  u32 *fragments_to_drop = 0;
+  u32 *fragments_to_loopback = 0;
+  nat64_main_t *nm = &nat64_main;
+
+  from = vlib_frame_vector_args (frame);
+  n_left_from = frame->n_vectors;
+  next_index = node->cached_next_index;
+
+  while (n_left_from > 0)
+    {
+      u32 n_left_to_next;
+
+      vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+
+      while (n_left_from > 0 && n_left_to_next > 0)
+       {
+         u32 bi0;
+         vlib_buffer_t *b0;
+         u32 next0;
+         ip4_header_t *ip40;
+         u8 cached0 = 0;
+         u32 sw_if_index0, fib_index0;
+         udp_header_t *udp0;
+         nat_reass_ip4_t *reass0;
+         ip46_address_t saddr0, daddr0;
+         nat64_db_st_entry_t *ste0;
+         nat64_db_bib_entry_t *bibe0;
+         ip6_address_t ip6_saddr0;
+         nat64_out2in_frag_set_ctx_t ctx0;
+
+         /* speculatively enqueue b0 to the current next frame */
+         bi0 = from[0];
+         to_next[0] = bi0;
+         from += 1;
+         to_next += 1;
+         n_left_from -= 1;
+         n_left_to_next -= 1;
+
+         b0 = vlib_get_buffer (vm, bi0);
+         next0 = NAT64_OUT2IN_NEXT_LOOKUP;
+
+         sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
+         fib_index0 =
+           fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP4,
+                                                sw_if_index0);
+
+         if (PREDICT_FALSE (nat_reass_is_drop_frag (1)))
+           {
+             next0 = NAT64_OUT2IN_NEXT_DROP;
+             b0->error = node->errors[NAT64_OUT2IN_ERROR_DROP_FRAGMENT];
+             goto trace0;
+           }
+
+         ip40 = vlib_buffer_get_current (b0);
+
+         if (PREDICT_FALSE (!(ip40->protocol == IP_PROTOCOL_TCP
+                              || ip40->protocol == IP_PROTOCOL_UDP)))
+           {
+             next0 = NAT64_OUT2IN_NEXT_DROP;
+             b0->error = node->errors[NAT64_OUT2IN_ERROR_DROP_FRAGMENT];
+             goto trace0;
+           }
+
+         udp0 = ip4_next_header (ip40);
+
+         reass0 = nat_ip4_reass_find_or_create (ip40->src_address,
+                                                ip40->dst_address,
+                                                ip40->fragment_id,
+                                                ip40->protocol,
+                                                1, &fragments_to_drop);
+
+         if (PREDICT_FALSE (!reass0))
+           {
+             next0 = NAT64_OUT2IN_NEXT_DROP;
+             b0->error = node->errors[NAT64_OUT2IN_ERROR_MAX_REASS];
+             goto trace0;
+           }
+
+         if (PREDICT_FALSE (ip4_is_first_fragment (ip40)))
+           {
+             ctx0.first_frag = 1;
+
+             memset (&saddr0, 0, sizeof (saddr0));
+             saddr0.ip4.as_u32 = ip40->src_address.as_u32;
+             memset (&daddr0, 0, sizeof (daddr0));
+             daddr0.ip4.as_u32 = ip40->dst_address.as_u32;
+
+             ste0 =
+               nat64_db_st_entry_find (&nm->db, &daddr0, &saddr0,
+                                       udp0->dst_port, udp0->src_port,
+                                       ip40->protocol, fib_index0, 0);
+             if (!ste0)
+               {
+                 bibe0 =
+                   nat64_db_bib_entry_find (&nm->db, &daddr0, udp0->dst_port,
+                                            ip40->protocol, fib_index0, 0);
+                 if (!bibe0)
+                   {
+                     next0 = NAT64_OUT2IN_NEXT_DROP;
+                     b0->error =
+                       node->errors[NAT64_OUT2IN_ERROR_NO_TRANSLATION];
+                     goto trace0;
+                   }
+
+                 nat64_compose_ip6 (&ip6_saddr0, &ip40->src_address,
+                                    bibe0->fib_index);
+                 ste0 =
+                   nat64_db_st_entry_create (&nm->db, bibe0, &ip6_saddr0,
+                                             &saddr0.ip4, udp0->src_port);
+
+                 if (!ste0)
+                   {
+                     next0 = NAT64_OUT2IN_NEXT_DROP;
+                     b0->error =
+                       node->errors[NAT64_OUT2IN_ERROR_NO_TRANSLATION];
+                     goto trace0;
+                   }
+               }
+             reass0->sess_index =
+               nat64_db_st_entry_get_index (&nm->db, ste0);
+
+             nat_ip4_reass_get_frags (reass0, &fragments_to_loopback);
+           }
+         else
+           {
+             ctx0.first_frag = 0;
+
+             if (PREDICT_FALSE (reass0->sess_index == (u32) ~ 0))
+               {
+                 if (nat_ip4_reass_add_fragment (reass0, bi0))
+                   {
+                     b0->error = node->errors[NAT64_OUT2IN_ERROR_MAX_FRAG];
+                     next0 = NAT64_OUT2IN_NEXT_DROP;
+                     goto trace0;
+                   }
+                 cached0 = 1;
+                 goto trace0;
+               }
+           }
+
+         ctx0.sess_index = reass0->sess_index;
+         ctx0.proto = ip40->protocol;
+         ctx0.vm = vm;
+         ctx0.b = b0;
+
+         if (ip4_to_ip6_fragmented (b0, nat64_out2in_frag_set_cb, &ctx0))
+           {
+             next0 = NAT64_OUT2IN_NEXT_DROP;
+             b0->error = node->errors[NAT64_OUT2IN_ERROR_UNKNOWN];
+             goto trace0;
+           }
+
+       trace0:
+         if (PREDICT_FALSE
+             ((node->flags & VLIB_NODE_FLAG_TRACE)
+              && (b0->flags & VLIB_BUFFER_IS_TRACED)))
+           {
+             nat64_out2in_reass_trace_t *t =
+               vlib_add_trace (vm, node, b0, sizeof (*t));
+             t->cached = cached0;
+             t->sw_if_index = sw_if_index0;
+             t->next_index = next0;
+           }
+
+         if (cached0)
+           {
+             n_left_to_next++;
+             to_next--;
+           }
+         else
+           {
+             pkts_processed += next0 != NAT64_OUT2IN_NEXT_DROP;
+
+             /* verify speculative enqueue, maybe switch current next frame */
+             vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
+                                              to_next, n_left_to_next,
+                                              bi0, next0);
+           }
+
+         if (n_left_from == 0 && vec_len (fragments_to_loopback))
+           {
+             from = vlib_frame_vector_args (frame);
+             u32 len = vec_len (fragments_to_loopback);
+             if (len <= VLIB_FRAME_SIZE)
+               {
+                 clib_memcpy (from, fragments_to_loopback,
+                              sizeof (u32) * len);
+                 n_left_from = len;
+                 vec_reset_length (fragments_to_loopback);
+               }
+             else
+               {
+                 clib_memcpy (from,
+                              fragments_to_loopback + (len -
+                                                       VLIB_FRAME_SIZE),
+                              sizeof (u32) * VLIB_FRAME_SIZE);
+                 n_left_from = VLIB_FRAME_SIZE;
+                 _vec_len (fragments_to_loopback) = len - VLIB_FRAME_SIZE;
+               }
+           }
+       }
+
+      vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+    }
+
+  vlib_node_increment_counter (vm, nat64_out2in_reass_node.index,
+                              NAT64_OUT2IN_ERROR_OUT2IN_PACKETS,
+                              pkts_processed);
+
+  nat_send_all_to_node (vm, fragments_to_drop, node,
+                       &node->errors[NAT64_OUT2IN_ERROR_DROP_FRAGMENT],
+                       NAT64_OUT2IN_NEXT_DROP);
+
+  vec_free (fragments_to_drop);
+  vec_free (fragments_to_loopback);
+  return frame->n_vectors;
+}
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (nat64_out2in_reass_node) = {
+  .function = nat64_out2in_reass_node_fn,
+  .name = "nat64-out2in-reass",
+  .vector_size = sizeof (u32),
+  .format_trace = format_nat64_out2in_reass_trace,
+  .type = VLIB_NODE_TYPE_INTERNAL,
+  .n_errors = ARRAY_LEN (nat64_out2in_error_strings),
+  .error_strings = nat64_out2in_error_strings,
+  .n_next_nodes = NAT64_OUT2IN_N_NEXT,
+  /* edit / add dispositions here */
+  .next_nodes = {
+    [NAT64_OUT2IN_NEXT_DROP] = "error-drop",
+    [NAT64_OUT2IN_NEXT_LOOKUP] = "ip6-lookup",
+    [NAT64_OUT2IN_NEXT_REASS] = "nat64-out2in-reass",
+  },
+};
+/* *INDENT-ON* */
+
+VLIB_NODE_FUNCTION_MULTIARCH (nat64_out2in_reass_node,
+                             nat64_out2in_reass_node_fn);
+
 /*
  * fd.io coding-style-patch-verification: ON
  *
 /*
  * fd.io coding-style-patch-verification: ON
  *
index 0ffa2f0..548a9e0 100644 (file)
@@ -22,6 +22,7 @@
 #include <nat/nat_det.h>
 #include <nat/nat64.h>
 #include <nat/dslite.h>
 #include <nat/nat_det.h>
 #include <nat/nat64.h>
 #include <nat/dslite.h>
+#include <nat/nat_reass.h>
 #include <vlibapi/api.h>
 #include <vlibmemory/api.h>
 
 #include <vlibapi/api.h>
 #include <vlibmemory/api.h>
 
@@ -1496,6 +1497,146 @@ vl_api_nat_ipfix_enable_disable_t_print (vl_api_nat_ipfix_enable_disable_t *
   FINISH;
 }
 
   FINISH;
 }
 
+static void
+vl_api_nat_set_reass_t_handler (vl_api_nat_set_reass_t * mp)
+{
+  snat_main_t *sm = &snat_main;
+  vl_api_nat_set_reass_reply_t *rmp;
+  int rv = 0;
+
+  rv =
+    nat_reass_set (ntohl (mp->timeout), ntohs (mp->max_reass), mp->max_frag,
+                  mp->drop_frag, mp->is_ip6);
+
+  REPLY_MACRO (VL_API_NAT_SET_REASS_REPLY);
+}
+
+static void *
+vl_api_nat_set_reass_t_print (vl_api_nat_set_reass_t * mp, void *handle)
+{
+  u8 *s;
+
+  s = format (0, "SCRIPT: nat_set_reass ");
+  s = format (s, "timeout %d max_reass %d max_frag %d drop_frag %d is_ip6 %d",
+             clib_host_to_net_u32 (mp->timeout),
+             clib_host_to_net_u16 (mp->max_reass),
+             mp->max_frag, mp->drop_frag, mp->is_ip6);
+
+  FINISH;
+}
+
+static void
+vl_api_nat_get_reass_t_handler (vl_api_nat_get_reass_t * mp)
+{
+  snat_main_t *sm = &snat_main;
+  vl_api_nat_get_reass_reply_t *rmp;
+  int rv = 0;
+
+  /* *INDENT-OFF* */
+  REPLY_MACRO2 (VL_API_NAT_GET_REASS_REPLY,
+  ({
+    rmp->ip4_timeout = htonl (nat_reass_get_timeout(0));
+    rmp->ip4_max_reass = htons (nat_reass_get_max_reass(0));
+    rmp->ip4_max_frag = nat_reass_get_max_frag(0);
+    rmp->ip4_drop_frag = nat_reass_is_drop_frag(0);
+    rmp->ip6_timeout = htonl (nat_reass_get_timeout(1));
+    rmp->ip6_max_reass = htons (nat_reass_get_max_reass(1));
+    rmp->ip6_max_frag = nat_reass_get_max_frag(1);
+    rmp->ip6_drop_frag = nat_reass_is_drop_frag(1);
+  }))
+  /* *INDENT-ON* */
+}
+
+static void *
+vl_api_nat_get_reass_t_print (vl_api_nat_get_reass_t * mp, void *handle)
+{
+  u8 *s;
+
+  s = format (0, "SCRIPT: nat_get_reass");
+
+  FINISH;
+}
+
+typedef struct nat_api_walk_ctx_t_
+{
+  unix_shared_memory_queue_t *q;
+  u32 context;
+} nat_api_walk_ctx_t;
+
+static int
+nat_ip4_reass_walk_api (nat_reass_ip4_t * reass, void *arg)
+{
+  vl_api_nat_reass_details_t *rmp;
+  snat_main_t *sm = &snat_main;
+  nat_api_walk_ctx_t *ctx = arg;
+
+  rmp = vl_msg_api_alloc (sizeof (*rmp));
+  memset (rmp, 0, sizeof (*rmp));
+  rmp->_vl_msg_id = ntohs (VL_API_NAT_REASS_DETAILS + sm->msg_id_base);
+  rmp->context = ctx->context;
+  clib_memcpy (rmp->src_addr, &(reass->key.src), 4);
+  clib_memcpy (rmp->dst_addr, &(reass->key.dst), 4);
+  rmp->proto = reass->key.proto;
+  rmp->frag_id = ntohl (reass->key.frag_id);
+  rmp->frag_n = reass->frag_n;
+  rmp->is_ip4 = 1;
+
+  vl_msg_api_send_shmem (ctx->q, (u8 *) & rmp);
+
+  return 0;
+}
+
+static int
+nat_ip6_reass_walk_api (nat_reass_ip6_t * reass, void *arg)
+{
+  vl_api_nat_reass_details_t *rmp;
+  snat_main_t *sm = &snat_main;
+  nat_api_walk_ctx_t *ctx = arg;
+
+  rmp = vl_msg_api_alloc (sizeof (*rmp));
+  memset (rmp, 0, sizeof (*rmp));
+  rmp->_vl_msg_id = ntohs (VL_API_NAT_REASS_DETAILS + sm->msg_id_base);
+  rmp->context = ctx->context;
+  clib_memcpy (rmp->src_addr, &(reass->key.src), 16);
+  clib_memcpy (rmp->dst_addr, &(reass->key.dst), 16);
+  rmp->proto = reass->key.proto;
+  rmp->frag_id = ntohl (reass->key.frag_id);
+  rmp->frag_n = reass->frag_n;
+  rmp->is_ip4 = 0;
+
+  vl_msg_api_send_shmem (ctx->q, (u8 *) & rmp);
+
+  return 0;
+}
+
+static void
+vl_api_nat_reass_dump_t_handler (vl_api_nat_reass_dump_t * mp)
+{
+  unix_shared_memory_queue_t *q;
+
+  q = vl_api_client_index_to_input_queue (mp->client_index);
+  if (q == 0)
+    return;
+
+  nat_api_walk_ctx_t ctx = {
+    .q = q,
+    .context = mp->context,
+  };
+
+  nat_ip4_reass_walk (nat_ip4_reass_walk_api, &ctx);
+  nat_ip6_reass_walk (nat_ip6_reass_walk_api, &ctx);
+}
+
+static void *
+vl_api_nat_reass_dump_t_print (vl_api_nat_reass_dump_t * mp, void *handle)
+{
+  u8 *s;
+
+  s = format (0, "SCRIPT: nat_reass_dump");
+
+  FINISH;
+}
+
 /*************/
 /*** NAT44 ***/
 /*************/
 /*************/
 /*** NAT44 ***/
 /*************/
@@ -3406,6 +3547,9 @@ _(NAT_SHOW_CONFIG, nat_show_config)                                     \
 _(NAT_SET_WORKERS, nat_set_workers)                                     \
 _(NAT_WORKER_DUMP, nat_worker_dump)                                     \
 _(NAT_IPFIX_ENABLE_DISABLE, nat_ipfix_enable_disable)                   \
 _(NAT_SET_WORKERS, nat_set_workers)                                     \
 _(NAT_WORKER_DUMP, nat_worker_dump)                                     \
 _(NAT_IPFIX_ENABLE_DISABLE, nat_ipfix_enable_disable)                   \
+_(NAT_SET_REASS, nat_set_reass)                                         \
+_(NAT_GET_REASS, nat_get_reass)                                         \
+_(NAT_REASS_DUMP, nat_reass_dump)                                       \
 _(NAT44_ADD_DEL_ADDRESS_RANGE, nat44_add_del_address_range)             \
 _(NAT44_INTERFACE_ADD_DEL_FEATURE, nat44_interface_add_del_feature)     \
 _(NAT44_ADD_DEL_STATIC_MAPPING, nat44_add_del_static_mapping)           \
 _(NAT44_ADD_DEL_ADDRESS_RANGE, nat44_add_del_address_range)             \
 _(NAT44_INTERFACE_ADD_DEL_FEATURE, nat44_interface_add_del_feature)     \
 _(NAT44_ADD_DEL_STATIC_MAPPING, nat44_add_del_static_mapping)           \
diff --git a/src/plugins/nat/nat_reass.c b/src/plugins/nat/nat_reass.c
new file mode 100644 (file)
index 0000000..239bc70
--- /dev/null
@@ -0,0 +1,739 @@
+/*
+ * Copyright (c) 2017 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/**
+ * @file
+ * @brief NAT plugin virtual fragmentation reassembly
+ */
+
+#include <vnet/vnet.h>
+#include <nat/nat_reass.h>
+
+nat_reass_main_t nat_reass_main;
+
+static u32
+nat_reass_get_nbuckets (u8 is_ip6)
+{
+  nat_reass_main_t *srm = &nat_reass_main;
+  u32 nbuckets;
+  u8 i;
+
+  if (is_ip6)
+    nbuckets = (u32) (srm->ip6_max_reass / NAT_REASS_HT_LOAD_FACTOR);
+  else
+    nbuckets = (u32) (srm->ip4_max_reass / NAT_REASS_HT_LOAD_FACTOR);
+
+  for (i = 0; i < 31; i++)
+    if ((1 << i) >= nbuckets)
+      break;
+  nbuckets = 1 << i;
+
+  return nbuckets;
+}
+
+static_always_inline void
+nat_ip4_reass_get_frags_inline (nat_reass_ip4_t * reass, u32 ** bi)
+{
+  nat_reass_main_t *srm = &nat_reass_main;
+  u32 elt_index;
+  dlist_elt_t *elt;
+
+  while ((elt_index =
+         clib_dlist_remove_head (srm->ip4_frags_list_pool,
+                                 reass->frags_per_reass_list_head_index)) !=
+        ~0)
+    {
+      elt = pool_elt_at_index (srm->ip4_frags_list_pool, elt_index);
+      vec_add1 (*bi, elt->value);
+      reass->frag_n--;
+      pool_put_index (srm->ip4_frags_list_pool, elt_index);
+    }
+}
+
+static_always_inline void
+nat_ip6_reass_get_frags_inline (nat_reass_ip6_t * reass, u32 ** bi)
+{
+  nat_reass_main_t *srm = &nat_reass_main;
+  u32 elt_index;
+  dlist_elt_t *elt;
+
+  while ((elt_index =
+         clib_dlist_remove_head (srm->ip6_frags_list_pool,
+                                 reass->frags_per_reass_list_head_index)) !=
+        ~0)
+    {
+      elt = pool_elt_at_index (srm->ip6_frags_list_pool, elt_index);
+      vec_add1 (*bi, elt->value);
+      reass->frag_n--;
+      pool_put_index (srm->ip6_frags_list_pool, elt_index);
+    }
+}
+
+int
+nat_reass_set (u32 timeout, u16 max_reass, u8 max_frag, u8 drop_frag,
+              u8 is_ip6)
+{
+  nat_reass_main_t *srm = &nat_reass_main;
+  u32 nbuckets;
+
+  if (is_ip6)
+    {
+      if (srm->ip6_max_reass != max_reass)
+       {
+         clib_spinlock_lock_if_init (&srm->ip6_reass_lock);
+
+         srm->ip6_max_reass = max_reass;
+         pool_free (srm->ip6_reass_pool);
+         pool_alloc (srm->ip6_reass_pool, srm->ip4_max_reass);
+         nbuckets = nat_reass_get_nbuckets (0);
+         clib_bihash_free_48_8 (&srm->ip6_reass_hash);
+         clib_bihash_init_48_8 (&srm->ip6_reass_hash, "nat-ip6-reass",
+                                nbuckets, nbuckets * 1024);
+
+         clib_spinlock_unlock_if_init (&srm->ip6_reass_lock);
+       }
+      srm->ip6_timeout = timeout;
+      srm->ip6_max_frag = max_frag;
+      srm->ip6_drop_frag = drop_frag;
+    }
+  else
+    {
+      if (srm->ip4_max_reass != max_reass)
+       {
+         clib_spinlock_lock_if_init (&srm->ip4_reass_lock);
+
+         srm->ip4_max_reass = max_reass;
+         pool_free (srm->ip4_reass_pool);
+         pool_alloc (srm->ip4_reass_pool, srm->ip4_max_reass);
+         nbuckets = nat_reass_get_nbuckets (0);
+         clib_bihash_free_16_8 (&srm->ip4_reass_hash);
+         clib_bihash_init_16_8 (&srm->ip4_reass_hash, "nat-ip4-reass",
+                                nbuckets, nbuckets * 1024);
+         clib_spinlock_unlock_if_init (&srm->ip4_reass_lock);
+       }
+      srm->ip4_timeout = timeout;
+      srm->ip4_max_frag = max_frag;
+      srm->ip4_drop_frag = drop_frag;
+    }
+
+  return 0;
+}
+
+u32
+nat_reass_get_timeout (u8 is_ip6)
+{
+  nat_reass_main_t *srm = &nat_reass_main;
+
+  if (is_ip6)
+    return srm->ip6_timeout;
+
+  return srm->ip4_timeout;
+}
+
+u16
+nat_reass_get_max_reass (u8 is_ip6)
+{
+  nat_reass_main_t *srm = &nat_reass_main;
+
+  if (is_ip6)
+    return srm->ip6_max_reass;
+
+  return srm->ip4_max_reass;
+}
+
+u8
+nat_reass_get_max_frag (u8 is_ip6)
+{
+  nat_reass_main_t *srm = &nat_reass_main;
+
+  if (is_ip6)
+    return srm->ip6_max_frag;
+
+  return srm->ip4_max_frag;
+}
+
+u8
+nat_reass_is_drop_frag (u8 is_ip6)
+{
+  nat_reass_main_t *srm = &nat_reass_main;
+
+  if (is_ip6)
+    return srm->ip6_drop_frag;
+
+  return srm->ip4_drop_frag;
+}
+
+static_always_inline nat_reass_ip4_t *
+nat_ip4_reass_lookup (nat_reass_ip4_key_t * k, f64 now)
+{
+  nat_reass_main_t *srm = &nat_reass_main;
+  clib_bihash_kv_16_8_t kv, value;
+  nat_reass_ip4_t *reass;
+
+  kv.key[0] = k->as_u64[0];
+  kv.key[1] = k->as_u64[1];
+
+  if (clib_bihash_search_16_8 (&srm->ip4_reass_hash, &kv, &value))
+    return 0;
+
+  reass = pool_elt_at_index (srm->ip4_reass_pool, value.value);
+  if (now < reass->last_heard + (f64) srm->ip4_timeout)
+    return reass;
+
+  return 0;
+}
+
+nat_reass_ip4_t *
+nat_ip4_reass_find_or_create (ip4_address_t src, ip4_address_t dst,
+                             u16 frag_id, u8 proto, u8 reset_timeout,
+                             u32 ** bi_to_drop)
+{
+  nat_reass_main_t *srm = &nat_reass_main;
+  nat_reass_ip4_t *reass = 0;
+  nat_reass_ip4_key_t k;
+  f64 now = vlib_time_now (srm->vlib_main);
+  dlist_elt_t *oldest_elt, *elt;
+  dlist_elt_t *per_reass_list_head_elt;
+  u32 oldest_index, elt_index;
+  clib_bihash_kv_16_8_t kv;
+
+  k.src.as_u32 = src.as_u32;
+  k.dst.as_u32 = dst.as_u32;
+  k.frag_id = frag_id;
+  k.proto = proto;
+
+  clib_spinlock_lock_if_init (&srm->ip4_reass_lock);
+
+  reass = nat_ip4_reass_lookup (&k, now);
+  if (reass)
+    {
+      if (reset_timeout)
+       {
+         reass->last_heard = now;
+         clib_dlist_remove (srm->ip4_reass_lru_list_pool,
+                            reass->lru_list_index);
+         clib_dlist_addtail (srm->ip4_reass_lru_list_pool,
+                             srm->ip4_reass_head_index,
+                             reass->lru_list_index);
+       }
+      goto unlock;
+    }
+
+  if (srm->ip4_reass_n >= srm->ip4_max_reass)
+    {
+      oldest_index =
+       clib_dlist_remove_head (srm->ip4_reass_lru_list_pool,
+                               srm->ip4_reass_head_index);
+      ASSERT (oldest_index != ~0);
+      oldest_elt =
+       pool_elt_at_index (srm->ip4_reass_lru_list_pool, oldest_index);
+      reass = pool_elt_at_index (srm->ip4_reass_pool, oldest_elt->value);
+      if (now < reass->last_heard + (f64) srm->ip4_timeout)
+       {
+         clib_dlist_addhead (srm->ip4_reass_lru_list_pool,
+                             srm->ip4_reass_head_index, oldest_index);
+         clib_warning ("no free resassembly slot");
+         reass = 0;
+         goto unlock;
+       }
+
+      clib_dlist_addtail (srm->ip4_reass_lru_list_pool,
+                         srm->ip4_reass_head_index, oldest_index);
+
+      kv.key[0] = k.as_u64[0];
+      kv.key[1] = k.as_u64[1];
+      if (clib_bihash_add_del_16_8 (&srm->ip4_reass_hash, &kv, 0))
+       {
+         reass = 0;
+         goto unlock;
+       }
+
+      nat_ip4_reass_get_frags_inline (reass, bi_to_drop);
+    }
+  else
+    {
+      pool_get (srm->ip4_reass_pool, reass);
+      pool_get (srm->ip4_reass_lru_list_pool, elt);
+      reass->lru_list_index = elt_index = elt - srm->ip4_reass_lru_list_pool;
+      clib_dlist_init (srm->ip4_reass_lru_list_pool, elt_index);
+      elt->value = reass - srm->ip4_reass_pool;
+      clib_dlist_addtail (srm->ip4_reass_lru_list_pool,
+                         srm->ip4_reass_head_index, elt_index);
+      pool_get (srm->ip4_frags_list_pool, per_reass_list_head_elt);
+      reass->frags_per_reass_list_head_index =
+       per_reass_list_head_elt - srm->ip4_frags_list_pool;
+      clib_dlist_init (srm->ip4_frags_list_pool,
+                      reass->frags_per_reass_list_head_index);
+      srm->ip4_reass_n++;
+    }
+
+  reass->key.as_u64[0] = kv.key[0] = k.as_u64[0];
+  reass->key.as_u64[1] = kv.key[1] = k.as_u64[1];
+  kv.value = reass - srm->ip4_reass_pool;
+  reass->sess_index = (u32) ~ 0;
+  reass->last_heard = now;
+
+  if (clib_bihash_add_del_16_8 (&srm->ip4_reass_hash, &kv, 1))
+    {
+      reass = 0;
+      goto unlock;
+    }
+
+unlock:
+  clib_spinlock_unlock_if_init (&srm->ip4_reass_lock);
+  return reass;
+}
+
+int
+nat_ip4_reass_add_fragment (nat_reass_ip4_t * reass, u32 bi)
+{
+  nat_reass_main_t *srm = &nat_reass_main;
+  dlist_elt_t *elt;
+  u32 elt_index;
+
+  if (reass->frag_n >= srm->ip4_max_frag)
+    return -1;
+
+  clib_spinlock_lock_if_init (&srm->ip4_reass_lock);
+
+  pool_get (srm->ip4_frags_list_pool, elt);
+  elt_index = elt - srm->ip4_frags_list_pool;
+  clib_dlist_init (srm->ip4_frags_list_pool, elt_index);
+  elt->value = bi;
+  clib_dlist_addtail (srm->ip4_frags_list_pool,
+                     reass->frags_per_reass_list_head_index, elt_index);
+  reass->frag_n++;
+
+  clib_spinlock_unlock_if_init (&srm->ip4_reass_lock);
+
+  return 0;
+}
+
+void
+nat_ip4_reass_get_frags (nat_reass_ip4_t * reass, u32 ** bi)
+{
+  nat_reass_main_t *srm = &nat_reass_main;
+
+  clib_spinlock_lock_if_init (&srm->ip4_reass_lock);
+
+  nat_ip4_reass_get_frags_inline (reass, bi);
+
+  clib_spinlock_unlock_if_init (&srm->ip4_reass_lock);
+}
+
+void
+nat_ip4_reass_walk (nat_ip4_reass_walk_fn_t fn, void *ctx)
+{
+  nat_reass_ip4_t *reass;
+  nat_reass_main_t *srm = &nat_reass_main;
+  f64 now = vlib_time_now (srm->vlib_main);
+
+  /* *INDENT-OFF* */
+  pool_foreach (reass, srm->ip4_reass_pool,
+  ({
+    if (now < reass->last_heard + (f64) srm->ip4_timeout)
+      {
+        if (fn (reass, ctx))
+          return;
+      }
+  }));
+  /* *INDENT-ON* */
+}
+
+static_always_inline nat_reass_ip6_t *
+nat_ip6_reass_lookup (nat_reass_ip6_key_t * k, f64 now)
+{
+  nat_reass_main_t *srm = &nat_reass_main;
+  clib_bihash_kv_48_8_t kv, value;
+  nat_reass_ip6_t *reass;
+
+  k->unused = 0;
+  kv.key[0] = k->as_u64[0];
+  kv.key[1] = k->as_u64[1];
+  kv.key[2] = k->as_u64[2];
+  kv.key[3] = k->as_u64[3];
+  kv.key[4] = k->as_u64[4];
+  kv.key[5] = k->as_u64[5];
+
+  if (clib_bihash_search_48_8 (&srm->ip6_reass_hash, &kv, &value))
+    return 0;
+
+  reass = pool_elt_at_index (srm->ip6_reass_pool, value.value);
+  if (now < reass->last_heard + (f64) srm->ip6_timeout)
+    return reass;
+
+  return 0;
+}
+
+nat_reass_ip6_t *
+nat_ip6_reass_find_or_create (ip6_address_t src, ip6_address_t dst,
+                             u32 frag_id, u8 proto, u8 reset_timeout,
+                             u32 ** bi_to_drop)
+{
+  nat_reass_main_t *srm = &nat_reass_main;
+  nat_reass_ip6_t *reass = 0;
+  nat_reass_ip6_key_t k;
+  f64 now = vlib_time_now (srm->vlib_main);
+  dlist_elt_t *oldest_elt, *elt;
+  dlist_elt_t *per_reass_list_head_elt;
+  u32 oldest_index, elt_index;
+  clib_bihash_kv_48_8_t kv;
+
+  k.src.as_u64[0] = src.as_u64[0];
+  k.src.as_u64[1] = src.as_u64[1];
+  k.dst.as_u64[0] = dst.as_u64[0];
+  k.dst.as_u64[1] = dst.as_u64[1];
+  k.frag_id = frag_id;
+  k.proto = proto;
+  k.unused = 0;
+
+  clib_spinlock_lock_if_init (&srm->ip6_reass_lock);
+
+  reass = nat_ip6_reass_lookup (&k, now);
+  if (reass)
+    {
+      if (reset_timeout)
+       {
+         reass->last_heard = now;
+         clib_dlist_remove (srm->ip6_reass_lru_list_pool,
+                            reass->lru_list_index);
+         clib_dlist_addtail (srm->ip6_reass_lru_list_pool,
+                             srm->ip6_reass_head_index,
+                             reass->lru_list_index);
+       }
+      goto unlock;
+    }
+
+  if (srm->ip6_reass_n >= srm->ip6_max_reass)
+    {
+      oldest_index =
+       clib_dlist_remove_head (srm->ip6_reass_lru_list_pool,
+                               srm->ip6_reass_head_index);
+      ASSERT (oldest_index != ~0);
+      oldest_elt =
+       pool_elt_at_index (srm->ip4_reass_lru_list_pool, oldest_index);
+      reass = pool_elt_at_index (srm->ip6_reass_pool, oldest_elt->value);
+      if (now < reass->last_heard + (f64) srm->ip6_timeout)
+       {
+         clib_dlist_addhead (srm->ip6_reass_lru_list_pool,
+                             srm->ip6_reass_head_index, oldest_index);
+         clib_warning ("no free resassembly slot");
+         reass = 0;
+         goto unlock;
+       }
+
+      clib_dlist_addtail (srm->ip6_reass_lru_list_pool,
+                         srm->ip6_reass_head_index, oldest_index);
+
+      kv.key[0] = k.as_u64[0];
+      kv.key[1] = k.as_u64[1];
+      kv.key[2] = k.as_u64[2];
+      kv.key[3] = k.as_u64[4];
+      kv.key[4] = k.as_u64[5];
+      if (clib_bihash_add_del_48_8 (&srm->ip6_reass_hash, &kv, 0))
+       {
+         reass = 0;
+         goto unlock;
+       }
+
+      nat_ip6_reass_get_frags_inline (reass, bi_to_drop);
+    }
+  else
+    {
+      pool_get (srm->ip6_reass_pool, reass);
+      pool_get (srm->ip6_reass_lru_list_pool, elt);
+      reass->lru_list_index = elt_index = elt - srm->ip6_reass_lru_list_pool;
+      clib_dlist_init (srm->ip6_reass_lru_list_pool, elt_index);
+      elt->value = reass - srm->ip6_reass_pool;
+      clib_dlist_addtail (srm->ip6_reass_lru_list_pool,
+                         srm->ip6_reass_head_index, elt_index);
+      pool_get (srm->ip6_frags_list_pool, per_reass_list_head_elt);
+      reass->frags_per_reass_list_head_index =
+       per_reass_list_head_elt - srm->ip6_frags_list_pool;
+      clib_dlist_init (srm->ip6_frags_list_pool,
+                      reass->frags_per_reass_list_head_index);
+      srm->ip6_reass_n++;
+    }
+
+  reass->key.as_u64[0] = kv.key[0] = k.as_u64[0];
+  reass->key.as_u64[1] = kv.key[1] = k.as_u64[1];
+  reass->key.as_u64[2] = kv.key[2] = k.as_u64[2];
+  reass->key.as_u64[3] = kv.key[3] = k.as_u64[3];
+  reass->key.as_u64[4] = kv.key[4] = k.as_u64[4];
+  reass->key.as_u64[5] = kv.key[5] = k.as_u64[5];
+  kv.value = reass - srm->ip6_reass_pool;
+  reass->sess_index = (u32) ~ 0;
+  reass->last_heard = now;
+
+  if (clib_bihash_add_del_48_8 (&srm->ip6_reass_hash, &kv, 1))
+    {
+      reass = 0;
+      goto unlock;
+    }
+
+unlock:
+  clib_spinlock_unlock_if_init (&srm->ip6_reass_lock);
+  return reass;
+}
+
+int
+nat_ip6_reass_add_fragment (nat_reass_ip6_t * reass, u32 bi)
+{
+  nat_reass_main_t *srm = &nat_reass_main;
+  dlist_elt_t *elt;
+  u32 elt_index;
+
+  if (reass->frag_n >= srm->ip6_max_frag)
+    return -1;
+
+  clib_spinlock_lock_if_init (&srm->ip6_reass_lock);
+
+  pool_get (srm->ip6_frags_list_pool, elt);
+  elt_index = elt - srm->ip6_frags_list_pool;
+  clib_dlist_init (srm->ip6_frags_list_pool, elt_index);
+  elt->value = bi;
+  clib_dlist_addtail (srm->ip6_frags_list_pool,
+                     reass->frags_per_reass_list_head_index, elt_index);
+  reass->frag_n++;
+
+  clib_spinlock_unlock_if_init (&srm->ip6_reass_lock);
+
+  return 0;
+}
+
+void
+nat_ip6_reass_get_frags (nat_reass_ip6_t * reass, u32 ** bi)
+{
+  nat_reass_main_t *srm = &nat_reass_main;
+
+  clib_spinlock_lock_if_init (&srm->ip6_reass_lock);
+
+  nat_ip6_reass_get_frags_inline (reass, bi);
+
+  clib_spinlock_unlock_if_init (&srm->ip6_reass_lock);
+}
+
+void
+nat_ip6_reass_walk (nat_ip6_reass_walk_fn_t fn, void *ctx)
+{
+  nat_reass_ip6_t *reass;
+  nat_reass_main_t *srm = &nat_reass_main;
+  f64 now = vlib_time_now (srm->vlib_main);
+
+  /* *INDENT-OFF* */
+  pool_foreach (reass, srm->ip6_reass_pool,
+  ({
+    if (now < reass->last_heard + (f64) srm->ip4_timeout)
+      {
+        if (fn (reass, ctx))
+          return;
+      }
+  }));
+  /* *INDENT-ON* */
+}
+
+clib_error_t *
+nat_reass_init (vlib_main_t * vm)
+{
+  nat_reass_main_t *srm = &nat_reass_main;
+  vlib_thread_main_t *tm = vlib_get_thread_main ();
+  clib_error_t *error = 0;
+  dlist_elt_t *head;
+  u32 nbuckets, head_index;
+
+  srm->vlib_main = vm;
+  srm->vnet_main = vnet_get_main ();
+
+  /* IPv4 */
+  srm->ip4_timeout = NAT_REASS_TIMEOUT_DEFAULT;
+  srm->ip4_max_reass = NAT_MAX_REASS_DEAFULT;
+  srm->ip4_max_frag = NAT_MAX_FRAG_DEFAULT;
+  srm->ip4_drop_frag = 0;
+  srm->ip4_reass_n = 0;
+
+  if (tm->n_vlib_mains > 1)
+    clib_spinlock_init (&srm->ip4_reass_lock);
+
+  pool_alloc (srm->ip4_reass_pool, srm->ip4_max_reass);
+
+  nbuckets = nat_reass_get_nbuckets (0);
+  clib_bihash_init_16_8 (&srm->ip4_reass_hash, "nat-ip4-reass", nbuckets,
+                        nbuckets * 1024);
+
+  pool_get (srm->ip4_reass_lru_list_pool, head);
+  srm->ip4_reass_head_index = head_index =
+    head - srm->ip4_reass_lru_list_pool;
+  clib_dlist_init (srm->ip4_reass_lru_list_pool, head_index);
+
+  /* IPv6 */
+  srm->ip6_timeout = NAT_REASS_TIMEOUT_DEFAULT;
+  srm->ip6_max_reass = NAT_MAX_REASS_DEAFULT;
+  srm->ip6_max_frag = NAT_MAX_FRAG_DEFAULT;
+  srm->ip6_drop_frag = 0;
+  srm->ip6_reass_n = 0;
+
+  if (tm->n_vlib_mains > 1)
+    clib_spinlock_init (&srm->ip6_reass_lock);
+
+  pool_alloc (srm->ip6_reass_pool, srm->ip6_max_reass);
+
+  nbuckets = nat_reass_get_nbuckets (1);
+  clib_bihash_init_48_8 (&srm->ip6_reass_hash, "nat-ip6-reass", nbuckets,
+                        nbuckets * 1024);
+
+  pool_get (srm->ip6_reass_lru_list_pool, head);
+  srm->ip6_reass_head_index = head_index =
+    head - srm->ip6_reass_lru_list_pool;
+  clib_dlist_init (srm->ip6_reass_lru_list_pool, head_index);
+
+  return error;
+}
+
+static clib_error_t *
+nat_reass_command_fn (vlib_main_t * vm, unformat_input_t * input,
+                     vlib_cli_command_t * cmd)
+{
+  clib_error_t *error = 0;
+  unformat_input_t _line_input, *line_input = &_line_input;
+  u32 timeout = 0, max_reass = 0, max_frag = 0;
+  u8 drop_frag = (u8) ~ 0, is_ip6 = 0;
+  int rv;
+
+  /* Get a line of input. */
+  if (!unformat_user (input, unformat_line_input, line_input))
+    return 0;
+
+  while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+    {
+      if (unformat (line_input, "max-reassemblies %u", &max_reass))
+       ;
+      else if (unformat (line_input, "max-fragments %u", &max_frag))
+       ;
+      else if (unformat (line_input, "timeout %u", &timeout))
+       ;
+      else if (unformat (line_input, "enable"))
+       drop_frag = 0;
+      else if (unformat (line_input, "disable"))
+       drop_frag = 1;
+      else if (unformat (line_input, "ip4"))
+       is_ip6 = 0;
+      else if (unformat (line_input, "ip6"))
+       is_ip6 = 1;
+      else
+       {
+         error = clib_error_return (0, "unknown input '%U'",
+                                    format_unformat_error, line_input);
+         goto done;
+       }
+    }
+
+  if (!timeout)
+    timeout = nat_reass_get_timeout (is_ip6);
+  if (!max_reass)
+    max_reass = nat_reass_get_max_reass (is_ip6);
+  if (!max_frag)
+    max_frag = nat_reass_get_max_frag (is_ip6);
+  if (drop_frag == (u8) ~ 0)
+    drop_frag = nat_reass_is_drop_frag (is_ip6);
+
+  rv =
+    nat_reass_set (timeout, (u16) max_reass, (u8) max_frag, drop_frag,
+                  is_ip6);
+  if (rv)
+    {
+      error = clib_error_return (0, "nat_set_reass return %d", rv);
+      goto done;
+    }
+
+done:
+  unformat_free (line_input);
+
+  return error;
+}
+
+static int
+nat_ip4_reass_walk_cli (nat_reass_ip4_t * reass, void *ctx)
+{
+  vlib_main_t *vm = ctx;
+
+  vlib_cli_output (vm, "  src %U dst %U proto %u id 0x%04x cached %u",
+                  format_ip4_address, &reass->key.src,
+                  format_ip4_address, &reass->key.dst,
+                  reass->key.proto,
+                  clib_net_to_host_u16 (reass->key.frag_id), reass->frag_n);
+
+  return 0;
+}
+
+static int
+nat_ip6_reass_walk_cli (nat_reass_ip6_t * reass, void *ctx)
+{
+  vlib_main_t *vm = ctx;
+
+  vlib_cli_output (vm, "  src %U dst %U proto %u id 0x%08x cached %u",
+                  format_ip6_address, &reass->key.src,
+                  format_ip6_address, &reass->key.dst,
+                  reass->key.proto,
+                  clib_net_to_host_u32 (reass->key.frag_id), reass->frag_n);
+
+  return 0;
+}
+
+static clib_error_t *
+show_nat_reass_command_fn (vlib_main_t * vm, unformat_input_t * input,
+                          vlib_cli_command_t * cmd)
+{
+  vlib_cli_output (vm, "NAT IPv4 virtual fragmentation reassembly is %s",
+                  nat_reass_is_drop_frag (0) ? "DISABLED" : "ENABLED");
+  vlib_cli_output (vm, " max-reasssemblies %u", nat_reass_get_max_reass (0));
+  vlib_cli_output (vm, " max-fragments %u", nat_reass_get_max_frag (0));
+  vlib_cli_output (vm, " timeout %usec", nat_reass_get_timeout (0));
+  vlib_cli_output (vm, " reassemblies:");
+  nat_ip4_reass_walk (nat_ip4_reass_walk_cli, vm);
+
+  vlib_cli_output (vm, "NAT IPv6 virtual fragmentation reassembly is %s",
+                  nat_reass_is_drop_frag (1) ? "DISABLED" : "ENABLED");
+  vlib_cli_output (vm, " max-reasssemblies %u", nat_reass_get_max_reass (1));
+  vlib_cli_output (vm, " max-fragments %u", nat_reass_get_max_frag (1));
+  vlib_cli_output (vm, " timeout %usec", nat_reass_get_timeout (1));
+  vlib_cli_output (vm, " reassemblies:");
+  nat_ip6_reass_walk (nat_ip6_reass_walk_cli, vm);
+
+  return 0;
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (nat_reass_command, static) =
+{
+  .path = "nat virtual-reassembly",
+  .short_help = "nat virtual-reassembly ip4|ip6 [max-reassemblies <n>] "
+                "[max-fragments <n>] [timeout <sec>] [enable|disable]",
+  .function = nat_reass_command_fn,
+};
+
+VLIB_CLI_COMMAND (show_nat_reass_command, static) =
+{
+  .path = "show nat virtual-reassembly",
+  .short_help = "show nat virtual-reassembly",
+  .function = show_nat_reass_command_fn,
+};
+/* *INDENT-ON* */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/plugins/nat/nat_reass.h b/src/plugins/nat/nat_reass.h
new file mode 100644 (file)
index 0000000..ae14a96
--- /dev/null
@@ -0,0 +1,293 @@
+/*
+ * Copyright (c) 2017 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/**
+ * @file
+ * @brief NAT plugin virtual fragmentation reassembly
+ */
+#ifndef __included_nat_reass_h__
+#define __included_nat_reass_h__
+
+#include <vnet/vnet.h>
+#include <vnet/ip/ip.h>
+#include <vppinfra/bihash_16_8.h>
+#include <vppinfra/bihash_48_8.h>
+#include <vppinfra/dlist.h>
+
+#define NAT_REASS_TIMEOUT_DEFAULT 2
+#define NAT_MAX_REASS_DEAFULT 1024
+#define NAT_MAX_FRAG_DEFAULT 5
+#define NAT_REASS_HT_LOAD_FACTOR (0.75)
+
+typedef struct
+{
+  union
+  {
+    struct
+    {
+      ip4_address_t src;
+      ip4_address_t dst;
+      /* align by making this 4 octets even though its a 2 octets field */
+      u32 frag_id;
+      /* align by making this 4 octets even though its a 1 octet field */
+      u32 proto;
+    };
+    u64 as_u64[2];
+  };
+} nat_reass_ip4_key_t;
+
+/* *INDENT-OFF* */
+typedef CLIB_PACKED(struct
+{
+  nat_reass_ip4_key_t key;
+  u32 lru_list_index;
+  u32 sess_index;
+  f64 last_heard;
+  u32 frags_per_reass_list_head_index;
+  u8 frag_n;
+}) nat_reass_ip4_t;
+/* *INDENT-ON* */
+
+typedef struct
+{
+  union
+  {
+    struct
+    {
+      ip6_address_t src;
+      ip6_address_t dst;
+      u32 frag_id;
+      /* align by making this 4 octets even though its a 1 octet field */
+      u32 proto;
+      u64 unused;
+    };
+    u64 as_u64[6];
+  };
+} nat_reass_ip6_key_t;
+
+/* *INDENT-OFF* */
+typedef CLIB_PACKED(struct
+{
+  nat_reass_ip6_key_t key;
+  u32 lru_list_index;
+  u32 sess_index;
+  f64 last_heard;
+  u32 frags_per_reass_list_head_index;
+  u8 frag_n;
+}) nat_reass_ip6_t;
+/* *INDENT-ON* */
+
+typedef struct
+{
+  /* IPv4 config */
+  u32 ip4_timeout;
+  u16 ip4_max_reass;
+  u8 ip4_max_frag;
+  u8 ip4_drop_frag;
+
+  /* IPv6 config */
+  u32 ip6_timeout;
+  u16 ip6_max_reass;
+  u8 ip6_max_frag;
+  u8 ip6_drop_frag;
+
+  /* IPv4 runtime */
+  nat_reass_ip4_t *ip4_reass_pool;
+  clib_bihash_16_8_t ip4_reass_hash;
+  dlist_elt_t *ip4_reass_lru_list_pool;
+  dlist_elt_t *ip4_frags_list_pool;
+  u32 ip4_reass_head_index;
+  u16 ip4_reass_n;
+  clib_spinlock_t ip4_reass_lock;
+
+  /* IPv6 runtime */
+  nat_reass_ip6_t *ip6_reass_pool;
+  clib_bihash_48_8_t ip6_reass_hash;
+  dlist_elt_t *ip6_reass_lru_list_pool;
+  dlist_elt_t *ip6_frags_list_pool;
+  u32 ip6_reass_head_index;
+  u16 ip6_reass_n;
+  clib_spinlock_t ip6_reass_lock;
+
+  /* convenience */
+  vlib_main_t *vlib_main;
+  vnet_main_t *vnet_main;
+} nat_reass_main_t;
+
+/**
+ * @brief Set NAT virtual fragmentation reassembly configuration.
+ *
+ * @param timeout   Reassembly timeout.
+ * @param max_reass Maximum number of concurrent reassemblies.
+ * @param max_frag  Maximum number of fragmets per reassembly
+ * @param drop_frag If zero translate fragments, otherwise drop fragments.
+ * @param is_ip6    1 if IPv6, 0 if IPv4.
+ *
+ * @returns 0 on success, non-zero value otherwise.
+ */
+int nat_reass_set (u32 timeout, u16 max_reass, u8 max_frag, u8 drop_frag,
+                  u8 is_ip6);
+
+/**
+ * @brief Get reassembly timeout.
+ *
+ * @param is_ip6 1 if IPv6, 0 if IPv4.
+ *
+ * @returns reassembly timeout.
+ */
+u32 nat_reass_get_timeout (u8 is_ip6);
+
+/**
+ * @brief Get maximum number of concurrent reassemblies.
+ *
+ * @param is_ip6 1 if IPv6, 0 if IPv4.
+ *
+ * @returns maximum number of concurrent reassemblies.
+ */
+u16 nat_reass_get_max_reass (u8 is_ip6);
+
+/**
+ * @brief Get maximum number of fragmets per reassembly.
+ *
+ * @param is_ip6 1 if IPv6, 0 if IPv4.
+ *
+ * @returns maximum number of fragmets per reassembly.
+ */
+u8 nat_reass_get_max_frag (u8 is_ip6);
+
+/**
+ * @brief Get status of virtual fragmentation reassembly.
+ *
+ * @param is_ip6 1 if IPv6, 0 if IPv4.
+ *
+ * @returns zero if translate fragments, non-zero value if drop fragments.
+ */
+u8 nat_reass_is_drop_frag (u8 is_ip6);
+
+/**
+ * @brief Initialize NAT virtual fragmentation reassembly.
+ *
+ * @param vm vlib main.
+ *
+ * @return error code.
+ */
+clib_error_t *nat_reass_init (vlib_main_t * vm);
+
+/**
+ * @brief Find or create reassembly.
+ *
+ * @param src Source IPv4 address.
+ * @param dst Destination IPv4 address.
+ * @param frag_id Fragment ID.
+ * @param proto L4 protocol.
+ * @param reset_timeout If non-zero value reset timeout.
+ * @param bi_to_drop Fragments to drop.
+ *
+ * @returns Reassembly data or 0 on failure.
+ */
+nat_reass_ip4_t *nat_ip4_reass_find_or_create (ip4_address_t src,
+                                              ip4_address_t dst,
+                                              u16 frag_id, u8 proto,
+                                              u8 reset_timeout,
+                                              u32 ** bi_to_drop);
+/**
+ * @brief Cache fragment.
+ *
+ * @param reass Reassembly data.
+ * @param bi Buffer index.
+ *
+ * @returns 0 on success, non-zero value otherwise.
+ */
+int nat_ip4_reass_add_fragment (nat_reass_ip4_t * reass, u32 bi);
+
+/**
+ * @brief Get cached fragments.
+ *
+ * @param reass Reassembly data.
+ * @param bi Vector of buffer indexes.
+ */
+void nat_ip4_reass_get_frags (nat_reass_ip4_t * reass, u32 ** bi);
+
+/**
+ * @breif Call back function when walking IPv4 reassemblies, non-zero return
+ * value stop walk.
+ */
+typedef int (*nat_ip4_reass_walk_fn_t) (nat_reass_ip4_t * reass, void *ctx);
+
+/**
+ * @brief Walk IPv4 reassemblies.
+ *
+ * @param fn The function to invoke on each entry visited.
+ * @param ctx A context passed in the visit function.
+ */
+void nat_ip4_reass_walk (nat_ip4_reass_walk_fn_t fn, void *ctx);
+
+/**
+ * @brief Find or create reassembly.
+ *
+ * @param src Source IPv6 address.
+ * @param dst Destination IPv6 address.
+ * @param frag_id Fragment ID.
+ * @param proto L4 protocol.
+ * @param reset_timeout If non-zero value reset timeout.
+ * @param bi_to_drop Fragments to drop.
+ *
+ * @returns Reassembly data or 0 on failure.
+ */
+nat_reass_ip6_t *nat_ip6_reass_find_or_create (ip6_address_t src,
+                                              ip6_address_t dst,
+                                              u32 frag_id, u8 proto,
+                                              u8 reset_timeout,
+                                              u32 ** bi_to_drop);
+/**
+ * @brief Cache fragment.
+ *
+ * @param reass Reassembly data.
+ * @param bi Buffer index.
+ *
+ * @returns 0 on success, non-zero value otherwise.
+ */
+int nat_ip6_reass_add_fragment (nat_reass_ip6_t * reass, u32 bi);
+
+/**
+ * @brief Get cached fragments.
+ *
+ * @param reass Reassembly data.
+ * @param bi Vector of buffer indexes.
+ */
+void nat_ip6_reass_get_frags (nat_reass_ip6_t * reass, u32 ** bi);
+
+/**
+ * @breif Call back function when walking IPv6 reassemblies, non-zero return
+ * value stop walk.
+ */
+typedef int (*nat_ip6_reass_walk_fn_t) (nat_reass_ip6_t * reass, void *ctx);
+
+/**
+ * @brief Walk IPv6 reassemblies.
+ *
+ * @param fn The function to invoke on each entry visited.
+ * @param ctx A context passed in the visit function.
+ */
+void nat_ip6_reass_walk (nat_ip6_reass_walk_fn_t fn, void *ctx);
+
+#endif /* __included_nat_reass_h__ */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
index f250136..489afad 100755 (executable)
@@ -25,6 +25,7 @@
 #include <nat/nat.h>
 #include <nat/nat_ipfix_logging.h>
 #include <nat/nat_det.h>
 #include <nat/nat.h>
 #include <nat/nat_ipfix_logging.h>
 #include <nat/nat_det.h>
+#include <nat/nat_reass.h>
 
 #include <vppinfra/hash.h>
 #include <vppinfra/error.h>
 
 #include <vppinfra/hash.h>
 #include <vppinfra/error.h>
@@ -78,17 +79,40 @@ static u8 * format_snat_out2in_worker_handoff_trace (u8 * s, va_list * args)
   return s;
 }
 
   return s;
 }
 
+typedef struct {
+  u32 sw_if_index;
+  u32 next_index;
+  u8 cached;
+} nat44_out2in_reass_trace_t;
+
+static u8 * format_nat44_out2in_reass_trace (u8 * s, va_list * args)
+{
+  CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+  CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+  nat44_out2in_reass_trace_t * t = va_arg (*args, nat44_out2in_reass_trace_t *);
+
+  s = format (s, "NAT44_OUT2IN_REASS: sw_if_index %d, next index %d, status %s",
+              t->sw_if_index, t->next_index,
+              t->cached ? "cached" : "translated");
+
+  return s;
+}
+
 vlib_node_registration_t snat_out2in_node;
 vlib_node_registration_t snat_out2in_fast_node;
 vlib_node_registration_t snat_out2in_worker_handoff_node;
 vlib_node_registration_t snat_det_out2in_node;
 vlib_node_registration_t snat_out2in_node;
 vlib_node_registration_t snat_out2in_fast_node;
 vlib_node_registration_t snat_out2in_worker_handoff_node;
 vlib_node_registration_t snat_det_out2in_node;
+vlib_node_registration_t nat44_out2in_reass_node;
 
 #define foreach_snat_out2in_error                       \
 _(UNSUPPORTED_PROTOCOL, "Unsupported protocol")         \
 _(OUT2IN_PACKETS, "Good out2in packets processed")      \
 _(BAD_ICMP_TYPE, "unsupported ICMP type")               \
 _(NO_TRANSLATION, "No translation")                     \
 
 #define foreach_snat_out2in_error                       \
 _(UNSUPPORTED_PROTOCOL, "Unsupported protocol")         \
 _(OUT2IN_PACKETS, "Good out2in packets processed")      \
 _(BAD_ICMP_TYPE, "unsupported ICMP type")               \
 _(NO_TRANSLATION, "No translation")                     \
-_(MAX_SESSIONS_EXCEEDED, "Maximum sessions exceeded")
+_(MAX_SESSIONS_EXCEEDED, "Maximum sessions exceeded")   \
+_(DROP_FRAGMENT, "Drop fragment")                       \
+_(MAX_REASS, "Maximum reassemblies exceeded")           \
+_(MAX_FRAG, "Maximum fragments per reassembly exceeded")
 
 typedef enum {
 #define _(sym,str) SNAT_OUT2IN_ERROR_##sym,
 
 typedef enum {
 #define _(sym,str) SNAT_OUT2IN_ERROR_##sym,
@@ -107,6 +131,7 @@ typedef enum {
   SNAT_OUT2IN_NEXT_DROP,
   SNAT_OUT2IN_NEXT_LOOKUP,
   SNAT_OUT2IN_NEXT_ICMP_ERROR,
   SNAT_OUT2IN_NEXT_DROP,
   SNAT_OUT2IN_NEXT_LOOKUP,
   SNAT_OUT2IN_NEXT_ICMP_ERROR,
+  SNAT_OUT2IN_NEXT_REASS,
   SNAT_OUT2IN_N_NEXT,
 } snat_out2in_next_t;
 
   SNAT_OUT2IN_N_NEXT,
 } snat_out2in_next_t;
 
@@ -139,6 +164,7 @@ create_session_for_static_mapping (snat_main_t *sm,
   dlist_elt_t * per_user_translation_list_elt;
   dlist_elt_t * per_user_list_head_elt;
   ip4_header_t *ip0;
   dlist_elt_t * per_user_translation_list_elt;
   dlist_elt_t * per_user_list_head_elt;
   ip4_header_t *ip0;
+  udp_header_t *udp0;
 
   if (PREDICT_FALSE (maximum_sessions_exceeded(sm, thread_index)))
     {
 
   if (PREDICT_FALSE (maximum_sessions_exceeded(sm, thread_index)))
     {
@@ -147,6 +173,7 @@ create_session_for_static_mapping (snat_main_t *sm,
     }
 
   ip0 = vlib_buffer_get_current (b0);
     }
 
   ip0 = vlib_buffer_get_current (b0);
+  udp0 = ip4_next_header (ip0);
 
   user_key.addr = in2out.addr;
   user_key.fib_index = in2out.fib_index;
 
   user_key.addr = in2out.addr;
   user_key.fib_index = in2out.fib_index;
@@ -188,7 +215,8 @@ create_session_for_static_mapping (snat_main_t *sm,
 
   s->outside_address_index = ~0;
   s->flags |= SNAT_SESSION_FLAG_STATIC_MAPPING;
 
   s->outside_address_index = ~0;
   s->flags |= SNAT_SESSION_FLAG_STATIC_MAPPING;
-  s->ext_host_addr.as_u32 = ip0->dst_address.as_u32;
+  s->ext_host_addr.as_u32 = ip0->src_address.as_u32;
+  s->ext_host_port = udp0->src_port;
   u->nstaticsessions++;
 
   /* Create list elts */
   u->nstaticsessions++;
 
   /* Create list elts */
@@ -1033,6 +1061,12 @@ snat_out2in_node_fn (vlib_main_t * vm,
               goto trace0;
             }
 
               goto trace0;
             }
 
+          if (PREDICT_FALSE (ip4_is_fragment (ip0)))
+            {
+              next0 = SNAT_OUT2IN_NEXT_REASS;
+              goto trace0;
+            }
+
           key0.addr = ip0->dst_address;
           key0.port = udp0->dst_port;
           key0.protocol = proto0;
           key0.addr = ip0->dst_address;
           key0.port = udp0->dst_port;
           key0.protocol = proto0;
@@ -1188,6 +1222,12 @@ snat_out2in_node_fn (vlib_main_t * vm,
               goto trace1;
             }
 
               goto trace1;
             }
 
+          if (PREDICT_FALSE (ip4_is_fragment (ip1)))
+            {
+              next1 = SNAT_OUT2IN_NEXT_REASS;
+              goto trace1;
+            }
+
           key1.addr = ip1->dst_address;
           key1.port = udp1->dst_port;
           key1.protocol = proto1;
           key1.addr = ip1->dst_address;
           key1.port = udp1->dst_port;
           key1.protocol = proto1;
@@ -1379,6 +1419,12 @@ snat_out2in_node_fn (vlib_main_t * vm,
               goto trace00;
             }
 
               goto trace00;
             }
 
+          if (PREDICT_FALSE (ip4_is_fragment (ip0)))
+            {
+              next0 = SNAT_OUT2IN_NEXT_REASS;
+              goto trace00;
+            }
+
           key0.addr = ip0->dst_address;
           key0.port = udp0->dst_port;
           key0.protocol = proto0;
           key0.addr = ip0->dst_address;
           key0.port = udp0->dst_port;
           key0.protocol = proto0;
@@ -1530,10 +1576,294 @@ VLIB_REGISTER_NODE (snat_out2in_node) = {
     [SNAT_OUT2IN_NEXT_DROP] = "error-drop",
     [SNAT_OUT2IN_NEXT_LOOKUP] = "ip4-lookup",
     [SNAT_OUT2IN_NEXT_ICMP_ERROR] = "ip4-icmp-error",
     [SNAT_OUT2IN_NEXT_DROP] = "error-drop",
     [SNAT_OUT2IN_NEXT_LOOKUP] = "ip4-lookup",
     [SNAT_OUT2IN_NEXT_ICMP_ERROR] = "ip4-icmp-error",
+    [SNAT_OUT2IN_NEXT_REASS] = "nat44-out2in-reass",
   },
 };
 VLIB_NODE_FUNCTION_MULTIARCH (snat_out2in_node, snat_out2in_node_fn);
 
   },
 };
 VLIB_NODE_FUNCTION_MULTIARCH (snat_out2in_node, snat_out2in_node_fn);
 
+static uword
+nat44_out2in_reass_node_fn (vlib_main_t * vm,
+                            vlib_node_runtime_t * node,
+                            vlib_frame_t * frame)
+{
+  u32 n_left_from, *from, *to_next;
+  snat_out2in_next_t next_index;
+  u32 pkts_processed = 0;
+  snat_main_t *sm = &snat_main;
+  f64 now = vlib_time_now (vm);
+  u32 thread_index = vlib_get_thread_index ();
+  snat_main_per_thread_data_t *per_thread_data =
+    &sm->per_thread_data[thread_index];
+  u32 *fragments_to_drop = 0;
+  u32 *fragments_to_loopback = 0;
+
+  from = vlib_frame_vector_args (frame);
+  n_left_from = frame->n_vectors;
+  next_index = node->cached_next_index;
+
+  while (n_left_from > 0)
+    {
+      u32 n_left_to_next;
+
+      vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+
+      while (n_left_from > 0 && n_left_to_next > 0)
+       {
+          u32 bi0, sw_if_index0, proto0, rx_fib_index0, new_addr0, old_addr0;
+         vlib_buffer_t *b0;
+          u32 next0;
+          u8 cached0 = 0;
+          ip4_header_t *ip0;
+          nat_reass_ip4_t *reass0;
+          udp_header_t * udp0;
+          tcp_header_t * tcp0;
+          snat_session_key_t key0, sm0;
+          clib_bihash_kv_8_8_t kv0, value0;
+          snat_session_t * s0 = 0;
+          u16 old_port0, new_port0;
+          ip_csum_t sum0;
+
+          /* speculatively enqueue b0 to the current next frame */
+         bi0 = from[0];
+         to_next[0] = bi0;
+         from += 1;
+         to_next += 1;
+         n_left_from -= 1;
+         n_left_to_next -= 1;
+
+         b0 = vlib_get_buffer (vm, bi0);
+          next0 = SNAT_OUT2IN_NEXT_LOOKUP;
+
+          sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
+          rx_fib_index0 = fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP4,
+                                                               sw_if_index0);
+
+          if (PREDICT_FALSE (nat_reass_is_drop_frag(0)))
+            {
+              next0 = SNAT_OUT2IN_NEXT_DROP;
+              b0->error = node->errors[SNAT_OUT2IN_ERROR_DROP_FRAGMENT];
+              goto trace0;
+            }
+
+          ip0 = (ip4_header_t *) vlib_buffer_get_current (b0);
+          udp0 = ip4_next_header (ip0);
+          tcp0 = (tcp_header_t *) udp0;
+          proto0 = ip_proto_to_snat_proto (ip0->protocol);
+
+          reass0 = nat_ip4_reass_find_or_create (ip0->src_address,
+                                                 ip0->dst_address,
+                                                 ip0->fragment_id,
+                                                 ip0->protocol,
+                                                 1,
+                                                 &fragments_to_drop);
+
+          if (PREDICT_FALSE (!reass0))
+            {
+              next0 = SNAT_OUT2IN_NEXT_DROP;
+              b0->error = node->errors[SNAT_OUT2IN_ERROR_MAX_REASS];
+              goto trace0;
+            }
+
+          if (PREDICT_FALSE (ip4_is_first_fragment (ip0)))
+            {
+              key0.addr = ip0->dst_address;
+              key0.port = udp0->dst_port;
+              key0.protocol = proto0;
+              key0.fib_index = rx_fib_index0;
+              kv0.key = key0.as_u64;
+
+              if (clib_bihash_search_8_8 (&per_thread_data->out2in, &kv0, &value0))
+                {
+                  /* Try to match static mapping by external address and port,
+                     destination address and port in packet */
+                  if (snat_static_mapping_match(sm, key0, &sm0, 1, 0))
+                    {
+                      b0->error = node->errors[SNAT_OUT2IN_ERROR_NO_TRANSLATION];
+                      /*
+                       * Send DHCP packets to the ipv4 stack, or we won't
+                       * be able to use dhcp client on the outside interface
+                       */
+                      if (proto0 != SNAT_PROTOCOL_UDP
+                          || (udp0->dst_port
+                              != clib_host_to_net_u16(UDP_DST_PORT_dhcp_to_client)))
+
+                        next0 = SNAT_OUT2IN_NEXT_DROP;
+                      goto trace0;
+                    }
+
+                  /* Create session initiated by host from external network */
+                  s0 = create_session_for_static_mapping(sm, b0, sm0, key0, node,
+                                                         thread_index);
+                  if (!s0)
+                    {
+                      b0->error = node->errors[SNAT_OUT2IN_ERROR_NO_TRANSLATION];
+                      next0 = SNAT_OUT2IN_NEXT_DROP;
+                      goto trace0;
+                    }
+                  reass0->sess_index = s0 - per_thread_data->sessions;
+                }
+              else
+                {
+                  s0 = pool_elt_at_index (per_thread_data->sessions,
+                                          value0.value);
+                  reass0->sess_index = value0.value;
+                }
+              nat_ip4_reass_get_frags (reass0, &fragments_to_loopback);
+            }
+          else
+            {
+              if (PREDICT_FALSE (reass0->sess_index == (u32) ~0))
+                {
+                  if (nat_ip4_reass_add_fragment (reass0, bi0))
+                    {
+                      b0->error = node->errors[SNAT_OUT2IN_ERROR_MAX_FRAG];
+                      next0 = SNAT_OUT2IN_NEXT_DROP;
+                      goto trace0;
+                    }
+                  cached0 = 1;
+                  goto trace0;
+                }
+              s0 = pool_elt_at_index (per_thread_data->sessions,
+                                      reass0->sess_index);
+            }
+
+          old_addr0 = ip0->dst_address.as_u32;
+          ip0->dst_address = s0->in2out.addr;
+          new_addr0 = ip0->dst_address.as_u32;
+          vnet_buffer(b0)->sw_if_index[VLIB_TX] = s0->in2out.fib_index;
+
+          sum0 = ip0->checksum;
+          sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
+                                 ip4_header_t,
+                                 dst_address /* changed member */);
+          ip0->checksum = ip_csum_fold (sum0);
+
+          if (PREDICT_FALSE (ip4_is_first_fragment (ip0)))
+            {
+              if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
+                {
+                  old_port0 = tcp0->dst_port;
+                  tcp0->dst_port = s0->in2out.port;
+                  new_port0 = tcp0->dst_port;
+
+                  sum0 = tcp0->checksum;
+                  sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
+                                         ip4_header_t,
+                                         dst_address /* changed member */);
+
+                  sum0 = ip_csum_update (sum0, old_port0, new_port0,
+                                         ip4_header_t /* cheat */,
+                                         length /* changed member */);
+                  tcp0->checksum = ip_csum_fold(sum0);
+                }
+              else
+                {
+                  old_port0 = udp0->dst_port;
+                  udp0->dst_port = s0->in2out.port;
+                  udp0->checksum = 0;
+                }
+            }
+
+          /* Accounting */
+          s0->last_heard = now;
+          s0->total_pkts++;
+          s0->total_bytes += vlib_buffer_length_in_chain (vm, b0);
+          /* Per-user LRU list maintenance for dynamic translation */
+          if (!snat_is_session_static (s0))
+            {
+              clib_dlist_remove (sm->per_thread_data[thread_index].list_pool,
+                                 s0->per_user_index);
+              clib_dlist_addtail (sm->per_thread_data[thread_index].list_pool,
+                                  s0->per_user_list_head_index,
+                                  s0->per_user_index);
+            }
+
+        trace0:
+          if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)
+                            && (b0->flags & VLIB_BUFFER_IS_TRACED)))
+            {
+              nat44_out2in_reass_trace_t *t =
+                 vlib_add_trace (vm, node, b0, sizeof (*t));
+              t->cached = cached0;
+              t->sw_if_index = sw_if_index0;
+              t->next_index = next0;
+            }
+
+          if (cached0)
+            {
+              n_left_to_next++;
+              to_next--;
+            }
+          else
+            {
+              pkts_processed += next0 != SNAT_OUT2IN_NEXT_DROP;
+
+              /* verify speculative enqueue, maybe switch current next frame */
+              vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
+                                               to_next, n_left_to_next,
+                                               bi0, next0);
+            }
+
+         if (n_left_from == 0 && vec_len (fragments_to_loopback))
+           {
+             from = vlib_frame_vector_args (frame);
+             u32 len = vec_len (fragments_to_loopback);
+             if (len <= VLIB_FRAME_SIZE)
+               {
+                 clib_memcpy (from, fragments_to_loopback, sizeof (u32) * len);
+                 n_left_from = len;
+                 vec_reset_length (fragments_to_loopback);
+               }
+             else
+               {
+                 clib_memcpy (from,
+                               fragments_to_loopback + (len - VLIB_FRAME_SIZE),
+                               sizeof (u32) * VLIB_FRAME_SIZE);
+                 n_left_from = VLIB_FRAME_SIZE;
+                 _vec_len (fragments_to_loopback) = len - VLIB_FRAME_SIZE;
+               }
+           }
+       }
+
+      vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+    }
+
+  vlib_node_increment_counter (vm, nat44_out2in_reass_node.index,
+                               SNAT_OUT2IN_ERROR_OUT2IN_PACKETS,
+                               pkts_processed);
+
+  nat_send_all_to_node (vm, fragments_to_drop, node,
+                        &node->errors[SNAT_OUT2IN_ERROR_DROP_FRAGMENT],
+                        SNAT_OUT2IN_NEXT_DROP);
+
+  vec_free (fragments_to_drop);
+  vec_free (fragments_to_loopback);
+  return frame->n_vectors;
+}
+
+VLIB_REGISTER_NODE (nat44_out2in_reass_node) = {
+  .function = nat44_out2in_reass_node_fn,
+  .name = "nat44-out2in-reass",
+  .vector_size = sizeof (u32),
+  .format_trace = format_nat44_out2in_reass_trace,
+  .type = VLIB_NODE_TYPE_INTERNAL,
+
+  .n_errors = ARRAY_LEN(snat_out2in_error_strings),
+  .error_strings = snat_out2in_error_strings,
+
+  .n_next_nodes = SNAT_OUT2IN_N_NEXT,
+
+  /* edit / add dispositions here */
+  .next_nodes = {
+    [SNAT_OUT2IN_NEXT_DROP] = "error-drop",
+    [SNAT_OUT2IN_NEXT_LOOKUP] = "ip4-lookup",
+    [SNAT_OUT2IN_NEXT_ICMP_ERROR] = "ip4-icmp-error",
+    [SNAT_OUT2IN_NEXT_REASS] = "nat44-out2in-reass",
+  },
+};
+VLIB_NODE_FUNCTION_MULTIARCH (nat44_out2in_reass_node,
+                              nat44_out2in_reass_node_fn);
+
 /**************************/
 /*** deterministic mode ***/
 /**************************/
 /**************************/
 /*** deterministic mode ***/
 /**************************/
@@ -2017,6 +2347,7 @@ VLIB_REGISTER_NODE (snat_det_out2in_node) = {
     [SNAT_OUT2IN_NEXT_DROP] = "error-drop",
     [SNAT_OUT2IN_NEXT_LOOKUP] = "ip4-lookup",
     [SNAT_OUT2IN_NEXT_ICMP_ERROR] = "ip4-icmp-error",
     [SNAT_OUT2IN_NEXT_DROP] = "error-drop",
     [SNAT_OUT2IN_NEXT_LOOKUP] = "ip4-lookup",
     [SNAT_OUT2IN_NEXT_ICMP_ERROR] = "ip4-icmp-error",
+    [SNAT_OUT2IN_NEXT_REASS] = "nat44-out2in-reass",
   },
 };
 VLIB_NODE_FUNCTION_MULTIARCH (snat_det_out2in_node, snat_det_out2in_node_fn);
   },
 };
 VLIB_NODE_FUNCTION_MULTIARCH (snat_det_out2in_node, snat_det_out2in_node_fn);
@@ -2509,6 +2840,7 @@ VLIB_REGISTER_NODE (snat_out2in_fast_node) = {
     [SNAT_OUT2IN_NEXT_LOOKUP] = "ip4-lookup",
     [SNAT_OUT2IN_NEXT_DROP] = "error-drop",
     [SNAT_OUT2IN_NEXT_ICMP_ERROR] = "ip4-icmp-error",
     [SNAT_OUT2IN_NEXT_LOOKUP] = "ip4-lookup",
     [SNAT_OUT2IN_NEXT_DROP] = "error-drop",
     [SNAT_OUT2IN_NEXT_ICMP_ERROR] = "ip4-icmp-error",
+    [SNAT_OUT2IN_NEXT_REASS] = "nat44-out2in-reass",
   },
 };
 VLIB_NODE_FUNCTION_MULTIARCH (snat_out2in_fast_node, snat_out2in_fast_node_fn);
   },
 };
 VLIB_NODE_FUNCTION_MULTIARCH (snat_out2in_fast_node, snat_out2in_fast_node_fn);
index e420baf..3c002bb 100644 (file)
@@ -3,16 +3,19 @@
 import socket
 import unittest
 import struct
 import socket
 import unittest
 import struct
+import StringIO
+import random
 
 from framework import VppTestCase, VppTestRunner, running_extended_tests
 from vpp_ip_route import VppIpRoute, VppRoutePath, DpoProto
 from scapy.layers.inet import IP, TCP, UDP, ICMP
 from scapy.layers.inet import IPerror, TCPerror, UDPerror, ICMPerror
 from scapy.layers.inet6 import IPv6, ICMPv6EchoRequest, ICMPv6EchoReply
 
 from framework import VppTestCase, VppTestRunner, running_extended_tests
 from vpp_ip_route import VppIpRoute, VppRoutePath, DpoProto
 from scapy.layers.inet import IP, TCP, UDP, ICMP
 from scapy.layers.inet import IPerror, TCPerror, UDPerror, ICMPerror
 from scapy.layers.inet6 import IPv6, ICMPv6EchoRequest, ICMPv6EchoReply
-from scapy.layers.inet6 import ICMPv6DestUnreach, IPerror6
+from scapy.layers.inet6 import ICMPv6DestUnreach, IPerror6, IPv6ExtHdrFragment
 from scapy.layers.l2 import Ether, ARP, GRE
 from scapy.data import IP_PROTOS
 from scapy.layers.l2 import Ether, ARP, GRE
 from scapy.data import IP_PROTOS
-from scapy.packet import bind_layers
+from scapy.packet import bind_layers, Raw
+from scapy.all import fragment6
 from util import ppp
 from ipfix import IPFIX, Set, Template, Data, IPFIXDecoder
 from time import sleep
 from util import ppp
 from ipfix import IPFIX, Set, Template, Data, IPFIXDecoder
 from time import sleep
@@ -464,6 +467,121 @@ class MethodHolder(VppTestCase):
                                       "(inside network):", packet))
                 raise
 
                                       "(inside network):", packet))
                 raise
 
+    def create_stream_frag(self, src_if, dst, sport, dport, data):
+        """
+        Create fragmented packet stream
+
+        :param src_if: Source interface
+        :param dst: Destination IPv4 address
+        :param sport: Source TCP port
+        :param dport: Destination TCP port
+        :param data: Payload data
+        :returns: Fragmets
+        """
+        id = random.randint(0, 65535)
+        p = (IP(src=src_if.remote_ip4, dst=dst) /
+             TCP(sport=sport, dport=dport) /
+             Raw(data))
+        p = p.__class__(str(p))
+        chksum = p['TCP'].chksum
+        pkts = []
+        p = (Ether(src=src_if.remote_mac, dst=src_if.local_mac) /
+             IP(src=src_if.remote_ip4, dst=dst, flags="MF", frag=0, id=id) /
+             TCP(sport=sport, dport=dport, chksum=chksum) /
+             Raw(data[0:4]))
+        pkts.append(p)
+        p = (Ether(src=src_if.remote_mac, dst=src_if.local_mac) /
+             IP(src=src_if.remote_ip4, dst=dst, flags="MF", frag=3, id=id,
+                proto=IP_PROTOS.tcp) /
+             Raw(data[4:20]))
+        pkts.append(p)
+        p = (Ether(src=src_if.remote_mac, dst=src_if.local_mac) /
+             IP(src=src_if.remote_ip4, dst=dst, frag=5, proto=IP_PROTOS.tcp,
+                id=id) /
+             Raw(data[20:]))
+        pkts.append(p)
+        return pkts
+
+    def create_stream_frag_ip6(self, src_if, dst, sport, dport, data,
+                               pref=None, plen=0, frag_size=128):
+        """
+        Create fragmented packet stream
+
+        :param src_if: Source interface
+        :param dst: Destination IPv4 address
+        :param sport: Source TCP port
+        :param dport: Destination TCP port
+        :param data: Payload data
+        :param pref: NAT64 prefix
+        :param plen: NAT64 prefix length
+        :param fragsize: size of fragments
+        :returns: Fragmets
+        """
+        if pref is None:
+            dst_ip6 = ''.join(['64:ff9b::', dst])
+        else:
+            dst_ip6 = self.compose_ip6(dst, pref, plen)
+
+        p = (Ether(dst=src_if.local_mac, src=src_if.remote_mac) /
+             IPv6(src=src_if.remote_ip6, dst=dst_ip6) /
+             IPv6ExtHdrFragment(id=random.randint(0, 65535)) /
+             TCP(sport=sport, dport=dport) /
+             Raw(data))
+
+        return fragment6(p, frag_size)
+
+    def reass_frags_and_verify(self, frags, src, dst):
+        """
+        Reassemble and verify fragmented packet
+
+        :param frags: Captured fragments
+        :param src: Source IPv4 address to verify
+        :param dst: Destination IPv4 address to verify
+
+        :returns: Reassembled IPv4 packet
+        """
+        buffer = StringIO.StringIO()
+        for p in frags:
+            self.assertEqual(p[IP].src, src)
+            self.assertEqual(p[IP].dst, dst)
+            self.check_ip_checksum(p)
+            buffer.seek(p[IP].frag * 8)
+            buffer.write(p[IP].payload)
+        ip = frags[0].getlayer(IP)
+        ip = IP(src=frags[0][IP].src, dst=frags[0][IP].dst,
+                proto=frags[0][IP].proto)
+        if ip.proto == IP_PROTOS.tcp:
+            p = (ip / TCP(buffer.getvalue()))
+            self.check_tcp_checksum(p)
+        elif ip.proto == IP_PROTOS.udp:
+            p = (ip / UDP(buffer.getvalue()))
+        return p
+
+    def reass_frags_and_verify_ip6(self, frags, src, dst):
+        """
+        Reassemble and verify fragmented packet
+
+        :param frags: Captured fragments
+        :param src: Source IPv6 address to verify
+        :param dst: Destination IPv6 address to verify
+
+        :returns: Reassembled IPv6 packet
+        """
+        buffer = StringIO.StringIO()
+        for p in frags:
+            self.assertEqual(p[IPv6].src, src)
+            self.assertEqual(p[IPv6].dst, dst)
+            buffer.seek(p[IPv6ExtHdrFragment].offset * 8)
+            buffer.write(p[IPv6ExtHdrFragment].payload)
+        ip = IPv6(src=frags[0][IPv6].src, dst=frags[0][IPv6].dst,
+                  nh=frags[0][IPv6ExtHdrFragment].nh)
+        if ip.nh == IP_PROTOS.tcp:
+            p = (ip / TCP(buffer.getvalue()))
+            self.check_tcp_checksum(p)
+        elif ip.nh == IP_PROTOS.udp:
+            p = (ip / UDP(buffer.getvalue()))
+        return p
+
     def verify_ipfix_nat44_ses(self, data):
         """
         Verify IPFIX NAT44 session create/delete event
     def verify_ipfix_nat44_ses(self, data):
         """
         Verify IPFIX NAT44 session create/delete event
@@ -586,6 +704,8 @@ class TestNAT44(MethodHolder):
             cls.pg4._remote_ip4 = cls.pg9._remote_hosts[0]._ip4 = "10.0.0.2"
             cls.pg9.resolve_arp()
 
             cls.pg4._remote_ip4 = cls.pg9._remote_hosts[0]._ip4 = "10.0.0.2"
             cls.pg9.resolve_arp()
 
+            random.seed()
+
         except Exception:
             super(TestNAT44, cls).tearDownClass()
             raise
         except Exception:
             super(TestNAT44, cls).tearDownClass()
             raise
@@ -671,6 +791,9 @@ class TestNAT44(MethodHolder):
                                                   addr.ip_address,
                                                   is_add=0)
 
                                                   addr.ip_address,
                                                   is_add=0)
 
+        self.vapi.nat_set_reass()
+        self.vapi.nat_set_reass(is_ip6=1)
+
     def nat44_add_static_mapping(self, local_ip, external_ip='0.0.0.0',
                                  local_port=0, external_port=0, vrf_id=0,
                                  is_add=1, external_sw_if_index=0xFFFFFFFF,
     def nat44_add_static_mapping(self, local_ip, external_ip='0.0.0.0',
                                  local_port=0, external_port=0, vrf_id=0,
                                  is_add=1, external_sw_if_index=0xFFFFFFFF,
@@ -2480,10 +2603,164 @@ class TestNAT44(MethodHolder):
         sessions = self.vapi.nat44_user_session_dump(self.pg0.remote_ip4n, 0)
         self.assertEqual(nsessions - len(sessions), 2)
 
         sessions = self.vapi.nat44_user_session_dump(self.pg0.remote_ip4n, 0)
         self.assertEqual(nsessions - len(sessions), 2)
 
+    def test_set_get_reass(self):
+        """ NAT44 set/get virtual fragmentation reassembly """
+        reas_cfg1 = self.vapi.nat_get_reass()
+
+        self.vapi.nat_set_reass(timeout=reas_cfg1.ip4_timeout + 5,
+                                max_reass=reas_cfg1.ip4_max_reass * 2,
+                                max_frag=reas_cfg1.ip4_max_frag * 2)
+
+        reas_cfg2 = self.vapi.nat_get_reass()
+
+        self.assertEqual(reas_cfg1.ip4_timeout + 5, reas_cfg2.ip4_timeout)
+        self.assertEqual(reas_cfg1.ip4_max_reass * 2, reas_cfg2.ip4_max_reass)
+        self.assertEqual(reas_cfg1.ip4_max_frag * 2, reas_cfg2.ip4_max_frag)
+
+        self.vapi.nat_set_reass(drop_frag=1)
+        self.assertTrue(self.vapi.nat_get_reass().ip4_drop_frag)
+
+    def test_frag_in_order(self):
+        """ NAT44 translate fragments arriving in order """
+        self.nat44_add_address(self.nat_addr)
+        self.vapi.nat44_interface_add_del_feature(self.pg0.sw_if_index)
+        self.vapi.nat44_interface_add_del_feature(self.pg1.sw_if_index,
+                                                  is_inside=0)
+
+        data = "A" * 4 + "B" * 16 + "C" * 3
+        self.tcp_port_in = random.randint(1025, 65535)
+
+        reass = self.vapi.nat_reass_dump()
+        reass_n_start = len(reass)
+
+        # in2out
+        pkts = self.create_stream_frag(self.pg0,
+                                       self.pg1.remote_ip4,
+                                       self.tcp_port_in,
+                                       20,
+                                       data)
+        self.pg0.add_stream(pkts)
+        self.pg_enable_capture(self.pg_interfaces)
+        self.pg_start()
+        frags = self.pg1.get_capture(len(pkts))
+        p = self.reass_frags_and_verify(frags,
+                                        self.nat_addr,
+                                        self.pg1.remote_ip4)
+        self.assertEqual(p[TCP].dport, 20)
+        self.assertNotEqual(p[TCP].sport, self.tcp_port_in)
+        self.tcp_port_out = p[TCP].sport
+        self.assertEqual(data, p[Raw].load)
+
+        # out2in
+        pkts = self.create_stream_frag(self.pg1,
+                                       self.nat_addr,
+                                       20,
+                                       self.tcp_port_out,
+                                       data)
+        self.pg1.add_stream(pkts)
+        self.pg_enable_capture(self.pg_interfaces)
+        self.pg_start()
+        frags = self.pg0.get_capture(len(pkts))
+        p = self.reass_frags_and_verify(frags,
+                                        self.pg1.remote_ip4,
+                                        self.pg0.remote_ip4)
+        self.assertEqual(p[TCP].sport, 20)
+        self.assertEqual(p[TCP].dport, self.tcp_port_in)
+        self.assertEqual(data, p[Raw].load)
+
+        reass = self.vapi.nat_reass_dump()
+        reass_n_end = len(reass)
+
+        self.assertEqual(reass_n_end - reass_n_start, 2)
+
+    def test_reass_hairpinning(self):
+        """ NAT44 fragments hairpinning """
+        host = self.pg0.remote_hosts[0]
+        server = self.pg0.remote_hosts[1]
+        host_in_port = random.randint(1025, 65535)
+        host_out_port = 0
+        server_in_port = random.randint(1025, 65535)
+        server_out_port = random.randint(1025, 65535)
+        data = "A" * 4 + "B" * 16 + "C" * 3
+
+        self.nat44_add_address(self.nat_addr)
+        self.vapi.nat44_interface_add_del_feature(self.pg0.sw_if_index)
+        self.vapi.nat44_interface_add_del_feature(self.pg1.sw_if_index,
+                                                  is_inside=0)
+        # add static mapping for server
+        self.nat44_add_static_mapping(server.ip4, self.nat_addr,
+                                      server_in_port, server_out_port,
+                                      proto=IP_PROTOS.tcp)
+
+        # send packet from host to server
+        pkts = self.create_stream_frag(self.pg0,
+                                       self.nat_addr,
+                                       host_in_port,
+                                       server_out_port,
+                                       data)
+        self.pg0.add_stream(pkts)
+        self.pg_enable_capture(self.pg_interfaces)
+        self.pg_start()
+        frags = self.pg0.get_capture(len(pkts))
+        p = self.reass_frags_and_verify(frags,
+                                        self.nat_addr,
+                                        server.ip4)
+        self.assertNotEqual(p[TCP].sport, host_in_port)
+        self.assertEqual(p[TCP].dport, server_in_port)
+        self.assertEqual(data, p[Raw].load)
+
+    def test_frag_out_of_order(self):
+        """ NAT44 translate fragments arriving out of order """
+        self.nat44_add_address(self.nat_addr)
+        self.vapi.nat44_interface_add_del_feature(self.pg0.sw_if_index)
+        self.vapi.nat44_interface_add_del_feature(self.pg1.sw_if_index,
+                                                  is_inside=0)
+
+        data = "A" * 4 + "B" * 16 + "C" * 3
+        random.randint(1025, 65535)
+
+        # in2out
+        pkts = self.create_stream_frag(self.pg0,
+                                       self.pg1.remote_ip4,
+                                       self.tcp_port_in,
+                                       20,
+                                       data)
+        pkts.reverse()
+        self.pg0.add_stream(pkts)
+        self.pg_enable_capture(self.pg_interfaces)
+        self.pg_start()
+        frags = self.pg1.get_capture(len(pkts))
+        p = self.reass_frags_and_verify(frags,
+                                        self.nat_addr,
+                                        self.pg1.remote_ip4)
+        self.assertEqual(p[TCP].dport, 20)
+        self.assertNotEqual(p[TCP].sport, self.tcp_port_in)
+        self.tcp_port_out = p[TCP].sport
+        self.assertEqual(data, p[Raw].load)
+
+        # out2in
+        pkts = self.create_stream_frag(self.pg1,
+                                       self.nat_addr,
+                                       20,
+                                       self.tcp_port_out,
+                                       data)
+        pkts.reverse()
+        self.pg1.add_stream(pkts)
+        self.pg_enable_capture(self.pg_interfaces)
+        self.pg_start()
+        frags = self.pg0.get_capture(len(pkts))
+        p = self.reass_frags_and_verify(frags,
+                                        self.pg1.remote_ip4,
+                                        self.pg0.remote_ip4)
+        self.assertEqual(p[TCP].sport, 20)
+        self.assertEqual(p[TCP].dport, self.tcp_port_in)
+        self.assertEqual(data, p[Raw].load)
+
     def tearDown(self):
         super(TestNAT44, self).tearDown()
         if not self.vpp_dead:
             self.logger.info(self.vapi.cli("show nat44 verbose"))
     def tearDown(self):
         super(TestNAT44, self).tearDown()
         if not self.vpp_dead:
             self.logger.info(self.vapi.cli("show nat44 verbose"))
+            self.logger.info(self.vapi.cli("show nat virtual-reassembly"))
             self.clear_nat44()
 
 
             self.clear_nat44()
 
 
@@ -3928,6 +4205,138 @@ class TestNAT64(MethodHolder):
             self.logger.error(ppp("Unexpected or invalid packet:", p))
             raise
 
             self.logger.error(ppp("Unexpected or invalid packet:", p))
             raise
 
+    def test_frag_in_order(self):
+        """ NAT64 translate fragments arriving in order """
+        self.tcp_port_in = random.randint(1025, 65535)
+
+        self.vapi.nat64_add_del_pool_addr_range(self.nat_addr_n,
+                                                self.nat_addr_n)
+        self.vapi.nat64_add_del_interface(self.pg0.sw_if_index)
+        self.vapi.nat64_add_del_interface(self.pg1.sw_if_index, is_inside=0)
+
+        reass = self.vapi.nat_reass_dump()
+        reass_n_start = len(reass)
+
+        # in2out
+        data = 'a' * 200
+        pkts = self.create_stream_frag_ip6(self.pg0, self.pg1.remote_ip4,
+                                           self.tcp_port_in, 20, data)
+        self.pg0.add_stream(pkts)
+        self.pg_enable_capture(self.pg_interfaces)
+        self.pg_start()
+        frags = self.pg1.get_capture(len(pkts))
+        p = self.reass_frags_and_verify(frags,
+                                        self.nat_addr,
+                                        self.pg1.remote_ip4)
+        self.assertEqual(p[TCP].dport, 20)
+        self.assertNotEqual(p[TCP].sport, self.tcp_port_in)
+        self.tcp_port_out = p[TCP].sport
+        self.assertEqual(data, p[Raw].load)
+
+        # out2in
+        data = "A" * 4 + "b" * 16 + "C" * 3
+        pkts = self.create_stream_frag(self.pg1,
+                                       self.nat_addr,
+                                       20,
+                                       self.tcp_port_out,
+                                       data)
+        self.pg1.add_stream(pkts)
+        self.pg_enable_capture(self.pg_interfaces)
+        self.pg_start()
+        frags = self.pg0.get_capture(len(pkts))
+        src = self.compose_ip6(self.pg1.remote_ip4, '64:ff9b::', 96)
+        p = self.reass_frags_and_verify_ip6(frags, src, self.pg0.remote_ip6)
+        self.assertEqual(p[TCP].sport, 20)
+        self.assertEqual(p[TCP].dport, self.tcp_port_in)
+        self.assertEqual(data, p[Raw].load)
+
+        reass = self.vapi.nat_reass_dump()
+        reass_n_end = len(reass)
+
+        self.assertEqual(reass_n_end - reass_n_start, 2)
+
+    def test_reass_hairpinning(self):
+        """ NAT64 fragments hairpinning """
+        data = 'a' * 200
+        client = self.pg0.remote_hosts[0]
+        server = self.pg0.remote_hosts[1]
+        server_in_port = random.randint(1025, 65535)
+        server_out_port = random.randint(1025, 65535)
+        client_in_port = random.randint(1025, 65535)
+        ip = IPv6(src=''.join(['64:ff9b::', self.nat_addr]))
+        nat_addr_ip6 = ip.src
+
+        self.vapi.nat64_add_del_pool_addr_range(self.nat_addr_n,
+                                                self.nat_addr_n)
+        self.vapi.nat64_add_del_interface(self.pg0.sw_if_index)
+        self.vapi.nat64_add_del_interface(self.pg1.sw_if_index, is_inside=0)
+
+        # add static BIB entry for server
+        self.vapi.nat64_add_del_static_bib(server.ip6n,
+                                           self.nat_addr_n,
+                                           server_in_port,
+                                           server_out_port,
+                                           IP_PROTOS.tcp)
+
+        # send packet from host to server
+        pkts = self.create_stream_frag_ip6(self.pg0,
+                                           self.nat_addr,
+                                           client_in_port,
+                                           server_out_port,
+                                           data)
+        self.pg0.add_stream(pkts)
+        self.pg_enable_capture(self.pg_interfaces)
+        self.pg_start()
+        frags = self.pg0.get_capture(len(pkts))
+        p = self.reass_frags_and_verify_ip6(frags, nat_addr_ip6, server.ip6)
+        self.assertNotEqual(p[TCP].sport, client_in_port)
+        self.assertEqual(p[TCP].dport, server_in_port)
+        self.assertEqual(data, p[Raw].load)
+
+    def test_frag_out_of_order(self):
+        """ NAT64 translate fragments arriving out of order """
+        self.tcp_port_in = random.randint(1025, 65535)
+
+        self.vapi.nat64_add_del_pool_addr_range(self.nat_addr_n,
+                                                self.nat_addr_n)
+        self.vapi.nat64_add_del_interface(self.pg0.sw_if_index)
+        self.vapi.nat64_add_del_interface(self.pg1.sw_if_index, is_inside=0)
+
+        # in2out
+        data = 'a' * 200
+        pkts = self.create_stream_frag_ip6(self.pg0, self.pg1.remote_ip4,
+                                           self.tcp_port_in, 20, data)
+        pkts.reverse()
+        self.pg0.add_stream(pkts)
+        self.pg_enable_capture(self.pg_interfaces)
+        self.pg_start()
+        frags = self.pg1.get_capture(len(pkts))
+        p = self.reass_frags_and_verify(frags,
+                                        self.nat_addr,
+                                        self.pg1.remote_ip4)
+        self.assertEqual(p[TCP].dport, 20)
+        self.assertNotEqual(p[TCP].sport, self.tcp_port_in)
+        self.tcp_port_out = p[TCP].sport
+        self.assertEqual(data, p[Raw].load)
+
+        # out2in
+        data = "A" * 4 + "B" * 16 + "C" * 3
+        pkts = self.create_stream_frag(self.pg1,
+                                       self.nat_addr,
+                                       20,
+                                       self.tcp_port_out,
+                                       data)
+        pkts.reverse()
+        self.pg1.add_stream(pkts)
+        self.pg_enable_capture(self.pg_interfaces)
+        self.pg_start()
+        frags = self.pg0.get_capture(len(pkts))
+        src = self.compose_ip6(self.pg1.remote_ip4, '64:ff9b::', 96)
+        p = self.reass_frags_and_verify_ip6(frags, src, self.pg0.remote_ip6)
+        self.assertEqual(p[TCP].sport, 20)
+        self.assertEqual(p[TCP].dport, self.tcp_port_in)
+        self.assertEqual(data, p[Raw].load)
+
     def nat64_get_ses_num(self):
         """
         Return number of active NAT64 sessions.
     def nat64_get_ses_num(self):
         """
         Return number of active NAT64 sessions.
@@ -4006,6 +4415,7 @@ class TestNAT64(MethodHolder):
             self.logger.info(self.vapi.cli("show nat64 prefix"))
             self.logger.info(self.vapi.cli("show nat64 bib all"))
             self.logger.info(self.vapi.cli("show nat64 session table all"))
             self.logger.info(self.vapi.cli("show nat64 prefix"))
             self.logger.info(self.vapi.cli("show nat64 bib all"))
             self.logger.info(self.vapi.cli("show nat64 session table all"))
+            self.logger.info(self.vapi.cli("show nat virtual-reassembly"))
             self.clear_nat64()
 
 
             self.clear_nat64()
 
 
index 31d7ac4..63f9383 100644 (file)
@@ -1408,6 +1408,43 @@ class VppPapiProvider(object):
              'vrf_id': vrf_id,
              'is_in': is_in})
 
              'vrf_id': vrf_id,
              'is_in': is_in})
 
+    def nat_set_reass(
+            self,
+            timeout=2,
+            max_reass=1024,
+            max_frag=5,
+            drop_frag=0,
+            is_ip6=0):
+        """Set NAT virtual fragmentation reassembly
+
+        :param timeout: reassembly timeout (Default 2sec)
+        :param max_reass: maximum concurrent reassemblies (Default 1024)
+        :param max_frag: maximum fragmets per reassembly (Default 5)
+        :param drop_frag: if 0 translate fragments, otherwise drop fragments
+        :param is_ip6: 1 if IPv6, 0 if IPv4
+        """
+        return self.api(
+            self.papi.nat_set_reass,
+            {'timeout': timeout,
+             'max_reass': max_reass,
+             'max_frag': max_frag,
+             'drop_frag': drop_frag,
+             'is_ip6': is_ip6})
+
+    def nat_get_reass(self):
+        """Get NAT virtual fragmentation reassembly configuration
+
+        :return: NAT virtual fragmentation reassembly configuration
+        """
+        return self.api(self.papi.nat_get_reass, {})
+
+    def nat_reass_dump(self):
+        """Dump NAT virtual fragmentation reassemblies
+
+        :return: Dictionary of NAT virtual fragmentation reassemblies
+        """
+        return self.api(self.papi.nat_reass_dump, {})
+
     def nat_det_add_del_map(
             self,
             in_addr,
     def nat_det_add_del_map(
             self,
             in_addr,