SNAT: IP fragmentation (VPP-890) 56/8056/4
authorMatus Fabian <matfabia@cisco.com>
Tue, 15 Aug 2017 13:59:19 +0000 (06:59 -0700)
committerOle Trøan <otroan@employees.org>
Tue, 7 Nov 2017 21:58:31 +0000 (21:58 +0000)
Translation of fragmented packets.

Change-Id: I9b1f2e9433ce273638080f32c2d3bff39c49899d
Signed-off-by: Matus Fabian <matfabia@cisco.com>
15 files changed:
src/plugins/nat.am
src/plugins/nat/in2out.c
src/plugins/nat/nat.api
src/plugins/nat/nat.c
src/plugins/nat/nat.h
src/plugins/nat/nat64_db.c
src/plugins/nat/nat64_db.h
src/plugins/nat/nat64_in2out.c
src/plugins/nat/nat64_out2in.c
src/plugins/nat/nat_api.c
src/plugins/nat/nat_reass.c [new file with mode: 0644]
src/plugins/nat/nat_reass.h [new file with mode: 0644]
src/plugins/nat/out2in.c
test/test_nat.py
test/vpp_papi_provider.py

index add82f0..b6c369f 100644 (file)
@@ -22,6 +22,7 @@ nat_plugin_la_SOURCES = nat/nat.c             \
        nat/nat_plugin.api.h                    \
         nat/nat_ipfix_logging.c                        \
         nat/nat_det.c                          \
+        nat/nat_reass.c                        \
         nat/nat64.c                            \
         nat/nat64_cli.c                        \
         nat/nat64_in2out.c                     \
index b059390..e4dbe91 100755 (executable)
@@ -24,6 +24,7 @@
 #include <nat/nat.h>
 #include <nat/nat_ipfix_logging.h>
 #include <nat/nat_det.h>
+#include <nat/nat_reass.h>
 
 #include <vppinfra/hash.h>
 #include <vppinfra/error.h>
@@ -83,6 +84,25 @@ static u8 * format_snat_in2out_worker_handoff_trace (u8 * s, va_list * args)
   return s;
 }
 
+typedef struct {
+  u32 sw_if_index;
+  u32 next_index;
+  u8 cached;
+} nat44_in2out_reass_trace_t;
+
+static u8 * format_nat44_in2out_reass_trace (u8 * s, va_list * args)
+{
+  CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+  CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+  nat44_in2out_reass_trace_t * t = va_arg (*args, nat44_in2out_reass_trace_t *);
+
+  s = format (s, "NAT44_IN2OUT_REASS: sw_if_index %d, next index %d, status %s",
+              t->sw_if_index, t->next_index,
+              t->cached ? "cached" : "translated");
+
+  return s;
+}
+
 vlib_node_registration_t snat_in2out_node;
 vlib_node_registration_t snat_in2out_slowpath_node;
 vlib_node_registration_t snat_in2out_fast_node;
@@ -94,6 +114,7 @@ vlib_node_registration_t snat_in2out_output_worker_handoff_node;
 vlib_node_registration_t snat_hairpin_dst_node;
 vlib_node_registration_t snat_hairpin_src_node;
 vlib_node_registration_t nat44_hairpinning_node;
+vlib_node_registration_t nat44_in2out_reass_node;
 
 
 #define foreach_snat_in2out_error                       \
@@ -103,7 +124,10 @@ _(OUT_OF_PORTS, "Out of ports")                         \
 _(BAD_OUTSIDE_FIB, "Outside VRF ID not found")          \
 _(BAD_ICMP_TYPE, "unsupported ICMP type")               \
 _(NO_TRANSLATION, "No translation")                     \
-_(MAX_SESSIONS_EXCEEDED, "Maximum sessions exceeded")
+_(MAX_SESSIONS_EXCEEDED, "Maximum sessions exceeded")   \
+_(DROP_FRAGMENT, "Drop fragment")                       \
+_(MAX_REASS, "Maximum reassemblies exceeded")           \
+_(MAX_FRAG, "Maximum fragments per reassembly exceeded")
 
 typedef enum {
 #define _(sym,str) SNAT_IN2OUT_ERROR_##sym,
@@ -123,6 +147,7 @@ typedef enum {
   SNAT_IN2OUT_NEXT_DROP,
   SNAT_IN2OUT_NEXT_ICMP_ERROR,
   SNAT_IN2OUT_NEXT_SLOW_PATH,
+  SNAT_IN2OUT_NEXT_REASS,
   SNAT_IN2OUT_N_NEXT,
 } snat_in2out_next_t;
 
@@ -243,6 +268,7 @@ static u32 slow_path (snat_main_t *sm, vlib_buffer_t *b0,
   u32 address_index = ~0;
   u32 outside_fib_index;
   uword * p;
+  udp_header_t * udp0 = ip4_next_header (ip0);
 
   if (PREDICT_FALSE (maximum_sessions_exceeded(sm, thread_index)))
     {
@@ -443,6 +469,7 @@ static u32 slow_path (snat_main_t *sm, vlib_buffer_t *b0,
   s->out2in.protocol = key0->protocol;
   s->out2in.fib_index = outside_fib_index;
   s->ext_host_addr.as_u32 = ip0->dst_address.as_u32;
+  s->ext_host_port = udp0->dst_port;
   *sessionp = s;
 
   /* Add to translation hashes */
@@ -1645,6 +1672,12 @@ snat_in2out_node_fn_inline (vlib_main_t * vm,
                   next0 = SNAT_IN2OUT_NEXT_SLOW_PATH;
                   goto trace00;
                 }
+
+              if (ip4_is_fragment (ip0))
+                {
+                  next0 = SNAT_IN2OUT_NEXT_REASS;
+                  goto trace00;
+                }
             }
 
           key0.addr = ip0->src_address;
@@ -1819,6 +1852,12 @@ snat_in2out_node_fn_inline (vlib_main_t * vm,
                   next1 = SNAT_IN2OUT_NEXT_SLOW_PATH;
                   goto trace01;
                 }
+
+              if (ip4_is_fragment (ip1))
+                {
+                  next0 = SNAT_IN2OUT_NEXT_REASS;
+                  goto trace01;
+                }
             }
 
           b1->flags |= VNET_BUFFER_F_IS_NATED;
@@ -2029,6 +2068,12 @@ snat_in2out_node_fn_inline (vlib_main_t * vm,
                   next0 = SNAT_IN2OUT_NEXT_SLOW_PATH;
                   goto trace0;
                 }
+
+              if (ip4_is_fragment (ip0))
+                {
+                  next0 = SNAT_IN2OUT_NEXT_REASS;
+                  goto trace0;
+                }
             }
 
           key0.addr = ip0->src_address;
@@ -2194,6 +2239,7 @@ VLIB_REGISTER_NODE (snat_in2out_node) = {
     [SNAT_IN2OUT_NEXT_LOOKUP] = "ip4-lookup",
     [SNAT_IN2OUT_NEXT_SLOW_PATH] = "nat44-in2out-slowpath",
     [SNAT_IN2OUT_NEXT_ICMP_ERROR] = "ip4-icmp-error",
+    [SNAT_IN2OUT_NEXT_REASS] = "nat44-in2out-reass",
   },
 };
 
@@ -2227,6 +2273,7 @@ VLIB_REGISTER_NODE (snat_in2out_output_node) = {
     [SNAT_IN2OUT_NEXT_LOOKUP] = "interface-output",
     [SNAT_IN2OUT_NEXT_SLOW_PATH] = "nat44-in2out-output-slowpath",
     [SNAT_IN2OUT_NEXT_ICMP_ERROR] = "ip4-icmp-error",
+    [SNAT_IN2OUT_NEXT_REASS] = "nat44-in2out-reass",
   },
 };
 
@@ -2261,6 +2308,7 @@ VLIB_REGISTER_NODE (snat_in2out_slowpath_node) = {
     [SNAT_IN2OUT_NEXT_LOOKUP] = "ip4-lookup",
     [SNAT_IN2OUT_NEXT_SLOW_PATH] = "nat44-in2out-slowpath",
     [SNAT_IN2OUT_NEXT_ICMP_ERROR] = "ip4-icmp-error",
+    [SNAT_IN2OUT_NEXT_REASS] = "nat44-in2out-reass",
   },
 };
 
@@ -2295,6 +2343,7 @@ VLIB_REGISTER_NODE (snat_in2out_output_slowpath_node) = {
     [SNAT_IN2OUT_NEXT_LOOKUP] = "interface-output",
     [SNAT_IN2OUT_NEXT_SLOW_PATH] = "nat44-in2out-output-slowpath",
     [SNAT_IN2OUT_NEXT_ICMP_ERROR] = "ip4-icmp-error",
+    [SNAT_IN2OUT_NEXT_REASS] = "nat44-in2out-reass",
   },
 };
 
@@ -2392,6 +2441,371 @@ VLIB_REGISTER_NODE (nat44_hairpinning_node) = {
 VLIB_NODE_FUNCTION_MULTIARCH (nat44_hairpinning_node,
                               nat44_hairpinning_fn);
 
+static inline void
+nat44_reass_hairpinning (snat_main_t *sm,
+                         vlib_buffer_t * b0,
+                         ip4_header_t * ip0,
+                         u16 sport,
+                         u16 dport,
+                         u32 proto0)
+{
+  snat_session_key_t key0, sm0;
+  snat_session_t * s0;
+  clib_bihash_kv_8_8_t kv0, value0;
+  ip_csum_t sum0;
+  u32 new_dst_addr0 = 0, old_dst_addr0, ti = 0, si;
+  u16 new_dst_port0, old_dst_port0;
+  udp_header_t * udp0;
+  tcp_header_t * tcp0;
+
+  key0.addr = ip0->dst_address;
+  key0.port = dport;
+  key0.protocol = proto0;
+  key0.fib_index = sm->outside_fib_index;
+  kv0.key = key0.as_u64;
+
+  udp0 = ip4_next_header (ip0);
+
+  /* Check if destination is static mappings */
+  if (!snat_static_mapping_match(sm, key0, &sm0, 1, 0))
+    {
+      new_dst_addr0 = sm0.addr.as_u32;
+      new_dst_port0 = sm0.port;
+      vnet_buffer(b0)->sw_if_index[VLIB_TX] = sm0.fib_index;
+    }
+  /* or active sessions */
+  else
+    {
+      if (sm->num_workers > 1)
+        ti = (clib_net_to_host_u16 (udp0->dst_port) - 1024) / sm->port_per_thread;
+      else
+        ti = sm->num_workers;
+
+      if (!clib_bihash_search_8_8 (&sm->per_thread_data[ti].out2in, &kv0, &value0))
+        {
+          si = value0.value;
+          s0 = pool_elt_at_index (sm->per_thread_data[ti].sessions, si);
+          new_dst_addr0 = s0->in2out.addr.as_u32;
+          new_dst_port0 = s0->in2out.port;
+          vnet_buffer(b0)->sw_if_index[VLIB_TX] = s0->in2out.fib_index;
+        }
+    }
+
+  /* Destination is behind the same NAT, use internal address and port */
+  if (new_dst_addr0)
+    {
+      old_dst_addr0 = ip0->dst_address.as_u32;
+      ip0->dst_address.as_u32 = new_dst_addr0;
+      sum0 = ip0->checksum;
+      sum0 = ip_csum_update (sum0, old_dst_addr0, new_dst_addr0,
+                             ip4_header_t, dst_address);
+      ip0->checksum = ip_csum_fold (sum0);
+
+      old_dst_port0 = dport;
+      if (PREDICT_TRUE(new_dst_port0 != old_dst_port0 &&
+                       ip4_is_first_fragment (ip0)))
+        {
+          if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
+            {
+              tcp0 = ip4_next_header (ip0);
+              tcp0->dst = new_dst_port0;
+              sum0 = tcp0->checksum;
+              sum0 = ip_csum_update (sum0, old_dst_addr0, new_dst_addr0,
+                                     ip4_header_t, dst_address);
+              sum0 = ip_csum_update (sum0, old_dst_port0, new_dst_port0,
+                                     ip4_header_t /* cheat */, length);
+              tcp0->checksum = ip_csum_fold(sum0);
+            }
+          else
+            {
+              udp0->dst_port = new_dst_port0;
+              udp0->checksum = 0;
+            }
+        }
+      else
+        {
+          if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
+            {
+              tcp0 = ip4_next_header (ip0);
+              sum0 = tcp0->checksum;
+              sum0 = ip_csum_update (sum0, old_dst_addr0, new_dst_addr0,
+                                     ip4_header_t, dst_address);
+              tcp0->checksum = ip_csum_fold(sum0);
+            }
+        }
+    }
+}
+
+static uword
+nat44_in2out_reass_node_fn (vlib_main_t * vm,
+                            vlib_node_runtime_t * node,
+                            vlib_frame_t * frame)
+{
+  u32 n_left_from, *from, *to_next;
+  snat_in2out_next_t next_index;
+  u32 pkts_processed = 0;
+  snat_main_t *sm = &snat_main;
+  f64 now = vlib_time_now (vm);
+  u32 thread_index = vlib_get_thread_index ();
+  snat_main_per_thread_data_t *per_thread_data =
+    &sm->per_thread_data[thread_index];
+  u32 *fragments_to_drop = 0;
+  u32 *fragments_to_loopback = 0;
+
+  from = vlib_frame_vector_args (frame);
+  n_left_from = frame->n_vectors;
+  next_index = node->cached_next_index;
+
+  while (n_left_from > 0)
+    {
+      u32 n_left_to_next;
+
+      vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+
+      while (n_left_from > 0 && n_left_to_next > 0)
+       {
+          u32 bi0, sw_if_index0, proto0, rx_fib_index0, new_addr0, old_addr0;
+         vlib_buffer_t *b0;
+          u32 next0;
+          u8 cached0 = 0;
+          ip4_header_t *ip0;
+          nat_reass_ip4_t *reass0;
+          udp_header_t * udp0;
+          tcp_header_t * tcp0;
+          snat_session_key_t key0;
+          clib_bihash_kv_8_8_t kv0, value0;
+          snat_session_t * s0 = 0;
+          u16 old_port0, new_port0;
+          ip_csum_t sum0;
+
+          /* speculatively enqueue b0 to the current next frame */
+         bi0 = from[0];
+         to_next[0] = bi0;
+         from += 1;
+         to_next += 1;
+         n_left_from -= 1;
+         n_left_to_next -= 1;
+
+         b0 = vlib_get_buffer (vm, bi0);
+          next0 = SNAT_IN2OUT_NEXT_LOOKUP;
+
+          sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
+          rx_fib_index0 = fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP4,
+                                                               sw_if_index0);
+
+          if (PREDICT_FALSE (nat_reass_is_drop_frag(0)))
+            {
+              next0 = SNAT_IN2OUT_NEXT_DROP;
+              b0->error = node->errors[SNAT_IN2OUT_ERROR_DROP_FRAGMENT];
+              goto trace0;
+            }
+
+          ip0 = (ip4_header_t *) vlib_buffer_get_current (b0);
+          udp0 = ip4_next_header (ip0);
+          tcp0 = (tcp_header_t *) udp0;
+          proto0 = ip_proto_to_snat_proto (ip0->protocol);
+
+          reass0 = nat_ip4_reass_find_or_create (ip0->src_address,
+                                                 ip0->dst_address,
+                                                 ip0->fragment_id,
+                                                 ip0->protocol,
+                                                 1,
+                                                 &fragments_to_drop);
+
+          if (PREDICT_FALSE (!reass0))
+            {
+              next0 = SNAT_IN2OUT_NEXT_DROP;
+              b0->error = node->errors[SNAT_IN2OUT_ERROR_MAX_REASS];
+              goto trace0;
+            }
+
+          if (PREDICT_FALSE (ip4_is_first_fragment (ip0)))
+            {
+              key0.addr = ip0->src_address;
+              key0.port = udp0->src_port;
+              key0.protocol = proto0;
+              key0.fib_index = rx_fib_index0;
+              kv0.key = key0.as_u64;
+
+              if (clib_bihash_search_8_8 (&per_thread_data->in2out, &kv0, &value0))
+                {
+                  if (PREDICT_FALSE(snat_not_translate(sm, node, sw_if_index0,
+                      ip0, proto0, rx_fib_index0, thread_index)))
+                    goto trace0;
+
+                  next0 = slow_path (sm, b0, ip0, rx_fib_index0, &key0,
+                                     &s0, node, next0, thread_index);
+
+                  if (PREDICT_FALSE (next0 == SNAT_IN2OUT_NEXT_DROP))
+                    goto trace0;
+
+                  reass0->sess_index = s0 - per_thread_data->sessions;
+                }
+              else
+                {
+                  s0 = pool_elt_at_index (per_thread_data->sessions,
+                                          value0.value);
+                  reass0->sess_index = value0.value;
+                }
+              nat_ip4_reass_get_frags (reass0, &fragments_to_loopback);
+            }
+          else
+            {
+              if (PREDICT_FALSE (reass0->sess_index == (u32) ~0))
+                {
+                  if (nat_ip4_reass_add_fragment (reass0, bi0))
+                    {
+                      b0->error = node->errors[SNAT_IN2OUT_ERROR_MAX_FRAG];
+                      next0 = SNAT_IN2OUT_NEXT_DROP;
+                      goto trace0;
+                    }
+                  cached0 = 1;
+                  goto trace0;
+                }
+              s0 = pool_elt_at_index (per_thread_data->sessions,
+                                      reass0->sess_index);
+            }
+
+          old_addr0 = ip0->src_address.as_u32;
+          ip0->src_address = s0->out2in.addr;
+          new_addr0 = ip0->src_address.as_u32;
+          vnet_buffer(b0)->sw_if_index[VLIB_TX] = s0->out2in.fib_index;
+
+          sum0 = ip0->checksum;
+          sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
+                                 ip4_header_t,
+                                 src_address /* changed member */);
+          ip0->checksum = ip_csum_fold (sum0);
+
+          if (PREDICT_FALSE (ip4_is_first_fragment (ip0)))
+            {
+              if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
+                {
+                  old_port0 = tcp0->src_port;
+                  tcp0->src_port = s0->out2in.port;
+                  new_port0 = tcp0->src_port;
+
+                  sum0 = tcp0->checksum;
+                  sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
+                                         ip4_header_t,
+                                         dst_address /* changed member */);
+                  sum0 = ip_csum_update (sum0, old_port0, new_port0,
+                                         ip4_header_t /* cheat */,
+                                         length /* changed member */);
+                  tcp0->checksum = ip_csum_fold(sum0);
+                }
+              else
+                {
+                  old_port0 = udp0->src_port;
+                  udp0->src_port = s0->out2in.port;
+                  udp0->checksum = 0;
+                }
+            }
+
+          /* Hairpinning */
+          nat44_reass_hairpinning (sm, b0, ip0, s0->out2in.port,
+                                   s0->ext_host_port, proto0);
+
+          /* Accounting */
+          s0->last_heard = now;
+          s0->total_pkts++;
+          s0->total_bytes += vlib_buffer_length_in_chain (vm, b0);
+          /* Per-user LRU list maintenance for dynamic translation */
+          if (!snat_is_session_static (s0))
+            {
+              clib_dlist_remove (sm->per_thread_data[thread_index].list_pool,
+                                 s0->per_user_index);
+              clib_dlist_addtail (sm->per_thread_data[thread_index].list_pool,
+                                  s0->per_user_list_head_index,
+                                  s0->per_user_index);
+            }
+
+        trace0:
+          if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)
+                            && (b0->flags & VLIB_BUFFER_IS_TRACED)))
+            {
+              nat44_in2out_reass_trace_t *t =
+                 vlib_add_trace (vm, node, b0, sizeof (*t));
+              t->cached = cached0;
+              t->sw_if_index = sw_if_index0;
+              t->next_index = next0;
+            }
+
+          if (cached0)
+            {
+              n_left_to_next++;
+              to_next--;
+            }
+          else
+            {
+              pkts_processed += next0 != SNAT_IN2OUT_NEXT_DROP;
+
+              /* verify speculative enqueue, maybe switch current next frame */
+              vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
+                                               to_next, n_left_to_next,
+                                               bi0, next0);
+            }
+
+         if (n_left_from == 0 && vec_len (fragments_to_loopback))
+           {
+             from = vlib_frame_vector_args (frame);
+             u32 len = vec_len (fragments_to_loopback);
+             if (len <= VLIB_FRAME_SIZE)
+               {
+                 clib_memcpy (from, fragments_to_loopback, sizeof (u32) * len);
+                 n_left_from = len;
+                 vec_reset_length (fragments_to_loopback);
+               }
+             else
+               {
+                 clib_memcpy (from,
+                               fragments_to_loopback + (len - VLIB_FRAME_SIZE),
+                               sizeof (u32) * VLIB_FRAME_SIZE);
+                 n_left_from = VLIB_FRAME_SIZE;
+                 _vec_len (fragments_to_loopback) = len - VLIB_FRAME_SIZE;
+               }
+           }
+       }
+
+      vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+    }
+
+  vlib_node_increment_counter (vm, nat44_in2out_reass_node.index,
+                               SNAT_IN2OUT_ERROR_IN2OUT_PACKETS,
+                               pkts_processed);
+
+  nat_send_all_to_node (vm, fragments_to_drop, node,
+                        &node->errors[SNAT_IN2OUT_ERROR_DROP_FRAGMENT],
+                        SNAT_IN2OUT_NEXT_DROP);
+
+  vec_free (fragments_to_drop);
+  vec_free (fragments_to_loopback);
+  return frame->n_vectors;
+}
+
+VLIB_REGISTER_NODE (nat44_in2out_reass_node) = {
+  .function = nat44_in2out_reass_node_fn,
+  .name = "nat44-in2out-reass",
+  .vector_size = sizeof (u32),
+  .format_trace = format_nat44_in2out_reass_trace,
+  .type = VLIB_NODE_TYPE_INTERNAL,
+
+  .n_errors = ARRAY_LEN(snat_in2out_error_strings),
+  .error_strings = snat_in2out_error_strings,
+
+  .n_next_nodes = SNAT_IN2OUT_N_NEXT,
+  .next_nodes = {
+    [SNAT_IN2OUT_NEXT_DROP] = "error-drop",
+    [SNAT_IN2OUT_NEXT_LOOKUP] = "ip4-lookup",
+    [SNAT_IN2OUT_NEXT_SLOW_PATH] = "nat44-in2out-slowpath",
+    [SNAT_IN2OUT_NEXT_ICMP_ERROR] = "ip4-icmp-error",
+    [SNAT_IN2OUT_NEXT_REASS] = "nat44-in2out-reass",
+  },
+};
+
+VLIB_NODE_FUNCTION_MULTIARCH (nat44_in2out_reass_node,
+                              nat44_in2out_reass_node_fn);
+
 /**************************/
 /*** deterministic mode ***/
 /**************************/
@@ -3771,6 +4185,7 @@ VLIB_REGISTER_NODE (snat_in2out_fast_node) = {
     [SNAT_IN2OUT_NEXT_LOOKUP] = "ip4-lookup",
     [SNAT_IN2OUT_NEXT_SLOW_PATH] = "nat44-in2out-slowpath",
     [SNAT_IN2OUT_NEXT_ICMP_ERROR] = "ip4-icmp-error",
+    [SNAT_IN2OUT_NEXT_REASS] = "nat44-in2out-reass",
   },
 };
 
index 187de25..d8fdf72 100644 (file)
@@ -760,6 +760,87 @@ autoreply define nat_ipfix_enable_disable {
   u8 enable;
 };
 
+/** \brief Set NAT virtual fragmentation reassembly
+    @param client_index - opaque cookie to identify the sender
+    @param context - sender context, to match reply w/ request
+    @param timeout - reassembly timeout
+    @param max_reass - maximum number of concurrent reassemblies
+    @param max_frag - maximum number of fragmets per reassembly
+    @param drop_frag - if 0 translate fragments, otherwise drop fragments
+    @param is_ip6 - 1 if IPv6, 0 if IPv4
+*/
+autoreply define nat_set_reass {
+  u32 client_index;
+  u32 context;
+  u32 timeout;
+  u16 max_reass;
+  u8  max_frag;
+  u8  drop_frag;
+  u8  is_ip6;
+};
+
+/** \brief Get NAT virtual fragmentation reassembly configuration
+    @param client_index - opaque cookie to identify the sender
+    @param context - sender context, to match reply w/ request
+*/
+define nat_get_reass {
+  u32 client_index;
+  u32 context;
+};
+
+/** \brief Get NAT virtual fragmentation reassembly configuration reply
+    @param context - sender context, to match reply w/ request
+    @param retval - return code
+    @param ip4_timeout - reassembly timeout
+    @param ip4_max_reass - maximum number of concurrent reassemblies
+    @param ip4_max_frag - maximum number of fragmets per reassembly
+    @param ip4_drop_frag - if 0 translate fragments, otherwise drop fragments
+    @param ip6_timeout - reassembly timeout
+    @param ip6_max_reass - maximum number of concurrent reassemblies
+    @param ip6_max_frag - maximum number of fragmets per reassembly
+    @param ip6_drop_frag - if 0 translate fragments, otherwise drop fragments
+*/
+define nat_get_reass_reply {
+  u32 context;
+  i32 retval;
+  u32 ip4_timeout;
+  u16 ip4_max_reass;
+  u8  ip4_max_frag;
+  u8  ip4_drop_frag;
+  u32 ip6_timeout;
+  u16 ip6_max_reass;
+  u8  ip6_max_frag;
+  u8  ip6_drop_frag;
+};
+
+/** \brief Dump NAT virtual fragmentation reassemblies
+    @param client_index - opaque cookie to identify the sender
+    @param context - sender context, to match reply w/ request
+*/
+define nat_reass_dump {
+  u32 client_index;
+  u32 context;
+};
+
+/** \brief NAT virtual fragmentation reassemblies response
+    @param context - sender context, to match reply w/ request
+    @param is_ip4 - 1 if address type is IPv4
+    @param src_addr - source IP address
+    @param dst_addr - destination IP address
+    @param frag_id - fragment ID
+    @param proto - protocol
+    @param frag_n - number of cached fragments
+*/
+define nat_reass_details {
+  u32 context;
+  u8 is_ip4;
+  u8 src_addr[16];
+  u8 dst_addr[16];
+  u32 frag_id;
+  u8 proto;
+  u8 frag_n;
+};
+
 /*
  * NAT44 APIs
  */
index cd5a6eb..7e651e5 100644 (file)
@@ -24,6 +24,7 @@
 #include <nat/nat_det.h>
 #include <nat/nat64.h>
 #include <nat/dslite.h>
+#include <nat/nat_reass.h>
 #include <vnet/fib/fib_table.h>
 #include <vnet/fib/ip4_fib.h>
 
@@ -1447,11 +1448,15 @@ static clib_error_t * snat_init (vlib_main_t * vm)
   /* Init IPFIX logging */
   snat_ipfix_logging_init(vm);
 
+  /* Init NAT64 */
   error = nat64_init(vm);
+  if (error)
+    return error;
 
   dslite_init(vm);
 
-  return error;
+  /* Init virtual fragmenentation reassembly */
+  return nat_reass_init(vm);
 }
 
 VLIB_INIT_FUNCTION (snat_init);
@@ -2889,6 +2894,7 @@ show_snat_command_fn (vlib_main_t * vm,
             }
         }
     }
+
   return 0;
 }
 
index b72e075..5bd0a11 100644 (file)
@@ -154,9 +154,9 @@ typedef CLIB_PACKED(struct {
   /* Outside address */
   u32 outside_address_index;    /* 64-67 */
 
-  /* External host address */
+  /* External host address and port */
   ip4_address_t ext_host_addr;  /* 68-71 */
-
+  u16 ext_host_port;            /* 72-73 */
 }) snat_session_t;
 
 
@@ -563,4 +563,30 @@ maximum_sessions_exceeded (snat_main_t *sm, u32 thread_index)
   return 0;
 }
 
-#endif /* __included_nat_h__ */
+static_always_inline void
+nat_send_all_to_node(vlib_main_t *vm, u32 *bi_vector,
+                     vlib_node_runtime_t *node, vlib_error_t *error, u32 next)
+{
+  u32 n_left_from, *from, next_index, *to_next, n_left_to_next;
+
+  from = bi_vector;
+  n_left_from = vec_len(bi_vector);
+  next_index = node->cached_next_index;
+  while (n_left_from > 0) {
+    vlib_get_next_frame(vm, node, next_index, to_next, n_left_to_next);
+    while (n_left_from > 0 && n_left_to_next > 0) {
+      u32 bi0 = to_next[0] = from[0];
+      from += 1;
+      n_left_from -= 1;
+      to_next += 1;
+      n_left_to_next -= 1;
+      vlib_buffer_t *p0 = vlib_get_buffer(vm, bi0);
+      p0->error = *error;
+      vlib_validate_buffer_enqueue_x1(vm, node, next_index, to_next,
+                                      n_left_to_next, bi0, next);
+    }
+    vlib_put_next_frame(vm, node, next_index, n_left_to_next);
+  }
+}
+
+#endif /* __included_snat_h__ */
index da73cee..008a137 100644 (file)
@@ -529,6 +529,52 @@ nat64_db_st_entry_find (nat64_db_t * db, ip46_address_t * l_addr,
   return ste;
 }
 
+u32
+nat64_db_st_entry_get_index (nat64_db_t * db, nat64_db_st_entry_t * ste)
+{
+  nat64_db_st_entry_t *st;
+
+  switch (ip_proto_to_snat_proto (ste->proto))
+    {
+/* *INDENT-OFF* */
+#define _(N, i, n, s) \
+    case SNAT_PROTOCOL_##N: \
+      st = db->st._##n##_st; \
+      break;
+      foreach_snat_protocol
+#undef _
+/* *INDENT-ON* */
+    default:
+      st = db->st._unk_proto_st;
+      return (u32) ~ 0;
+    }
+
+  return ste - st;
+}
+
+nat64_db_st_entry_t *
+nat64_db_st_entry_by_index (nat64_db_t * db, u8 proto, u32 ste_index)
+{
+  nat64_db_st_entry_t *st;
+
+  switch (ip_proto_to_snat_proto (proto))
+    {
+/* *INDENT-OFF* */
+#define _(N, i, n, s) \
+    case SNAT_PROTOCOL_##N: \
+      st = db->st._##n##_st; \
+      break;
+      foreach_snat_protocol
+#undef _
+/* *INDENT-ON* */
+    default:
+      st = db->st._unk_proto_st;
+      break;
+    }
+
+  return pool_elt_at_index (st, ste_index);
+}
+
 void
 nad64_db_st_free_expired (nat64_db_t * db, u32 now)
 {
index 394ca87..94d9a8b 100644 (file)
@@ -296,6 +296,27 @@ void nad64_db_st_free_expired (nat64_db_t * db, u32 now);
  */
 void nat64_db_free_out_addr (nat64_db_t * db, ip4_address_t * out_addr);
 
+/*
+ * @brief Get ST entry index.
+ *
+ * @param db NAT64 DB.
+ * @param ste ST entry.
+ *
+ * @return ST entry index on success, ~0 otherwise.
+ */
+u32 nat64_db_st_entry_get_index (nat64_db_t * db, nat64_db_st_entry_t * ste);
+
+/**
+ * @brief Get ST entry by index and protocol.
+ *
+ * @param db NAT64 DB.
+ * @param proto L4 protocol.
+ * @param bibe_index ST entry index.
+ *
+ * @return BIB entry if found.
+ */
+nat64_db_st_entry_t *nat64_db_st_entry_by_index (nat64_db_t * db,
+                                                u8 proto, u32 ste_index);
 #endif /* __included_nat64_db_h__ */
 
 /*
index f78baff..4f94575 100644 (file)
@@ -18,6 +18,7 @@
  */
 
 #include <nat/nat64.h>
+#include <nat/nat_reass.h>
 #include <vnet/ip/ip6_to_ip4.h>
 #include <vnet/fib/fib_table.h>
 
@@ -45,14 +46,42 @@ format_nat64_in2out_trace (u8 * s, va_list * args)
   return s;
 }
 
+typedef struct
+{
+  u32 sw_if_index;
+  u32 next_index;
+  u8 cached;
+} nat64_in2out_reass_trace_t;
+
+static u8 *
+format_nat64_in2out_reass_trace (u8 * s, va_list * args)
+{
+  CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+  CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+  nat64_in2out_reass_trace_t *t =
+    va_arg (*args, nat64_in2out_reass_trace_t *);
+
+  s =
+    format (s, "NAT64-in2out-reass: sw_if_index %d, next index %d, status %s",
+           t->sw_if_index, t->next_index,
+           t->cached ? "cached" : "translated");
+
+  return s;
+}
+
 vlib_node_registration_t nat64_in2out_node;
 vlib_node_registration_t nat64_in2out_slowpath_node;
+vlib_node_registration_t nat64_in2out_reass_node;
+
+#define foreach_nat64_in2out_error                       \
+_(UNSUPPORTED_PROTOCOL, "unsupported protocol")          \
+_(IN2OUT_PACKETS, "good in2out packets processed")       \
+_(NO_TRANSLATION, "no translation")                      \
+_(UNKNOWN, "unknown")                                    \
+_(DROP_FRAGMENT, "Drop fragment")                        \
+_(MAX_REASS, "Maximum reassemblies exceeded")            \
+_(MAX_FRAG, "Maximum fragments per reassembly exceeded")
 
-#define foreach_nat64_in2out_error                 \
-_(UNSUPPORTED_PROTOCOL, "unsupported protocol")    \
-_(IN2OUT_PACKETS, "good in2out packets processed") \
-_(NO_TRANSLATION, "no translation")                \
-_(UNKNOWN, "unknown")
 
 typedef enum
 {
@@ -74,6 +103,7 @@ typedef enum
   NAT64_IN2OUT_NEXT_IP6_LOOKUP,
   NAT64_IN2OUT_NEXT_DROP,
   NAT64_IN2OUT_NEXT_SLOWPATH,
+  NAT64_IN2OUT_NEXT_REASS,
   NAT64_IN2OUT_N_NEXT,
 } nat64_in2out_next_t;
 
@@ -936,13 +966,6 @@ nat64_in2out_node_fn_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
            }
 
          proto0 = ip_proto_to_snat_proto (l4_protocol0);
-         if (frag_offset0 != 0)
-           {
-             next0 = NAT64_IN2OUT_NEXT_DROP;
-             b0->error =
-               node->errors[NAT64_IN2OUT_ERROR_UNSUPPORTED_PROTOCOL];
-             goto trace0;
-           }
 
          if (is_slow_path)
            {
@@ -979,6 +1002,13 @@ nat64_in2out_node_fn_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
                }
            }
 
+         if (PREDICT_FALSE
+             (ip60->protocol == IP_PROTOCOL_IPV6_FRAGMENTATION))
+           {
+             next0 = NAT64_IN2OUT_NEXT_REASS;
+             goto trace0;
+           }
+
          if (proto0 == SNAT_PROTOCOL_ICMP)
            {
              if (is_hairpinning (&ip60->dst_address))
@@ -1073,6 +1103,7 @@ VLIB_REGISTER_NODE (nat64_in2out_node) = {
     [NAT64_IN2OUT_NEXT_IP4_LOOKUP] = "ip4-lookup",
     [NAT64_IN2OUT_NEXT_IP6_LOOKUP] = "ip6-lookup",
     [NAT64_IN2OUT_NEXT_SLOWPATH] = "nat64-in2out-slowpath",
+    [NAT64_IN2OUT_NEXT_REASS] = "nat64-in2out-reass",
   },
 };
 /* *INDENT-ON* */
@@ -1102,6 +1133,7 @@ VLIB_REGISTER_NODE (nat64_in2out_slowpath_node) = {
     [NAT64_IN2OUT_NEXT_IP4_LOOKUP] = "ip4-lookup",
     [NAT64_IN2OUT_NEXT_IP6_LOOKUP] = "ip6-lookup",
     [NAT64_IN2OUT_NEXT_SLOWPATH] = "nat64-in2out-slowpath",
+    [NAT64_IN2OUT_NEXT_REASS] = "nat64-in2out-reass",
   },
 };
 /* *INDENT-ON* */
@@ -1109,6 +1141,455 @@ VLIB_REGISTER_NODE (nat64_in2out_slowpath_node) = {
 VLIB_NODE_FUNCTION_MULTIARCH (nat64_in2out_slowpath_node,
                              nat64_in2out_slowpath_node_fn);
 
+typedef struct nat64_in2out_frag_set_ctx_t_
+{
+  vlib_main_t *vm;
+  u32 sess_index;
+  u16 l4_offset;
+  u8 proto;
+  u8 first_frag;
+} nat64_in2out_frag_set_ctx_t;
+
+static int
+nat64_in2out_frag_set_cb (ip6_header_t * ip6, ip4_header_t * ip4, void *arg)
+{
+  nat64_main_t *nm = &nat64_main;
+  nat64_in2out_frag_set_ctx_t *ctx = arg;
+  nat64_db_st_entry_t *ste;
+  nat64_db_bib_entry_t *bibe;
+  udp_header_t *udp;
+
+  ste = nat64_db_st_entry_by_index (&nm->db, ctx->proto, ctx->sess_index);
+  if (!ste)
+    return -1;
+
+  bibe = nat64_db_bib_entry_by_index (&nm->db, ctx->proto, ste->bibe_index);
+  if (!bibe)
+    return -1;
+
+  nat64_session_reset_timeout (ste, ctx->vm);
+
+  if (ctx->first_frag)
+    {
+      udp = (udp_header_t *) u8_ptr_add (ip6, ctx->l4_offset);
+
+      if (ctx->proto == IP_PROTOCOL_TCP)
+       {
+         u16 *checksum;
+         ip_csum_t csum;
+         tcp_header_t *tcp = (tcp_header_t *) udp;
+
+         checksum = &tcp->checksum;
+         csum = ip_csum_sub_even (*checksum, tcp->src_port);
+         csum = ip_csum_sub_even (csum, ip6->src_address.as_u64[0]);
+         csum = ip_csum_sub_even (csum, ip6->src_address.as_u64[1]);
+         csum = ip_csum_sub_even (csum, ip6->dst_address.as_u64[0]);
+         csum = ip_csum_sub_even (csum, ip6->dst_address.as_u64[1]);
+         csum = ip_csum_add_even (csum, bibe->out_port);
+         csum = ip_csum_add_even (csum, bibe->out_addr.as_u32);
+         csum = ip_csum_add_even (csum, ste->out_r_addr.as_u32);
+         *checksum = ip_csum_fold (csum);
+       }
+
+      udp->src_port = bibe->out_port;
+    }
+
+  ip4->src_address.as_u32 = bibe->out_addr.as_u32;
+  ip4->dst_address.as_u32 = ste->out_r_addr.as_u32;
+
+  return 0;
+}
+
+static int
+nat64_in2out_frag_hairpinning (vlib_buffer_t * b, ip6_header_t * ip6,
+                              nat64_in2out_frag_set_ctx_t * ctx)
+{
+  nat64_main_t *nm = &nat64_main;
+  nat64_db_st_entry_t *ste;
+  nat64_db_bib_entry_t *bibe;
+  udp_header_t *udp = (udp_header_t *) u8_ptr_add (ip6, ctx->l4_offset);
+  tcp_header_t *tcp = (tcp_header_t *) udp;
+  u16 sport = udp->src_port;
+  u16 dport = udp->dst_port;
+  u16 *checksum;
+  ip_csum_t csum;
+  ip46_address_t saddr, daddr;
+
+  if (ctx->first_frag)
+    {
+      if (ctx->proto == IP_PROTOCOL_UDP)
+       checksum = &udp->checksum;
+      else
+       checksum = &tcp->checksum;
+
+      csum = ip_csum_sub_even (*checksum, ip6->src_address.as_u64[0]);
+      csum = ip_csum_sub_even (csum, ip6->src_address.as_u64[1]);
+      csum = ip_csum_sub_even (csum, ip6->dst_address.as_u64[0]);
+      csum = ip_csum_sub_even (csum, ip6->dst_address.as_u64[1]);
+      csum = ip_csum_sub_even (csum, sport);
+      csum = ip_csum_sub_even (csum, dport);
+    }
+
+  ste = nat64_db_st_entry_by_index (&nm->db, ctx->proto, ctx->sess_index);
+  if (!ste)
+    return -1;
+
+  bibe = nat64_db_bib_entry_by_index (&nm->db, ctx->proto, ste->bibe_index);
+  if (!bibe)
+    return -1;
+
+  nat64_session_reset_timeout (ste, ctx->vm);
+
+  sport = bibe->out_port;
+  dport = ste->r_port;
+
+  nat64_compose_ip6 (&ip6->src_address, &bibe->out_addr, bibe->fib_index);
+
+  memset (&saddr, 0, sizeof (saddr));
+  memset (&daddr, 0, sizeof (daddr));
+  saddr.ip4.as_u32 = bibe->out_addr.as_u32;
+  daddr.ip4.as_u32 = ste->out_r_addr.as_u32;
+
+  ste =
+    nat64_db_st_entry_find (&nm->db, &daddr, &saddr, dport, sport, ctx->proto,
+                           0, 0);
+
+  if (ste)
+    {
+      bibe =
+       nat64_db_bib_entry_by_index (&nm->db, ctx->proto, ste->bibe_index);
+      if (!bibe)
+       return -1;
+    }
+  else
+    {
+      bibe =
+       nat64_db_bib_entry_find (&nm->db, &daddr, dport, ctx->proto, 0, 0);
+
+      if (!bibe)
+       return -1;
+
+      ste =
+       nat64_db_st_entry_create (&nm->db, bibe, &ip6->src_address,
+                                 &saddr.ip4, sport);
+    }
+
+  ip6->dst_address.as_u64[0] = bibe->in_addr.as_u64[0];
+  ip6->dst_address.as_u64[1] = bibe->in_addr.as_u64[1];
+
+  if (ctx->first_frag)
+    {
+      udp->dst_port = bibe->in_port;
+      udp->src_port = sport;
+      csum = ip_csum_add_even (csum, ip6->src_address.as_u64[0]);
+      csum = ip_csum_add_even (csum, ip6->src_address.as_u64[1]);
+      csum = ip_csum_add_even (csum, ip6->dst_address.as_u64[0]);
+      csum = ip_csum_add_even (csum, ip6->dst_address.as_u64[1]);
+      csum = ip_csum_add_even (csum, udp->src_port);
+      csum = ip_csum_add_even (csum, udp->dst_port);
+      *checksum = ip_csum_fold (csum);
+    }
+
+  return 0;
+}
+
+static uword
+nat64_in2out_reass_node_fn (vlib_main_t * vm,
+                           vlib_node_runtime_t * node, vlib_frame_t * frame)
+{
+  u32 n_left_from, *from, *to_next;
+  nat64_in2out_next_t next_index;
+  u32 pkts_processed = 0;
+  u32 *fragments_to_drop = 0;
+  u32 *fragments_to_loopback = 0;
+  nat64_main_t *nm = &nat64_main;
+
+  from = vlib_frame_vector_args (frame);
+  n_left_from = frame->n_vectors;
+  next_index = node->cached_next_index;
+
+  while (n_left_from > 0)
+    {
+      u32 n_left_to_next;
+
+      vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+
+      while (n_left_from > 0 && n_left_to_next > 0)
+       {
+         u32 bi0;
+         vlib_buffer_t *b0;
+         u32 next0;
+         u8 cached0 = 0;
+         ip6_header_t *ip60;
+         u16 l4_offset0, frag_offset0;
+         u8 l4_protocol0;
+         nat_reass_ip6_t *reass0;
+         ip6_frag_hdr_t *frag0;
+         nat64_db_bib_entry_t *bibe0;
+         nat64_db_st_entry_t *ste0;
+         udp_header_t *udp0;
+         snat_protocol_t proto0;
+         u32 sw_if_index0, fib_index0;
+         ip46_address_t saddr0, daddr0;
+         nat64_in2out_frag_set_ctx_t ctx0;
+
+         /* speculatively enqueue b0 to the current next frame */
+         bi0 = from[0];
+         to_next[0] = bi0;
+         from += 1;
+         to_next += 1;
+         n_left_from -= 1;
+         n_left_to_next -= 1;
+
+         b0 = vlib_get_buffer (vm, bi0);
+         next0 = NAT64_IN2OUT_NEXT_IP4_LOOKUP;
+
+         sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
+         fib_index0 =
+           fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP6,
+                                                sw_if_index0);
+
+         if (PREDICT_FALSE (nat_reass_is_drop_frag (1)))
+           {
+             next0 = NAT64_IN2OUT_NEXT_DROP;
+             b0->error = node->errors[NAT64_IN2OUT_ERROR_DROP_FRAGMENT];
+             goto trace0;
+           }
+
+         ip60 = (ip6_header_t *) vlib_buffer_get_current (b0);
+
+         if (PREDICT_FALSE
+             (ip6_parse
+              (ip60, b0->current_length, &l4_protocol0, &l4_offset0,
+               &frag_offset0)))
+           {
+             next0 = NAT64_IN2OUT_NEXT_DROP;
+             b0->error = node->errors[NAT64_IN2OUT_ERROR_UNKNOWN];
+             goto trace0;
+           }
+
+         if (PREDICT_FALSE
+             (!(l4_protocol0 == IP_PROTOCOL_TCP
+                || l4_protocol0 == IP_PROTOCOL_UDP)))
+           {
+             next0 = NAT64_IN2OUT_NEXT_DROP;
+             b0->error = node->errors[NAT64_IN2OUT_ERROR_DROP_FRAGMENT];
+             goto trace0;
+           }
+
+         udp0 = (udp_header_t *) u8_ptr_add (ip60, l4_offset0);
+         frag0 = (ip6_frag_hdr_t *) u8_ptr_add (ip60, frag_offset0);
+         proto0 = ip_proto_to_snat_proto (l4_protocol0);
+
+         reass0 = nat_ip6_reass_find_or_create (ip60->src_address,
+                                                ip60->dst_address,
+                                                frag0->identification,
+                                                l4_protocol0,
+                                                1, &fragments_to_drop);
+
+         if (PREDICT_FALSE (!reass0))
+           {
+             next0 = NAT64_IN2OUT_NEXT_DROP;
+             b0->error = node->errors[NAT64_IN2OUT_ERROR_MAX_REASS];
+             goto trace0;
+           }
+
+         if (PREDICT_TRUE (ip6_frag_hdr_offset (frag0)))
+           {
+             ctx0.first_frag = 0;
+             if (PREDICT_FALSE (reass0->sess_index == (u32) ~ 0))
+               {
+                 if (nat_ip6_reass_add_fragment (reass0, bi0))
+                   {
+                     b0->error = node->errors[NAT64_IN2OUT_ERROR_MAX_FRAG];
+                     next0 = NAT64_IN2OUT_NEXT_DROP;
+                     goto trace0;
+                   }
+                 cached0 = 1;
+                 goto trace0;
+               }
+           }
+         else
+           {
+             ctx0.first_frag = 1;
+
+             saddr0.as_u64[0] = ip60->src_address.as_u64[0];
+             saddr0.as_u64[1] = ip60->src_address.as_u64[1];
+             daddr0.as_u64[0] = ip60->dst_address.as_u64[0];
+             daddr0.as_u64[1] = ip60->dst_address.as_u64[1];
+
+             ste0 =
+               nat64_db_st_entry_find (&nm->db, &saddr0, &daddr0,
+                                       udp0->src_port, udp0->dst_port,
+                                       l4_protocol0, fib_index0, 1);
+             if (!ste0)
+               {
+                 bibe0 =
+                   nat64_db_bib_entry_find (&nm->db, &saddr0, udp0->src_port,
+                                            l4_protocol0, fib_index0, 1);
+                 if (!bibe0)
+                   {
+                     u16 out_port0;
+                     ip4_address_t out_addr0;
+                     if (nat64_alloc_out_addr_and_port
+                         (fib_index0, proto0, &out_addr0, &out_port0))
+                       {
+                         next0 = NAT64_IN2OUT_NEXT_DROP;
+                         b0->error =
+                           node->errors[NAT64_IN2OUT_ERROR_NO_TRANSLATION];
+                         goto trace0;
+                       }
+
+                     bibe0 =
+                       nat64_db_bib_entry_create (&nm->db,
+                                                  &ip60->src_address,
+                                                  &out_addr0, udp0->src_port,
+                                                  clib_host_to_net_u16
+                                                  (out_port0), fib_index0,
+                                                  l4_protocol0, 0);
+                     if (!bibe0)
+                       {
+                         next0 = NAT64_IN2OUT_NEXT_DROP;
+                         b0->error =
+                           node->errors[NAT64_IN2OUT_ERROR_NO_TRANSLATION];
+                         goto trace0;
+                       }
+                   }
+                 nat64_extract_ip4 (&ip60->dst_address, &daddr0.ip4,
+                                    fib_index0);
+                 ste0 =
+                   nat64_db_st_entry_create (&nm->db, bibe0,
+                                             &ip60->dst_address, &daddr0.ip4,
+                                             udp0->dst_port);
+                 if (!ste0)
+                   {
+                     next0 = NAT64_IN2OUT_NEXT_DROP;
+                     b0->error =
+                       node->errors[NAT64_IN2OUT_ERROR_NO_TRANSLATION];
+                     goto trace0;
+                   }
+               }
+             reass0->sess_index =
+               nat64_db_st_entry_get_index (&nm->db, ste0);
+
+             nat_ip6_reass_get_frags (reass0, &fragments_to_loopback);
+           }
+
+         ctx0.sess_index = reass0->sess_index;
+         ctx0.proto = l4_protocol0;
+         ctx0.vm = vm;
+         ctx0.l4_offset = l4_offset0;
+
+         if (PREDICT_FALSE (is_hairpinning (&ip60->dst_address)))
+           {
+             next0 = NAT64_IN2OUT_NEXT_IP6_LOOKUP;
+             if (nat64_in2out_frag_hairpinning (b0, ip60, &ctx0))
+               {
+                 next0 = NAT64_IN2OUT_NEXT_DROP;
+                 b0->error = node->errors[NAT64_IN2OUT_ERROR_NO_TRANSLATION];
+               }
+             goto trace0;
+           }
+         else
+           {
+             if (ip6_to_ip4_fragmented (b0, nat64_in2out_frag_set_cb, &ctx0))
+               {
+                 next0 = NAT64_IN2OUT_NEXT_DROP;
+                 b0->error = node->errors[NAT64_IN2OUT_ERROR_UNKNOWN];
+                 goto trace0;
+               }
+           }
+
+       trace0:
+         if (PREDICT_FALSE
+             ((node->flags & VLIB_NODE_FLAG_TRACE)
+              && (b0->flags & VLIB_BUFFER_IS_TRACED)))
+           {
+             nat64_in2out_reass_trace_t *t =
+               vlib_add_trace (vm, node, b0, sizeof (*t));
+             t->cached = cached0;
+             t->sw_if_index = sw_if_index0;
+             t->next_index = next0;
+           }
+
+         if (cached0)
+           {
+             n_left_to_next++;
+             to_next--;
+           }
+         else
+           {
+             pkts_processed += next0 != NAT64_IN2OUT_NEXT_DROP;
+
+             /* verify speculative enqueue, maybe switch current next frame */
+             vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
+                                              to_next, n_left_to_next,
+                                              bi0, next0);
+           }
+
+         if (n_left_from == 0 && vec_len (fragments_to_loopback))
+           {
+             from = vlib_frame_vector_args (frame);
+             u32 len = vec_len (fragments_to_loopback);
+             if (len <= VLIB_FRAME_SIZE)
+               {
+                 clib_memcpy (from, fragments_to_loopback,
+                              sizeof (u32) * len);
+                 n_left_from = len;
+                 vec_reset_length (fragments_to_loopback);
+               }
+             else
+               {
+                 clib_memcpy (from,
+                              fragments_to_loopback + (len -
+                                                       VLIB_FRAME_SIZE),
+                              sizeof (u32) * VLIB_FRAME_SIZE);
+                 n_left_from = VLIB_FRAME_SIZE;
+                 _vec_len (fragments_to_loopback) = len - VLIB_FRAME_SIZE;
+               }
+           }
+       }
+
+      vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+    }
+
+  vlib_node_increment_counter (vm, nat64_in2out_reass_node.index,
+                              NAT64_IN2OUT_ERROR_IN2OUT_PACKETS,
+                              pkts_processed);
+
+  nat_send_all_to_node (vm, fragments_to_drop, node,
+                       &node->errors[NAT64_IN2OUT_ERROR_DROP_FRAGMENT],
+                       NAT64_IN2OUT_NEXT_DROP);
+
+  vec_free (fragments_to_drop);
+  vec_free (fragments_to_loopback);
+  return frame->n_vectors;
+}
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (nat64_in2out_reass_node) = {
+  .function = nat64_in2out_reass_node_fn,
+  .name = "nat64-in2out-reass",
+  .vector_size = sizeof (u32),
+  .format_trace = format_nat64_in2out_reass_trace,
+  .type = VLIB_NODE_TYPE_INTERNAL,
+  .n_errors = ARRAY_LEN (nat64_in2out_error_strings),
+  .error_strings = nat64_in2out_error_strings,
+  .n_next_nodes = NAT64_IN2OUT_N_NEXT,
+  /* edit / add dispositions here */
+  .next_nodes = {
+    [NAT64_IN2OUT_NEXT_DROP] = "error-drop",
+    [NAT64_IN2OUT_NEXT_IP4_LOOKUP] = "ip4-lookup",
+    [NAT64_IN2OUT_NEXT_IP6_LOOKUP] = "ip6-lookup",
+    [NAT64_IN2OUT_NEXT_SLOWPATH] = "nat64-in2out-slowpath",
+    [NAT64_IN2OUT_NEXT_REASS] = "nat64-in2out-reass",
+  },
+};
+/* *INDENT-ON* */
+
+VLIB_NODE_FUNCTION_MULTIARCH (nat64_in2out_reass_node,
+                             nat64_in2out_reass_node_fn);
+
 /*
  * fd.io coding-style-patch-verification: ON
  *
index 61e88a7..eb5ecb4 100644 (file)
@@ -18,6 +18,7 @@
  */
 
 #include <nat/nat64.h>
+#include <nat/nat_reass.h>
 #include <vnet/ip/ip4_to_ip6.h>
 #include <vnet/fib/ip4_fib.h>
 
@@ -41,13 +42,41 @@ format_nat64_out2in_trace (u8 * s, va_list * args)
   return s;
 }
 
+typedef struct
+{
+  u32 sw_if_index;
+  u32 next_index;
+  u8 cached;
+} nat64_out2in_reass_trace_t;
+
+static u8 *
+format_nat64_out2in_reass_trace (u8 * s, va_list * args)
+{
+  CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+  CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+  nat64_out2in_reass_trace_t *t =
+    va_arg (*args, nat64_out2in_reass_trace_t *);
+
+  s =
+    format (s, "NAT64-out2in-reass: sw_if_index %d, next index %d, status %s",
+           t->sw_if_index, t->next_index,
+           t->cached ? "cached" : "translated");
+
+  return s;
+}
+
 vlib_node_registration_t nat64_out2in_node;
+vlib_node_registration_t nat64_out2in_reass_node;
+
+#define foreach_nat64_out2in_error                       \
+_(UNSUPPORTED_PROTOCOL, "Unsupported protocol")          \
+_(OUT2IN_PACKETS, "Good out2in packets processed")       \
+_(NO_TRANSLATION, "No translation")                      \
+_(UNKNOWN, "unknown")                                    \
+_(DROP_FRAGMENT, "Drop fragment")                        \
+_(MAX_REASS, "Maximum reassemblies exceeded")            \
+_(MAX_FRAG, "Maximum fragments per reassembly exceeded")
 
-#define foreach_nat64_out2in_error                 \
-_(UNSUPPORTED_PROTOCOL, "Unsupported protocol")    \
-_(OUT2IN_PACKETS, "Good out2in packets processed") \
-_(NO_TRANSLATION, "No translation")                \
-_(UNKNOWN, "unknown")
 
 typedef enum
 {
@@ -67,6 +96,7 @@ typedef enum
 {
   NAT64_OUT2IN_NEXT_LOOKUP,
   NAT64_OUT2IN_NEXT_DROP,
+  NAT64_OUT2IN_NEXT_REASS,
   NAT64_OUT2IN_N_NEXT,
 } nat64_out2in_next_t;
 
@@ -412,20 +442,27 @@ nat64_out2in_node_fn (vlib_main_t * vm, vlib_node_runtime_t * node,
 
          proto0 = ip_proto_to_snat_proto (ip40->protocol);
 
-         if (proto0 == SNAT_PROTOCOL_ICMP)
+         if (PREDICT_FALSE (proto0 == ~0))
            {
-             if (icmp_to_icmp6
-                 (b0, nat64_out2in_icmp_set_cb, &ctx0,
-                  nat64_out2in_inner_icmp_set_cb, &ctx0))
+             if (ip4_to_ip6 (b0, nat64_out2in_unk_proto_set_cb, &ctx0))
                {
                  next0 = NAT64_OUT2IN_NEXT_DROP;
                  b0->error = node->errors[NAT64_OUT2IN_ERROR_NO_TRANSLATION];
-                 goto trace0;
                }
+             goto trace0;
            }
-         else if (proto0 == SNAT_PROTOCOL_TCP || proto0 == SNAT_PROTOCOL_UDP)
+
+         if (PREDICT_FALSE (ip4_is_fragment (ip40)))
            {
-             if (ip4_to_ip6_tcp_udp (b0, nat64_out2in_tcp_udp_set_cb, &ctx0))
+             next0 = NAT64_OUT2IN_NEXT_REASS;
+             goto trace0;
+           }
+
+         if (proto0 == SNAT_PROTOCOL_ICMP)
+           {
+             if (icmp_to_icmp6
+                 (b0, nat64_out2in_icmp_set_cb, &ctx0,
+                  nat64_out2in_inner_icmp_set_cb, &ctx0))
                {
                  next0 = NAT64_OUT2IN_NEXT_DROP;
                  b0->error = node->errors[NAT64_OUT2IN_ERROR_NO_TRANSLATION];
@@ -434,7 +471,7 @@ nat64_out2in_node_fn (vlib_main_t * vm, vlib_node_runtime_t * node,
            }
          else
            {
-             if (ip4_to_ip6 (b0, nat64_out2in_unk_proto_set_cb, &ctx0))
+             if (ip4_to_ip6_tcp_udp (b0, nat64_out2in_tcp_udp_set_cb, &ctx0))
                {
                  next0 = NAT64_OUT2IN_NEXT_DROP;
                  b0->error = node->errors[NAT64_OUT2IN_ERROR_NO_TRANSLATION];
@@ -474,17 +511,361 @@ VLIB_REGISTER_NODE (nat64_out2in_node) = {
   .format_trace = format_nat64_out2in_trace,
   .type = VLIB_NODE_TYPE_INTERNAL,
   .n_errors = ARRAY_LEN (nat64_out2in_error_strings),
-  .error_strings = nat64_out2in_error_strings,.n_next_nodes = 2,
+  .error_strings = nat64_out2in_error_strings,
+  .n_next_nodes = NAT64_OUT2IN_N_NEXT,
   /* edit / add dispositions here */
   .next_nodes = {
     [NAT64_OUT2IN_NEXT_DROP] = "error-drop",
     [NAT64_OUT2IN_NEXT_LOOKUP] = "ip6-lookup",
+    [NAT64_OUT2IN_NEXT_REASS] = "nat64-out2in-reass",
   },
 };
 /* *INDENT-ON* */
 
 VLIB_NODE_FUNCTION_MULTIARCH (nat64_out2in_node, nat64_out2in_node_fn);
 
+typedef struct nat64_out2in_frag_set_ctx_t_
+{
+  vlib_main_t *vm;
+  vlib_buffer_t *b;
+  u32 sess_index;
+  u8 proto;
+  u8 first_frag;
+} nat64_out2in_frag_set_ctx_t;
+
+static int
+nat64_out2in_frag_set_cb (ip4_header_t * ip4, ip6_header_t * ip6, void *arg)
+{
+  nat64_main_t *nm = &nat64_main;
+  nat64_out2in_frag_set_ctx_t *ctx = arg;
+  nat64_db_st_entry_t *ste;
+  nat64_db_bib_entry_t *bibe;
+  udp_header_t *udp = ip4_next_header (ip4);
+  ip_csum_t csum;
+  u16 *checksum;
+
+  ste = nat64_db_st_entry_by_index (&nm->db, ctx->proto, ctx->sess_index);
+  if (!ste)
+    return -1;
+
+  bibe = nat64_db_bib_entry_by_index (&nm->db, ctx->proto, ste->bibe_index);
+  if (!bibe)
+    return -1;
+
+  nat64_session_reset_timeout (ste, ctx->vm);
+
+  if (ctx->first_frag)
+    {
+      udp->dst_port = bibe->in_port;
+
+      if (ip4->protocol == IP_PROTOCOL_UDP)
+       {
+         checksum = &udp->checksum;
+
+         if (!checksum)
+           {
+             u16 udp_len =
+               clib_host_to_net_u16 (ip4->length) - sizeof (*ip4);
+             csum = ip_incremental_checksum (0, udp, udp_len);
+             csum =
+               ip_csum_with_carry (csum, clib_host_to_net_u16 (udp_len));
+             csum =
+               ip_csum_with_carry (csum,
+                                   clib_host_to_net_u16 (IP_PROTOCOL_UDP));
+             csum = ip_csum_with_carry (csum, ste->in_r_addr.as_u64[0]);
+             csum = ip_csum_with_carry (csum, ste->in_r_addr.as_u64[1]);
+             csum = ip_csum_with_carry (csum, bibe->in_addr.as_u64[0]);
+             csum = ip_csum_with_carry (csum, bibe->in_addr.as_u64[1]);
+             *checksum = ~ip_csum_fold (csum);
+           }
+         else
+           {
+             csum = ip_csum_sub_even (*checksum, bibe->out_addr.as_u32);
+             csum = ip_csum_sub_even (csum, ste->out_r_addr.as_u32);
+             csum = ip_csum_sub_even (csum, bibe->out_port);
+             csum = ip_csum_add_even (csum, ste->in_r_addr.as_u64[0]);
+             csum = ip_csum_add_even (csum, ste->in_r_addr.as_u64[1]);
+             csum = ip_csum_add_even (csum, bibe->in_addr.as_u64[0]);
+             csum = ip_csum_add_even (csum, bibe->in_addr.as_u64[1]);
+             csum = ip_csum_add_even (csum, bibe->in_port);
+             *checksum = ip_csum_fold (csum);
+           }
+       }
+      else
+       {
+         tcp_header_t *tcp = ip4_next_header (ip4);
+         checksum = &tcp->checksum;
+         csum = ip_csum_sub_even (*checksum, bibe->out_addr.as_u32);
+         csum = ip_csum_sub_even (csum, ste->out_r_addr.as_u32);
+         csum = ip_csum_sub_even (csum, bibe->out_port);
+         csum = ip_csum_add_even (csum, ste->in_r_addr.as_u64[0]);
+         csum = ip_csum_add_even (csum, ste->in_r_addr.as_u64[1]);
+         csum = ip_csum_add_even (csum, bibe->in_addr.as_u64[0]);
+         csum = ip_csum_add_even (csum, bibe->in_addr.as_u64[1]);
+         csum = ip_csum_add_even (csum, bibe->in_port);
+         *checksum = ip_csum_fold (csum);
+       }
+
+    }
+
+  ip6->src_address.as_u64[0] = ste->in_r_addr.as_u64[0];
+  ip6->src_address.as_u64[1] = ste->in_r_addr.as_u64[1];
+
+  ip6->dst_address.as_u64[0] = bibe->in_addr.as_u64[0];
+  ip6->dst_address.as_u64[1] = bibe->in_addr.as_u64[1];
+
+  vnet_buffer (ctx->b)->sw_if_index[VLIB_TX] = bibe->fib_index;
+
+  return 0;
+}
+
+static uword
+nat64_out2in_reass_node_fn (vlib_main_t * vm, vlib_node_runtime_t * node,
+                           vlib_frame_t * frame)
+{
+  u32 n_left_from, *from, *to_next;
+  nat64_out2in_next_t next_index;
+  u32 pkts_processed = 0;
+  u32 *fragments_to_drop = 0;
+  u32 *fragments_to_loopback = 0;
+  nat64_main_t *nm = &nat64_main;
+
+  from = vlib_frame_vector_args (frame);
+  n_left_from = frame->n_vectors;
+  next_index = node->cached_next_index;
+
+  while (n_left_from > 0)
+    {
+      u32 n_left_to_next;
+
+      vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+
+      while (n_left_from > 0 && n_left_to_next > 0)
+       {
+         u32 bi0;
+         vlib_buffer_t *b0;
+         u32 next0;
+         ip4_header_t *ip40;
+         u8 cached0 = 0;
+         u32 sw_if_index0, fib_index0;
+         udp_header_t *udp0;
+         nat_reass_ip4_t *reass0;
+         ip46_address_t saddr0, daddr0;
+         nat64_db_st_entry_t *ste0;
+         nat64_db_bib_entry_t *bibe0;
+         ip6_address_t ip6_saddr0;
+         nat64_out2in_frag_set_ctx_t ctx0;
+
+         /* speculatively enqueue b0 to the current next frame */
+         bi0 = from[0];
+         to_next[0] = bi0;
+         from += 1;
+         to_next += 1;
+         n_left_from -= 1;
+         n_left_to_next -= 1;
+
+         b0 = vlib_get_buffer (vm, bi0);
+         next0 = NAT64_OUT2IN_NEXT_LOOKUP;
+
+         sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
+         fib_index0 =
+           fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP4,
+                                                sw_if_index0);
+
+         if (PREDICT_FALSE (nat_reass_is_drop_frag (1)))
+           {
+             next0 = NAT64_OUT2IN_NEXT_DROP;
+             b0->error = node->errors[NAT64_OUT2IN_ERROR_DROP_FRAGMENT];
+             goto trace0;
+           }
+
+         ip40 = vlib_buffer_get_current (b0);
+
+         if (PREDICT_FALSE (!(ip40->protocol == IP_PROTOCOL_TCP
+                              || ip40->protocol == IP_PROTOCOL_UDP)))
+           {
+             next0 = NAT64_OUT2IN_NEXT_DROP;
+             b0->error = node->errors[NAT64_OUT2IN_ERROR_DROP_FRAGMENT];
+             goto trace0;
+           }
+
+         udp0 = ip4_next_header (ip40);
+
+         reass0 = nat_ip4_reass_find_or_create (ip40->src_address,
+                                                ip40->dst_address,
+                                                ip40->fragment_id,
+                                                ip40->protocol,
+                                                1, &fragments_to_drop);
+
+         if (PREDICT_FALSE (!reass0))
+           {
+             next0 = NAT64_OUT2IN_NEXT_DROP;
+             b0->error = node->errors[NAT64_OUT2IN_ERROR_MAX_REASS];
+             goto trace0;
+           }
+
+         if (PREDICT_FALSE (ip4_is_first_fragment (ip40)))
+           {
+             ctx0.first_frag = 1;
+
+             memset (&saddr0, 0, sizeof (saddr0));
+             saddr0.ip4.as_u32 = ip40->src_address.as_u32;
+             memset (&daddr0, 0, sizeof (daddr0));
+             daddr0.ip4.as_u32 = ip40->dst_address.as_u32;
+
+             ste0 =
+               nat64_db_st_entry_find (&nm->db, &daddr0, &saddr0,
+                                       udp0->dst_port, udp0->src_port,
+                                       ip40->protocol, fib_index0, 0);
+             if (!ste0)
+               {
+                 bibe0 =
+                   nat64_db_bib_entry_find (&nm->db, &daddr0, udp0->dst_port,
+                                            ip40->protocol, fib_index0, 0);
+                 if (!bibe0)
+                   {
+                     next0 = NAT64_OUT2IN_NEXT_DROP;
+                     b0->error =
+                       node->errors[NAT64_OUT2IN_ERROR_NO_TRANSLATION];
+                     goto trace0;
+                   }
+
+                 nat64_compose_ip6 (&ip6_saddr0, &ip40->src_address,
+                                    bibe0->fib_index);
+                 ste0 =
+                   nat64_db_st_entry_create (&nm->db, bibe0, &ip6_saddr0,
+                                             &saddr0.ip4, udp0->src_port);
+
+                 if (!ste0)
+                   {
+                     next0 = NAT64_OUT2IN_NEXT_DROP;
+                     b0->error =
+                       node->errors[NAT64_OUT2IN_ERROR_NO_TRANSLATION];
+                     goto trace0;
+                   }
+               }
+             reass0->sess_index =
+               nat64_db_st_entry_get_index (&nm->db, ste0);
+
+             nat_ip4_reass_get_frags (reass0, &fragments_to_loopback);
+           }
+         else
+           {
+             ctx0.first_frag = 0;
+
+             if (PREDICT_FALSE (reass0->sess_index == (u32) ~ 0))
+               {
+                 if (nat_ip4_reass_add_fragment (reass0, bi0))
+                   {
+                     b0->error = node->errors[NAT64_OUT2IN_ERROR_MAX_FRAG];
+                     next0 = NAT64_OUT2IN_NEXT_DROP;
+                     goto trace0;
+                   }
+                 cached0 = 1;
+                 goto trace0;
+               }
+           }
+
+         ctx0.sess_index = reass0->sess_index;
+         ctx0.proto = ip40->protocol;
+         ctx0.vm = vm;
+         ctx0.b = b0;
+
+         if (ip4_to_ip6_fragmented (b0, nat64_out2in_frag_set_cb, &ctx0))
+           {
+             next0 = NAT64_OUT2IN_NEXT_DROP;
+             b0->error = node->errors[NAT64_OUT2IN_ERROR_UNKNOWN];
+             goto trace0;
+           }
+
+       trace0:
+         if (PREDICT_FALSE
+             ((node->flags & VLIB_NODE_FLAG_TRACE)
+              && (b0->flags & VLIB_BUFFER_IS_TRACED)))
+           {
+             nat64_out2in_reass_trace_t *t =
+               vlib_add_trace (vm, node, b0, sizeof (*t));
+             t->cached = cached0;
+             t->sw_if_index = sw_if_index0;
+             t->next_index = next0;
+           }
+
+         if (cached0)
+           {
+             n_left_to_next++;
+             to_next--;
+           }
+         else
+           {
+             pkts_processed += next0 != NAT64_OUT2IN_NEXT_DROP;
+
+             /* verify speculative enqueue, maybe switch current next frame */
+             vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
+                                              to_next, n_left_to_next,
+                                              bi0, next0);
+           }
+
+         if (n_left_from == 0 && vec_len (fragments_to_loopback))
+           {
+             from = vlib_frame_vector_args (frame);
+             u32 len = vec_len (fragments_to_loopback);
+             if (len <= VLIB_FRAME_SIZE)
+               {
+                 clib_memcpy (from, fragments_to_loopback,
+                              sizeof (u32) * len);
+                 n_left_from = len;
+                 vec_reset_length (fragments_to_loopback);
+               }
+             else
+               {
+                 clib_memcpy (from,
+                              fragments_to_loopback + (len -
+                                                       VLIB_FRAME_SIZE),
+                              sizeof (u32) * VLIB_FRAME_SIZE);
+                 n_left_from = VLIB_FRAME_SIZE;
+                 _vec_len (fragments_to_loopback) = len - VLIB_FRAME_SIZE;
+               }
+           }
+       }
+
+      vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+    }
+
+  vlib_node_increment_counter (vm, nat64_out2in_reass_node.index,
+                              NAT64_OUT2IN_ERROR_OUT2IN_PACKETS,
+                              pkts_processed);
+
+  nat_send_all_to_node (vm, fragments_to_drop, node,
+                       &node->errors[NAT64_OUT2IN_ERROR_DROP_FRAGMENT],
+                       NAT64_OUT2IN_NEXT_DROP);
+
+  vec_free (fragments_to_drop);
+  vec_free (fragments_to_loopback);
+  return frame->n_vectors;
+}
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (nat64_out2in_reass_node) = {
+  .function = nat64_out2in_reass_node_fn,
+  .name = "nat64-out2in-reass",
+  .vector_size = sizeof (u32),
+  .format_trace = format_nat64_out2in_reass_trace,
+  .type = VLIB_NODE_TYPE_INTERNAL,
+  .n_errors = ARRAY_LEN (nat64_out2in_error_strings),
+  .error_strings = nat64_out2in_error_strings,
+  .n_next_nodes = NAT64_OUT2IN_N_NEXT,
+  /* edit / add dispositions here */
+  .next_nodes = {
+    [NAT64_OUT2IN_NEXT_DROP] = "error-drop",
+    [NAT64_OUT2IN_NEXT_LOOKUP] = "ip6-lookup",
+    [NAT64_OUT2IN_NEXT_REASS] = "nat64-out2in-reass",
+  },
+};
+/* *INDENT-ON* */
+
+VLIB_NODE_FUNCTION_MULTIARCH (nat64_out2in_reass_node,
+                             nat64_out2in_reass_node_fn);
+
 /*
  * fd.io coding-style-patch-verification: ON
  *
index 0ffa2f0..548a9e0 100644 (file)
@@ -22,6 +22,7 @@
 #include <nat/nat_det.h>
 #include <nat/nat64.h>
 #include <nat/dslite.h>
+#include <nat/nat_reass.h>
 #include <vlibapi/api.h>
 #include <vlibmemory/api.h>
 
@@ -1496,6 +1497,146 @@ vl_api_nat_ipfix_enable_disable_t_print (vl_api_nat_ipfix_enable_disable_t *
   FINISH;
 }
 
+static void
+vl_api_nat_set_reass_t_handler (vl_api_nat_set_reass_t * mp)
+{
+  snat_main_t *sm = &snat_main;
+  vl_api_nat_set_reass_reply_t *rmp;
+  int rv = 0;
+
+  rv =
+    nat_reass_set (ntohl (mp->timeout), ntohs (mp->max_reass), mp->max_frag,
+                  mp->drop_frag, mp->is_ip6);
+
+  REPLY_MACRO (VL_API_NAT_SET_REASS_REPLY);
+}
+
+static void *
+vl_api_nat_set_reass_t_print (vl_api_nat_set_reass_t * mp, void *handle)
+{
+  u8 *s;
+
+  s = format (0, "SCRIPT: nat_set_reass ");
+  s = format (s, "timeout %d max_reass %d max_frag %d drop_frag %d is_ip6 %d",
+             clib_host_to_net_u32 (mp->timeout),
+             clib_host_to_net_u16 (mp->max_reass),
+             mp->max_frag, mp->drop_frag, mp->is_ip6);
+
+  FINISH;
+}
+
+static void
+vl_api_nat_get_reass_t_handler (vl_api_nat_get_reass_t * mp)
+{
+  snat_main_t *sm = &snat_main;
+  vl_api_nat_get_reass_reply_t *rmp;
+  int rv = 0;
+
+  /* *INDENT-OFF* */
+  REPLY_MACRO2 (VL_API_NAT_GET_REASS_REPLY,
+  ({
+    rmp->ip4_timeout = htonl (nat_reass_get_timeout(0));
+    rmp->ip4_max_reass = htons (nat_reass_get_max_reass(0));
+    rmp->ip4_max_frag = nat_reass_get_max_frag(0);
+    rmp->ip4_drop_frag = nat_reass_is_drop_frag(0);
+    rmp->ip6_timeout = htonl (nat_reass_get_timeout(1));
+    rmp->ip6_max_reass = htons (nat_reass_get_max_reass(1));
+    rmp->ip6_max_frag = nat_reass_get_max_frag(1);
+    rmp->ip6_drop_frag = nat_reass_is_drop_frag(1);
+  }))
+  /* *INDENT-ON* */
+}
+
+static void *
+vl_api_nat_get_reass_t_print (vl_api_nat_get_reass_t * mp, void *handle)
+{
+  u8 *s;
+
+  s = format (0, "SCRIPT: nat_get_reass");
+
+  FINISH;
+}
+
+typedef struct nat_api_walk_ctx_t_
+{
+  unix_shared_memory_queue_t *q;
+  u32 context;
+} nat_api_walk_ctx_t;
+
+static int
+nat_ip4_reass_walk_api (nat_reass_ip4_t * reass, void *arg)
+{
+  vl_api_nat_reass_details_t *rmp;
+  snat_main_t *sm = &snat_main;
+  nat_api_walk_ctx_t *ctx = arg;
+
+  rmp = vl_msg_api_alloc (sizeof (*rmp));
+  memset (rmp, 0, sizeof (*rmp));
+  rmp->_vl_msg_id = ntohs (VL_API_NAT_REASS_DETAILS + sm->msg_id_base);
+  rmp->context = ctx->context;
+  clib_memcpy (rmp->src_addr, &(reass->key.src), 4);
+  clib_memcpy (rmp->dst_addr, &(reass->key.dst), 4);
+  rmp->proto = reass->key.proto;
+  rmp->frag_id = ntohl (reass->key.frag_id);
+  rmp->frag_n = reass->frag_n;
+  rmp->is_ip4 = 1;
+
+  vl_msg_api_send_shmem (ctx->q, (u8 *) & rmp);
+
+  return 0;
+}
+
+static int
+nat_ip6_reass_walk_api (nat_reass_ip6_t * reass, void *arg)
+{
+  vl_api_nat_reass_details_t *rmp;
+  snat_main_t *sm = &snat_main;
+  nat_api_walk_ctx_t *ctx = arg;
+
+  rmp = vl_msg_api_alloc (sizeof (*rmp));
+  memset (rmp, 0, sizeof (*rmp));
+  rmp->_vl_msg_id = ntohs (VL_API_NAT_REASS_DETAILS + sm->msg_id_base);
+  rmp->context = ctx->context;
+  clib_memcpy (rmp->src_addr, &(reass->key.src), 16);
+  clib_memcpy (rmp->dst_addr, &(reass->key.dst), 16);
+  rmp->proto = reass->key.proto;
+  rmp->frag_id = ntohl (reass->key.frag_id);
+  rmp->frag_n = reass->frag_n;
+  rmp->is_ip4 = 0;
+
+  vl_msg_api_send_shmem (ctx->q, (u8 *) & rmp);
+
+  return 0;
+}
+
+static void
+vl_api_nat_reass_dump_t_handler (vl_api_nat_reass_dump_t * mp)
+{
+  unix_shared_memory_queue_t *q;
+
+  q = vl_api_client_index_to_input_queue (mp->client_index);
+  if (q == 0)
+    return;
+
+  nat_api_walk_ctx_t ctx = {
+    .q = q,
+    .context = mp->context,
+  };
+
+  nat_ip4_reass_walk (nat_ip4_reass_walk_api, &ctx);
+  nat_ip6_reass_walk (nat_ip6_reass_walk_api, &ctx);
+}
+
+static void *
+vl_api_nat_reass_dump_t_print (vl_api_nat_reass_dump_t * mp, void *handle)
+{
+  u8 *s;
+
+  s = format (0, "SCRIPT: nat_reass_dump");
+
+  FINISH;
+}
+
 /*************/
 /*** NAT44 ***/
 /*************/
@@ -3406,6 +3547,9 @@ _(NAT_SHOW_CONFIG, nat_show_config)                                     \
 _(NAT_SET_WORKERS, nat_set_workers)                                     \
 _(NAT_WORKER_DUMP, nat_worker_dump)                                     \
 _(NAT_IPFIX_ENABLE_DISABLE, nat_ipfix_enable_disable)                   \
+_(NAT_SET_REASS, nat_set_reass)                                         \
+_(NAT_GET_REASS, nat_get_reass)                                         \
+_(NAT_REASS_DUMP, nat_reass_dump)                                       \
 _(NAT44_ADD_DEL_ADDRESS_RANGE, nat44_add_del_address_range)             \
 _(NAT44_INTERFACE_ADD_DEL_FEATURE, nat44_interface_add_del_feature)     \
 _(NAT44_ADD_DEL_STATIC_MAPPING, nat44_add_del_static_mapping)           \
diff --git a/src/plugins/nat/nat_reass.c b/src/plugins/nat/nat_reass.c
new file mode 100644 (file)
index 0000000..239bc70
--- /dev/null
@@ -0,0 +1,739 @@
+/*
+ * Copyright (c) 2017 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/**
+ * @file
+ * @brief NAT plugin virtual fragmentation reassembly
+ */
+
+#include <vnet/vnet.h>
+#include <nat/nat_reass.h>
+
+nat_reass_main_t nat_reass_main;
+
+static u32
+nat_reass_get_nbuckets (u8 is_ip6)
+{
+  nat_reass_main_t *srm = &nat_reass_main;
+  u32 nbuckets;
+  u8 i;
+
+  if (is_ip6)
+    nbuckets = (u32) (srm->ip6_max_reass / NAT_REASS_HT_LOAD_FACTOR);
+  else
+    nbuckets = (u32) (srm->ip4_max_reass / NAT_REASS_HT_LOAD_FACTOR);
+
+  for (i = 0; i < 31; i++)
+    if ((1 << i) >= nbuckets)
+      break;
+  nbuckets = 1 << i;
+
+  return nbuckets;
+}
+
+static_always_inline void
+nat_ip4_reass_get_frags_inline (nat_reass_ip4_t * reass, u32 ** bi)
+{
+  nat_reass_main_t *srm = &nat_reass_main;
+  u32 elt_index;
+  dlist_elt_t *elt;
+
+  while ((elt_index =
+         clib_dlist_remove_head (srm->ip4_frags_list_pool,
+                                 reass->frags_per_reass_list_head_index)) !=
+        ~0)
+    {
+      elt = pool_elt_at_index (srm->ip4_frags_list_pool, elt_index);
+      vec_add1 (*bi, elt->value);
+      reass->frag_n--;
+      pool_put_index (srm->ip4_frags_list_pool, elt_index);
+    }
+}
+
+static_always_inline void
+nat_ip6_reass_get_frags_inline (nat_reass_ip6_t * reass, u32 ** bi)
+{
+  nat_reass_main_t *srm = &nat_reass_main;
+  u32 elt_index;
+  dlist_elt_t *elt;
+
+  while ((elt_index =
+         clib_dlist_remove_head (srm->ip6_frags_list_pool,
+                                 reass->frags_per_reass_list_head_index)) !=
+        ~0)
+    {
+      elt = pool_elt_at_index (srm->ip6_frags_list_pool, elt_index);
+      vec_add1 (*bi, elt->value);
+      reass->frag_n--;
+      pool_put_index (srm->ip6_frags_list_pool, elt_index);
+    }
+}
+
+int
+nat_reass_set (u32 timeout, u16 max_reass, u8 max_frag, u8 drop_frag,
+              u8 is_ip6)
+{
+  nat_reass_main_t *srm = &nat_reass_main;
+  u32 nbuckets;
+
+  if (is_ip6)
+    {
+      if (srm->ip6_max_reass != max_reass)
+       {
+         clib_spinlock_lock_if_init (&srm->ip6_reass_lock);
+
+         srm->ip6_max_reass = max_reass;
+         pool_free (srm->ip6_reass_pool);
+         pool_alloc (srm->ip6_reass_pool, srm->ip4_max_reass);
+         nbuckets = nat_reass_get_nbuckets (0);
+         clib_bihash_free_48_8 (&srm->ip6_reass_hash);
+         clib_bihash_init_48_8 (&srm->ip6_reass_hash, "nat-ip6-reass",
+                                nbuckets, nbuckets * 1024);
+
+         clib_spinlock_unlock_if_init (&srm->ip6_reass_lock);
+       }
+      srm->ip6_timeout = timeout;
+      srm->ip6_max_frag = max_frag;
+      srm->ip6_drop_frag = drop_frag;
+    }
+  else
+    {
+      if (srm->ip4_max_reass != max_reass)
+       {
+         clib_spinlock_lock_if_init (&srm->ip4_reass_lock);
+
+         srm->ip4_max_reass = max_reass;
+         pool_free (srm->ip4_reass_pool);
+         pool_alloc (srm->ip4_reass_pool, srm->ip4_max_reass);
+         nbuckets = nat_reass_get_nbuckets (0);
+         clib_bihash_free_16_8 (&srm->ip4_reass_hash);
+         clib_bihash_init_16_8 (&srm->ip4_reass_hash, "nat-ip4-reass",
+                                nbuckets, nbuckets * 1024);
+         clib_spinlock_unlock_if_init (&srm->ip4_reass_lock);
+       }
+      srm->ip4_timeout = timeout;
+      srm->ip4_max_frag = max_frag;
+      srm->ip4_drop_frag = drop_frag;
+    }
+
+  return 0;
+}
+
+u32
+nat_reass_get_timeout (u8 is_ip6)
+{
+  nat_reass_main_t *srm = &nat_reass_main;
+
+  if (is_ip6)
+    return srm->ip6_timeout;
+
+  return srm->ip4_timeout;
+}
+
+u16
+nat_reass_get_max_reass (u8 is_ip6)
+{
+  nat_reass_main_t *srm = &nat_reass_main;
+
+  if (is_ip6)
+    return srm->ip6_max_reass;
+
+  return srm->ip4_max_reass;
+}
+
+u8
+nat_reass_get_max_frag (u8 is_ip6)
+{
+  nat_reass_main_t *srm = &nat_reass_main;
+
+  if (is_ip6)
+    return srm->ip6_max_frag;
+
+  return srm->ip4_max_frag;
+}
+
+u8
+nat_reass_is_drop_frag (u8 is_ip6)
+{
+  nat_reass_main_t *srm = &nat_reass_main;
+
+  if (is_ip6)
+    return srm->ip6_drop_frag;
+
+  return srm->ip4_drop_frag;
+}
+
+static_always_inline nat_reass_ip4_t *
+nat_ip4_reass_lookup (nat_reass_ip4_key_t * k, f64 now)
+{
+  nat_reass_main_t *srm = &nat_reass_main;
+  clib_bihash_kv_16_8_t kv, value;
+  nat_reass_ip4_t *reass;
+
+  kv.key[0] = k->as_u64[0];
+  kv.key[1] = k->as_u64[1];
+
+  if (clib_bihash_search_16_8 (&srm->ip4_reass_hash, &kv, &value))
+    return 0;
+
+  reass = pool_elt_at_index (srm->ip4_reass_pool, value.value);
+  if (now < reass->last_heard + (f64) srm->ip4_timeout)
+    return reass;
+
+  return 0;
+}
+
+nat_reass_ip4_t *
+nat_ip4_reass_find_or_create (ip4_address_t src, ip4_address_t dst,
+                             u16 frag_id, u8 proto, u8 reset_timeout,
+                             u32 ** bi_to_drop)
+{
+  nat_reass_main_t *srm = &nat_reass_main;
+  nat_reass_ip4_t *reass = 0;
+  nat_reass_ip4_key_t k;
+  f64 now = vlib_time_now (srm->vlib_main);
+  dlist_elt_t *oldest_elt, *elt;
+  dlist_elt_t *per_reass_list_head_elt;
+  u32 oldest_index, elt_index;
+  clib_bihash_kv_16_8_t kv;
+
+  k.src.as_u32 = src.as_u32;
+  k.dst.as_u32 = dst.as_u32;
+  k.frag_id = frag_id;
+  k.proto = proto;
+
+  clib_spinlock_lock_if_init (&srm->ip4_reass_lock);
+
+  reass = nat_ip4_reass_lookup (&k, now);
+  if (reass)
+    {
+      if (reset_timeout)
+       {
+         reass->last_heard = now;
+         clib_dlist_remove (srm->ip4_reass_lru_list_pool,
+                            reass->lru_list_index);
+         clib_dlist_addtail (srm->ip4_reass_lru_list_pool,
+                             srm->ip4_reass_head_index,
+                             reass->lru_list_index);
+       }
+      goto unlock;
+    }
+
+  if (srm->ip4_reass_n >= srm->ip4_max_reass)
+    {
+      oldest_index =
+       clib_dlist_remove_head (srm->ip4_reass_lru_list_pool,
+                               srm->ip4_reass_head_index);
+      ASSERT (oldest_index != ~0);
+      oldest_elt =
+       pool_elt_at_index (srm->ip4_reass_lru_list_pool, oldest_index);
+      reass = pool_elt_at_index (srm->ip4_reass_pool, oldest_elt->value);
+      if (now < reass->last_heard + (f64) srm->ip4_timeout)
+       {
+         clib_dlist_addhead (srm->ip4_reass_lru_list_pool,
+                             srm->ip4_reass_head_index, oldest_index);
+         clib_warning ("no free resassembly slot");
+         reass = 0;
+         goto unlock;
+       }
+
+      clib_dlist_addtail (srm->ip4_reass_lru_list_pool,
+                         srm->ip4_reass_head_index, oldest_index);
+
+      kv.key[0] = k.as_u64[0];
+      kv.key[1] = k.as_u64[1];
+      if (clib_bihash_add_del_16_8 (&srm->ip4_reass_hash, &kv, 0))
+       {
+         reass = 0;
+         goto unlock;
+       }
+
+      nat_ip4_reass_get_frags_inline (reass, bi_to_drop);
+    }
+  else
+    {
+      pool_get (srm->ip4_reass_pool, reass);
+      pool_get (srm->ip4_reass_lru_list_pool, elt);
+      reass->lru_list_index = elt_index = elt - srm->ip4_reass_lru_list_pool;
+      clib_dlist_init (srm->ip4_reass_lru_list_pool, elt_index);
+      elt->value = reass - srm->ip4_reass_pool;
+      clib_dlist_addtail (srm->ip4_reass_lru_list_pool,
+                         srm->ip4_reass_head_index, elt_index);
+      pool_get (srm->ip4_frags_list_pool, per_reass_list_head_elt);
+      reass->frags_per_reass_list_head_index =
+       per_reass_list_head_elt - srm->ip4_frags_list_pool;
+      clib_dlist_init (srm->ip4_frags_list_pool,
+                      reass->frags_per_reass_list_head_index);
+      srm->ip4_reass_n++;
+    }
+
+  reass->key.as_u64[0] = kv.key[0] = k.as_u64[0];
+  reass->key.as_u64[1] = kv.key[1] = k.as_u64[1];
+  kv.value = reass - srm->ip4_reass_pool;
+  reass->sess_index = (u32) ~ 0;
+  reass->last_heard = now;
+
+  if (clib_bihash_add_del_16_8 (&srm->ip4_reass_hash, &kv, 1))
+    {
+      reass = 0;
+      goto unlock;
+    }
+
+unlock:
+  clib_spinlock_unlock_if_init (&srm->ip4_reass_lock);
+  return reass;
+}
+
+int
+nat_ip4_reass_add_fragment (nat_reass_ip4_t * reass, u32 bi)
+{
+  nat_reass_main_t *srm = &nat_reass_main;
+  dlist_elt_t *elt;
+  u32 elt_index;
+
+  if (reass->frag_n >= srm->ip4_max_frag)
+    return -1;
+
+  clib_spinlock_lock_if_init (&srm->ip4_reass_lock);
+
+  pool_get (srm->ip4_frags_list_pool, elt);
+  elt_index = elt - srm->ip4_frags_list_pool;
+  clib_dlist_init (srm->ip4_frags_list_pool, elt_index);
+  elt->value = bi;
+  clib_dlist_addtail (srm->ip4_frags_list_pool,
+                     reass->frags_per_reass_list_head_index, elt_index);
+  reass->frag_n++;
+
+  clib_spinlock_unlock_if_init (&srm->ip4_reass_lock);
+
+  return 0;
+}
+
+void
+nat_ip4_reass_get_frags (nat_reass_ip4_t * reass, u32 ** bi)
+{
+  nat_reass_main_t *srm = &nat_reass_main;
+
+  clib_spinlock_lock_if_init (&srm->ip4_reass_lock);
+
+  nat_ip4_reass_get_frags_inline (reass, bi);
+
+  clib_spinlock_unlock_if_init (&srm->ip4_reass_lock);
+}
+
+void
+nat_ip4_reass_walk (nat_ip4_reass_walk_fn_t fn, void *ctx)
+{
+  nat_reass_ip4_t *reass;
+  nat_reass_main_t *srm = &nat_reass_main;
+  f64 now = vlib_time_now (srm->vlib_main);
+
+  /* *INDENT-OFF* */
+  pool_foreach (reass, srm->ip4_reass_pool,
+  ({
+    if (now < reass->last_heard + (f64) srm->ip4_timeout)
+      {
+        if (fn (reass, ctx))
+          return;
+      }
+  }));
+  /* *INDENT-ON* */
+}
+
+static_always_inline nat_reass_ip6_t *
+nat_ip6_reass_lookup (nat_reass_ip6_key_t * k, f64 now)
+{
+  nat_reass_main_t *srm = &nat_reass_main;
+  clib_bihash_kv_48_8_t kv, value;
+  nat_reass_ip6_t *reass;
+
+  k->unused = 0;
+  kv.key[0] = k->as_u64[0];
+  kv.key[1] = k->as_u64[1];
+  kv.key[2] = k->as_u64[2];
+  kv.key[3] = k->as_u64[3];
+  kv.key[4] = k->as_u64[4];
+  kv.key[5] = k->as_u64[5];
+
+  if (clib_bihash_search_48_8 (&srm->ip6_reass_hash, &kv, &value))
+    return 0;
+
+  reass = pool_elt_at_index (srm->ip6_reass_pool, value.value);
+  if (now < reass->last_heard + (f64) srm->ip6_timeout)
+    return reass;
+
+  return 0;
+}
+
+nat_reass_ip6_t *
+nat_ip6_reass_find_or_create (ip6_address_t src, ip6_address_t dst,
+                             u32 frag_id, u8 proto, u8 reset_timeout,
+                             u32 ** bi_to_drop)
+{
+  nat_reass_main_t *srm = &nat_reass_main;
+  nat_reass_ip6_t *reass = 0;
+  nat_reass_ip6_key_t k;
+  f64 now = vlib_time_now (srm->vlib_main);
+  dlist_elt_t *oldest_elt, *elt;
+  dlist_elt_t *per_reass_list_head_elt;
+  u32 oldest_index, elt_index;
+  clib_bihash_kv_48_8_t kv;
+
+  k.src.as_u64[0] = src.as_u64[0];
+  k.src.as_u64[1] = src.as_u64[1];
+  k.dst.as_u64[0] = dst.as_u64[0];
+  k.dst.as_u64[1] = dst.as_u64[1];
+  k.frag_id = frag_id;
+  k.proto = proto;
+  k.unused = 0;
+
+  clib_spinlock_lock_if_init (&srm->ip6_reass_lock);
+
+  reass = nat_ip6_reass_lookup (&k, now);
+  if (reass)
+    {
+      if (reset_timeout)
+       {
+         reass->last_heard = now;
+         clib_dlist_remove (srm->ip6_reass_lru_list_pool,
+                            reass->lru_list_index);
+         clib_dlist_addtail (srm->ip6_reass_lru_list_pool,
+                             srm->ip6_reass_head_index,
+                             reass->lru_list_index);
+       }
+      goto unlock;
+    }
+
+  if (srm->ip6_reass_n >= srm->ip6_max_reass)
+    {
+      oldest_index =
+       clib_dlist_remove_head (srm->ip6_reass_lru_list_pool,
+                               srm->ip6_reass_head_index);
+      ASSERT (oldest_index != ~0);
+      oldest_elt =
+       pool_elt_at_index (srm->ip4_reass_lru_list_pool, oldest_index);
+      reass = pool_elt_at_index (srm->ip6_reass_pool, oldest_elt->value);
+      if (now < reass->last_heard + (f64) srm->ip6_timeout)
+       {
+         clib_dlist_addhead (srm->ip6_reass_lru_list_pool,
+                             srm->ip6_reass_head_index, oldest_index);
+         clib_warning ("no free resassembly slot");
+         reass = 0;
+         goto unlock;
+       }
+
+      clib_dlist_addtail (srm->ip6_reass_lru_list_pool,
+                         srm->ip6_reass_head_index, oldest_index);
+
+      kv.key[0] = k.as_u64[0];
+      kv.key[1] = k.as_u64[1];
+      kv.key[2] = k.as_u64[2];
+      kv.key[3] = k.as_u64[4];
+      kv.key[4] = k.as_u64[5];
+      if (clib_bihash_add_del_48_8 (&srm->ip6_reass_hash, &kv, 0))
+       {
+         reass = 0;
+         goto unlock;
+       }
+
+      nat_ip6_reass_get_frags_inline (reass, bi_to_drop);
+    }
+  else
+    {
+      pool_get (srm->ip6_reass_pool, reass);
+      pool_get (srm->ip6_reass_lru_list_pool, elt);
+      reass->lru_list_index = elt_index = elt - srm->ip6_reass_lru_list_pool;
+      clib_dlist_init (srm->ip6_reass_lru_list_pool, elt_index);
+      elt->value = reass - srm->ip6_reass_pool;
+      clib_dlist_addtail (srm->ip6_reass_lru_list_pool,
+                         srm->ip6_reass_head_index, elt_index);
+      pool_get (srm->ip6_frags_list_pool, per_reass_list_head_elt);
+      reass->frags_per_reass_list_head_index =
+       per_reass_list_head_elt - srm->ip6_frags_list_pool;
+      clib_dlist_init (srm->ip6_frags_list_pool,
+                      reass->frags_per_reass_list_head_index);
+      srm->ip6_reass_n++;
+    }
+
+  reass->key.as_u64[0] = kv.key[0] = k.as_u64[0];
+  reass->key.as_u64[1] = kv.key[1] = k.as_u64[1];
+  reass->key.as_u64[2] = kv.key[2] = k.as_u64[2];
+  reass->key.as_u64[3] = kv.key[3] = k.as_u64[3];
+  reass->key.as_u64[4] = kv.key[4] = k.as_u64[4];
+  reass->key.as_u64[5] = kv.key[5] = k.as_u64[5];
+  kv.value = reass - srm->ip6_reass_pool;
+  reass->sess_index = (u32) ~ 0;
+  reass->last_heard = now;
+
+  if (clib_bihash_add_del_48_8 (&srm->ip6_reass_hash, &kv, 1))
+    {
+      reass = 0;
+      goto unlock;
+    }
+
+unlock:
+  clib_spinlock_unlock_if_init (&srm->ip6_reass_lock);
+  return reass;
+}
+
+int
+nat_ip6_reass_add_fragment (nat_reass_ip6_t * reass, u32 bi)
+{
+  nat_reass_main_t *srm = &nat_reass_main;
+  dlist_elt_t *elt;
+  u32 elt_index;
+
+  if (reass->frag_n >= srm->ip6_max_frag)
+    return -1;
+
+  clib_spinlock_lock_if_init (&srm->ip6_reass_lock);
+
+  pool_get (srm->ip6_frags_list_pool, elt);
+  elt_index = elt - srm->ip6_frags_list_pool;
+  clib_dlist_init (srm->ip6_frags_list_pool, elt_index);
+  elt->value = bi;
+  clib_dlist_addtail (srm->ip6_frags_list_pool,
+                     reass->frags_per_reass_list_head_index, elt_index);
+  reass->frag_n++;
+
+  clib_spinlock_unlock_if_init (&srm->ip6_reass_lock);
+
+  return 0;
+}
+
+void
+nat_ip6_reass_get_frags (nat_reass_ip6_t * reass, u32 ** bi)
+{
+  nat_reass_main_t *srm = &nat_reass_main;
+
+  clib_spinlock_lock_if_init (&srm->ip6_reass_lock);
+
+  nat_ip6_reass_get_frags_inline (reass, bi);
+
+  clib_spinlock_unlock_if_init (&srm->ip6_reass_lock);
+}
+
+void
+nat_ip6_reass_walk (nat_ip6_reass_walk_fn_t fn, void *ctx)
+{
+  nat_reass_ip6_t *reass;
+  nat_reass_main_t *srm = &nat_reass_main;
+  f64 now = vlib_time_now (srm->vlib_main);
+
+  /* *INDENT-OFF* */
+  pool_foreach (reass, srm->ip6_reass_pool,
+  ({
+    if (now < reass->last_heard + (f64) srm->ip4_timeout)
+      {
+        if (fn (reass, ctx))
+          return;
+      }
+  }));
+  /* *INDENT-ON* */
+}
+
+clib_error_t *
+nat_reass_init (vlib_main_t * vm)
+{
+  nat_reass_main_t *srm = &nat_reass_main;
+  vlib_thread_main_t *tm = vlib_get_thread_main ();
+  clib_error_t *error = 0;
+  dlist_elt_t *head;
+  u32 nbuckets, head_index;
+
+  srm->vlib_main = vm;
+  srm->vnet_main = vnet_get_main ();
+
+  /* IPv4 */
+  srm->ip4_timeout = NAT_REASS_TIMEOUT_DEFAULT;
+  srm->ip4_max_reass = NAT_MAX_REASS_DEAFULT;
+  srm->ip4_max_frag = NAT_MAX_FRAG_DEFAULT;
+  srm->ip4_drop_frag = 0;
+  srm->ip4_reass_n = 0;
+
+  if (tm->n_vlib_mains > 1)
+    clib_spinlock_init (&srm->ip4_reass_lock);
+
+  pool_alloc (srm->ip4_reass_pool, srm->ip4_max_reass);
+
+  nbuckets = nat_reass_get_nbuckets (0);
+  clib_bihash_init_16_8 (&srm->ip4_reass_hash, "nat-ip4-reass", nbuckets,
+                        nbuckets * 1024);
+
+  pool_get (srm->ip4_reass_lru_list_pool, head);
+  srm->ip4_reass_head_index = head_index =
+    head - srm->ip4_reass_lru_list_pool;
+  clib_dlist_init (srm->ip4_reass_lru_list_pool, head_index);
+
+  /* IPv6 */
+  srm->ip6_timeout = NAT_REASS_TIMEOUT_DEFAULT;
+  srm->ip6_max_reass = NAT_MAX_REASS_DEAFULT;
+  srm->ip6_max_frag = NAT_MAX_FRAG_DEFAULT;
+  srm->ip6_drop_frag = 0;
+  srm->ip6_reass_n = 0;
+
+  if (tm->n_vlib_mains > 1)
+    clib_spinlock_init (&srm->ip6_reass_lock);
+
+  pool_alloc (srm->ip6_reass_pool, srm->ip6_max_reass);
+
+  nbuckets = nat_reass_get_nbuckets (1);
+  clib_bihash_init_48_8 (&srm->ip6_reass_hash, "nat-ip6-reass", nbuckets,
+                        nbuckets * 1024);
+
+  pool_get (srm->ip6_reass_lru_list_pool, head);
+  srm->ip6_reass_head_index = head_index =
+    head - srm->ip6_reass_lru_list_pool;
+  clib_dlist_init (srm->ip6_reass_lru_list_pool, head_index);
+
+  return error;
+}
+
+static clib_error_t *
+nat_reass_command_fn (vlib_main_t * vm, unformat_input_t * input,
+                     vlib_cli_command_t * cmd)
+{
+  clib_error_t *error = 0;
+  unformat_input_t _line_input, *line_input = &_line_input;
+  u32 timeout = 0, max_reass = 0, max_frag = 0;
+  u8 drop_frag = (u8) ~ 0, is_ip6 = 0;
+  int rv;
+
+  /* Get a line of input. */
+  if (!unformat_user (input, unformat_line_input, line_input))
+    return 0;
+
+  while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+    {
+      if (unformat (line_input, "max-reassemblies %u", &max_reass))
+       ;
+      else if (unformat (line_input, "max-fragments %u", &max_frag))
+       ;
+      else if (unformat (line_input, "timeout %u", &timeout))
+       ;
+      else if (unformat (line_input, "enable"))
+       drop_frag = 0;
+      else if (unformat (line_input, "disable"))
+       drop_frag = 1;
+      else if (unformat (line_input, "ip4"))
+       is_ip6 = 0;
+      else if (unformat (line_input, "ip6"))
+       is_ip6 = 1;
+      else
+       {
+         error = clib_error_return (0, "unknown input '%U'",
+                                    format_unformat_error, line_input);
+         goto done;
+       }
+    }
+
+  if (!timeout)
+    timeout = nat_reass_get_timeout (is_ip6);
+  if (!max_reass)
+    max_reass = nat_reass_get_max_reass (is_ip6);
+  if (!max_frag)
+    max_frag = nat_reass_get_max_frag (is_ip6);
+  if (drop_frag == (u8) ~ 0)
+    drop_frag = nat_reass_is_drop_frag (is_ip6);
+
+  rv =
+    nat_reass_set (timeout, (u16) max_reass, (u8) max_frag, drop_frag,
+                  is_ip6);
+  if (rv)
+    {
+      error = clib_error_return (0, "nat_set_reass return %d", rv);
+      goto done;
+    }
+
+done:
+  unformat_free (line_input);
+
+  return error;
+}
+
+static int
+nat_ip4_reass_walk_cli (nat_reass_ip4_t * reass, void *ctx)
+{
+  vlib_main_t *vm = ctx;
+
+  vlib_cli_output (vm, "  src %U dst %U proto %u id 0x%04x cached %u",
+                  format_ip4_address, &reass->key.src,
+                  format_ip4_address, &reass->key.dst,
+                  reass->key.proto,
+                  clib_net_to_host_u16 (reass->key.frag_id), reass->frag_n);
+
+  return 0;
+}
+
+static int
+nat_ip6_reass_walk_cli (nat_reass_ip6_t * reass, void *ctx)
+{
+  vlib_main_t *vm = ctx;
+
+  vlib_cli_output (vm, "  src %U dst %U proto %u id 0x%08x cached %u",
+                  format_ip6_address, &reass->key.src,
+                  format_ip6_address, &reass->key.dst,
+                  reass->key.proto,
+                  clib_net_to_host_u32 (reass->key.frag_id), reass->frag_n);
+
+  return 0;
+}
+
+static clib_error_t *
+show_nat_reass_command_fn (vlib_main_t * vm, unformat_input_t * input,
+                          vlib_cli_command_t * cmd)
+{
+  vlib_cli_output (vm, "NAT IPv4 virtual fragmentation reassembly is %s",
+                  nat_reass_is_drop_frag (0) ? "DISABLED" : "ENABLED");
+  vlib_cli_output (vm, " max-reasssemblies %u", nat_reass_get_max_reass (0));
+  vlib_cli_output (vm, " max-fragments %u", nat_reass_get_max_frag (0));
+  vlib_cli_output (vm, " timeout %usec", nat_reass_get_timeout (0));
+  vlib_cli_output (vm, " reassemblies:");
+  nat_ip4_reass_walk (nat_ip4_reass_walk_cli, vm);
+
+  vlib_cli_output (vm, "NAT IPv6 virtual fragmentation reassembly is %s",
+                  nat_reass_is_drop_frag (1) ? "DISABLED" : "ENABLED");
+  vlib_cli_output (vm, " max-reasssemblies %u", nat_reass_get_max_reass (1));
+  vlib_cli_output (vm, " max-fragments %u", nat_reass_get_max_frag (1));
+  vlib_cli_output (vm, " timeout %usec", nat_reass_get_timeout (1));
+  vlib_cli_output (vm, " reassemblies:");
+  nat_ip6_reass_walk (nat_ip6_reass_walk_cli, vm);
+
+  return 0;
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (nat_reass_command, static) =
+{
+  .path = "nat virtual-reassembly",
+  .short_help = "nat virtual-reassembly ip4|ip6 [max-reassemblies <n>] "
+                "[max-fragments <n>] [timeout <sec>] [enable|disable]",
+  .function = nat_reass_command_fn,
+};
+
+VLIB_CLI_COMMAND (show_nat_reass_command, static) =
+{
+  .path = "show nat virtual-reassembly",
+  .short_help = "show nat virtual-reassembly",
+  .function = show_nat_reass_command_fn,
+};
+/* *INDENT-ON* */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/plugins/nat/nat_reass.h b/src/plugins/nat/nat_reass.h
new file mode 100644 (file)
index 0000000..ae14a96
--- /dev/null
@@ -0,0 +1,293 @@
+/*
+ * Copyright (c) 2017 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/**
+ * @file
+ * @brief NAT plugin virtual fragmentation reassembly
+ */
+#ifndef __included_nat_reass_h__
+#define __included_nat_reass_h__
+
+#include <vnet/vnet.h>
+#include <vnet/ip/ip.h>
+#include <vppinfra/bihash_16_8.h>
+#include <vppinfra/bihash_48_8.h>
+#include <vppinfra/dlist.h>
+
+#define NAT_REASS_TIMEOUT_DEFAULT 2
+#define NAT_MAX_REASS_DEAFULT 1024
+#define NAT_MAX_FRAG_DEFAULT 5
+#define NAT_REASS_HT_LOAD_FACTOR (0.75)
+
+typedef struct
+{
+  union
+  {
+    struct
+    {
+      ip4_address_t src;
+      ip4_address_t dst;
+      /* align by making this 4 octets even though its a 2 octets field */
+      u32 frag_id;
+      /* align by making this 4 octets even though its a 1 octet field */
+      u32 proto;
+    };
+    u64 as_u64[2];
+  };
+} nat_reass_ip4_key_t;
+
+/* *INDENT-OFF* */
+typedef CLIB_PACKED(struct
+{
+  nat_reass_ip4_key_t key;
+  u32 lru_list_index;
+  u32 sess_index;
+  f64 last_heard;
+  u32 frags_per_reass_list_head_index;
+  u8 frag_n;
+}) nat_reass_ip4_t;
+/* *INDENT-ON* */
+
+typedef struct
+{
+  union
+  {
+    struct
+    {
+      ip6_address_t src;
+      ip6_address_t dst;
+      u32 frag_id;
+      /* align by making this 4 octets even though its a 1 octet field */
+      u32 proto;
+      u64 unused;
+    };
+    u64 as_u64[6];
+  };
+} nat_reass_ip6_key_t;
+
+/* *INDENT-OFF* */
+typedef CLIB_PACKED(struct
+{
+  nat_reass_ip6_key_t key;
+  u32 lru_list_index;
+  u32 sess_index;
+  f64 last_heard;
+  u32 frags_per_reass_list_head_index;
+  u8 frag_n;
+}) nat_reass_ip6_t;
+/* *INDENT-ON* */
+
+typedef struct
+{
+  /* IPv4 config */
+  u32 ip4_timeout;
+  u16 ip4_max_reass;
+  u8 ip4_max_frag;
+  u8 ip4_drop_frag;
+
+  /* IPv6 config */
+  u32 ip6_timeout;
+  u16 ip6_max_reass;
+  u8 ip6_max_frag;
+  u8 ip6_drop_frag;
+
+  /* IPv4 runtime */
+  nat_reass_ip4_t *ip4_reass_pool;
+  clib_bihash_16_8_t ip4_reass_hash;
+  dlist_elt_t *ip4_reass_lru_list_pool;
+  dlist_elt_t *ip4_frags_list_pool;
+  u32 ip4_reass_head_index;
+  u16 ip4_reass_n;
+  clib_spinlock_t ip4_reass_lock;
+
+  /* IPv6 runtime */
+  nat_reass_ip6_t *ip6_reass_pool;
+  clib_bihash_48_8_t ip6_reass_hash;
+  dlist_elt_t *ip6_reass_lru_list_pool;
+  dlist_elt_t *ip6_frags_list_pool;
+  u32 ip6_reass_head_index;
+  u16 ip6_reass_n;
+  clib_spinlock_t ip6_reass_lock;
+
+  /* convenience */
+  vlib_main_t *vlib_main;
+  vnet_main_t *vnet_main;
+} nat_reass_main_t;
+
+/**
+ * @brief Set NAT virtual fragmentation reassembly configuration.
+ *
+ * @param timeout   Reassembly timeout.
+ * @param max_reass Maximum number of concurrent reassemblies.
+ * @param max_frag  Maximum number of fragmets per reassembly
+ * @param drop_frag If zero translate fragments, otherwise drop fragments.
+ * @param is_ip6    1 if IPv6, 0 if IPv4.
+ *
+ * @returns 0 on success, non-zero value otherwise.
+ */
+int nat_reass_set (u32 timeout, u16 max_reass, u8 max_frag, u8 drop_frag,
+                  u8 is_ip6);
+
+/**
+ * @brief Get reassembly timeout.
+ *
+ * @param is_ip6 1 if IPv6, 0 if IPv4.
+ *
+ * @returns reassembly timeout.
+ */
+u32 nat_reass_get_timeout (u8 is_ip6);
+
+/**
+ * @brief Get maximum number of concurrent reassemblies.
+ *
+ * @param is_ip6 1 if IPv6, 0 if IPv4.
+ *
+ * @returns maximum number of concurrent reassemblies.
+ */
+u16 nat_reass_get_max_reass (u8 is_ip6);
+
+/**
+ * @brief Get maximum number of fragmets per reassembly.
+ *
+ * @param is_ip6 1 if IPv6, 0 if IPv4.
+ *
+ * @returns maximum number of fragmets per reassembly.
+ */
+u8 nat_reass_get_max_frag (u8 is_ip6);
+
+/**
+ * @brief Get status of virtual fragmentation reassembly.
+ *
+ * @param is_ip6 1 if IPv6, 0 if IPv4.
+ *
+ * @returns zero if translate fragments, non-zero value if drop fragments.
+ */
+u8 nat_reass_is_drop_frag (u8 is_ip6);
+
+/**
+ * @brief Initialize NAT virtual fragmentation reassembly.
+ *
+ * @param vm vlib main.
+ *
+ * @return error code.
+ */
+clib_error_t *nat_reass_init (vlib_main_t * vm);
+
+/**
+ * @brief Find or create reassembly.
+ *
+ * @param src Source IPv4 address.
+ * @param dst Destination IPv4 address.
+ * @param frag_id Fragment ID.
+ * @param proto L4 protocol.
+ * @param reset_timeout If non-zero value reset timeout.
+ * @param bi_to_drop Fragments to drop.
+ *
+ * @returns Reassembly data or 0 on failure.
+ */
+nat_reass_ip4_t *nat_ip4_reass_find_or_create (ip4_address_t src,
+                                              ip4_address_t dst,
+                                              u16 frag_id, u8 proto,
+                                              u8 reset_timeout,
+                                              u32 ** bi_to_drop);
+/**
+ * @brief Cache fragment.
+ *
+ * @param reass Reassembly data.
+ * @param bi Buffer index.
+ *
+ * @returns 0 on success, non-zero value otherwise.
+ */
+int nat_ip4_reass_add_fragment (nat_reass_ip4_t * reass, u32 bi);
+
+/**
+ * @brief Get cached fragments.
+ *
+ * @param reass Reassembly data.
+ * @param bi Vector of buffer indexes.
+ */
+void nat_ip4_reass_get_frags (nat_reass_ip4_t * reass, u32 ** bi);
+
+/**
+ * @breif Call back function when walking IPv4 reassemblies, non-zero return
+ * value stop walk.
+ */
+typedef int (*nat_ip4_reass_walk_fn_t) (nat_reass_ip4_t * reass, void *ctx);
+
+/**
+ * @brief Walk IPv4 reassemblies.
+ *
+ * @param fn The function to invoke on each entry visited.
+ * @param ctx A context passed in the visit function.
+ */
+void nat_ip4_reass_walk (nat_ip4_reass_walk_fn_t fn, void *ctx);
+
+/**
+ * @brief Find or create reassembly.
+ *
+ * @param src Source IPv6 address.
+ * @param dst Destination IPv6 address.
+ * @param frag_id Fragment ID.
+ * @param proto L4 protocol.
+ * @param reset_timeout If non-zero value reset timeout.
+ * @param bi_to_drop Fragments to drop.
+ *
+ * @returns Reassembly data or 0 on failure.
+ */
+nat_reass_ip6_t *nat_ip6_reass_find_or_create (ip6_address_t src,
+                                              ip6_address_t dst,
+                                              u32 frag_id, u8 proto,
+                                              u8 reset_timeout,
+                                              u32 ** bi_to_drop);
+/**
+ * @brief Cache fragment.
+ *
+ * @param reass Reassembly data.
+ * @param bi Buffer index.
+ *
+ * @returns 0 on success, non-zero value otherwise.
+ */
+int nat_ip6_reass_add_fragment (nat_reass_ip6_t * reass, u32 bi);
+
+/**
+ * @brief Get cached fragments.
+ *
+ * @param reass Reassembly data.
+ * @param bi Vector of buffer indexes.
+ */
+void nat_ip6_reass_get_frags (nat_reass_ip6_t * reass, u32 ** bi);
+
+/**
+ * @breif Call back function when walking IPv6 reassemblies, non-zero return
+ * value stop walk.
+ */
+typedef int (*nat_ip6_reass_walk_fn_t) (nat_reass_ip6_t * reass, void *ctx);
+
+/**
+ * @brief Walk IPv6 reassemblies.
+ *
+ * @param fn The function to invoke on each entry visited.
+ * @param ctx A context passed in the visit function.
+ */
+void nat_ip6_reass_walk (nat_ip6_reass_walk_fn_t fn, void *ctx);
+
+#endif /* __included_nat_reass_h__ */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
index f250136..489afad 100755 (executable)
@@ -25,6 +25,7 @@
 #include <nat/nat.h>
 #include <nat/nat_ipfix_logging.h>
 #include <nat/nat_det.h>
+#include <nat/nat_reass.h>
 
 #include <vppinfra/hash.h>
 #include <vppinfra/error.h>
@@ -78,17 +79,40 @@ static u8 * format_snat_out2in_worker_handoff_trace (u8 * s, va_list * args)
   return s;
 }
 
+typedef struct {
+  u32 sw_if_index;
+  u32 next_index;
+  u8 cached;
+} nat44_out2in_reass_trace_t;
+
+static u8 * format_nat44_out2in_reass_trace (u8 * s, va_list * args)
+{
+  CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+  CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+  nat44_out2in_reass_trace_t * t = va_arg (*args, nat44_out2in_reass_trace_t *);
+
+  s = format (s, "NAT44_OUT2IN_REASS: sw_if_index %d, next index %d, status %s",
+              t->sw_if_index, t->next_index,
+              t->cached ? "cached" : "translated");
+
+  return s;
+}
+
 vlib_node_registration_t snat_out2in_node;
 vlib_node_registration_t snat_out2in_fast_node;
 vlib_node_registration_t snat_out2in_worker_handoff_node;
 vlib_node_registration_t snat_det_out2in_node;
+vlib_node_registration_t nat44_out2in_reass_node;
 
 #define foreach_snat_out2in_error                       \
 _(UNSUPPORTED_PROTOCOL, "Unsupported protocol")         \
 _(OUT2IN_PACKETS, "Good out2in packets processed")      \
 _(BAD_ICMP_TYPE, "unsupported ICMP type")               \
 _(NO_TRANSLATION, "No translation")                     \
-_(MAX_SESSIONS_EXCEEDED, "Maximum sessions exceeded")
+_(MAX_SESSIONS_EXCEEDED, "Maximum sessions exceeded")   \
+_(DROP_FRAGMENT, "Drop fragment")                       \
+_(MAX_REASS, "Maximum reassemblies exceeded")           \
+_(MAX_FRAG, "Maximum fragments per reassembly exceeded")
 
 typedef enum {
 #define _(sym,str) SNAT_OUT2IN_ERROR_##sym,
@@ -107,6 +131,7 @@ typedef enum {
   SNAT_OUT2IN_NEXT_DROP,
   SNAT_OUT2IN_NEXT_LOOKUP,
   SNAT_OUT2IN_NEXT_ICMP_ERROR,
+  SNAT_OUT2IN_NEXT_REASS,
   SNAT_OUT2IN_N_NEXT,
 } snat_out2in_next_t;
 
@@ -139,6 +164,7 @@ create_session_for_static_mapping (snat_main_t *sm,
   dlist_elt_t * per_user_translation_list_elt;
   dlist_elt_t * per_user_list_head_elt;
   ip4_header_t *ip0;
+  udp_header_t *udp0;
 
   if (PREDICT_FALSE (maximum_sessions_exceeded(sm, thread_index)))
     {
@@ -147,6 +173,7 @@ create_session_for_static_mapping (snat_main_t *sm,
     }
 
   ip0 = vlib_buffer_get_current (b0);
+  udp0 = ip4_next_header (ip0);
 
   user_key.addr = in2out.addr;
   user_key.fib_index = in2out.fib_index;
@@ -188,7 +215,8 @@ create_session_for_static_mapping (snat_main_t *sm,
 
   s->outside_address_index = ~0;
   s->flags |= SNAT_SESSION_FLAG_STATIC_MAPPING;
-  s->ext_host_addr.as_u32 = ip0->dst_address.as_u32;
+  s->ext_host_addr.as_u32 = ip0->src_address.as_u32;
+  s->ext_host_port = udp0->src_port;
   u->nstaticsessions++;
 
   /* Create list elts */
@@ -1033,6 +1061,12 @@ snat_out2in_node_fn (vlib_main_t * vm,
               goto trace0;
             }
 
+          if (PREDICT_FALSE (ip4_is_fragment (ip0)))
+            {
+              next0 = SNAT_OUT2IN_NEXT_REASS;
+              goto trace0;
+            }
+
           key0.addr = ip0->dst_address;
           key0.port = udp0->dst_port;
           key0.protocol = proto0;
@@ -1188,6 +1222,12 @@ snat_out2in_node_fn (vlib_main_t * vm,
               goto trace1;
             }
 
+          if (PREDICT_FALSE (ip4_is_fragment (ip1)))
+            {
+              next1 = SNAT_OUT2IN_NEXT_REASS;
+              goto trace1;
+            }
+
           key1.addr = ip1->dst_address;
           key1.port = udp1->dst_port;
           key1.protocol = proto1;
@@ -1379,6 +1419,12 @@ snat_out2in_node_fn (vlib_main_t * vm,
               goto trace00;
             }
 
+          if (PREDICT_FALSE (ip4_is_fragment (ip0)))
+            {
+              next0 = SNAT_OUT2IN_NEXT_REASS;
+              goto trace00;
+            }
+
           key0.addr = ip0->dst_address;
           key0.port = udp0->dst_port;
           key0.protocol = proto0;
@@ -1530,10 +1576,294 @@ VLIB_REGISTER_NODE (snat_out2in_node) = {
     [SNAT_OUT2IN_NEXT_DROP] = "error-drop",
     [SNAT_OUT2IN_NEXT_LOOKUP] = "ip4-lookup",
     [SNAT_OUT2IN_NEXT_ICMP_ERROR] = "ip4-icmp-error",
+    [SNAT_OUT2IN_NEXT_REASS] = "nat44-out2in-reass",
   },
 };
 VLIB_NODE_FUNCTION_MULTIARCH (snat_out2in_node, snat_out2in_node_fn);
 
+static uword
+nat44_out2in_reass_node_fn (vlib_main_t * vm,
+                            vlib_node_runtime_t * node,
+                            vlib_frame_t * frame)
+{
+  u32 n_left_from, *from, *to_next;
+  snat_out2in_next_t next_index;
+  u32 pkts_processed = 0;
+  snat_main_t *sm = &snat_main;
+  f64 now = vlib_time_now (vm);
+  u32 thread_index = vlib_get_thread_index ();
+  snat_main_per_thread_data_t *per_thread_data =
+    &sm->per_thread_data[thread_index];
+  u32 *fragments_to_drop = 0;
+  u32 *fragments_to_loopback = 0;
+
+  from = vlib_frame_vector_args (frame);
+  n_left_from = frame->n_vectors;
+  next_index = node->cached_next_index;
+
+  while (n_left_from > 0)
+    {
+      u32 n_left_to_next;
+
+      vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+
+      while (n_left_from > 0 && n_left_to_next > 0)
+       {
+          u32 bi0, sw_if_index0, proto0, rx_fib_index0, new_addr0, old_addr0;
+         vlib_buffer_t *b0;
+          u32 next0;
+          u8 cached0 = 0;
+          ip4_header_t *ip0;
+          nat_reass_ip4_t *reass0;
+          udp_header_t * udp0;
+          tcp_header_t * tcp0;
+          snat_session_key_t key0, sm0;
+          clib_bihash_kv_8_8_t kv0, value0;
+          snat_session_t * s0 = 0;
+          u16 old_port0, new_port0;
+          ip_csum_t sum0;
+
+          /* speculatively enqueue b0 to the current next frame */
+         bi0 = from[0];
+         to_next[0] = bi0;
+         from += 1;
+         to_next += 1;
+         n_left_from -= 1;
+         n_left_to_next -= 1;
+
+         b0 = vlib_get_buffer (vm, bi0);
+          next0 = SNAT_OUT2IN_NEXT_LOOKUP;
+
+          sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
+          rx_fib_index0 = fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP4,
+                                                               sw_if_index0);
+
+          if (PREDICT_FALSE (nat_reass_is_drop_frag(0)))
+            {
+              next0 = SNAT_OUT2IN_NEXT_DROP;
+              b0->error = node->errors[SNAT_OUT2IN_ERROR_DROP_FRAGMENT];
+              goto trace0;
+            }
+
+          ip0 = (ip4_header_t *) vlib_buffer_get_current (b0);
+          udp0 = ip4_next_header (ip0);
+          tcp0 = (tcp_header_t *) udp0;
+          proto0 = ip_proto_to_snat_proto (ip0->protocol);
+
+          reass0 = nat_ip4_reass_find_or_create (ip0->src_address,
+                                                 ip0->dst_address,
+                                                 ip0->fragment_id,
+                                                 ip0->protocol,
+                                                 1,
+                                                 &fragments_to_drop);
+
+          if (PREDICT_FALSE (!reass0))
+            {
+              next0 = SNAT_OUT2IN_NEXT_DROP;
+              b0->error = node->errors[SNAT_OUT2IN_ERROR_MAX_REASS];
+              goto trace0;
+            }
+
+          if (PREDICT_FALSE (ip4_is_first_fragment (ip0)))
+            {
+              key0.addr = ip0->dst_address;
+              key0.port = udp0->dst_port;
+              key0.protocol = proto0;
+              key0.fib_index = rx_fib_index0;
+              kv0.key = key0.as_u64;
+
+              if (clib_bihash_search_8_8 (&per_thread_data->out2in, &kv0, &value0))
+                {
+                  /* Try to match static mapping by external address and port,
+                     destination address and port in packet */
+                  if (snat_static_mapping_match(sm, key0, &sm0, 1, 0))
+                    {
+                      b0->error = node->errors[SNAT_OUT2IN_ERROR_NO_TRANSLATION];
+                      /*
+                       * Send DHCP packets to the ipv4 stack, or we won't
+                       * be able to use dhcp client on the outside interface
+                       */
+                      if (proto0 != SNAT_PROTOCOL_UDP
+                          || (udp0->dst_port
+                              != clib_host_to_net_u16(UDP_DST_PORT_dhcp_to_client)))
+
+                        next0 = SNAT_OUT2IN_NEXT_DROP;
+                      goto trace0;
+                    }
+
+                  /* Create session initiated by host from external network */
+                  s0 = create_session_for_static_mapping(sm, b0, sm0, key0, node,
+                                                         thread_index);
+                  if (!s0)
+                    {
+                      b0->error = node->errors[SNAT_OUT2IN_ERROR_NO_TRANSLATION];
+                      next0 = SNAT_OUT2IN_NEXT_DROP;
+                      goto trace0;
+                    }
+                  reass0->sess_index = s0 - per_thread_data->sessions;
+                }
+              else
+                {
+                  s0 = pool_elt_at_index (per_thread_data->sessions,
+                                          value0.value);
+                  reass0->sess_index = value0.value;
+                }
+              nat_ip4_reass_get_frags (reass0, &fragments_to_loopback);
+            }
+          else
+            {
+              if (PREDICT_FALSE (reass0->sess_index == (u32) ~0))
+                {
+                  if (nat_ip4_reass_add_fragment (reass0, bi0))
+                    {
+                      b0->error = node->errors[SNAT_OUT2IN_ERROR_MAX_FRAG];
+                      next0 = SNAT_OUT2IN_NEXT_DROP;
+                      goto trace0;
+                    }
+                  cached0 = 1;
+                  goto trace0;
+                }
+              s0 = pool_elt_at_index (per_thread_data->sessions,
+                                      reass0->sess_index);
+            }
+
+          old_addr0 = ip0->dst_address.as_u32;
+          ip0->dst_address = s0->in2out.addr;
+          new_addr0 = ip0->dst_address.as_u32;
+          vnet_buffer(b0)->sw_if_index[VLIB_TX] = s0->in2out.fib_index;
+
+          sum0 = ip0->checksum;
+          sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
+                                 ip4_header_t,
+                                 dst_address /* changed member */);
+          ip0->checksum = ip_csum_fold (sum0);
+
+          if (PREDICT_FALSE (ip4_is_first_fragment (ip0)))
+            {
+              if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
+                {
+                  old_port0 = tcp0->dst_port;
+                  tcp0->dst_port = s0->in2out.port;
+                  new_port0 = tcp0->dst_port;
+
+                  sum0 = tcp0->checksum;
+                  sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
+                                         ip4_header_t,
+                                         dst_address /* changed member */);
+
+                  sum0 = ip_csum_update (sum0, old_port0, new_port0,
+                                         ip4_header_t /* cheat */,
+                                         length /* changed member */);
+                  tcp0->checksum = ip_csum_fold(sum0);
+                }
+              else
+                {
+                  old_port0 = udp0->dst_port;
+                  udp0->dst_port = s0->in2out.port;
+                  udp0->checksum = 0;
+                }
+            }
+
+          /* Accounting */
+          s0->last_heard = now;
+          s0->total_pkts++;
+          s0->total_bytes += vlib_buffer_length_in_chain (vm, b0);
+          /* Per-user LRU list maintenance for dynamic translation */
+          if (!snat_is_session_static (s0))
+            {
+              clib_dlist_remove (sm->per_thread_data[thread_index].list_pool,
+                                 s0->per_user_index);
+              clib_dlist_addtail (sm->per_thread_data[thread_index].list_pool,
+                                  s0->per_user_list_head_index,
+                                  s0->per_user_index);
+            }
+
+        trace0:
+          if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)
+                            && (b0->flags & VLIB_BUFFER_IS_TRACED)))
+            {
+              nat44_out2in_reass_trace_t *t =
+                 vlib_add_trace (vm, node, b0, sizeof (*t));
+              t->cached = cached0;
+              t->sw_if_index = sw_if_index0;
+              t->next_index = next0;
+            }
+
+          if (cached0)
+            {
+              n_left_to_next++;
+              to_next--;
+            }
+          else
+            {
+              pkts_processed += next0 != SNAT_OUT2IN_NEXT_DROP;
+
+              /* verify speculative enqueue, maybe switch current next frame */
+              vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
+                                               to_next, n_left_to_next,
+                                               bi0, next0);
+            }
+
+         if (n_left_from == 0 && vec_len (fragments_to_loopback))
+           {
+             from = vlib_frame_vector_args (frame);
+             u32 len = vec_len (fragments_to_loopback);
+             if (len <= VLIB_FRAME_SIZE)
+               {
+                 clib_memcpy (from, fragments_to_loopback, sizeof (u32) * len);
+                 n_left_from = len;
+                 vec_reset_length (fragments_to_loopback);
+               }
+             else
+               {
+                 clib_memcpy (from,
+                               fragments_to_loopback + (len - VLIB_FRAME_SIZE),
+                               sizeof (u32) * VLIB_FRAME_SIZE);
+                 n_left_from = VLIB_FRAME_SIZE;
+                 _vec_len (fragments_to_loopback) = len - VLIB_FRAME_SIZE;
+               }
+           }
+       }
+
+      vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+    }
+
+  vlib_node_increment_counter (vm, nat44_out2in_reass_node.index,
+                               SNAT_OUT2IN_ERROR_OUT2IN_PACKETS,
+                               pkts_processed);
+
+  nat_send_all_to_node (vm, fragments_to_drop, node,
+                        &node->errors[SNAT_OUT2IN_ERROR_DROP_FRAGMENT],
+                        SNAT_OUT2IN_NEXT_DROP);
+
+  vec_free (fragments_to_drop);
+  vec_free (fragments_to_loopback);
+  return frame->n_vectors;
+}
+
+VLIB_REGISTER_NODE (nat44_out2in_reass_node) = {
+  .function = nat44_out2in_reass_node_fn,
+  .name = "nat44-out2in-reass",
+  .vector_size = sizeof (u32),
+  .format_trace = format_nat44_out2in_reass_trace,
+  .type = VLIB_NODE_TYPE_INTERNAL,
+
+  .n_errors = ARRAY_LEN(snat_out2in_error_strings),
+  .error_strings = snat_out2in_error_strings,
+
+  .n_next_nodes = SNAT_OUT2IN_N_NEXT,
+
+  /* edit / add dispositions here */
+  .next_nodes = {
+    [SNAT_OUT2IN_NEXT_DROP] = "error-drop",
+    [SNAT_OUT2IN_NEXT_LOOKUP] = "ip4-lookup",
+    [SNAT_OUT2IN_NEXT_ICMP_ERROR] = "ip4-icmp-error",
+    [SNAT_OUT2IN_NEXT_REASS] = "nat44-out2in-reass",
+  },
+};
+VLIB_NODE_FUNCTION_MULTIARCH (nat44_out2in_reass_node,
+                              nat44_out2in_reass_node_fn);
+
 /**************************/
 /*** deterministic mode ***/
 /**************************/
@@ -2017,6 +2347,7 @@ VLIB_REGISTER_NODE (snat_det_out2in_node) = {
     [SNAT_OUT2IN_NEXT_DROP] = "error-drop",
     [SNAT_OUT2IN_NEXT_LOOKUP] = "ip4-lookup",
     [SNAT_OUT2IN_NEXT_ICMP_ERROR] = "ip4-icmp-error",
+    [SNAT_OUT2IN_NEXT_REASS] = "nat44-out2in-reass",
   },
 };
 VLIB_NODE_FUNCTION_MULTIARCH (snat_det_out2in_node, snat_det_out2in_node_fn);
@@ -2509,6 +2840,7 @@ VLIB_REGISTER_NODE (snat_out2in_fast_node) = {
     [SNAT_OUT2IN_NEXT_LOOKUP] = "ip4-lookup",
     [SNAT_OUT2IN_NEXT_DROP] = "error-drop",
     [SNAT_OUT2IN_NEXT_ICMP_ERROR] = "ip4-icmp-error",
+    [SNAT_OUT2IN_NEXT_REASS] = "nat44-out2in-reass",
   },
 };
 VLIB_NODE_FUNCTION_MULTIARCH (snat_out2in_fast_node, snat_out2in_fast_node_fn);
index e420baf..3c002bb 100644 (file)
@@ -3,16 +3,19 @@
 import socket
 import unittest
 import struct
+import StringIO
+import random
 
 from framework import VppTestCase, VppTestRunner, running_extended_tests
 from vpp_ip_route import VppIpRoute, VppRoutePath, DpoProto
 from scapy.layers.inet import IP, TCP, UDP, ICMP
 from scapy.layers.inet import IPerror, TCPerror, UDPerror, ICMPerror
 from scapy.layers.inet6 import IPv6, ICMPv6EchoRequest, ICMPv6EchoReply
-from scapy.layers.inet6 import ICMPv6DestUnreach, IPerror6
+from scapy.layers.inet6 import ICMPv6DestUnreach, IPerror6, IPv6ExtHdrFragment
 from scapy.layers.l2 import Ether, ARP, GRE
 from scapy.data import IP_PROTOS
-from scapy.packet import bind_layers
+from scapy.packet import bind_layers, Raw
+from scapy.all import fragment6
 from util import ppp
 from ipfix import IPFIX, Set, Template, Data, IPFIXDecoder
 from time import sleep
@@ -464,6 +467,121 @@ class MethodHolder(VppTestCase):
                                       "(inside network):", packet))
                 raise
 
+    def create_stream_frag(self, src_if, dst, sport, dport, data):
+        """
+        Create fragmented packet stream
+
+        :param src_if: Source interface
+        :param dst: Destination IPv4 address
+        :param sport: Source TCP port
+        :param dport: Destination TCP port
+        :param data: Payload data
+        :returns: Fragmets
+        """
+        id = random.randint(0, 65535)
+        p = (IP(src=src_if.remote_ip4, dst=dst) /
+             TCP(sport=sport, dport=dport) /
+             Raw(data))
+        p = p.__class__(str(p))
+        chksum = p['TCP'].chksum
+        pkts = []
+        p = (Ether(src=src_if.remote_mac, dst=src_if.local_mac) /
+             IP(src=src_if.remote_ip4, dst=dst, flags="MF", frag=0, id=id) /
+             TCP(sport=sport, dport=dport, chksum=chksum) /
+             Raw(data[0:4]))
+        pkts.append(p)
+        p = (Ether(src=src_if.remote_mac, dst=src_if.local_mac) /
+             IP(src=src_if.remote_ip4, dst=dst, flags="MF", frag=3, id=id,
+                proto=IP_PROTOS.tcp) /
+             Raw(data[4:20]))
+        pkts.append(p)
+        p = (Ether(src=src_if.remote_mac, dst=src_if.local_mac) /
+             IP(src=src_if.remote_ip4, dst=dst, frag=5, proto=IP_PROTOS.tcp,
+                id=id) /
+             Raw(data[20:]))
+        pkts.append(p)
+        return pkts
+
+    def create_stream_frag_ip6(self, src_if, dst, sport, dport, data,
+                               pref=None, plen=0, frag_size=128):
+        """
+        Create fragmented packet stream
+
+        :param src_if: Source interface
+        :param dst: Destination IPv4 address
+        :param sport: Source TCP port
+        :param dport: Destination TCP port
+        :param data: Payload data
+        :param pref: NAT64 prefix
+        :param plen: NAT64 prefix length
+        :param fragsize: size of fragments
+        :returns: Fragmets
+        """
+        if pref is None:
+            dst_ip6 = ''.join(['64:ff9b::', dst])
+        else:
+            dst_ip6 = self.compose_ip6(dst, pref, plen)
+
+        p = (Ether(dst=src_if.local_mac, src=src_if.remote_mac) /
+             IPv6(src=src_if.remote_ip6, dst=dst_ip6) /
+             IPv6ExtHdrFragment(id=random.randint(0, 65535)) /
+             TCP(sport=sport, dport=dport) /
+             Raw(data))
+
+        return fragment6(p, frag_size)
+
+    def reass_frags_and_verify(self, frags, src, dst):
+        """
+        Reassemble and verify fragmented packet
+
+        :param frags: Captured fragments
+        :param src: Source IPv4 address to verify
+        :param dst: Destination IPv4 address to verify
+
+        :returns: Reassembled IPv4 packet
+        """
+        buffer = StringIO.StringIO()
+        for p in frags:
+            self.assertEqual(p[IP].src, src)
+            self.assertEqual(p[IP].dst, dst)
+            self.check_ip_checksum(p)
+            buffer.seek(p[IP].frag * 8)
+            buffer.write(p[IP].payload)
+        ip = frags[0].getlayer(IP)
+        ip = IP(src=frags[0][IP].src, dst=frags[0][IP].dst,
+                proto=frags[0][IP].proto)
+        if ip.proto == IP_PROTOS.tcp:
+            p = (ip / TCP(buffer.getvalue()))
+            self.check_tcp_checksum(p)
+        elif ip.proto == IP_PROTOS.udp:
+            p = (ip / UDP(buffer.getvalue()))
+        return p
+
+    def reass_frags_and_verify_ip6(self, frags, src, dst):
+        """
+        Reassemble and verify fragmented packet
+
+        :param frags: Captured fragments
+        :param src: Source IPv6 address to verify
+        :param dst: Destination IPv6 address to verify
+
+        :returns: Reassembled IPv6 packet
+        """
+        buffer = StringIO.StringIO()
+        for p in frags:
+            self.assertEqual(p[IPv6].src, src)
+            self.assertEqual(p[IPv6].dst, dst)
+            buffer.seek(p[IPv6ExtHdrFragment].offset * 8)
+            buffer.write(p[IPv6ExtHdrFragment].payload)
+        ip = IPv6(src=frags[0][IPv6].src, dst=frags[0][IPv6].dst,
+                  nh=frags[0][IPv6ExtHdrFragment].nh)
+        if ip.nh == IP_PROTOS.tcp:
+            p = (ip / TCP(buffer.getvalue()))
+            self.check_tcp_checksum(p)
+        elif ip.nh == IP_PROTOS.udp:
+            p = (ip / UDP(buffer.getvalue()))
+        return p
+
     def verify_ipfix_nat44_ses(self, data):
         """
         Verify IPFIX NAT44 session create/delete event
@@ -586,6 +704,8 @@ class TestNAT44(MethodHolder):
             cls.pg4._remote_ip4 = cls.pg9._remote_hosts[0]._ip4 = "10.0.0.2"
             cls.pg9.resolve_arp()
 
+            random.seed()
+
         except Exception:
             super(TestNAT44, cls).tearDownClass()
             raise
@@ -671,6 +791,9 @@ class TestNAT44(MethodHolder):
                                                   addr.ip_address,
                                                   is_add=0)
 
+        self.vapi.nat_set_reass()
+        self.vapi.nat_set_reass(is_ip6=1)
+
     def nat44_add_static_mapping(self, local_ip, external_ip='0.0.0.0',
                                  local_port=0, external_port=0, vrf_id=0,
                                  is_add=1, external_sw_if_index=0xFFFFFFFF,
@@ -2480,10 +2603,164 @@ class TestNAT44(MethodHolder):
         sessions = self.vapi.nat44_user_session_dump(self.pg0.remote_ip4n, 0)
         self.assertEqual(nsessions - len(sessions), 2)
 
+    def test_set_get_reass(self):
+        """ NAT44 set/get virtual fragmentation reassembly """
+        reas_cfg1 = self.vapi.nat_get_reass()
+
+        self.vapi.nat_set_reass(timeout=reas_cfg1.ip4_timeout + 5,
+                                max_reass=reas_cfg1.ip4_max_reass * 2,
+                                max_frag=reas_cfg1.ip4_max_frag * 2)
+
+        reas_cfg2 = self.vapi.nat_get_reass()
+
+        self.assertEqual(reas_cfg1.ip4_timeout + 5, reas_cfg2.ip4_timeout)
+        self.assertEqual(reas_cfg1.ip4_max_reass * 2, reas_cfg2.ip4_max_reass)
+        self.assertEqual(reas_cfg1.ip4_max_frag * 2, reas_cfg2.ip4_max_frag)
+
+        self.vapi.nat_set_reass(drop_frag=1)
+        self.assertTrue(self.vapi.nat_get_reass().ip4_drop_frag)
+
+    def test_frag_in_order(self):
+        """ NAT44 translate fragments arriving in order """
+        self.nat44_add_address(self.nat_addr)
+        self.vapi.nat44_interface_add_del_feature(self.pg0.sw_if_index)
+        self.vapi.nat44_interface_add_del_feature(self.pg1.sw_if_index,
+                                                  is_inside=0)
+
+        data = "A" * 4 + "B" * 16 + "C" * 3
+        self.tcp_port_in = random.randint(1025, 65535)
+
+        reass = self.vapi.nat_reass_dump()
+        reass_n_start = len(reass)
+
+        # in2out
+        pkts = self.create_stream_frag(self.pg0,
+                                       self.pg1.remote_ip4,
+                                       self.tcp_port_in,
+                                       20,
+                                       data)
+        self.pg0.add_stream(pkts)
+        self.pg_enable_capture(self.pg_interfaces)
+        self.pg_start()
+        frags = self.pg1.get_capture(len(pkts))
+        p = self.reass_frags_and_verify(frags,
+                                        self.nat_addr,
+                                        self.pg1.remote_ip4)
+        self.assertEqual(p[TCP].dport, 20)
+        self.assertNotEqual(p[TCP].sport, self.tcp_port_in)
+        self.tcp_port_out = p[TCP].sport
+        self.assertEqual(data, p[Raw].load)
+
+        # out2in
+        pkts = self.create_stream_frag(self.pg1,
+                                       self.nat_addr,
+                                       20,
+                                       self.tcp_port_out,
+                                       data)
+        self.pg1.add_stream(pkts)
+        self.pg_enable_capture(self.pg_interfaces)
+        self.pg_start()
+        frags = self.pg0.get_capture(len(pkts))
+        p = self.reass_frags_and_verify(frags,
+                                        self.pg1.remote_ip4,
+                                        self.pg0.remote_ip4)
+        self.assertEqual(p[TCP].sport, 20)
+        self.assertEqual(p[TCP].dport, self.tcp_port_in)
+        self.assertEqual(data, p[Raw].load)
+
+        reass = self.vapi.nat_reass_dump()
+        reass_n_end = len(reass)
+
+        self.assertEqual(reass_n_end - reass_n_start, 2)
+
+    def test_reass_hairpinning(self):
+        """ NAT44 fragments hairpinning """
+        host = self.pg0.remote_hosts[0]
+        server = self.pg0.remote_hosts[1]
+        host_in_port = random.randint(1025, 65535)
+        host_out_port = 0
+        server_in_port = random.randint(1025, 65535)
+        server_out_port = random.randint(1025, 65535)
+        data = "A" * 4 + "B" * 16 + "C" * 3
+
+        self.nat44_add_address(self.nat_addr)
+        self.vapi.nat44_interface_add_del_feature(self.pg0.sw_if_index)
+        self.vapi.nat44_interface_add_del_feature(self.pg1.sw_if_index,
+                                                  is_inside=0)
+        # add static mapping for server
+        self.nat44_add_static_mapping(server.ip4, self.nat_addr,
+                                      server_in_port, server_out_port,
+                                      proto=IP_PROTOS.tcp)
+
+        # send packet from host to server
+        pkts = self.create_stream_frag(self.pg0,
+                                       self.nat_addr,
+                                       host_in_port,
+                                       server_out_port,
+                                       data)
+        self.pg0.add_stream(pkts)
+        self.pg_enable_capture(self.pg_interfaces)
+        self.pg_start()
+        frags = self.pg0.get_capture(len(pkts))
+        p = self.reass_frags_and_verify(frags,
+                                        self.nat_addr,
+                                        server.ip4)
+        self.assertNotEqual(p[TCP].sport, host_in_port)
+        self.assertEqual(p[TCP].dport, server_in_port)
+        self.assertEqual(data, p[Raw].load)
+
+    def test_frag_out_of_order(self):
+        """ NAT44 translate fragments arriving out of order """
+        self.nat44_add_address(self.nat_addr)
+        self.vapi.nat44_interface_add_del_feature(self.pg0.sw_if_index)
+        self.vapi.nat44_interface_add_del_feature(self.pg1.sw_if_index,
+                                                  is_inside=0)
+
+        data = "A" * 4 + "B" * 16 + "C" * 3
+        random.randint(1025, 65535)
+
+        # in2out
+        pkts = self.create_stream_frag(self.pg0,
+                                       self.pg1.remote_ip4,
+                                       self.tcp_port_in,
+                                       20,
+                                       data)
+        pkts.reverse()
+        self.pg0.add_stream(pkts)
+        self.pg_enable_capture(self.pg_interfaces)
+        self.pg_start()
+        frags = self.pg1.get_capture(len(pkts))
+        p = self.reass_frags_and_verify(frags,
+                                        self.nat_addr,
+                                        self.pg1.remote_ip4)
+        self.assertEqual(p[TCP].dport, 20)
+        self.assertNotEqual(p[TCP].sport, self.tcp_port_in)
+        self.tcp_port_out = p[TCP].sport
+        self.assertEqual(data, p[Raw].load)
+
+        # out2in
+        pkts = self.create_stream_frag(self.pg1,
+                                       self.nat_addr,
+                                       20,
+                                       self.tcp_port_out,
+                                       data)
+        pkts.reverse()
+        self.pg1.add_stream(pkts)
+        self.pg_enable_capture(self.pg_interfaces)
+        self.pg_start()
+        frags = self.pg0.get_capture(len(pkts))
+        p = self.reass_frags_and_verify(frags,
+                                        self.pg1.remote_ip4,
+                                        self.pg0.remote_ip4)
+        self.assertEqual(p[TCP].sport, 20)
+        self.assertEqual(p[TCP].dport, self.tcp_port_in)
+        self.assertEqual(data, p[Raw].load)
+
     def tearDown(self):
         super(TestNAT44, self).tearDown()
         if not self.vpp_dead:
             self.logger.info(self.vapi.cli("show nat44 verbose"))
+            self.logger.info(self.vapi.cli("show nat virtual-reassembly"))
             self.clear_nat44()
 
 
@@ -3928,6 +4205,138 @@ class TestNAT64(MethodHolder):
             self.logger.error(ppp("Unexpected or invalid packet:", p))
             raise
 
+    def test_frag_in_order(self):
+        """ NAT64 translate fragments arriving in order """
+        self.tcp_port_in = random.randint(1025, 65535)
+
+        self.vapi.nat64_add_del_pool_addr_range(self.nat_addr_n,
+                                                self.nat_addr_n)
+        self.vapi.nat64_add_del_interface(self.pg0.sw_if_index)
+        self.vapi.nat64_add_del_interface(self.pg1.sw_if_index, is_inside=0)
+
+        reass = self.vapi.nat_reass_dump()
+        reass_n_start = len(reass)
+
+        # in2out
+        data = 'a' * 200
+        pkts = self.create_stream_frag_ip6(self.pg0, self.pg1.remote_ip4,
+                                           self.tcp_port_in, 20, data)
+        self.pg0.add_stream(pkts)
+        self.pg_enable_capture(self.pg_interfaces)
+        self.pg_start()
+        frags = self.pg1.get_capture(len(pkts))
+        p = self.reass_frags_and_verify(frags,
+                                        self.nat_addr,
+                                        self.pg1.remote_ip4)
+        self.assertEqual(p[TCP].dport, 20)
+        self.assertNotEqual(p[TCP].sport, self.tcp_port_in)
+        self.tcp_port_out = p[TCP].sport
+        self.assertEqual(data, p[Raw].load)
+
+        # out2in
+        data = "A" * 4 + "b" * 16 + "C" * 3
+        pkts = self.create_stream_frag(self.pg1,
+                                       self.nat_addr,
+                                       20,
+                                       self.tcp_port_out,
+                                       data)
+        self.pg1.add_stream(pkts)
+        self.pg_enable_capture(self.pg_interfaces)
+        self.pg_start()
+        frags = self.pg0.get_capture(len(pkts))
+        src = self.compose_ip6(self.pg1.remote_ip4, '64:ff9b::', 96)
+        p = self.reass_frags_and_verify_ip6(frags, src, self.pg0.remote_ip6)
+        self.assertEqual(p[TCP].sport, 20)
+        self.assertEqual(p[TCP].dport, self.tcp_port_in)
+        self.assertEqual(data, p[Raw].load)
+
+        reass = self.vapi.nat_reass_dump()
+        reass_n_end = len(reass)
+
+        self.assertEqual(reass_n_end - reass_n_start, 2)
+
+    def test_reass_hairpinning(self):
+        """ NAT64 fragments hairpinning """
+        data = 'a' * 200
+        client = self.pg0.remote_hosts[0]
+        server = self.pg0.remote_hosts[1]
+        server_in_port = random.randint(1025, 65535)
+        server_out_port = random.randint(1025, 65535)
+        client_in_port = random.randint(1025, 65535)
+        ip = IPv6(src=''.join(['64:ff9b::', self.nat_addr]))
+        nat_addr_ip6 = ip.src
+
+        self.vapi.nat64_add_del_pool_addr_range(self.nat_addr_n,
+                                                self.nat_addr_n)
+        self.vapi.nat64_add_del_interface(self.pg0.sw_if_index)
+        self.vapi.nat64_add_del_interface(self.pg1.sw_if_index, is_inside=0)
+
+        # add static BIB entry for server
+        self.vapi.nat64_add_del_static_bib(server.ip6n,
+                                           self.nat_addr_n,
+                                           server_in_port,
+                                           server_out_port,
+                                           IP_PROTOS.tcp)
+
+        # send packet from host to server
+        pkts = self.create_stream_frag_ip6(self.pg0,
+                                           self.nat_addr,
+                                           client_in_port,
+                                           server_out_port,
+                                           data)
+        self.pg0.add_stream(pkts)
+        self.pg_enable_capture(self.pg_interfaces)
+        self.pg_start()
+        frags = self.pg0.get_capture(len(pkts))
+        p = self.reass_frags_and_verify_ip6(frags, nat_addr_ip6, server.ip6)
+        self.assertNotEqual(p[TCP].sport, client_in_port)
+        self.assertEqual(p[TCP].dport, server_in_port)
+        self.assertEqual(data, p[Raw].load)
+
+    def test_frag_out_of_order(self):
+        """ NAT64 translate fragments arriving out of order """
+        self.tcp_port_in = random.randint(1025, 65535)
+
+        self.vapi.nat64_add_del_pool_addr_range(self.nat_addr_n,
+                                                self.nat_addr_n)
+        self.vapi.nat64_add_del_interface(self.pg0.sw_if_index)
+        self.vapi.nat64_add_del_interface(self.pg1.sw_if_index, is_inside=0)
+
+        # in2out
+        data = 'a' * 200
+        pkts = self.create_stream_frag_ip6(self.pg0, self.pg1.remote_ip4,
+                                           self.tcp_port_in, 20, data)
+        pkts.reverse()
+        self.pg0.add_stream(pkts)
+        self.pg_enable_capture(self.pg_interfaces)
+        self.pg_start()
+        frags = self.pg1.get_capture(len(pkts))
+        p = self.reass_frags_and_verify(frags,
+                                        self.nat_addr,
+                                        self.pg1.remote_ip4)
+        self.assertEqual(p[TCP].dport, 20)
+        self.assertNotEqual(p[TCP].sport, self.tcp_port_in)
+        self.tcp_port_out = p[TCP].sport
+        self.assertEqual(data, p[Raw].load)
+
+        # out2in
+        data = "A" * 4 + "B" * 16 + "C" * 3
+        pkts = self.create_stream_frag(self.pg1,
+                                       self.nat_addr,
+                                       20,
+                                       self.tcp_port_out,
+                                       data)
+        pkts.reverse()
+        self.pg1.add_stream(pkts)
+        self.pg_enable_capture(self.pg_interfaces)
+        self.pg_start()
+        frags = self.pg0.get_capture(len(pkts))
+        src = self.compose_ip6(self.pg1.remote_ip4, '64:ff9b::', 96)
+        p = self.reass_frags_and_verify_ip6(frags, src, self.pg0.remote_ip6)
+        self.assertEqual(p[TCP].sport, 20)
+        self.assertEqual(p[TCP].dport, self.tcp_port_in)
+        self.assertEqual(data, p[Raw].load)
+
     def nat64_get_ses_num(self):
         """
         Return number of active NAT64 sessions.
@@ -4006,6 +4415,7 @@ class TestNAT64(MethodHolder):
             self.logger.info(self.vapi.cli("show nat64 prefix"))
             self.logger.info(self.vapi.cli("show nat64 bib all"))
             self.logger.info(self.vapi.cli("show nat64 session table all"))
+            self.logger.info(self.vapi.cli("show nat virtual-reassembly"))
             self.clear_nat64()
 
 
index 31d7ac4..63f9383 100644 (file)
@@ -1408,6 +1408,43 @@ class VppPapiProvider(object):
              'vrf_id': vrf_id,
              'is_in': is_in})
 
+    def nat_set_reass(
+            self,
+            timeout=2,
+            max_reass=1024,
+            max_frag=5,
+            drop_frag=0,
+            is_ip6=0):
+        """Set NAT virtual fragmentation reassembly
+
+        :param timeout: reassembly timeout (Default 2sec)
+        :param max_reass: maximum concurrent reassemblies (Default 1024)
+        :param max_frag: maximum fragmets per reassembly (Default 5)
+        :param drop_frag: if 0 translate fragments, otherwise drop fragments
+        :param is_ip6: 1 if IPv6, 0 if IPv4
+        """
+        return self.api(
+            self.papi.nat_set_reass,
+            {'timeout': timeout,
+             'max_reass': max_reass,
+             'max_frag': max_frag,
+             'drop_frag': drop_frag,
+             'is_ip6': is_ip6})
+
+    def nat_get_reass(self):
+        """Get NAT virtual fragmentation reassembly configuration
+
+        :return: NAT virtual fragmentation reassembly configuration
+        """
+        return self.api(self.papi.nat_get_reass, {})
+
+    def nat_reass_dump(self):
+        """Dump NAT virtual fragmentation reassemblies
+
+        :return: Dictionary of NAT virtual fragmentation reassemblies
+        """
+        return self.api(self.papi.nat_reass_dump, {})
+
     def nat_det_add_del_map(
             self,
             in_addr,