nat: use SVR 63/23963/8
authorKlement Sekera <ksekera@cisco.com>
Thu, 10 Oct 2019 09:46:06 +0000 (09:46 +0000)
committerOle Trøan <otroan@employees.org>
Fri, 3 Jan 2020 10:10:15 +0000 (10:10 +0000)
Remove NAT's implementation of shallow virtual reassembly with
corresponding CLIs, APIs & tests. Replace with standalone shallow
virtual reassembly provided by ipX-sv-reass* nodes.

Type: refactor
Change-Id: I7e6c7487a5a500d591f6871474a359e0993e59b6
Signed-off-by: Klement Sekera <ksekera@cisco.com>
39 files changed:
src/plugins/map/ip4_map_t.c
src/plugins/map/ip6_map_t.c
src/plugins/nat/CMakeLists.txt
src/plugins/nat/dslite_in2out.c
src/plugins/nat/dslite_out2in.c
src/plugins/nat/in2out.c
src/plugins/nat/in2out_ed.c
src/plugins/nat/nat.api
src/plugins/nat/nat.c
src/plugins/nat/nat.h
src/plugins/nat/nat44_classify.c
src/plugins/nat/nat44_hairpinning.c
src/plugins/nat/nat44_handoff.c
src/plugins/nat/nat64.c
src/plugins/nat/nat64.h
src/plugins/nat/nat64_in2out.c
src/plugins/nat/nat64_out2in.c
src/plugins/nat/nat66.c
src/plugins/nat/nat66_in2out.c
src/plugins/nat/nat66_out2in.c
src/plugins/nat/nat_api.c
src/plugins/nat/nat_det_in2out.c
src/plugins/nat/nat_det_out2in.c
src/plugins/nat/nat_format.c
src/plugins/nat/nat_inlines.h
src/plugins/nat/nat_reass.c [deleted file]
src/plugins/nat/nat_reass.h [deleted file]
src/plugins/nat/out2in.c
src/plugins/nat/out2in_ed.c
src/plugins/nat/test/test_nat.py
src/vnet/buffer.h
src/vnet/ip/ip4_packet.h
src/vnet/ip/ip4_to_ip6.h
src/vnet/ip/ip6_packet.h
src/vnet/ip/ip6_to_ip4.h
src/vnet/ip/reass/ip4_sv_reass.c
src/vnet/ip/reass/ip4_sv_reass.h
src/vnet/ip/reass/ip6_sv_reass.c
src/vnet/vxlan-gbp/vxlan_gbp.h

index c254efc..a02b554 100644 (file)
@@ -69,7 +69,8 @@ typedef struct
 } icmp_to_icmp6_ctx_t;
 
 static int
-ip4_to_ip6_set_icmp_cb (ip4_header_t * ip4, ip6_header_t * ip6, void *arg)
+ip4_to_ip6_set_icmp_cb (vlib_buffer_t * b, ip4_header_t * ip4,
+                       ip6_header_t * ip6, void *arg)
 {
   icmp_to_icmp6_ctx_t *ctx = arg;
 
@@ -83,8 +84,8 @@ ip4_to_ip6_set_icmp_cb (ip4_header_t * ip4, ip6_header_t * ip6, void *arg)
 }
 
 static int
-ip4_to_ip6_set_inner_icmp_cb (ip4_header_t * ip4, ip6_header_t * ip6,
-                             void *arg)
+ip4_to_ip6_set_inner_icmp_cb (vlib_buffer_t * b, ip4_header_t * ip4,
+                             ip6_header_t * ip6, void *arg)
 {
   icmp_to_icmp6_ctx_t *ctx = arg;
 
index 7999507..95104dc 100644 (file)
@@ -145,9 +145,11 @@ ip6_map_t_icmp (vlib_main_t * vm,
          d0 =
            pool_elt_at_index (map_main.domains,
                               vnet_buffer (p0)->map_t.map_domain_index);
-         ctx0.sender_port = ip6_get_port (ip60, 0, p0->current_length);
          ctx0.d = d0;
-         if (ctx0.sender_port == 0)
+         ctx0.sender_port = 0;
+         if (!ip6_get_port
+             (vm, p0, ip60, p0->current_length, NULL, &ctx0.sender_port,
+              NULL, NULL, NULL, NULL))
            {
              // In case of 1:1 mapping, we don't care about the port
              if (!(d0->ea_bits_len == 0 && d0->rules))
@@ -157,9 +159,8 @@ ip6_map_t_icmp (vlib_main_t * vm,
                }
            }
 
-         if (icmp6_to_icmp
-             (p0, ip6_to_ip4_set_icmp_cb, &ctx0,
-              ip6_to_ip4_set_inner_icmp_cb, &ctx0))
+         if (icmp6_to_icmp (vm, p0, ip6_to_ip4_set_icmp_cb, &ctx0,
+                            ip6_to_ip4_set_inner_icmp_cb, &ctx0))
            {
              error0 = MAP_ERROR_ICMP;
              goto err0;
@@ -200,7 +201,7 @@ ip6_map_t_icmp (vlib_main_t * vm,
  * Translate IPv6 fragmented packet to IPv4.
  */
 always_inline int
-map_ip6_to_ip4_fragmented (vlib_buffer_t * p)
+map_ip6_to_ip4_fragmented (vlib_main_t * vm, vlib_buffer_t * p)
 {
   ip6_header_t *ip6;
   ip6_frag_hdr_t *frag;
@@ -214,7 +215,7 @@ map_ip6_to_ip4_fragmented (vlib_buffer_t * p)
   ip6 = vlib_buffer_get_current (p);
 
   if (ip6_parse
-      (ip6, p->current_length, &l4_protocol, &l4_offset, &frag_offset))
+      (vm, p, ip6, p->current_length, &l4_protocol, &l4_offset, &frag_offset))
     return -1;
 
   frag = (ip6_frag_hdr_t *) u8_ptr_add (ip6, frag_offset);
@@ -230,7 +231,7 @@ map_ip6_to_ip4_fragmented (vlib_buffer_t * p)
 
   ip4->ip_version_and_header_length =
     IP4_VERSION_AND_HEADER_LENGTH_NO_OPTIONS;
-  ip4->tos = ip6_translate_tos (ip6);
+  ip4->tos = ip6_translate_tos (ip6->ip_version_traffic_class_and_flow_label);
   ip4->length =
     u16_net_add (ip6->payload_length,
                 sizeof (*ip4) - l4_offset + sizeof (*ip6));
@@ -276,7 +277,7 @@ ip6_map_t_fragmented (vlib_main_t * vm,
          next0 = IP6_MAPT_TCP_UDP_NEXT_IP4_LOOKUP;
          p0 = vlib_get_buffer (vm, pi0);
 
-         if (map_ip6_to_ip4_fragmented (p0))
+         if (map_ip6_to_ip4_fragmented (vm, p0))
            {
              p0->error = error_node->errors[MAP_ERROR_FRAGMENT_DROPPED];
              next0 = IP6_MAPT_FRAGMENTED_NEXT_DROP;
@@ -306,7 +307,8 @@ ip6_map_t_fragmented (vlib_main_t * vm,
  * Translate IPv6 UDP/TCP packet to IPv4.
  */
 always_inline int
-map_ip6_to_ip4_tcp_udp (vlib_buffer_t * p, bool udp_checksum)
+map_ip6_to_ip4_tcp_udp (vlib_main_t * vm, vlib_buffer_t * p,
+                       bool udp_checksum)
 {
   map_main_t *mm = &map_main;
   ip6_header_t *ip6;
@@ -323,7 +325,7 @@ map_ip6_to_ip4_tcp_udp (vlib_buffer_t * p, bool udp_checksum)
   ip6 = vlib_buffer_get_current (p);
 
   if (ip6_parse
-      (ip6, p->current_length, &l4_protocol, &l4_offset, &frag_offset))
+      (vm, p, ip6, p->current_length, &l4_protocol, &l4_offset, &frag_offset))
     return -1;
 
   if (l4_protocol == IP_PROTOCOL_TCP)
@@ -370,7 +372,7 @@ map_ip6_to_ip4_tcp_udp (vlib_buffer_t * p, bool udp_checksum)
 
   ip4->ip_version_and_header_length =
     IP4_VERSION_AND_HEADER_LENGTH_NO_OPTIONS;
-  ip4->tos = ip6_translate_tos (ip6);
+  ip4->tos = ip6_translate_tos (ip6->ip_version_traffic_class_and_flow_label);
   ip4->length =
     u16_net_add (ip6->payload_length,
                 sizeof (*ip4) + sizeof (*ip6) - l4_offset);
@@ -429,7 +431,7 @@ ip6_map_t_tcp_udp (vlib_main_t * vm,
 
          p0 = vlib_get_buffer (vm, pi0);
 
-         if (map_ip6_to_ip4_tcp_udp (p0, true))
+         if (map_ip6_to_ip4_tcp_udp (vm, p0, true))
            {
              p0->error = error_node->errors[MAP_ERROR_UNKNOWN];
              next0 = IP6_MAPT_TCP_UDP_NEXT_DROP;
@@ -512,7 +514,7 @@ ip6_map_t (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
          vnet_buffer (p0)->map_t.mtu = d0->mtu ? d0->mtu : ~0;
 
          if (PREDICT_FALSE
-             (ip6_parse (ip60, p0->current_length,
+             (ip6_parse (vm, p0, ip60, p0->current_length,
                          &(vnet_buffer (p0)->map_t.v6.l4_protocol),
                          &(vnet_buffer (p0)->map_t.v6.l4_offset),
                          &(vnet_buffer (p0)->map_t.v6.frag_offset))))
index 4f6ed67..372bbd6 100644 (file)
@@ -23,7 +23,6 @@ add_vpp_plugin(nat
   nat_det.c
   nat_det_in2out.c
   nat_det_out2in.c
-  nat_reass.c
   nat_dpo.c
   nat44_cli.c
   nat44_handoff.c
index 2a8b548..4494a77 100644 (file)
@@ -182,7 +182,7 @@ dslite_icmp_in2out (dslite_main_t * dm, ip6_header_t * ip6,
   u16 old_id, new_id;
   ip_csum_t sum;
 
-  if (icmp_is_error_message (icmp))
+  if (icmp_type_is_error_message (icmp->type))
     {
       n = DSLITE_IN2OUT_NEXT_DROP;
       *error = DSLITE_ERROR_BAD_ICMP_TYPE;
index 18f9a57..265d79f 100644 (file)
@@ -46,7 +46,8 @@ dslite_icmp_out2in (dslite_main_t * dm, ip4_header_t * ip4,
 
   echo = (icmp_echo_header_t *) (icmp + 1);
 
-  if (icmp_is_error_message (icmp) || (icmp->type != ICMP4_echo_reply))
+  if (icmp_type_is_error_message (icmp->type)
+      || (icmp->type != ICMP4_echo_reply))
     {
       n = DSLITE_OUT2IN_NEXT_DROP;
       *error = DSLITE_ERROR_BAD_ICMP_TYPE;
index 6cb111c..7eaaab2 100755 (executable)
@@ -27,7 +27,6 @@
 #include <vnet/udp/udp.h>
 #include <nat/nat.h>
 #include <nat/nat_ipfix_logging.h>
-#include <nat/nat_reass.h>
 #include <nat/nat_inlines.h>
 #include <nat/nat44_inlines.h>
 #include <nat/nat_syslog.h>
@@ -84,8 +83,6 @@ _(BAD_ICMP_TYPE, "unsupported ICMP type")               \
 _(NO_TRANSLATION, "no translation")                     \
 _(MAX_SESSIONS_EXCEEDED, "maximum sessions exceeded")   \
 _(DROP_FRAGMENT, "drop fragment")                       \
-_(MAX_REASS, "maximum reassemblies exceeded")           \
-_(MAX_FRAG, "maximum fragments per reassembly exceeded")\
 _(TCP_PACKETS, "TCP packets")                           \
 _(UDP_PACKETS, "UDP packets")                           \
 _(ICMP_PACKETS, "ICMP packets")                         \
@@ -114,7 +111,6 @@ typedef enum
   SNAT_IN2OUT_NEXT_DROP,
   SNAT_IN2OUT_NEXT_ICMP_ERROR,
   SNAT_IN2OUT_NEXT_SLOW_PATH,
-  SNAT_IN2OUT_NEXT_REASS,
   SNAT_IN2OUT_N_NEXT,
 } snat_in2out_next_t;
 
@@ -255,7 +251,6 @@ slow_path (snat_main_t * sm, vlib_buffer_t * b0,
   snat_session_t *s = 0;
   clib_bihash_kv_8_8_t kv0;
   snat_session_key_t key1;
-  udp_header_t *udp0 = ip4_next_header (ip0);
   u8 is_sm = 0;
   nat_outside_fib_t *outside_fib;
   fib_node_index_t fei = FIB_NODE_INDEX_INVALID;
@@ -357,7 +352,7 @@ slow_path (snat_main_t * sm, vlib_buffer_t * b0,
       break;
     }
   s->ext_host_addr.as_u32 = ip0->dst_address.as_u32;
-  s->ext_host_port = udp0->dst_port;
+  s->ext_host_port = vnet_buffer (b0)->ip.reass.l4_dst_port;
   *sessionp = s;
 
   /* Add to translation hashes */
@@ -401,7 +396,7 @@ slow_path (snat_main_t * sm, vlib_buffer_t * b0,
 
 #ifndef CLIB_MARCH_VARIANT
 static_always_inline
-  snat_in2out_error_t icmp_get_key (ip4_header_t * ip0,
+  snat_in2out_error_t icmp_get_key (vlib_buffer_t * b, ip4_header_t * ip0,
                                    snat_session_key_t * p_key0)
 {
   icmp46_header_t *icmp0;
@@ -414,11 +409,12 @@ static_always_inline
   icmp0 = (icmp46_header_t *) ip4_next_header (ip0);
   echo0 = (icmp_echo_header_t *) (icmp0 + 1);
 
-  if (!icmp_is_error_message (icmp0))
+  if (!icmp_type_is_error_message
+      (vnet_buffer (b)->ip.reass.icmp_type_or_tcp_flags))
     {
       key0.protocol = SNAT_PROTOCOL_ICMP;
       key0.addr = ip0->src_address;
-      key0.port = echo0->identifier;
+      key0.port = vnet_buffer (b)->ip.reass.l4_src_port;       // TODO fixme should this be dst port?
     }
   else
     {
@@ -466,7 +462,6 @@ icmp_match_in2out_slow (snat_main_t * sm, vlib_node_runtime_t * node,
                        snat_session_key_t * p_value,
                        u8 * p_dont_translate, void *d, void *e)
 {
-  icmp46_header_t *icmp0;
   u32 sw_if_index0;
   u32 rx_fib_index0;
   snat_session_key_t key0;
@@ -476,11 +471,10 @@ icmp_match_in2out_slow (snat_main_t * sm, vlib_node_runtime_t * node,
   u32 next0 = ~0;
   int err;
 
-  icmp0 = (icmp46_header_t *) ip4_next_header (ip0);
   sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
   rx_fib_index0 = ip4_fib_table_get_index_for_sw_if_index (sw_if_index0);
 
-  err = icmp_get_key (ip0, &key0);
+  err = icmp_get_key (b0, ip0, &key0);
   if (err != -1)
     {
       b0->error = node->errors[err];
@@ -519,7 +513,9 @@ icmp_match_in2out_slow (snat_main_t * sm, vlib_node_runtime_t * node,
            }
        }
 
-      if (PREDICT_FALSE (icmp_is_error_message (icmp0)))
+      if (PREDICT_FALSE
+         (icmp_type_is_error_message
+          (vnet_buffer (b0)->ip.reass.icmp_type_or_tcp_flags)))
        {
          b0->error = node->errors[SNAT_IN2OUT_ERROR_BAD_ICMP_TYPE];
          next0 = SNAT_IN2OUT_NEXT_DROP;
@@ -540,9 +536,13 @@ icmp_match_in2out_slow (snat_main_t * sm, vlib_node_runtime_t * node,
     }
   else
     {
-      if (PREDICT_FALSE (icmp0->type != ICMP4_echo_request &&
-                        icmp0->type != ICMP4_echo_reply &&
-                        !icmp_is_error_message (icmp0)))
+      if (PREDICT_FALSE
+         (vnet_buffer (b0)->ip.reass.icmp_type_or_tcp_flags !=
+          ICMP4_echo_request
+          && vnet_buffer (b0)->ip.reass.icmp_type_or_tcp_flags !=
+          ICMP4_echo_reply
+          && !icmp_type_is_error_message (vnet_buffer (b0)->ip.
+                                          reass.icmp_type_or_tcp_flags)))
        {
          b0->error = node->errors[SNAT_IN2OUT_ERROR_BAD_ICMP_TYPE];
          next0 = SNAT_IN2OUT_NEXT_DROP;
@@ -585,7 +585,6 @@ icmp_match_in2out_fast (snat_main_t * sm, vlib_node_runtime_t * node,
                        snat_session_key_t * p_value,
                        u8 * p_dont_translate, void *d, void *e)
 {
-  icmp46_header_t *icmp0;
   u32 sw_if_index0;
   u32 rx_fib_index0;
   snat_session_key_t key0;
@@ -595,11 +594,10 @@ icmp_match_in2out_fast (snat_main_t * sm, vlib_node_runtime_t * node,
   u32 next0 = ~0;
   int err;
 
-  icmp0 = (icmp46_header_t *) ip4_next_header (ip0);
   sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
   rx_fib_index0 = ip4_fib_table_get_index_for_sw_if_index (sw_if_index0);
 
-  err = icmp_get_key (ip0, &key0);
+  err = icmp_get_key (b0, ip0, &key0);
   if (err != -1)
     {
       b0->error = node->errors[err];
@@ -619,7 +617,8 @@ icmp_match_in2out_fast (snat_main_t * sm, vlib_node_runtime_t * node,
          goto out;
        }
 
-      if (icmp_is_error_message (icmp0))
+      if (icmp_type_is_error_message
+         (vnet_buffer (b0)->ip.reass.icmp_type_or_tcp_flags))
        {
          next0 = SNAT_IN2OUT_NEXT_DROP;
          goto out;
@@ -630,9 +629,12 @@ icmp_match_in2out_fast (snat_main_t * sm, vlib_node_runtime_t * node,
       goto out;
     }
 
-  if (PREDICT_FALSE (icmp0->type != ICMP4_echo_request &&
-                    (icmp0->type != ICMP4_echo_reply || !is_addr_only) &&
-                    !icmp_is_error_message (icmp0)))
+  if (PREDICT_FALSE
+      (vnet_buffer (b0)->ip.reass.icmp_type_or_tcp_flags != ICMP4_echo_request
+       && (vnet_buffer (b0)->ip.reass.icmp_type_or_tcp_flags !=
+          ICMP4_echo_reply || !is_addr_only)
+       && !icmp_type_is_error_message (vnet_buffer (b0)->ip.
+                                      reass.icmp_type_or_tcp_flags)))
     {
       b0->error = node->errors[SNAT_IN2OUT_ERROR_BAD_ICMP_TYPE];
       next0 = SNAT_IN2OUT_NEXT_DROP;
@@ -706,84 +708,90 @@ icmp_in2out (snat_main_t * sm,
                         src_address /* changed member */ );
   ip0->checksum = ip_csum_fold (sum0);
 
-  if (icmp0->checksum == 0)
-    icmp0->checksum = 0xffff;
-
-  if (!icmp_is_error_message (icmp0))
-    {
-      new_id0 = sm0.port;
-      if (PREDICT_FALSE (new_id0 != echo0->identifier))
-       {
-         old_id0 = echo0->identifier;
-         new_id0 = sm0.port;
-         echo0->identifier = new_id0;
-
-         sum0 = icmp0->checksum;
-         sum0 = ip_csum_update (sum0, old_id0, new_id0, icmp_echo_header_t,
-                                identifier);
-         icmp0->checksum = ip_csum_fold (sum0);
-       }
-    }
-  else
+  if (!vnet_buffer (b0)->ip.reass.is_non_first_fragment)
     {
-      inner_ip0 = (ip4_header_t *) (echo0 + 1);
-      l4_header = ip4_next_header (inner_ip0);
+      if (icmp0->checksum == 0)
+       icmp0->checksum = 0xffff;
 
-      if (!ip4_header_checksum_is_valid (inner_ip0))
+      if (!icmp_type_is_error_message (icmp0->type))
        {
-         next0 = SNAT_IN2OUT_NEXT_DROP;
-         goto out;
+         new_id0 = sm0.port;
+         if (PREDICT_FALSE (new_id0 != echo0->identifier))
+           {
+             old_id0 = echo0->identifier;
+             new_id0 = sm0.port;
+             echo0->identifier = new_id0;
+
+             sum0 = icmp0->checksum;
+             sum0 =
+               ip_csum_update (sum0, old_id0, new_id0, icmp_echo_header_t,
+                               identifier);
+             icmp0->checksum = ip_csum_fold (sum0);
+           }
        }
-
-      /* update inner destination IP address */
-      old_addr0 = inner_ip0->dst_address.as_u32;
-      inner_ip0->dst_address = sm0.addr;
-      new_addr0 = inner_ip0->dst_address.as_u32;
-      sum0 = icmp0->checksum;
-      sum0 = ip_csum_update (sum0, old_addr0, new_addr0, ip4_header_t,
-                            dst_address /* changed member */ );
-      icmp0->checksum = ip_csum_fold (sum0);
-
-      /* update inner IP header checksum */
-      old_checksum0 = inner_ip0->checksum;
-      sum0 = inner_ip0->checksum;
-      sum0 = ip_csum_update (sum0, old_addr0, new_addr0, ip4_header_t,
-                            dst_address /* changed member */ );
-      inner_ip0->checksum = ip_csum_fold (sum0);
-      new_checksum0 = inner_ip0->checksum;
-      sum0 = icmp0->checksum;
-      sum0 = ip_csum_update (sum0, old_checksum0, new_checksum0, ip4_header_t,
-                            checksum);
-      icmp0->checksum = ip_csum_fold (sum0);
-
-      switch (protocol)
+      else
        {
-       case SNAT_PROTOCOL_ICMP:
-         inner_icmp0 = (icmp46_header_t *) l4_header;
-         inner_echo0 = (icmp_echo_header_t *) (inner_icmp0 + 1);
+         inner_ip0 = (ip4_header_t *) (echo0 + 1);
+         l4_header = ip4_next_header (inner_ip0);
 
-         old_id0 = inner_echo0->identifier;
-         new_id0 = sm0.port;
-         inner_echo0->identifier = new_id0;
+         if (!ip4_header_checksum_is_valid (inner_ip0))
+           {
+             next0 = SNAT_IN2OUT_NEXT_DROP;
+             goto out;
+           }
 
+         /* update inner destination IP address */
+         old_addr0 = inner_ip0->dst_address.as_u32;
+         inner_ip0->dst_address = sm0.addr;
+         new_addr0 = inner_ip0->dst_address.as_u32;
          sum0 = icmp0->checksum;
-         sum0 = ip_csum_update (sum0, old_id0, new_id0, icmp_echo_header_t,
-                                identifier);
+         sum0 = ip_csum_update (sum0, old_addr0, new_addr0, ip4_header_t,
+                                dst_address /* changed member */ );
          icmp0->checksum = ip_csum_fold (sum0);
-         break;
-       case SNAT_PROTOCOL_UDP:
-       case SNAT_PROTOCOL_TCP:
-         old_id0 = ((tcp_udp_header_t *) l4_header)->dst_port;
-         new_id0 = sm0.port;
-         ((tcp_udp_header_t *) l4_header)->dst_port = new_id0;
 
+         /* update inner IP header checksum */
+         old_checksum0 = inner_ip0->checksum;
+         sum0 = inner_ip0->checksum;
+         sum0 = ip_csum_update (sum0, old_addr0, new_addr0, ip4_header_t,
+                                dst_address /* changed member */ );
+         inner_ip0->checksum = ip_csum_fold (sum0);
+         new_checksum0 = inner_ip0->checksum;
          sum0 = icmp0->checksum;
-         sum0 = ip_csum_update (sum0, old_id0, new_id0, tcp_udp_header_t,
-                                dst_port);
+         sum0 =
+           ip_csum_update (sum0, old_checksum0, new_checksum0, ip4_header_t,
+                           checksum);
          icmp0->checksum = ip_csum_fold (sum0);
-         break;
-       default:
-         ASSERT (0);
+
+         switch (protocol)
+           {
+           case SNAT_PROTOCOL_ICMP:
+             inner_icmp0 = (icmp46_header_t *) l4_header;
+             inner_echo0 = (icmp_echo_header_t *) (inner_icmp0 + 1);
+
+             old_id0 = inner_echo0->identifier;
+             new_id0 = sm0.port;
+             inner_echo0->identifier = new_id0;
+
+             sum0 = icmp0->checksum;
+             sum0 =
+               ip_csum_update (sum0, old_id0, new_id0, icmp_echo_header_t,
+                               identifier);
+             icmp0->checksum = ip_csum_fold (sum0);
+             break;
+           case SNAT_PROTOCOL_UDP:
+           case SNAT_PROTOCOL_TCP:
+             old_id0 = ((tcp_udp_header_t *) l4_header)->dst_port;
+             new_id0 = sm0.port;
+             ((tcp_udp_header_t *) l4_header)->dst_port = new_id0;
+
+             sum0 = icmp0->checksum;
+             sum0 = ip_csum_update (sum0, old_id0, new_id0, tcp_udp_header_t,
+                                    dst_port);
+             icmp0->checksum = ip_csum_fold (sum0);
+             break;
+           default:
+             ASSERT (0);
+           }
        }
     }
 
@@ -939,7 +947,7 @@ snat_in2out_node_fn_inline (vlib_main_t * vm,
          b1 = vlib_get_buffer (vm, bi1);
 
          if (is_output_feature)
-           iph_offset0 = vnet_buffer (b0)->ip.save_rewrite_length;
+           iph_offset0 = vnet_buffer (b0)->ip.reass.save_rewrite_length;
 
          ip0 = (ip4_header_t *) ((u8 *) vlib_buffer_get_current (b0) +
                                  iph_offset0);
@@ -999,13 +1007,6 @@ snat_in2out_node_fn_inline (vlib_main_t * vm,
                  goto trace00;
                }
 
-             if (ip4_is_fragment (ip0))
-               {
-                 next0 = SNAT_IN2OUT_NEXT_REASS;
-                 fragments++;
-                 goto trace00;
-               }
-
              if (PREDICT_FALSE (proto0 == SNAT_PROTOCOL_ICMP))
                {
                  next0 = SNAT_IN2OUT_NEXT_SLOW_PATH;
@@ -1014,7 +1015,7 @@ snat_in2out_node_fn_inline (vlib_main_t * vm,
            }
 
          key0.addr = ip0->src_address;
-         key0.port = udp0->src_port;
+         key0.port = vnet_buffer (b0)->ip.reass.l4_src_port;
          key0.protocol = proto0;
          key0.fib_index = rx_fib_index0;
 
@@ -1029,13 +1030,12 @@ snat_in2out_node_fn_inline (vlib_main_t * vm,
                {
                  if (is_output_feature)
                    {
-                     if (PREDICT_FALSE (nat_not_translate_output_feature (sm,
-                                                                          ip0,
-                                                                          proto0,
-                                                                          udp0->src_port,
-                                                                          udp0->dst_port,
-                                                                          thread_index,
-                                                                          sw_if_index0)))
+                     if (PREDICT_FALSE
+                         (nat_not_translate_output_feature
+                          (sm, ip0, proto0,
+                           vnet_buffer (b0)->ip.reass.l4_src_port,
+                           vnet_buffer (b0)->ip.reass.l4_dst_port,
+                           thread_index, sw_if_index0)))
                        goto trace00;
 
                      /*
@@ -1045,7 +1045,7 @@ snat_in2out_node_fn_inline (vlib_main_t * vm,
                      if (PREDICT_FALSE
                          ((b0->flags & VNET_BUFFER_F_LOCALLY_ORIGINATED)
                           && proto0 == SNAT_PROTOCOL_UDP
-                          && (udp0->dst_port ==
+                          && (vnet_buffer (b0)->ip.reass.l4_dst_port ==
                               clib_host_to_net_u16
                               (UDP_DST_PORT_dhcp_to_server))))
                        goto trace00;
@@ -1092,34 +1092,42 @@ snat_in2out_node_fn_inline (vlib_main_t * vm,
                                 src_address /* changed member */ );
          ip0->checksum = ip_csum_fold (sum0);
 
-         old_port0 = udp0->src_port;
-         new_port0 = udp0->src_port = s0->out2in.port;
 
          if (PREDICT_TRUE (proto0 == SNAT_PROTOCOL_TCP))
            {
-             sum0 = tcp0->checksum;
-             sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
-                                    ip4_header_t,
-                                    dst_address /* changed member */ );
-             sum0 = ip_csum_update (sum0, old_port0, new_port0,
-                                    ip4_header_t /* cheat */ ,
-                                    length /* changed member */ );
-             mss_clamping (sm, tcp0, &sum0);
-             tcp0->checksum = ip_csum_fold (sum0);
-             tcp_packets++;
-           }
-         else
-           {
-             if (PREDICT_FALSE (udp0->checksum))
+             if (!vnet_buffer (b0)->ip.reass.is_non_first_fragment)
                {
-                 sum0 = udp0->checksum;
+                 old_port0 = vnet_buffer (b0)->ip.reass.l4_src_port;
+                 new_port0 = udp0->src_port = s0->out2in.port;
+                 sum0 = tcp0->checksum;
                  sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
                                         ip4_header_t,
                                         dst_address /* changed member */ );
                  sum0 = ip_csum_update (sum0, old_port0, new_port0,
                                         ip4_header_t /* cheat */ ,
                                         length /* changed member */ );
-                 udp0->checksum = ip_csum_fold (sum0);
+                 mss_clamping (sm, tcp0, &sum0);
+                 tcp0->checksum = ip_csum_fold (sum0);
+               }
+             tcp_packets++;
+           }
+         else
+           {
+             if (!vnet_buffer (b0)->ip.reass.is_non_first_fragment)
+               {
+                 if (PREDICT_FALSE (udp0->checksum))
+                   {
+                     old_port0 = vnet_buffer (b0)->ip.reass.l4_src_port;
+                     new_port0 = udp0->src_port = s0->out2in.port;
+                     sum0 = udp0->checksum;
+                     sum0 = ip_csum_update (sum0, old_addr0, new_addr0, ip4_header_t, dst_address      /* changed member */
+                       );
+                     sum0 =
+                       ip_csum_update (sum0, old_port0, new_port0,
+                                       ip4_header_t /* cheat */ ,
+                                       length /* changed member */ );
+                     udp0->checksum = ip_csum_fold (sum0);
+                   }
                }
              udp_packets++;
            }
@@ -1149,7 +1157,7 @@ snat_in2out_node_fn_inline (vlib_main_t * vm,
          pkts_processed += next0 == SNAT_IN2OUT_NEXT_LOOKUP;
 
          if (is_output_feature)
-           iph_offset1 = vnet_buffer (b1)->ip.save_rewrite_length;
+           iph_offset1 = vnet_buffer (b1)->ip.reass.save_rewrite_length;
 
          ip1 = (ip4_header_t *) ((u8 *) vlib_buffer_get_current (b1) +
                                  iph_offset1);
@@ -1207,13 +1215,6 @@ snat_in2out_node_fn_inline (vlib_main_t * vm,
                  goto trace01;
                }
 
-             if (ip4_is_fragment (ip1))
-               {
-                 next1 = SNAT_IN2OUT_NEXT_REASS;
-                 fragments++;
-                 goto trace01;
-               }
-
              if (PREDICT_FALSE (proto1 == SNAT_PROTOCOL_ICMP))
                {
                  next1 = SNAT_IN2OUT_NEXT_SLOW_PATH;
@@ -1222,7 +1223,7 @@ snat_in2out_node_fn_inline (vlib_main_t * vm,
            }
 
          key1.addr = ip1->src_address;
-         key1.port = udp1->src_port;
+         key1.port = vnet_buffer (b1)->ip.reass.l4_src_port;
          key1.protocol = proto1;
          key1.fib_index = rx_fib_index1;
 
@@ -1237,13 +1238,12 @@ snat_in2out_node_fn_inline (vlib_main_t * vm,
                {
                  if (is_output_feature)
                    {
-                     if (PREDICT_FALSE (nat_not_translate_output_feature (sm,
-                                                                          ip1,
-                                                                          proto1,
-                                                                          udp1->src_port,
-                                                                          udp1->dst_port,
-                                                                          thread_index,
-                                                                          sw_if_index1)))
+                     if (PREDICT_FALSE
+                         (nat_not_translate_output_feature
+                          (sm, ip1, proto1,
+                           vnet_buffer (b1)->ip.reass.l4_src_port,
+                           vnet_buffer (b1)->ip.reass.l4_dst_port,
+                           thread_index, sw_if_index1)))
                        goto trace01;
 
                      /*
@@ -1253,7 +1253,7 @@ snat_in2out_node_fn_inline (vlib_main_t * vm,
                      if (PREDICT_FALSE
                          ((b1->flags & VNET_BUFFER_F_LOCALLY_ORIGINATED)
                           && proto1 == SNAT_PROTOCOL_UDP
-                          && (udp1->dst_port ==
+                          && (vnet_buffer (b1)->ip.reass.l4_dst_port ==
                               clib_host_to_net_u16
                               (UDP_DST_PORT_dhcp_to_server))))
                        goto trace01;
@@ -1300,34 +1300,41 @@ snat_in2out_node_fn_inline (vlib_main_t * vm,
                                 src_address /* changed member */ );
          ip1->checksum = ip_csum_fold (sum1);
 
-         old_port1 = udp1->src_port;
-         new_port1 = udp1->src_port = s1->out2in.port;
-
          if (PREDICT_TRUE (proto1 == SNAT_PROTOCOL_TCP))
            {
-             sum1 = tcp1->checksum;
-             sum1 = ip_csum_update (sum1, old_addr1, new_addr1,
-                                    ip4_header_t,
-                                    dst_address /* changed member */ );
-             sum1 = ip_csum_update (sum1, old_port1, new_port1,
-                                    ip4_header_t /* cheat */ ,
-                                    length /* changed member */ );
-             mss_clamping (sm, tcp1, &sum1);
-             tcp1->checksum = ip_csum_fold (sum1);
-             tcp_packets++;
-           }
-         else
-           {
-             if (PREDICT_FALSE (udp1->checksum))
+             if (!vnet_buffer (b0)->ip.reass.is_non_first_fragment)
                {
-                 sum1 = udp1->checksum;
+                 old_port1 = vnet_buffer (b1)->ip.reass.l4_src_port;
+                 new_port1 = udp1->src_port = s1->out2in.port;
+                 sum1 = tcp1->checksum;
                  sum1 = ip_csum_update (sum1, old_addr1, new_addr1,
                                         ip4_header_t,
                                         dst_address /* changed member */ );
                  sum1 = ip_csum_update (sum1, old_port1, new_port1,
                                         ip4_header_t /* cheat */ ,
                                         length /* changed member */ );
-                 udp1->checksum = ip_csum_fold (sum1);
+                 mss_clamping (sm, tcp1, &sum1);
+                 tcp1->checksum = ip_csum_fold (sum1);
+               }
+             tcp_packets++;
+           }
+         else
+           {
+             if (!vnet_buffer (b0)->ip.reass.is_non_first_fragment)
+               {
+                 if (PREDICT_FALSE (udp1->checksum))
+                   {
+                     old_port1 = vnet_buffer (b1)->ip.reass.l4_src_port;
+                     new_port1 = udp1->src_port = s1->out2in.port;
+                     sum1 = udp1->checksum;
+                     sum1 = ip_csum_update (sum1, old_addr1, new_addr1, ip4_header_t, dst_address      /* changed member */
+                       );
+                     sum1 =
+                       ip_csum_update (sum1, old_port1, new_port1,
+                                       ip4_header_t /* cheat */ ,
+                                       length /* changed member */ );
+                     udp1->checksum = ip_csum_fold (sum1);
+                   }
                }
              udp_packets++;
            }
@@ -1393,7 +1400,7 @@ snat_in2out_node_fn_inline (vlib_main_t * vm,
          next0 = SNAT_IN2OUT_NEXT_LOOKUP;
 
          if (is_output_feature)
-           iph_offset0 = vnet_buffer (b0)->ip.save_rewrite_length;
+           iph_offset0 = vnet_buffer (b0)->ip.reass.save_rewrite_length;
 
          ip0 = (ip4_header_t *) ((u8 *) vlib_buffer_get_current (b0) +
                                  iph_offset0);
@@ -1451,13 +1458,6 @@ snat_in2out_node_fn_inline (vlib_main_t * vm,
                  goto trace0;
                }
 
-             if (ip4_is_fragment (ip0))
-               {
-                 next0 = SNAT_IN2OUT_NEXT_REASS;
-                 fragments++;
-                 goto trace0;
-               }
-
              if (PREDICT_FALSE (proto0 == SNAT_PROTOCOL_ICMP))
                {
                  next0 = SNAT_IN2OUT_NEXT_SLOW_PATH;
@@ -1466,7 +1466,7 @@ snat_in2out_node_fn_inline (vlib_main_t * vm,
            }
 
          key0.addr = ip0->src_address;
-         key0.port = udp0->src_port;
+         key0.port = vnet_buffer (b0)->ip.reass.l4_src_port;
          key0.protocol = proto0;
          key0.fib_index = rx_fib_index0;
 
@@ -1479,13 +1479,12 @@ snat_in2out_node_fn_inline (vlib_main_t * vm,
                {
                  if (is_output_feature)
                    {
-                     if (PREDICT_FALSE (nat_not_translate_output_feature (sm,
-                                                                          ip0,
-                                                                          proto0,
-                                                                          udp0->src_port,
-                                                                          udp0->dst_port,
-                                                                          thread_index,
-                                                                          sw_if_index0)))
+                     if (PREDICT_FALSE
+                         (nat_not_translate_output_feature
+                          (sm, ip0, proto0,
+                           vnet_buffer (b0)->ip.reass.l4_src_port,
+                           vnet_buffer (b0)->ip.reass.l4_dst_port,
+                           thread_index, sw_if_index0)))
                        goto trace0;
 
                      /*
@@ -1495,7 +1494,7 @@ snat_in2out_node_fn_inline (vlib_main_t * vm,
                      if (PREDICT_FALSE
                          ((b0->flags & VNET_BUFFER_F_LOCALLY_ORIGINATED)
                           && proto0 == SNAT_PROTOCOL_UDP
-                          && (udp0->dst_port ==
+                          && (vnet_buffer (b0)->ip.reass.l4_dst_port ==
                               clib_host_to_net_u16
                               (UDP_DST_PORT_dhcp_to_server))))
                        goto trace0;
@@ -1543,34 +1542,41 @@ snat_in2out_node_fn_inline (vlib_main_t * vm,
                                 src_address /* changed member */ );
          ip0->checksum = ip_csum_fold (sum0);
 
-         old_port0 = udp0->src_port;
-         new_port0 = udp0->src_port = s0->out2in.port;
-
          if (PREDICT_TRUE (proto0 == SNAT_PROTOCOL_TCP))
            {
-             sum0 = tcp0->checksum;
-             sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
-                                    ip4_header_t,
-                                    dst_address /* changed member */ );
-             sum0 = ip_csum_update (sum0, old_port0, new_port0,
-                                    ip4_header_t /* cheat */ ,
-                                    length /* changed member */ );
-             mss_clamping (sm, tcp0, &sum0);
-             tcp0->checksum = ip_csum_fold (sum0);
-             tcp_packets++;
-           }
-         else
-           {
-             if (PREDICT_FALSE (udp0->checksum))
+             if (!vnet_buffer (b0)->ip.reass.is_non_first_fragment)
                {
-                 sum0 = udp0->checksum;
+                 old_port0 = vnet_buffer (b0)->ip.reass.l4_src_port;
+                 new_port0 = udp0->src_port = s0->out2in.port;
+                 sum0 = tcp0->checksum;
                  sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
                                         ip4_header_t,
                                         dst_address /* changed member */ );
                  sum0 = ip_csum_update (sum0, old_port0, new_port0,
                                         ip4_header_t /* cheat */ ,
                                         length /* changed member */ );
-                 udp0->checksum = ip_csum_fold (sum0);
+                 mss_clamping (sm, tcp0, &sum0);
+                 tcp0->checksum = ip_csum_fold (sum0);
+               }
+             tcp_packets++;
+           }
+         else
+           {
+             if (!vnet_buffer (b0)->ip.reass.is_non_first_fragment)
+               {
+                 if (PREDICT_FALSE (udp0->checksum))
+                   {
+                     old_port0 = vnet_buffer (b0)->ip.reass.l4_src_port;
+                     new_port0 = udp0->src_port = s0->out2in.port;
+                     sum0 = udp0->checksum;
+                     sum0 = ip_csum_update (sum0, old_addr0, new_addr0, ip4_header_t, dst_address      /* changed member */
+                       );
+                     sum0 =
+                       ip_csum_update (sum0, old_port0, new_port0,
+                                       ip4_header_t /* cheat */ ,
+                                       length /* changed member */ );
+                     udp0->checksum = ip_csum_fold (sum0);
+                   }
                }
              udp_packets++;
            }
@@ -1654,7 +1660,6 @@ VLIB_REGISTER_NODE (snat_in2out_node) = {
     [SNAT_IN2OUT_NEXT_LOOKUP] = "ip4-lookup",
     [SNAT_IN2OUT_NEXT_SLOW_PATH] = "nat44-in2out-slowpath",
     [SNAT_IN2OUT_NEXT_ICMP_ERROR] = "ip4-icmp-error",
-    [SNAT_IN2OUT_NEXT_REASS] = "nat44-in2out-reass",
   },
 };
 /* *INDENT-ON* */
@@ -1687,7 +1692,6 @@ VLIB_REGISTER_NODE (snat_in2out_output_node) = {
     [SNAT_IN2OUT_NEXT_LOOKUP] = "interface-output",
     [SNAT_IN2OUT_NEXT_SLOW_PATH] = "nat44-in2out-output-slowpath",
     [SNAT_IN2OUT_NEXT_ICMP_ERROR] = "ip4-icmp-error",
-    [SNAT_IN2OUT_NEXT_REASS] = "nat44-in2out-reass",
   },
 };
 /* *INDENT-ON* */
@@ -1720,7 +1724,6 @@ VLIB_REGISTER_NODE (snat_in2out_slowpath_node) = {
     [SNAT_IN2OUT_NEXT_LOOKUP] = "ip4-lookup",
     [SNAT_IN2OUT_NEXT_SLOW_PATH] = "nat44-in2out-slowpath",
     [SNAT_IN2OUT_NEXT_ICMP_ERROR] = "ip4-icmp-error",
-    [SNAT_IN2OUT_NEXT_REASS] = "nat44-in2out-reass",
   },
 };
 /* *INDENT-ON* */
@@ -1753,307 +1756,6 @@ VLIB_REGISTER_NODE (snat_in2out_output_slowpath_node) = {
     [SNAT_IN2OUT_NEXT_LOOKUP] = "interface-output",
     [SNAT_IN2OUT_NEXT_SLOW_PATH] = "nat44-in2out-output-slowpath",
     [SNAT_IN2OUT_NEXT_ICMP_ERROR] = "ip4-icmp-error",
-    [SNAT_IN2OUT_NEXT_REASS] = "nat44-in2out-reass",
-  },
-};
-/* *INDENT-ON* */
-
-VLIB_NODE_FN (nat44_in2out_reass_node) (vlib_main_t * vm,
-                                       vlib_node_runtime_t * node,
-                                       vlib_frame_t * frame)
-{
-  u32 n_left_from, *from, *to_next;
-  snat_in2out_next_t next_index;
-  u32 pkts_processed = 0, cached_fragments = 0;
-  snat_main_t *sm = &snat_main;
-  f64 now = vlib_time_now (vm);
-  u32 thread_index = vm->thread_index;
-  snat_main_per_thread_data_t *per_thread_data =
-    &sm->per_thread_data[thread_index];
-  u32 *fragments_to_drop = 0;
-  u32 *fragments_to_loopback = 0;
-
-  from = vlib_frame_vector_args (frame);
-  n_left_from = frame->n_vectors;
-  next_index = node->cached_next_index;
-
-  while (n_left_from > 0)
-    {
-      u32 n_left_to_next;
-
-      vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
-
-      while (n_left_from > 0 && n_left_to_next > 0)
-       {
-         u32 bi0, sw_if_index0, proto0, rx_fib_index0, new_addr0, old_addr0;
-         vlib_buffer_t *b0;
-         u32 next0;
-         u8 cached0 = 0;
-         ip4_header_t *ip0;
-         nat_reass_ip4_t *reass0;
-         udp_header_t *udp0;
-         tcp_header_t *tcp0;
-         icmp46_header_t *icmp0;
-         snat_session_key_t key0;
-         clib_bihash_kv_8_8_t kv0, value0;
-         snat_session_t *s0 = 0;
-         u16 old_port0, new_port0;
-         ip_csum_t sum0;
-
-         /* speculatively enqueue b0 to the current next frame */
-         bi0 = from[0];
-         to_next[0] = bi0;
-         from += 1;
-         to_next += 1;
-         n_left_from -= 1;
-         n_left_to_next -= 1;
-
-         b0 = vlib_get_buffer (vm, bi0);
-         next0 = SNAT_IN2OUT_NEXT_LOOKUP;
-
-         sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
-         rx_fib_index0 =
-           fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP4,
-                                                sw_if_index0);
-
-         if (PREDICT_FALSE (nat_reass_is_drop_frag (0)))
-           {
-             next0 = SNAT_IN2OUT_NEXT_DROP;
-             b0->error = node->errors[SNAT_IN2OUT_ERROR_DROP_FRAGMENT];
-             goto trace0;
-           }
-
-         ip0 = (ip4_header_t *) vlib_buffer_get_current (b0);
-         udp0 = ip4_next_header (ip0);
-         tcp0 = (tcp_header_t *) udp0;
-         icmp0 = (icmp46_header_t *) udp0;
-         proto0 = ip_proto_to_snat_proto (ip0->protocol);
-
-         reass0 = nat_ip4_reass_find_or_create (ip0->src_address,
-                                                ip0->dst_address,
-                                                ip0->fragment_id,
-                                                ip0->protocol,
-                                                1, &fragments_to_drop);
-
-         if (PREDICT_FALSE (!reass0))
-           {
-             next0 = SNAT_IN2OUT_NEXT_DROP;
-             b0->error = node->errors[SNAT_IN2OUT_ERROR_MAX_REASS];
-             nat_elog_notice ("maximum reassemblies exceeded");
-             goto trace0;
-           }
-
-         if (PREDICT_FALSE (ip4_is_first_fragment (ip0)))
-           {
-             if (PREDICT_FALSE (proto0 == SNAT_PROTOCOL_ICMP))
-               {
-                 next0 = icmp_in2out_slow_path
-                   (sm, b0, ip0, icmp0, sw_if_index0, rx_fib_index0, node,
-                    next0, now, thread_index, &s0);
-
-                 if (PREDICT_TRUE (next0 != SNAT_IN2OUT_NEXT_DROP))
-                   {
-                     if (s0)
-                       reass0->sess_index = s0 - per_thread_data->sessions;
-                     else
-                       reass0->flags |= NAT_REASS_FLAG_ED_DONT_TRANSLATE;
-                     nat_ip4_reass_get_frags (reass0,
-                                              &fragments_to_loopback);
-                   }
-
-                 goto trace0;
-               }
-
-             key0.addr = ip0->src_address;
-             key0.port = udp0->src_port;
-             key0.protocol = proto0;
-             key0.fib_index = rx_fib_index0;
-             kv0.key = key0.as_u64;
-
-             if (clib_bihash_search_8_8
-                 (&per_thread_data->in2out, &kv0, &value0))
-               {
-                 if (PREDICT_FALSE
-                     (snat_not_translate
-                      (sm, node, sw_if_index0, ip0, proto0, rx_fib_index0,
-                       thread_index)))
-                   goto trace0;
-
-                 next0 = slow_path (sm, b0, ip0, rx_fib_index0, &key0,
-                                    &s0, node, next0, thread_index, now);
-
-                 if (PREDICT_FALSE (next0 == SNAT_IN2OUT_NEXT_DROP))
-                   goto trace0;
-
-                 if (PREDICT_FALSE (!s0))
-                   goto trace0;
-
-                 reass0->sess_index = s0 - per_thread_data->sessions;
-               }
-             else
-               {
-                 s0 = pool_elt_at_index (per_thread_data->sessions,
-                                         value0.value);
-                 reass0->sess_index = value0.value;
-               }
-             nat_ip4_reass_get_frags (reass0, &fragments_to_loopback);
-           }
-         else
-           {
-             if (PREDICT_FALSE (reass0->sess_index == (u32) ~ 0))
-               {
-                 if (nat_ip4_reass_add_fragment
-                     (thread_index, reass0, bi0, &fragments_to_drop))
-                   {
-                     b0->error = node->errors[SNAT_IN2OUT_ERROR_MAX_FRAG];
-                     nat_elog_notice
-                       ("maximum fragments per reassembly exceeded");
-                     next0 = SNAT_IN2OUT_NEXT_DROP;
-                     goto trace0;
-                   }
-                 cached0 = 1;
-                 goto trace0;
-               }
-             s0 = pool_elt_at_index (per_thread_data->sessions,
-                                     reass0->sess_index);
-           }
-
-         old_addr0 = ip0->src_address.as_u32;
-         ip0->src_address = s0->out2in.addr;
-         new_addr0 = ip0->src_address.as_u32;
-         vnet_buffer (b0)->sw_if_index[VLIB_TX] = s0->out2in.fib_index;
-
-         sum0 = ip0->checksum;
-         sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
-                                ip4_header_t,
-                                src_address /* changed member */ );
-         ip0->checksum = ip_csum_fold (sum0);
-
-         if (PREDICT_FALSE (ip4_is_first_fragment (ip0)))
-           {
-             old_port0 = udp0->src_port;
-             new_port0 = udp0->src_port = s0->out2in.port;
-
-             if (PREDICT_TRUE (proto0 == SNAT_PROTOCOL_TCP))
-               {
-                 sum0 = tcp0->checksum;
-                 sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
-                                        ip4_header_t,
-                                        dst_address /* changed member */ );
-                 sum0 = ip_csum_update (sum0, old_port0, new_port0,
-                                        ip4_header_t /* cheat */ ,
-                                        length /* changed member */ );
-                 tcp0->checksum = ip_csum_fold (sum0);
-               }
-             else if (PREDICT_FALSE (udp0->checksum))
-               {
-                 sum0 = udp0->checksum;
-                 sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
-                                        ip4_header_t,
-                                        dst_address /* changed member */ );
-                 sum0 = ip_csum_update (sum0, old_port0, new_port0,
-                                        ip4_header_t /* cheat */ ,
-                                        length /* changed member */ );
-                 udp0->checksum = ip_csum_fold (sum0);
-               }
-           }
-
-         /* Hairpinning */
-         nat44_reass_hairpinning (sm, b0, ip0, s0->out2in.port,
-                                  s0->ext_host_port, proto0, 0);
-
-         /* Accounting */
-         nat44_session_update_counters (s0, now,
-                                        vlib_buffer_length_in_chain (vm, b0),
-                                        thread_index);
-         /* Per-user LRU list maintenance */
-         nat44_session_update_lru (sm, s0, thread_index);
-
-       trace0:
-         if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)
-                            && (b0->flags & VLIB_BUFFER_IS_TRACED)))
-           {
-             nat44_reass_trace_t *t =
-               vlib_add_trace (vm, node, b0, sizeof (*t));
-             t->cached = cached0;
-             t->sw_if_index = sw_if_index0;
-             t->next_index = next0;
-           }
-
-         if (cached0)
-           {
-             n_left_to_next++;
-             to_next--;
-             cached_fragments++;
-           }
-         else
-           {
-             pkts_processed += next0 != SNAT_IN2OUT_NEXT_DROP;
-
-             /* verify speculative enqueue, maybe switch current next frame */
-             vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
-                                              to_next, n_left_to_next,
-                                              bi0, next0);
-           }
-
-         if (n_left_from == 0 && vec_len (fragments_to_loopback))
-           {
-             from = vlib_frame_vector_args (frame);
-             u32 len = vec_len (fragments_to_loopback);
-             if (len <= VLIB_FRAME_SIZE)
-               {
-                 clib_memcpy_fast (from, fragments_to_loopback,
-                                   sizeof (u32) * len);
-                 n_left_from = len;
-                 vec_reset_length (fragments_to_loopback);
-               }
-             else
-               {
-                 clib_memcpy_fast (from, fragments_to_loopback +
-                                   (len - VLIB_FRAME_SIZE),
-                                   sizeof (u32) * VLIB_FRAME_SIZE);
-                 n_left_from = VLIB_FRAME_SIZE;
-                 _vec_len (fragments_to_loopback) = len - VLIB_FRAME_SIZE;
-               }
-           }
-       }
-
-      vlib_put_next_frame (vm, node, next_index, n_left_to_next);
-    }
-
-  vlib_node_increment_counter (vm, sm->in2out_reass_node_index,
-                              SNAT_IN2OUT_ERROR_PROCESSED_FRAGMENTS,
-                              pkts_processed);
-  vlib_node_increment_counter (vm, sm->in2out_reass_node_index,
-                              SNAT_IN2OUT_ERROR_CACHED_FRAGMENTS,
-                              cached_fragments);
-
-  nat_send_all_to_node (vm, fragments_to_drop, node,
-                       &node->errors[SNAT_IN2OUT_ERROR_DROP_FRAGMENT],
-                       SNAT_IN2OUT_NEXT_DROP);
-
-  vec_free (fragments_to_drop);
-  vec_free (fragments_to_loopback);
-  return frame->n_vectors;
-}
-
-/* *INDENT-OFF* */
-VLIB_REGISTER_NODE (nat44_in2out_reass_node) = {
-  .name = "nat44-in2out-reass",
-  .vector_size = sizeof (u32),
-  .format_trace = format_nat44_reass_trace,
-  .type = VLIB_NODE_TYPE_INTERNAL,
-
-  .n_errors = ARRAY_LEN(snat_in2out_error_strings),
-  .error_strings = snat_in2out_error_strings,
-
-  .n_next_nodes = SNAT_IN2OUT_N_NEXT,
-  .next_nodes = {
-    [SNAT_IN2OUT_NEXT_DROP] = "error-drop",
-    [SNAT_IN2OUT_NEXT_LOOKUP] = "ip4-lookup",
-    [SNAT_IN2OUT_NEXT_SLOW_PATH] = "nat44-in2out-slowpath",
-    [SNAT_IN2OUT_NEXT_ICMP_ERROR] = "ip4-icmp-error",
-    [SNAT_IN2OUT_NEXT_REASS] = "nat44-in2out-reass",
   },
 };
 /* *INDENT-ON* */
@@ -2264,7 +1966,6 @@ VLIB_REGISTER_NODE (snat_in2out_fast_node) = {
     [SNAT_IN2OUT_NEXT_LOOKUP] = "ip4-lookup",
     [SNAT_IN2OUT_NEXT_SLOW_PATH] = "nat44-in2out-slowpath",
     [SNAT_IN2OUT_NEXT_ICMP_ERROR] = "ip4-icmp-error",
-    [SNAT_IN2OUT_NEXT_REASS] = "nat44-in2out-reass",
   },
 };
 /* *INDENT-ON* */
index f8cd89f..0209a40 100644 (file)
@@ -27,7 +27,6 @@
 #include <vppinfra/error.h>
 #include <nat/nat.h>
 #include <nat/nat_ipfix_logging.h>
-#include <nat/nat_reass.h>
 #include <nat/nat_inlines.h>
 #include <nat/nat44_inlines.h>
 #include <nat/nat_syslog.h>
@@ -189,8 +188,7 @@ slow_path_ed (snat_main_t * sm,
              u32 rx_fib_index,
              clib_bihash_kv_16_8_t * kv,
              snat_session_t ** sessionp,
-             vlib_node_runtime_t * node, u32 next, u32 thread_index, f64 now,
-             tcp_header_t * tcp)
+             vlib_node_runtime_t * node, u32 next, u32 thread_index, f64 now)
 {
   snat_session_t *s = 0;
   snat_user_t *u;
@@ -254,7 +252,8 @@ slow_path_ed (snat_main_t * sm,
 
   if (proto == SNAT_PROTOCOL_TCP)
     {
-      if (!tcp_is_init (tcp))
+      if (!tcp_flags_is_init
+         (vnet_buffer (b)->ip.reass.icmp_type_or_tcp_flags))
        {
          b->error = node->errors[NAT_IN2OUT_ED_ERROR_NON_SYN];
          return NAT_NEXT_DROP;
@@ -405,7 +404,6 @@ nat_not_translate_output_feature_fwd (snat_main_t * sm, ip4_header_t * ip,
 {
   nat_ed_ses_key_t key;
   clib_bihash_kv_16_8_t kv, value;
-  udp_header_t *udp;
   snat_session_t *s = 0;
   snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index];
 
@@ -415,7 +413,7 @@ nat_not_translate_output_feature_fwd (snat_main_t * sm, ip4_header_t * ip,
   if (ip->protocol == IP_PROTOCOL_ICMP)
     {
       key.as_u64[0] = key.as_u64[1] = 0;
-      if (get_icmp_i2o_ed_key (ip, &key))
+      if (get_icmp_i2o_ed_key (b, ip, &key))
        return 0;
       key.fib_index = 0;
       kv.key[0] = key.as_u64[0];
@@ -423,9 +421,9 @@ nat_not_translate_output_feature_fwd (snat_main_t * sm, ip4_header_t * ip,
     }
   else if (ip->protocol == IP_PROTOCOL_UDP || ip->protocol == IP_PROTOCOL_TCP)
     {
-      udp = ip4_next_header (ip);
       make_ed_kv (&kv, &ip->src_address, &ip->dst_address, ip->protocol, 0,
-                 udp->src_port, udp->dst_port);
+                 vnet_buffer (b)->ip.reass.l4_src_port,
+                 vnet_buffer (b)->ip.reass.l4_dst_port);
     }
   else
     {
@@ -440,8 +438,7 @@ nat_not_translate_output_feature_fwd (snat_main_t * sm, ip4_header_t * ip,
        {
          if (ip->protocol == IP_PROTOCOL_TCP)
            {
-             tcp_header_t *tcp = ip4_next_header (ip);
-             if (nat44_set_tcp_session_state_i2o (sm, s, tcp, thread_index))
+             if (nat44_set_tcp_session_state_i2o (sm, s, b, thread_index))
                return 1;
            }
          /* Accounting */
@@ -518,7 +515,6 @@ icmp_match_in2out_ed (snat_main_t * sm, vlib_node_runtime_t * node,
                      u8 * p_proto, snat_session_key_t * p_value,
                      u8 * p_dont_translate, void *d, void *e)
 {
-  icmp46_header_t *icmp;
   u32 sw_if_index;
   u32 rx_fib_index;
   nat_ed_ses_key_t key;
@@ -529,12 +525,11 @@ icmp_match_in2out_ed (snat_main_t * sm, vlib_node_runtime_t * node,
   int err;
   snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index];
 
-  icmp = (icmp46_header_t *) ip4_next_header (ip);
   sw_if_index = vnet_buffer (b)->sw_if_index[VLIB_RX];
   rx_fib_index = ip4_fib_table_get_index_for_sw_if_index (sw_if_index);
 
   key.as_u64[0] = key.as_u64[1] = 0;
-  err = get_icmp_i2o_ed_key (ip, &key);
+  err = get_icmp_i2o_ed_key (b, ip, &key);
   if (err != 0)
     {
       b->error = node->errors[err];
@@ -550,18 +545,10 @@ icmp_match_in2out_ed (snat_main_t * sm, vlib_node_runtime_t * node,
     {
       if (vnet_buffer (b)->sw_if_index[VLIB_TX] != ~0)
        {
-         if (PREDICT_FALSE (nat44_ed_not_translate_output_feature (sm, ip,
-                                                                   key.proto,
-                                                                   key.
-                                                                   l_port,
-                                                                   key.
-                                                                   r_port,
-                                                                   thread_index,
-                                                                   sw_if_index,
-                                                                   vnet_buffer
-                                                                   (b)->
-                                                                   sw_if_index
-                                                                   [VLIB_TX])))
+         if (PREDICT_FALSE
+             (nat44_ed_not_translate_output_feature
+              (sm, ip, key.proto, key.l_port, key.r_port, thread_index,
+               sw_if_index, vnet_buffer (b)->sw_if_index[VLIB_TX])))
            {
              dont_translate = 1;
              goto out;
@@ -579,7 +566,9 @@ icmp_match_in2out_ed (snat_main_t * sm, vlib_node_runtime_t * node,
            }
        }
 
-      if (PREDICT_FALSE (icmp_is_error_message (icmp)))
+      if (PREDICT_FALSE
+         (icmp_type_is_error_message
+          (vnet_buffer (b)->ip.reass.icmp_type_or_tcp_flags)))
        {
          b->error = node->errors[NAT_IN2OUT_ED_ERROR_BAD_ICMP_TYPE];
          next = NAT_NEXT_DROP;
@@ -587,7 +576,7 @@ icmp_match_in2out_ed (snat_main_t * sm, vlib_node_runtime_t * node,
        }
 
       next = slow_path_ed (sm, b, rx_fib_index, &kv, &s, node, next,
-                          thread_index, vlib_time_now (sm->vlib_main), 0);
+                          thread_index, vlib_time_now (sm->vlib_main));
 
       if (PREDICT_FALSE (next == NAT_NEXT_DROP))
        goto out;
@@ -600,9 +589,13 @@ icmp_match_in2out_ed (snat_main_t * sm, vlib_node_runtime_t * node,
     }
   else
     {
-      if (PREDICT_FALSE (icmp->type != ICMP4_echo_request &&
-                        icmp->type != ICMP4_echo_reply &&
-                        !icmp_is_error_message (icmp)))
+      if (PREDICT_FALSE
+         (vnet_buffer (b)->ip.reass.icmp_type_or_tcp_flags !=
+          ICMP4_echo_request
+          && vnet_buffer (b)->ip.reass.icmp_type_or_tcp_flags !=
+          ICMP4_echo_reply
+          && !icmp_type_is_error_message (vnet_buffer (b)->ip.
+                                          reass.icmp_type_or_tcp_flags)))
        {
          b->error = node->errors[NAT_IN2OUT_ED_ERROR_BAD_ICMP_TYPE];
          next = NAT_NEXT_DROP;
@@ -837,14 +830,11 @@ nat44_ed_in2out_node_fn_inline (vlib_main_t * vm,
   u32 thread_index = vm->thread_index;
   snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index];
   u32 tcp_packets = 0, udp_packets = 0, icmp_packets = 0, other_packets =
-    0, fragments = 0, def_slow, def_reass;
+    0, def_slow;
 
   def_slow = is_output_feature ? NAT_NEXT_IN2OUT_ED_OUTPUT_SLOW_PATH :
     NAT_NEXT_IN2OUT_ED_SLOW_PATH;
 
-  def_reass = is_output_feature ? NAT_NEXT_IN2OUT_ED_OUTPUT_REASS :
-    NAT_NEXT_IN2OUT_ED_REASS;
-
   stats_node_index = is_slow_path ? sm->ed_in2out_slowpath_node_index :
     sm->ed_in2out_node_index;
 
@@ -910,8 +900,8 @@ nat44_ed_in2out_node_fn_inline (vlib_main_t * vm,
                  vnet_feature_next (&nat_buffer_opaque (b1)->arc_next, b1);
                }
 
-             iph_offset0 = vnet_buffer (b0)->ip.save_rewrite_length;
-             iph_offset1 = vnet_buffer (b1)->ip.save_rewrite_length;
+             iph_offset0 = vnet_buffer (b0)->ip.reass.save_rewrite_length;
+             iph_offset1 = vnet_buffer (b1)->ip.reass.save_rewrite_length;
            }
 
          next0 = nat_buffer_opaque (b0)->arc_next;
@@ -971,13 +961,6 @@ nat44_ed_in2out_node_fn_inline (vlib_main_t * vm,
                  goto trace00;
                }
 
-             if (ip4_is_fragment (ip0))
-               {
-                 next0 = def_reass;
-                 fragments++;
-                 goto trace00;
-               }
-
              if (is_output_feature)
                {
                  if (PREDICT_FALSE
@@ -994,8 +977,9 @@ nat44_ed_in2out_node_fn_inline (vlib_main_t * vm,
            }
 
          make_ed_kv (&kv0, &ip0->src_address, &ip0->dst_address,
-                     ip0->protocol, rx_fib_index0, udp0->src_port,
-                     udp0->dst_port);
+                     ip0->protocol, rx_fib_index0,
+                     vnet_buffer (b0)->ip.reass.l4_src_port,
+                     vnet_buffer (b0)->ip.reass.l4_dst_port);
 
          if (clib_bihash_search_16_8 (&tsm->in2out_ed, &kv0, &value0))
            {
@@ -1005,8 +989,10 @@ nat44_ed_in2out_node_fn_inline (vlib_main_t * vm,
                    {
                      if (PREDICT_FALSE
                          (nat44_ed_not_translate_output_feature
-                          (sm, ip0, ip0->protocol, udp0->src_port,
-                           udp0->dst_port, thread_index, sw_if_index0,
+                          (sm, ip0, ip0->protocol,
+                           vnet_buffer (b0)->ip.reass.l4_src_port,
+                           vnet_buffer (b0)->ip.reass.l4_dst_port,
+                           thread_index, sw_if_index0,
                            vnet_buffer (b0)->sw_if_index[VLIB_TX])))
                        goto trace00;
 
@@ -1017,7 +1003,7 @@ nat44_ed_in2out_node_fn_inline (vlib_main_t * vm,
                      if (PREDICT_FALSE
                          ((b0->flags & VNET_BUFFER_F_LOCALLY_ORIGINATED)
                           && proto0 == SNAT_PROTOCOL_UDP
-                          && (udp0->dst_port ==
+                          && (vnet_buffer (b0)->ip.reass.l4_dst_port ==
                               clib_host_to_net_u16
                               (UDP_DST_PORT_dhcp_to_server))))
                        goto trace00;
@@ -1034,7 +1020,7 @@ nat44_ed_in2out_node_fn_inline (vlib_main_t * vm,
 
                  next0 =
                    slow_path_ed (sm, b0, rx_fib_index0, &kv0, &s0, node,
-                                 next0, thread_index, now, tcp0);
+                                 next0, thread_index, now);
 
                  if (PREDICT_FALSE (next0 == NAT_NEXT_DROP))
                    goto trace00;
@@ -1069,36 +1055,44 @@ nat44_ed_in2out_node_fn_inline (vlib_main_t * vm,
                                   dst_address);
          ip0->checksum = ip_csum_fold (sum0);
 
-         old_port0 = udp0->src_port;
-         new_port0 = udp0->src_port = s0->out2in.port;
+         old_port0 = vnet_buffer (b0)->ip.reass.l4_src_port;
 
          if (PREDICT_TRUE (proto0 == SNAT_PROTOCOL_TCP))
            {
-             sum0 = tcp0->checksum;
-             sum0 = ip_csum_update (sum0, old_addr0, new_addr0, ip4_header_t,
-                                    dst_address);
-             sum0 = ip_csum_update (sum0, old_port0, new_port0, ip4_header_t,
-                                    length);
-             if (PREDICT_FALSE (is_twice_nat_session (s0)))
+             if (!vnet_buffer (b0)->ip.reass.is_non_first_fragment)
                {
-                 sum0 = ip_csum_update (sum0, ip0->dst_address.as_u32,
-                                        s0->ext_host_addr.as_u32,
-                                        ip4_header_t, dst_address);
-                 sum0 = ip_csum_update (sum0, tcp0->dst_port,
-                                        s0->ext_host_port, ip4_header_t,
-                                        length);
-                 tcp0->dst_port = s0->ext_host_port;
-                 ip0->dst_address.as_u32 = s0->ext_host_addr.as_u32;
+                 new_port0 = udp0->src_port = s0->out2in.port;
+                 sum0 = tcp0->checksum;
+                 sum0 =
+                   ip_csum_update (sum0, old_addr0, new_addr0, ip4_header_t,
+                                   dst_address);
+                 sum0 =
+                   ip_csum_update (sum0, old_port0, new_port0, ip4_header_t,
+                                   length);
+                 if (PREDICT_FALSE (is_twice_nat_session (s0)))
+                   {
+                     sum0 = ip_csum_update (sum0, ip0->dst_address.as_u32,
+                                            s0->ext_host_addr.as_u32,
+                                            ip4_header_t, dst_address);
+                     sum0 =
+                       ip_csum_update (sum0,
+                                       vnet_buffer (b0)->ip.
+                                       reass.l4_dst_port, s0->ext_host_port,
+                                       ip4_header_t, length);
+                     tcp0->dst_port = s0->ext_host_port;
+                     ip0->dst_address.as_u32 = s0->ext_host_addr.as_u32;
+                   }
+                 mss_clamping (sm, tcp0, &sum0);
+                 tcp0->checksum = ip_csum_fold (sum0);
                }
-             mss_clamping (sm, tcp0, &sum0);
-             tcp0->checksum = ip_csum_fold (sum0);
              tcp_packets++;
-             if (nat44_set_tcp_session_state_i2o
-                 (sm, s0, tcp0, thread_index))
+             if (nat44_set_tcp_session_state_i2o (sm, s0, b0, thread_index))
                goto trace00;
            }
-         else if (udp0->checksum)
+         else if (!vnet_buffer (b0)->ip.reass.is_non_first_fragment
+                  && udp0->checksum)
            {
+             new_port0 = udp0->src_port = s0->out2in.port;
              sum0 = udp0->checksum;
              sum0 = ip_csum_update (sum0, old_addr0, new_addr0, ip4_header_t,
                                     dst_address);
@@ -1109,9 +1103,10 @@ nat44_ed_in2out_node_fn_inline (vlib_main_t * vm,
                  sum0 = ip_csum_update (sum0, ip0->dst_address.as_u32,
                                         s0->ext_host_addr.as_u32,
                                         ip4_header_t, dst_address);
-                 sum0 = ip_csum_update (sum0, tcp0->dst_port,
-                                        s0->ext_host_port, ip4_header_t,
-                                        length);
+                 sum0 =
+                   ip_csum_update (sum0,
+                                   vnet_buffer (b0)->ip.reass.l4_dst_port,
+                                   s0->ext_host_port, ip4_header_t, length);
                  udp0->dst_port = s0->ext_host_port;
                  ip0->dst_address.as_u32 = s0->ext_host_addr.as_u32;
                }
@@ -1120,12 +1115,16 @@ nat44_ed_in2out_node_fn_inline (vlib_main_t * vm,
            }
          else
            {
-             if (PREDICT_FALSE (is_twice_nat_session (s0)))
+             if (!vnet_buffer (b0)->ip.reass.is_non_first_fragment)
                {
-                 udp0->dst_port = s0->ext_host_port;
-                 ip0->dst_address.as_u32 = s0->ext_host_addr.as_u32;
+                 new_port0 = udp0->src_port = s0->out2in.port;
+                 if (PREDICT_FALSE (is_twice_nat_session (s0)))
+                   {
+                     udp0->dst_port = s0->ext_host_port;
+                     ip0->dst_address.as_u32 = s0->ext_host_addr.as_u32;
+                   }
+                 udp_packets++;
                }
-             udp_packets++;
            }
 
          /* Accounting */
@@ -1181,8 +1180,8 @@ nat44_ed_in2out_node_fn_inline (vlib_main_t * vm,
                {
                  s1 = nat44_ed_in2out_unknown_proto (sm, b1, ip1,
                                                      rx_fib_index1,
-                                                     thread_index, now, vm,
-                                                     node);
+                                                     thread_index, now,
+                                                     vm, node);
                  if (!s1)
                    next1 = NAT_NEXT_DROP;
                  other_packets++;
@@ -1192,8 +1191,8 @@ nat44_ed_in2out_node_fn_inline (vlib_main_t * vm,
              if (PREDICT_FALSE (proto1 == SNAT_PROTOCOL_ICMP))
                {
                  next1 = icmp_in2out_ed_slow_path
-                   (sm, b1, ip1, icmp1, sw_if_index1, rx_fib_index1, node,
-                    next1, now, thread_index, &s1);
+                   (sm, b1, ip1, icmp1, sw_if_index1, rx_fib_index1,
+                    node, next1, now, thread_index, &s1);
                  icmp_packets++;
                  goto trace01;
                }
@@ -1206,13 +1205,6 @@ nat44_ed_in2out_node_fn_inline (vlib_main_t * vm,
                  goto trace01;
                }
 
-             if (ip4_is_fragment (ip1))
-               {
-                 next1 = def_reass;
-                 fragments++;
-                 goto trace01;
-               }
-
              if (is_output_feature)
                {
                  if (PREDICT_FALSE
@@ -1229,8 +1221,9 @@ nat44_ed_in2out_node_fn_inline (vlib_main_t * vm,
            }
 
          make_ed_kv (&kv1, &ip1->src_address, &ip1->dst_address,
-                     ip1->protocol, rx_fib_index1, udp1->src_port,
-                     udp1->dst_port);
+                     ip1->protocol, rx_fib_index1,
+                     vnet_buffer (b1)->ip.reass.l4_src_port,
+                     vnet_buffer (b1)->ip.reass.l4_dst_port);
 
          if (clib_bihash_search_16_8 (&tsm->in2out_ed, &kv1, &value1))
            {
@@ -1240,8 +1233,10 @@ nat44_ed_in2out_node_fn_inline (vlib_main_t * vm,
                    {
                      if (PREDICT_FALSE
                          (nat44_ed_not_translate_output_feature
-                          (sm, ip1, ip1->protocol, udp1->src_port,
-                           udp1->dst_port, thread_index, sw_if_index1,
+                          (sm, ip1, ip1->protocol,
+                           vnet_buffer (b1)->ip.reass.l4_src_port,
+                           vnet_buffer (b1)->ip.reass.l4_dst_port,
+                           thread_index, sw_if_index1,
                            vnet_buffer (b1)->sw_if_index[VLIB_TX])))
                        goto trace01;
 
@@ -1252,7 +1247,7 @@ nat44_ed_in2out_node_fn_inline (vlib_main_t * vm,
                      if (PREDICT_FALSE
                          ((b1->flags & VNET_BUFFER_F_LOCALLY_ORIGINATED)
                           && proto1 == SNAT_PROTOCOL_UDP
-                          && (udp1->dst_port ==
+                          && (vnet_buffer (b1)->ip.reass.l4_dst_port ==
                               clib_host_to_net_u16
                               (UDP_DST_PORT_dhcp_to_server))))
                        goto trace01;
@@ -1261,7 +1256,8 @@ nat44_ed_in2out_node_fn_inline (vlib_main_t * vm,
                    {
                      if (PREDICT_FALSE (nat44_ed_not_translate (sm, node,
                                                                 sw_if_index1,
-                                                                ip1, proto1,
+                                                                ip1,
+                                                                proto1,
                                                                 rx_fib_index1,
                                                                 thread_index)))
                        goto trace01;
@@ -1269,7 +1265,7 @@ nat44_ed_in2out_node_fn_inline (vlib_main_t * vm,
 
                  next1 =
                    slow_path_ed (sm, b1, rx_fib_index1, &kv1, &s1, node,
-                                 next1, thread_index, now, tcp1);
+                                 next1, thread_index, now);
 
                  if (PREDICT_FALSE (next1 == NAT_NEXT_DROP))
                    goto trace01;
@@ -1304,50 +1300,62 @@ nat44_ed_in2out_node_fn_inline (vlib_main_t * vm,
                                   dst_address);
          ip1->checksum = ip_csum_fold (sum1);
 
-         old_port1 = udp1->src_port;
-         new_port1 = udp1->src_port = s1->out2in.port;
+         old_port1 = vnet_buffer (b1)->ip.reass.l4_src_port;
 
          if (PREDICT_TRUE (proto1 == SNAT_PROTOCOL_TCP))
            {
-             sum1 = tcp1->checksum;
-             sum1 = ip_csum_update (sum1, old_addr1, new_addr1, ip4_header_t,
-                                    dst_address);
-             sum1 = ip_csum_update (sum1, old_port1, new_port1, ip4_header_t,
-                                    length);
-             if (PREDICT_FALSE (is_twice_nat_session (s1)))
+             if (!vnet_buffer (b0)->ip.reass.is_non_first_fragment)
                {
-                 sum1 = ip_csum_update (sum1, ip1->dst_address.as_u32,
-                                        s1->ext_host_addr.as_u32,
-                                        ip4_header_t, dst_address);
-                 sum1 = ip_csum_update (sum1, tcp1->dst_port,
-                                        s1->ext_host_port, ip4_header_t,
-                                        length);
-                 tcp1->dst_port = s1->ext_host_port;
-                 ip1->dst_address.as_u32 = s1->ext_host_addr.as_u32;
+                 new_port1 = udp1->src_port = s1->out2in.port;
+                 sum1 = tcp1->checksum;
+                 sum1 =
+                   ip_csum_update (sum1, old_addr1, new_addr1,
+                                   ip4_header_t, dst_address);
+                 sum1 =
+                   ip_csum_update (sum1, old_port1, new_port1,
+                                   ip4_header_t, length);
+                 if (PREDICT_FALSE (is_twice_nat_session (s1)))
+                   {
+                     sum1 =
+                       ip_csum_update (sum1, ip1->dst_address.as_u32,
+                                       s1->ext_host_addr.as_u32,
+                                       ip4_header_t, dst_address);
+                     sum1 =
+                       ip_csum_update (sum1,
+                                       vnet_buffer (b1)->ip.
+                                       reass.l4_dst_port, s1->ext_host_port,
+                                       ip4_header_t, length);
+                     tcp1->dst_port = s1->ext_host_port;
+                     ip1->dst_address.as_u32 = s1->ext_host_addr.as_u32;
+                   }
+                 tcp1->checksum = ip_csum_fold (sum1);
+                 mss_clamping (sm, tcp1, &sum1);
                }
-             tcp1->checksum = ip_csum_fold (sum1);
-             mss_clamping (sm, tcp1, &sum1);
              tcp_packets++;
-             if (nat44_set_tcp_session_state_i2o
-                 (sm, s1, tcp1, thread_index))
+             if (nat44_set_tcp_session_state_i2o (sm, s1, b1, thread_index))
                goto trace01;
            }
-         else if (udp1->checksum)
+         else if (!vnet_buffer (b1)->ip.reass.is_non_first_fragment
+                  && udp1->checksum)
            {
+             new_port1 = udp1->src_port = s1->out2in.port;
              sum1 = udp1->checksum;
-             sum1 = ip_csum_update (sum1, old_addr1, new_addr1, ip4_header_t,
-                                    dst_address);
-             sum1 = ip_csum_update (sum1, old_port1, new_port1, ip4_header_t,
-                                    length);
+             sum1 =
+               ip_csum_update (sum1, old_addr1, new_addr1, ip4_header_t,
+                               dst_address);
+             sum1 =
+               ip_csum_update (sum1, old_port1, new_port1, ip4_header_t,
+                               length);
 
              if (PREDICT_FALSE (is_twice_nat_session (s1)))
                {
                  sum1 = ip_csum_update (sum1, ip1->dst_address.as_u32,
                                         s1->ext_host_addr.as_u32,
                                         ip4_header_t, dst_address);
-                 sum1 = ip_csum_update (sum1, tcp1->dst_port,
-                                        s1->ext_host_port, ip4_header_t,
-                                        length);
+                 sum1 =
+                   ip_csum_update (sum1,
+                                   vnet_buffer (b1)->ip.reass.l4_dst_port,
+                                   s1->ext_host_port, ip4_header_t, length);
                  udp1->dst_port = s1->ext_host_port;
                  ip1->dst_address.as_u32 = s1->ext_host_addr.as_u32;
                }
@@ -1356,17 +1364,22 @@ nat44_ed_in2out_node_fn_inline (vlib_main_t * vm,
            }
          else
            {
-             if (PREDICT_FALSE (is_twice_nat_session (s1)))
+             if (!vnet_buffer (b1)->ip.reass.is_non_first_fragment)
                {
-                 udp1->dst_port = s1->ext_host_port;
-                 ip1->dst_address.as_u32 = s1->ext_host_addr.as_u32;
+                 new_port1 = udp1->src_port = s1->out2in.port;
+                 if (PREDICT_FALSE (is_twice_nat_session (s1)))
+                   {
+                     udp1->dst_port = s1->ext_host_port;
+                     ip1->dst_address.as_u32 = s1->ext_host_addr.as_u32;
+                   }
                }
              udp_packets++;
            }
 
          /* Accounting */
          nat44_session_update_counters (s1, now,
-                                        vlib_buffer_length_in_chain (vm, b1),
+                                        vlib_buffer_length_in_chain (vm,
+                                                                     b1),
                                         thread_index);
          /* Per-user LRU list maintenance */
          nat44_session_update_lru (sm, s1, thread_index);
@@ -1426,7 +1439,7 @@ nat44_ed_in2out_node_fn_inline (vlib_main_t * vm,
              if (PREDICT_TRUE (!is_slow_path))
                vnet_feature_next (&nat_buffer_opaque (b0)->arc_next, b0);
 
-             iph_offset0 = vnet_buffer (b0)->ip.save_rewrite_length;
+             iph_offset0 = vnet_buffer (b0)->ip.reass.save_rewrite_length;
            }
 
          next0 = nat_buffer_opaque (b0)->arc_next;
@@ -1460,8 +1473,8 @@ nat44_ed_in2out_node_fn_inline (vlib_main_t * vm,
                {
                  s0 = nat44_ed_in2out_unknown_proto (sm, b0, ip0,
                                                      rx_fib_index0,
-                                                     thread_index, now, vm,
-                                                     node);
+                                                     thread_index, now,
+                                                     vm, node);
                  if (!s0)
                    next0 = NAT_NEXT_DROP;
                  other_packets++;
@@ -1471,8 +1484,8 @@ nat44_ed_in2out_node_fn_inline (vlib_main_t * vm,
              if (PREDICT_FALSE (proto0 == SNAT_PROTOCOL_ICMP))
                {
                  next0 = icmp_in2out_ed_slow_path
-                   (sm, b0, ip0, icmp0, sw_if_index0, rx_fib_index0, node,
-                    next0, now, thread_index, &s0);
+                   (sm, b0, ip0, icmp0, sw_if_index0, rx_fib_index0,
+                    node, next0, now, thread_index, &s0);
                  icmp_packets++;
                  goto trace0;
                }
@@ -1485,13 +1498,6 @@ nat44_ed_in2out_node_fn_inline (vlib_main_t * vm,
                  goto trace0;
                }
 
-             if (ip4_is_fragment (ip0))
-               {
-                 next0 = def_reass;
-                 fragments++;
-                 goto trace0;
-               }
-
              if (is_output_feature)
                {
                  if (PREDICT_FALSE
@@ -1508,8 +1514,9 @@ nat44_ed_in2out_node_fn_inline (vlib_main_t * vm,
            }
 
          make_ed_kv (&kv0, &ip0->src_address, &ip0->dst_address,
-                     ip0->protocol, rx_fib_index0, udp0->src_port,
-                     udp0->dst_port);
+                     ip0->protocol, rx_fib_index0,
+                     vnet_buffer (b0)->ip.reass.l4_src_port,
+                     vnet_buffer (b0)->ip.reass.l4_dst_port);
 
          if (clib_bihash_search_16_8 (&tsm->in2out_ed, &kv0, &value0))
            {
@@ -1519,8 +1526,10 @@ nat44_ed_in2out_node_fn_inline (vlib_main_t * vm,
                    {
                      if (PREDICT_FALSE
                          (nat44_ed_not_translate_output_feature
-                          (sm, ip0, ip0->protocol, udp0->src_port,
-                           udp0->dst_port, thread_index, sw_if_index0,
+                          (sm, ip0, ip0->protocol,
+                           vnet_buffer (b0)->ip.reass.l4_src_port,
+                           vnet_buffer (b0)->ip.reass.l4_dst_port,
+                           thread_index, sw_if_index0,
                            vnet_buffer (b0)->sw_if_index[VLIB_TX])))
                        goto trace0;
 
@@ -1531,7 +1540,7 @@ nat44_ed_in2out_node_fn_inline (vlib_main_t * vm,
                      if (PREDICT_FALSE
                          ((b0->flags & VNET_BUFFER_F_LOCALLY_ORIGINATED)
                           && proto0 == SNAT_PROTOCOL_UDP
-                          && (udp0->dst_port ==
+                          && (vnet_buffer (b0)->ip.reass.l4_dst_port ==
                               clib_host_to_net_u16
                               (UDP_DST_PORT_dhcp_to_server))))
                        goto trace0;
@@ -1540,7 +1549,8 @@ nat44_ed_in2out_node_fn_inline (vlib_main_t * vm,
                    {
                      if (PREDICT_FALSE (nat44_ed_not_translate (sm, node,
                                                                 sw_if_index0,
-                                                                ip0, proto0,
+                                                                ip0,
+                                                                proto0,
                                                                 rx_fib_index0,
                                                                 thread_index)))
                        goto trace0;
@@ -1548,7 +1558,7 @@ nat44_ed_in2out_node_fn_inline (vlib_main_t * vm,
 
                  next0 =
                    slow_path_ed (sm, b0, rx_fib_index0, &kv0, &s0, node,
-                                 next0, thread_index, now, tcp0);
+                                 next0, thread_index, now);
 
                  if (PREDICT_FALSE (next0 == NAT_NEXT_DROP))
                    goto trace0;
@@ -1583,49 +1593,61 @@ nat44_ed_in2out_node_fn_inline (vlib_main_t * vm,
                                   dst_address);
          ip0->checksum = ip_csum_fold (sum0);
 
-         old_port0 = udp0->src_port;
-         new_port0 = udp0->src_port = s0->out2in.port;
+         old_port0 = vnet_buffer (b0)->ip.reass.l4_src_port;
 
          if (PREDICT_TRUE (proto0 == SNAT_PROTOCOL_TCP))
            {
-             sum0 = tcp0->checksum;
-             sum0 = ip_csum_update (sum0, old_addr0, new_addr0, ip4_header_t,
-                                    dst_address);
-             sum0 = ip_csum_update (sum0, old_port0, new_port0, ip4_header_t,
-                                    length);
-             if (PREDICT_FALSE (is_twice_nat_session (s0)))
+             if (!vnet_buffer (b0)->ip.reass.is_non_first_fragment)
                {
-                 sum0 = ip_csum_update (sum0, ip0->dst_address.as_u32,
-                                        s0->ext_host_addr.as_u32,
-                                        ip4_header_t, dst_address);
-                 sum0 = ip_csum_update (sum0, tcp0->dst_port,
-                                        s0->ext_host_port, ip4_header_t,
-                                        length);
-                 tcp0->dst_port = s0->ext_host_port;
-                 ip0->dst_address.as_u32 = s0->ext_host_addr.as_u32;
+                 new_port0 = udp0->src_port = s0->out2in.port;
+                 sum0 = tcp0->checksum;
+                 sum0 =
+                   ip_csum_update (sum0, old_addr0, new_addr0,
+                                   ip4_header_t, dst_address);
+                 sum0 =
+                   ip_csum_update (sum0, old_port0, new_port0,
+                                   ip4_header_t, length);
+                 if (PREDICT_FALSE (is_twice_nat_session (s0)))
+                   {
+                     sum0 =
+                       ip_csum_update (sum0, ip0->dst_address.as_u32,
+                                       s0->ext_host_addr.as_u32,
+                                       ip4_header_t, dst_address);
+                     sum0 =
+                       ip_csum_update (sum0,
+                                       vnet_buffer (b0)->ip.
+                                       reass.l4_dst_port, s0->ext_host_port,
+                                       ip4_header_t, length);
+                     tcp0->dst_port = s0->ext_host_port;
+                     ip0->dst_address.as_u32 = s0->ext_host_addr.as_u32;
+                   }
+                 mss_clamping (sm, tcp0, &sum0);
+                 tcp0->checksum = ip_csum_fold (sum0);
                }
-             mss_clamping (sm, tcp0, &sum0);
-             tcp0->checksum = ip_csum_fold (sum0);
              tcp_packets++;
-             if (nat44_set_tcp_session_state_i2o
-                 (sm, s0, tcp0, thread_index))
+             if (nat44_set_tcp_session_state_i2o (sm, s0, b0, thread_index))
                goto trace0;
            }
-         else if (udp0->checksum)
+         else if (!vnet_buffer (b0)->ip.reass.is_non_first_fragment
+                  && udp0->checksum)
            {
+             new_port0 = udp0->src_port = s0->out2in.port;
              sum0 = udp0->checksum;
-             sum0 = ip_csum_update (sum0, old_addr0, new_addr0, ip4_header_t,
-                                    dst_address);
-             sum0 = ip_csum_update (sum0, old_port0, new_port0, ip4_header_t,
-                                    length);
+             sum0 =
+               ip_csum_update (sum0, old_addr0, new_addr0, ip4_header_t,
+                               dst_address);
+             sum0 =
+               ip_csum_update (sum0, old_port0, new_port0, ip4_header_t,
+                               length);
              if (PREDICT_FALSE (is_twice_nat_session (s0)))
                {
                  sum0 = ip_csum_update (sum0, ip0->dst_address.as_u32,
                                         s0->ext_host_addr.as_u32,
                                         ip4_header_t, dst_address);
-                 sum0 = ip_csum_update (sum0, tcp0->dst_port,
-                                        s0->ext_host_port, ip4_header_t,
-                                        length);
+                 sum0 =
+                   ip_csum_update (sum0,
+                                   vnet_buffer (b0)->ip.reass.l4_dst_port,
+                                   s0->ext_host_port, ip4_header_t, length);
                  udp0->dst_port = s0->ext_host_port;
                  ip0->dst_address.as_u32 = s0->ext_host_addr.as_u32;
                }
@@ -1634,18 +1656,22 @@ nat44_ed_in2out_node_fn_inline (vlib_main_t * vm,
            }
          else
            {
-             if (PREDICT_FALSE (is_twice_nat_session (s0)))
+             if (!vnet_buffer (b0)->ip.reass.is_non_first_fragment)
                {
-                 udp0->dst_port = s0->ext_host_port;
-                 ip0->dst_address.as_u32 = s0->ext_host_addr.as_u32;
+                 new_port0 = udp0->src_port = s0->out2in.port;
+                 if (PREDICT_FALSE (is_twice_nat_session (s0)))
+                   {
+                     udp0->dst_port = s0->ext_host_port;
+                     ip0->dst_address.as_u32 = s0->ext_host_addr.as_u32;
+                   }
+                 udp_packets++;
                }
-             udp_packets++;
            }
 
          /* Accounting */
          nat44_session_update_counters (s0, now,
-                                        vlib_buffer_length_in_chain (vm, b0),
-                                        thread_index);
+                                        vlib_buffer_length_in_chain
+                                        (vm, b0), thread_index);
          /* Per-user LRU list maintenance */
          nat44_session_update_lru (sm, s0, thread_index);
 
@@ -1687,379 +1713,6 @@ nat44_ed_in2out_node_fn_inline (vlib_main_t * vm,
   vlib_node_increment_counter (vm, stats_node_index,
                               NAT_IN2OUT_ED_ERROR_OTHER_PACKETS,
                               other_packets);
-  vlib_node_increment_counter (vm, stats_node_index,
-                              NAT_IN2OUT_ED_ERROR_FRAGMENTS, fragments);
-
-  return frame->n_vectors;
-}
-
-static inline uword
-nat44_ed_in2out_reass_node_fn_inline (vlib_main_t * vm,
-                                     vlib_node_runtime_t * node,
-                                     vlib_frame_t * frame,
-                                     int is_output_feature)
-{
-  u32 n_left_from, *from, *to_next;
-  nat_next_t next_index;
-  u32 pkts_processed = 0, cached_fragments = 0;
-  snat_main_t *sm = &snat_main;
-  f64 now = vlib_time_now (vm);
-  u32 thread_index = vm->thread_index;
-  snat_main_per_thread_data_t *per_thread_data =
-    &sm->per_thread_data[thread_index];
-  u32 *fragments_to_drop = 0;
-  u32 *fragments_to_loopback = 0;
-
-  from = vlib_frame_vector_args (frame);
-  n_left_from = frame->n_vectors;
-  next_index = node->cached_next_index;
-
-  while (n_left_from > 0)
-    {
-      u32 n_left_to_next;
-
-      vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
-
-      while (n_left_from > 0 && n_left_to_next > 0)
-       {
-         u32 bi0, sw_if_index0, proto0, rx_fib_index0, new_addr0, old_addr0;
-         u32 iph_offset0 = 0;
-         vlib_buffer_t *b0;
-         u32 next0;
-         u8 cached0 = 0;
-         ip4_header_t *ip0 = 0;
-         nat_reass_ip4_t *reass0;
-         udp_header_t *udp0;
-         tcp_header_t *tcp0;
-         icmp46_header_t *icmp0;
-         clib_bihash_kv_16_8_t kv0, value0;
-         snat_session_t *s0 = 0;
-         u16 old_port0, new_port0;
-         ip_csum_t sum0;
-
-         /* speculatively enqueue b0 to the current next frame */
-         bi0 = from[0];
-         to_next[0] = bi0;
-         from += 1;
-         to_next += 1;
-         n_left_from -= 1;
-         n_left_to_next -= 1;
-
-         b0 = vlib_get_buffer (vm, bi0);
-
-         next0 = nat_buffer_opaque (b0)->arc_next;
-
-         sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
-         rx_fib_index0 =
-           fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP4,
-                                                sw_if_index0);
-
-         if (PREDICT_FALSE (nat_reass_is_drop_frag (0)))
-           {
-             next0 = NAT_NEXT_DROP;
-             b0->error = node->errors[NAT_IN2OUT_ED_ERROR_DROP_FRAGMENT];
-             goto trace0;
-           }
-
-         if (is_output_feature)
-           iph_offset0 = vnet_buffer (b0)->ip.save_rewrite_length;
-
-         ip0 = (ip4_header_t *) ((u8 *) vlib_buffer_get_current (b0) +
-                                 iph_offset0);
-
-         udp0 = ip4_next_header (ip0);
-         tcp0 = (tcp_header_t *) udp0;
-         icmp0 = (icmp46_header_t *) udp0;
-         proto0 = ip_proto_to_snat_proto (ip0->protocol);
-
-         reass0 = nat_ip4_reass_find_or_create (ip0->src_address,
-                                                ip0->dst_address,
-                                                ip0->fragment_id,
-                                                ip0->protocol,
-                                                1, &fragments_to_drop);
-
-         if (PREDICT_FALSE (!reass0))
-           {
-             next0 = NAT_NEXT_DROP;
-             b0->error = node->errors[NAT_IN2OUT_ED_ERROR_MAX_REASS];
-             nat_elog_notice ("maximum reassemblies exceeded");
-             goto trace0;
-           }
-
-         if (PREDICT_FALSE (ip4_is_first_fragment (ip0)))
-           {
-             if (PREDICT_FALSE (proto0 == SNAT_PROTOCOL_ICMP))
-               {
-                 if (is_output_feature)
-                   {
-                     if (PREDICT_FALSE
-                         (nat_not_translate_output_feature_fwd
-                          (sm, ip0, thread_index, now, vm, b0)))
-                       reass0->flags |= NAT_REASS_FLAG_ED_DONT_TRANSLATE;
-                     goto trace0;
-                   }
-
-                 next0 = icmp_in2out_ed_slow_path
-                   (sm, b0, ip0, icmp0, sw_if_index0, rx_fib_index0, node,
-                    next0, now, thread_index, &s0);
-
-                 if (PREDICT_TRUE (next0 != NAT_NEXT_DROP))
-                   {
-                     if (s0)
-                       reass0->sess_index = s0 - per_thread_data->sessions;
-                     else
-                       reass0->flags |= NAT_REASS_FLAG_ED_DONT_TRANSLATE;
-                     nat_ip4_reass_get_frags (reass0,
-                                              &fragments_to_loopback);
-                   }
-
-                 goto trace0;
-               }
-
-             make_ed_kv (&kv0, &ip0->src_address, &ip0->dst_address,
-                         ip0->protocol, rx_fib_index0, udp0->src_port,
-                         udp0->dst_port);
-
-             if (clib_bihash_search_16_8
-                 (&per_thread_data->in2out_ed, &kv0, &value0))
-               {
-                 if (is_output_feature)
-                   {
-                     if (PREDICT_FALSE
-                         (nat44_ed_not_translate_output_feature
-                          (sm, ip0, ip0->protocol, udp0->src_port,
-                           udp0->dst_port, thread_index, sw_if_index0,
-                           vnet_buffer (b0)->sw_if_index[VLIB_TX])))
-                       {
-                         reass0->flags |= NAT_REASS_FLAG_ED_DONT_TRANSLATE;
-                         nat_ip4_reass_get_frags (reass0,
-                                                  &fragments_to_loopback);
-                         goto trace0;
-                       }
-
-                     /*
-                      * Send DHCP packets to the ipv4 stack, or we won't
-                      * be able to use dhcp client on the outside interface
-                      */
-                     if (PREDICT_FALSE
-                         ((b0->flags & VNET_BUFFER_F_LOCALLY_ORIGINATED)
-                          && proto0 == SNAT_PROTOCOL_UDP
-                          && (udp0->dst_port ==
-                              clib_host_to_net_u16
-                              (UDP_DST_PORT_dhcp_to_server))))
-                       goto trace0;
-                   }
-                 else
-                   {
-                     if (PREDICT_FALSE (nat44_ed_not_translate (sm, node,
-                                                                sw_if_index0,
-                                                                ip0, proto0,
-                                                                rx_fib_index0,
-                                                                thread_index)))
-                       {
-                         reass0->flags |= NAT_REASS_FLAG_ED_DONT_TRANSLATE;
-                         nat_ip4_reass_get_frags (reass0,
-                                                  &fragments_to_loopback);
-                         goto trace0;
-                       }
-                   }
-
-                 next0 = slow_path_ed (sm, b0, rx_fib_index0, &kv0,
-                                       &s0, node, next0, thread_index, now,
-                                       tcp0);
-
-                 if (PREDICT_FALSE (next0 == NAT_NEXT_DROP))
-                   goto trace0;
-
-                 if (PREDICT_FALSE (!s0))
-                   {
-                     reass0->flags |= NAT_REASS_FLAG_ED_DONT_TRANSLATE;
-                     goto trace0;
-                   }
-
-                 reass0->sess_index = s0 - per_thread_data->sessions;
-               }
-             else
-               {
-                 s0 = pool_elt_at_index (per_thread_data->sessions,
-                                         value0.value);
-                 reass0->sess_index = value0.value;
-               }
-             nat_ip4_reass_get_frags (reass0, &fragments_to_loopback);
-           }
-         else
-           {
-             if (reass0->flags & NAT_REASS_FLAG_ED_DONT_TRANSLATE)
-               goto trace0;
-             if (PREDICT_FALSE (reass0->sess_index == (u32) ~ 0))
-               {
-                 if (nat_ip4_reass_add_fragment
-                     (thread_index, reass0, bi0, &fragments_to_drop))
-                   {
-                     b0->error = node->errors[NAT_IN2OUT_ED_ERROR_MAX_FRAG];
-                     nat_elog_notice
-                       ("maximum fragments per reassembly exceeded");
-                     next0 = NAT_NEXT_DROP;
-                     goto trace0;
-                   }
-                 cached0 = 1;
-                 goto trace0;
-               }
-             s0 = pool_elt_at_index (per_thread_data->sessions,
-                                     reass0->sess_index);
-           }
-
-         old_addr0 = ip0->src_address.as_u32;
-         ip0->src_address = s0->out2in.addr;
-         new_addr0 = ip0->src_address.as_u32;
-         if (!is_output_feature)
-           vnet_buffer (b0)->sw_if_index[VLIB_TX] = s0->out2in.fib_index;
-
-         sum0 = ip0->checksum;
-         sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
-                                ip4_header_t,
-                                src_address /* changed member */ );
-         if (PREDICT_FALSE (is_twice_nat_session (s0)))
-           sum0 = ip_csum_update (sum0, ip0->dst_address.as_u32,
-                                  s0->ext_host_addr.as_u32, ip4_header_t,
-                                  dst_address);
-         ip0->checksum = ip_csum_fold (sum0);
-
-         if (PREDICT_FALSE (ip4_is_first_fragment (ip0)))
-           {
-             old_port0 = udp0->src_port;
-             new_port0 = udp0->src_port = s0->out2in.port;
-
-             if (PREDICT_TRUE (proto0 == SNAT_PROTOCOL_TCP))
-               {
-                 sum0 = tcp0->checksum;
-                 sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
-                                        ip4_header_t,
-                                        dst_address /* changed member */ );
-                 sum0 = ip_csum_update (sum0, old_port0, new_port0,
-                                        ip4_header_t /* cheat */ ,
-                                        length /* changed member */ );
-                 if (PREDICT_FALSE (is_twice_nat_session (s0)))
-                   {
-                     sum0 = ip_csum_update (sum0, ip0->dst_address.as_u32,
-                                            s0->ext_host_addr.as_u32,
-                                            ip4_header_t, dst_address);
-                     sum0 = ip_csum_update (sum0, tcp0->dst_port,
-                                            s0->ext_host_port, ip4_header_t,
-                                            length);
-                     tcp0->dst_port = s0->ext_host_port;
-                     ip0->dst_address.as_u32 = s0->ext_host_addr.as_u32;
-                   }
-                 tcp0->checksum = ip_csum_fold (sum0);
-               }
-             else if (udp0->checksum)
-               {
-                 sum0 = udp0->checksum;
-                 sum0 =
-                   ip_csum_update (sum0, old_addr0, new_addr0, ip4_header_t,
-                                   dst_address);
-                 sum0 =
-                   ip_csum_update (sum0, old_port0, new_port0, ip4_header_t,
-                                   length);
-                 if (PREDICT_FALSE (is_twice_nat_session (s0)))
-                   {
-                     sum0 = ip_csum_update (sum0, ip0->dst_address.as_u32,
-                                            s0->ext_host_addr.as_u32,
-                                            ip4_header_t, dst_address);
-                     sum0 = ip_csum_update (sum0, tcp0->dst_port,
-                                            s0->ext_host_port, ip4_header_t,
-                                            length);
-                     udp0->dst_port = s0->ext_host_port;
-                     ip0->dst_address.as_u32 = s0->ext_host_addr.as_u32;
-                   }
-                 udp0->checksum = ip_csum_fold (sum0);
-               }
-             else
-               {
-                 if (PREDICT_FALSE (is_twice_nat_session (s0)))
-                   {
-                     udp0->dst_port = s0->ext_host_port;
-                     ip0->dst_address.as_u32 = s0->ext_host_addr.as_u32;
-                   }
-               }
-           }
-
-         /* Hairpinning */
-         nat44_reass_hairpinning (sm, b0, ip0, s0->out2in.port,
-                                  s0->ext_host_port, proto0, 1);
-
-         /* Accounting */
-         nat44_session_update_counters (s0, now,
-                                        vlib_buffer_length_in_chain (vm, b0),
-                                        thread_index);
-         /* Per-user LRU list maintenance */
-         nat44_session_update_lru (sm, s0, thread_index);
-
-       trace0:
-         if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)
-                            && (b0->flags & VLIB_BUFFER_IS_TRACED)))
-           {
-             nat44_reass_trace_t *t =
-               vlib_add_trace (vm, node, b0, sizeof (*t));
-             t->cached = cached0;
-             t->sw_if_index = sw_if_index0;
-             t->next_index = next0;
-           }
-
-         if (cached0)
-           {
-             n_left_to_next++;
-             to_next--;
-             cached_fragments++;
-           }
-         else
-           {
-             pkts_processed += next0 == nat_buffer_opaque (b0)->arc_next;
-
-             /* verify speculative enqueue, maybe switch current next frame */
-             vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
-                                              to_next, n_left_to_next,
-                                              bi0, next0);
-           }
-
-         if (n_left_from == 0 && vec_len (fragments_to_loopback))
-           {
-             from = vlib_frame_vector_args (frame);
-             u32 len = vec_len (fragments_to_loopback);
-             if (len <= VLIB_FRAME_SIZE)
-               {
-                 clib_memcpy_fast (from, fragments_to_loopback,
-                                   sizeof (u32) * len);
-                 n_left_from = len;
-                 vec_reset_length (fragments_to_loopback);
-               }
-             else
-               {
-                 clib_memcpy_fast (from, fragments_to_loopback +
-                                   (len - VLIB_FRAME_SIZE),
-                                   sizeof (u32) * VLIB_FRAME_SIZE);
-                 n_left_from = VLIB_FRAME_SIZE;
-                 _vec_len (fragments_to_loopback) = len - VLIB_FRAME_SIZE;
-               }
-           }
-       }
-
-      vlib_put_next_frame (vm, node, next_index, n_left_to_next);
-    }
-
-  vlib_node_increment_counter (vm, sm->ed_in2out_reass_node_index,
-                              NAT_IN2OUT_ED_ERROR_PROCESSED_FRAGMENTS,
-                              pkts_processed);
-  vlib_node_increment_counter (vm, sm->ed_in2out_reass_node_index,
-                              NAT_IN2OUT_ED_ERROR_CACHED_FRAGMENTS,
-                              cached_fragments);
-
-  nat_send_all_to_node (vm, fragments_to_drop, node,
-                       &node->errors[NAT_IN2OUT_ED_ERROR_DROP_FRAGMENT],
-                       NAT_NEXT_DROP);
-
-  vec_free (fragments_to_drop);
-  vec_free (fragments_to_loopback);
   return frame->n_vectors;
 }
 
@@ -2104,8 +1757,8 @@ VLIB_REGISTER_NODE (nat44_ed_in2out_output_node) = {
 /* *INDENT-ON* */
 
 VLIB_NODE_FN (nat44_ed_in2out_slowpath_node) (vlib_main_t * vm,
-                                             vlib_node_runtime_t * node,
-                                             vlib_frame_t * frame)
+                                             vlib_node_runtime_t *
+                                             node, vlib_frame_t * frame)
 {
   return nat44_ed_in2out_node_fn_inline (vm, node, frame, 1, 0);
 }
@@ -2124,8 +1777,8 @@ VLIB_REGISTER_NODE (nat44_ed_in2out_slowpath_node) = {
 /* *INDENT-ON* */
 
 VLIB_NODE_FN (nat44_ed_in2out_output_slowpath_node) (vlib_main_t * vm,
-                                                    vlib_node_runtime_t *
-                                                    node,
+                                                    vlib_node_runtime_t
+                                                    node,
                                                     vlib_frame_t * frame)
 {
   return nat44_ed_in2out_node_fn_inline (vm, node, frame, 1, 1);
@@ -2144,45 +1797,6 @@ VLIB_REGISTER_NODE (nat44_ed_in2out_output_slowpath_node) = {
 };
 /* *INDENT-ON* */
 
-
-VLIB_NODE_FN (nat44_ed_in2out_reass_node) (vlib_main_t * vm,
-                                          vlib_node_runtime_t * node,
-                                          vlib_frame_t * frame)
-{
-  return nat44_ed_in2out_reass_node_fn_inline (vm, node, frame, 0);
-}
-
-/* *INDENT-OFF* */
-VLIB_REGISTER_NODE (nat44_ed_in2out_reass_node) = {
-  .name = "nat44-ed-in2out-reass",
-  .vector_size = sizeof (u32),
-  .sibling_of = "nat-default",
-  .format_trace = format_nat44_reass_trace,
-  .type = VLIB_NODE_TYPE_INTERNAL,
-  .n_errors = ARRAY_LEN (nat_in2out_ed_error_strings),
-  .error_strings = nat_in2out_ed_error_strings,
-};
-/* *INDENT-ON* */
-
-VLIB_NODE_FN (nat44_ed_in2out_reass_output_node) (vlib_main_t * vm,
-                                                 vlib_node_runtime_t * node,
-                                                 vlib_frame_t * frame)
-{
-  return nat44_ed_in2out_reass_node_fn_inline (vm, node, frame, 1);
-}
-
-/* *INDENT-OFF* */
-VLIB_REGISTER_NODE (nat44_ed_in2out_reass_output_node) = {
-  .name = "nat44-ed-in2out-reass-output",
-  .vector_size = sizeof (u32),
-  .sibling_of = "nat-default",
-  .format_trace = format_nat44_reass_trace,
-  .type = VLIB_NODE_TYPE_INTERNAL,
-  .n_errors = ARRAY_LEN (nat_in2out_ed_error_strings),
-  .error_strings = nat_in2out_ed_error_strings,
-};
-/* *INDENT-ON* */
-
 static u8 *
 format_nat_pre_trace (u8 * s, va_list * args)
 {
@@ -2192,9 +1806,8 @@ format_nat_pre_trace (u8 * s, va_list * args)
   return format (s, "in2out next_index %d", t->next_index);
 }
 
-VLIB_NODE_FN (nat_pre_in2out_node) (vlib_main_t * vm,
-                                   vlib_node_runtime_t * node,
-                                   vlib_frame_t * frame)
+VLIB_NODE_FN (nat_pre_in2out_node)
+  (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
 {
   return nat_pre_node_fn_inline (vm, node, frame,
                                 NAT_NEXT_IN2OUT_ED_FAST_PATH);
index 8cf26d4..356dd0c 100644 (file)
@@ -189,85 +189,6 @@ autoreply define nat_ipfix_enable_disable {
   bool enable;
 };
 
-/** \brief Set NAT virtual fragmentation reassembly
-    @param client_index - opaque cookie to identify the sender
-    @param context - sender context, to match reply w/ request
-    @param timeout - reassembly timeout
-    @param max_reass - maximum number of concurrent reassemblies
-    @param max_frag - maximum number of fragmets per reassembly
-    @param drop_frag - if 0 translate fragments, otherwise drop fragments
-    @param is_ip6 - true if IPv6, false if IPv4
-*/
-autoreply define nat_set_reass {
-  u32 client_index;
-  u32 context;
-  u32 timeout;
-  u16 max_reass;
-  u8  max_frag;
-  u8  drop_frag;
-  bool  is_ip6;
-};
-
-/** \brief Get NAT virtual fragmentation reassembly configuration
-    @param client_index - opaque cookie to identify the sender
-    @param context - sender context, to match reply w/ request
-*/
-define nat_get_reass {
-  u32 client_index;
-  u32 context;
-};
-
-/** \brief Get NAT virtual fragmentation reassembly configuration reply
-    @param context - sender context, to match reply w/ request
-    @param retval - return code
-    @param ip4_timeout - reassembly timeout
-    @param ip4_max_reass - maximum number of concurrent reassemblies
-    @param ip4_max_frag - maximum number of fragmets per reassembly
-    @param ip4_drop_frag - if 0 translate fragments, otherwise drop fragments
-    @param ip6_timeout - reassembly timeout
-    @param ip6_max_reass - maximum number of concurrent reassemblies
-    @param ip6_max_frag - maximum number of fragmets per reassembly
-    @param ip6_drop_frag - if 0 translate fragments, otherwise drop fragments
-*/
-define nat_get_reass_reply {
-  u32 context;
-  i32 retval;
-  u32 ip4_timeout;
-  u16 ip4_max_reass;
-  u8  ip4_max_frag;
-  u8  ip4_drop_frag;
-  u32 ip6_timeout;
-  u16 ip6_max_reass;
-  u8  ip6_max_frag;
-  u8  ip6_drop_frag;
-};
-
-/** \brief Dump NAT virtual fragmentation reassemblies
-    @param client_index - opaque cookie to identify the sender
-    @param context - sender context, to match reply w/ request
-*/
-define nat_reass_dump {
-  u32 client_index;
-  u32 context;
-};
-
-/** \brief NAT virtual fragmentation reassemblies response
-    @param context - sender context, to match reply w/ request
-    @param src_addr - source IPv4 address
-    @param dst_addr - destination IPv4 address
-    @param frag_id - fragment ID
-    @param proto - protocol
-    @param frag_n - number of cached fragments
-*/
-define nat_reass_details {
-  u32 context;
-  vl_api_address_t src_addr;
-  vl_api_address_t dst_addr;
-  u32 frag_id;
-  u8 proto;
-  u8 frag_n;
-};
-
 /** \brief Set values of timeouts for NAT sessions (seconds)
     @param client_index - opaque cookie to identify the sender
     @param context - sender context, to match reply w/ request
index d85fb26..c1a1839 100755 (executable)
 #include <nat/nat64.h>
 #include <nat/nat66.h>
 #include <nat/dslite.h>
-#include <nat/nat_reass.h>
 #include <nat/nat_inlines.h>
 #include <nat/nat_affinity.h>
 #include <nat/nat_syslog.h>
 #include <nat/nat_ha.h>
 #include <vnet/fib/fib_table.h>
 #include <vnet/fib/ip4_fib.h>
+#include <vnet/ip/reass/ip4_sv_reass.h>
 
 #include <vpp/app/version.h>
 
@@ -46,13 +46,15 @@ fib_source_t nat_fib_src_low;
 VNET_FEATURE_INIT (nat_pre_in2out, static) = {
   .arc_name = "ip4-unicast",
   .node_name = "nat-pre-in2out",
-  .runs_after = VNET_FEATURES ("acl-plugin-in-ip4-fa"),
+  .runs_after = VNET_FEATURES ("acl-plugin-in-ip4-fa",
+                              "ip4-sv-reassembly-feature"),
 };
 VNET_FEATURE_INIT (nat_pre_out2in, static) = {
   .arc_name = "ip4-unicast",
   .node_name = "nat-pre-out2in",
   .runs_after = VNET_FEATURES ("acl-plugin-in-ip4-fa",
-                               "ip4-dhcp-client-detect"),
+                               "ip4-dhcp-client-detect",
+                              "ip4-sv-reassembly-feature"),
 };
 VNET_FEATURE_INIT (snat_in2out_worker_handoff, static) = {
   .arc_name = "ip4-unicast",
@@ -68,103 +70,103 @@ VNET_FEATURE_INIT (snat_out2in_worker_handoff, static) = {
 VNET_FEATURE_INIT (ip4_snat_in2out, static) = {
   .arc_name = "ip4-unicast",
   .node_name = "nat44-in2out",
-  .runs_after = VNET_FEATURES ("acl-plugin-in-ip4-fa"),
+  .runs_after = VNET_FEATURES ("acl-plugin-in-ip4-fa","ip4-sv-reassembly-feature"),
 };
 VNET_FEATURE_INIT (ip4_snat_out2in, static) = {
   .arc_name = "ip4-unicast",
   .node_name = "nat44-out2in",
-  .runs_after = VNET_FEATURES ("acl-plugin-in-ip4-fa",
+  .runs_after = VNET_FEATURES ("acl-plugin-in-ip4-fa","ip4-sv-reassembly-feature",
                                "ip4-dhcp-client-detect"),
 };
 VNET_FEATURE_INIT (ip4_nat_classify, static) = {
   .arc_name = "ip4-unicast",
   .node_name = "nat44-classify",
-  .runs_after = VNET_FEATURES ("acl-plugin-in-ip4-fa"),
+  .runs_after = VNET_FEATURES ("acl-plugin-in-ip4-fa","ip4-sv-reassembly-feature"),
 };
 VNET_FEATURE_INIT (ip4_snat_det_in2out, static) = {
   .arc_name = "ip4-unicast",
   .node_name = "nat44-det-in2out",
-  .runs_after = VNET_FEATURES ("acl-plugin-in-ip4-fa"),
+  .runs_after = VNET_FEATURES ("acl-plugin-in-ip4-fa","ip4-sv-reassembly-feature"),
 };
 VNET_FEATURE_INIT (ip4_snat_det_out2in, static) = {
   .arc_name = "ip4-unicast",
   .node_name = "nat44-det-out2in",
-  .runs_after = VNET_FEATURES ("acl-plugin-in-ip4-fa",
+  .runs_after = VNET_FEATURES ("acl-plugin-in-ip4-fa","ip4-sv-reassembly-feature",
                                "ip4-dhcp-client-detect"),
 };
 VNET_FEATURE_INIT (ip4_nat_det_classify, static) = {
   .arc_name = "ip4-unicast",
   .node_name = "nat44-det-classify",
-  .runs_after = VNET_FEATURES ("acl-plugin-in-ip4-fa"),
+  .runs_after = VNET_FEATURES ("acl-plugin-in-ip4-fa","ip4-sv-reassembly-feature"),
 };
 VNET_FEATURE_INIT (ip4_nat44_ed_in2out, static) = {
   .arc_name = "ip4-unicast",
   .node_name = "nat44-ed-in2out",
-  .runs_after = VNET_FEATURES ("acl-plugin-in-ip4-fa"),
+  .runs_after = VNET_FEATURES ("acl-plugin-in-ip4-fa","ip4-sv-reassembly-feature"),
 };
 VNET_FEATURE_INIT (ip4_nat44_ed_out2in, static) = {
   .arc_name = "ip4-unicast",
   .node_name = "nat44-ed-out2in",
-  .runs_after = VNET_FEATURES ("acl-plugin-in-ip4-fa",
+  .runs_after = VNET_FEATURES ("acl-plugin-in-ip4-fa","ip4-sv-reassembly-feature",
                                "ip4-dhcp-client-detect"),
 };
 VNET_FEATURE_INIT (ip4_nat44_ed_classify, static) = {
   .arc_name = "ip4-unicast",
   .node_name = "nat44-ed-classify",
-  .runs_after = VNET_FEATURES ("acl-plugin-in-ip4-fa"),
+  .runs_after = VNET_FEATURES ("acl-plugin-in-ip4-fa","ip4-sv-reassembly-feature"),
 };
 VNET_FEATURE_INIT (ip4_nat_handoff_classify, static) = {
   .arc_name = "ip4-unicast",
   .node_name = "nat44-handoff-classify",
-  .runs_after = VNET_FEATURES ("acl-plugin-in-ip4-fa"),
+  .runs_after = VNET_FEATURES ("acl-plugin-in-ip4-fa","ip4-sv-reassembly-feature"),
 };
 VNET_FEATURE_INIT (ip4_snat_in2out_fast, static) = {
   .arc_name = "ip4-unicast",
   .node_name = "nat44-in2out-fast",
-  .runs_after = VNET_FEATURES ("acl-plugin-in-ip4-fa"),
+  .runs_after = VNET_FEATURES ("acl-plugin-in-ip4-fa","ip4-sv-reassembly-feature"),
 };
 VNET_FEATURE_INIT (ip4_snat_out2in_fast, static) = {
   .arc_name = "ip4-unicast",
   .node_name = "nat44-out2in-fast",
-  .runs_after = VNET_FEATURES ("acl-plugin-in-ip4-fa",
+  .runs_after = VNET_FEATURES ("acl-plugin-in-ip4-fa","ip4-sv-reassembly-feature",
                                "ip4-dhcp-client-detect"),
 };
 VNET_FEATURE_INIT (ip4_snat_hairpin_dst, static) = {
   .arc_name = "ip4-unicast",
   .node_name = "nat44-hairpin-dst",
-  .runs_after = VNET_FEATURES ("acl-plugin-in-ip4-fa"),
+  .runs_after = VNET_FEATURES ("acl-plugin-in-ip4-fa","ip4-sv-reassembly-feature"),
 };
 VNET_FEATURE_INIT (ip4_nat44_ed_hairpin_dst, static) = {
   .arc_name = "ip4-unicast",
   .node_name = "nat44-ed-hairpin-dst",
-  .runs_after = VNET_FEATURES ("acl-plugin-in-ip4-fa"),
+  .runs_after = VNET_FEATURES ("acl-plugin-in-ip4-fa","ip4-sv-reassembly-feature"),
 };
 
 /* Hook up output features */
 VNET_FEATURE_INIT (ip4_snat_in2out_output, static) = {
   .arc_name = "ip4-output",
   .node_name = "nat44-in2out-output",
-  .runs_after = VNET_FEATURES ("acl-plugin-out-ip4-fa"),
+  .runs_after = VNET_FEATURES ("acl-plugin-out-ip4-fa","ip4-sv-reassembly-output-feature"),
 };
 VNET_FEATURE_INIT (ip4_snat_in2out_output_worker_handoff, static) = {
   .arc_name = "ip4-output",
   .node_name = "nat44-in2out-output-worker-handoff",
-  .runs_after = VNET_FEATURES ("acl-plugin-out-ip4-fa"),
+  .runs_after = VNET_FEATURES ("acl-plugin-out-ip4-fa","ip4-sv-reassembly-output-feature"),
 };
 VNET_FEATURE_INIT (ip4_snat_hairpin_src, static) = {
   .arc_name = "ip4-output",
   .node_name = "nat44-hairpin-src",
-  .runs_after = VNET_FEATURES ("acl-plugin-out-ip4-fa"),
+  .runs_after = VNET_FEATURES ("acl-plugin-out-ip4-fa","ip4-sv-reassembly-output-feature"),
 };
 VNET_FEATURE_INIT (ip4_nat44_ed_in2out_output, static) = {
   .arc_name = "ip4-output",
   .node_name = "nat44-ed-in2out-output",
-  .runs_after = VNET_FEATURES ("acl-plugin-out-ip4-fa"),
+  .runs_after = VNET_FEATURES ("acl-plugin-out-ip4-fa","ip4-sv-reassembly-output-feature"),
 };
 VNET_FEATURE_INIT (ip4_nat44_ed_hairpin_src, static) = {
   .arc_name = "ip4-output",
   .node_name = "nat44-ed-hairpin-src",
-  .runs_after = VNET_FEATURES ("acl-plugin-out-ip4-fa"),
+  .runs_after = VNET_FEATURES ("acl-plugin-out-ip4-fa","ip4-sv-reassembly-output-feature"),
 };
 
 /* Hook up ip4-local features */
@@ -1869,6 +1871,9 @@ feature_set:
                     feature_name = !is_inside ?  "nat44-in2out" : "nat44-out2in";
                   }
 
+               int rv = ip4_sv_reass_enable_disable_with_refcnt (sw_if_index, 0);
+               if (rv)
+                 return rv;
                 vnet_feature_enable_disable ("ip4-unicast", del_feature_name,
                                              sw_if_index, 0, 0, 0);
                 vnet_feature_enable_disable ("ip4-unicast", feature_name,
@@ -1887,6 +1892,9 @@ feature_set:
               }
             else
               {
+               int rv = ip4_sv_reass_enable_disable_with_refcnt (sw_if_index, 0);
+               if (rv)
+                 return rv;
                 vnet_feature_enable_disable ("ip4-unicast", feature_name,
                                              sw_if_index, 0, 0, 0);
                 pool_put (sm->interfaces, i);
@@ -1934,6 +1942,9 @@ feature_set:
                 feature_name = "nat44-classify";
               }
 
+           int rv = ip4_sv_reass_enable_disable_with_refcnt (sw_if_index, 1);
+           if (rv)
+             return rv;
             vnet_feature_enable_disable ("ip4-unicast", del_feature_name,
                                          sw_if_index, 0, 0, 0);
             vnet_feature_enable_disable ("ip4-unicast", feature_name,
@@ -1964,6 +1975,10 @@ feature_set:
   vnet_feature_enable_disable ("ip4-unicast", feature_name, sw_if_index, 1, 0,
                               0);
 
+  int rv = ip4_sv_reass_enable_disable_with_refcnt (sw_if_index, 1);
+  if (rv)
+    return rv;
+
   if (is_inside && !sm->out2in_dpo)
     {
       if (sm->endpoint_dependent)
@@ -2063,6 +2078,15 @@ feature_set:
     {
       if (sm->endpoint_dependent)
        {
+         int rv =
+           ip4_sv_reass_enable_disable_with_refcnt (sw_if_index, !is_del);
+         if (rv)
+           return rv;
+         rv =
+           ip4_sv_reass_output_enable_disable_with_refcnt (sw_if_index,
+                                                           !is_del);
+         if (rv)
+           return rv;
          vnet_feature_enable_disable ("ip4-unicast", "nat44-ed-hairpin-dst",
                                       sw_if_index, !is_del, 0, 0);
          vnet_feature_enable_disable ("ip4-output", "nat44-ed-hairpin-src",
@@ -2070,6 +2094,15 @@ feature_set:
        }
       else
        {
+         int rv =
+           ip4_sv_reass_enable_disable_with_refcnt (sw_if_index, !is_del);
+         if (rv)
+           return rv;
+         rv =
+           ip4_sv_reass_output_enable_disable_with_refcnt (sw_if_index,
+                                                           !is_del);
+         if (rv)
+           return rv;
          vnet_feature_enable_disable ("ip4-unicast", "nat44-hairpin-dst",
                                       sw_if_index, !is_del, 0, 0);
          vnet_feature_enable_disable ("ip4-output", "nat44-hairpin-src",
@@ -2080,6 +2113,13 @@ feature_set:
 
   if (sm->num_workers > 1)
     {
+      int rv = ip4_sv_reass_enable_disable_with_refcnt (sw_if_index, !is_del);
+      if (rv)
+       return rv;
+      rv =
+       ip4_sv_reass_output_enable_disable_with_refcnt (sw_if_index, !is_del);
+      if (rv)
+       return rv;
       vnet_feature_enable_disable ("ip4-unicast",
                                   "nat44-out2in-worker-handoff",
                                   sw_if_index, !is_del, 0, 0);
@@ -2091,6 +2131,15 @@ feature_set:
     {
       if (sm->endpoint_dependent)
        {
+         int rv =
+           ip4_sv_reass_enable_disable_with_refcnt (sw_if_index, !is_del);
+         if (rv)
+           return rv;
+         rv =
+           ip4_sv_reass_output_enable_disable_with_refcnt (sw_if_index,
+                                                           !is_del);
+         if (rv)
+           return rv;
          vnet_feature_enable_disable ("ip4-unicast", "nat-pre-out2in",
                                       sw_if_index, !is_del, 0, 0);
          vnet_feature_enable_disable ("ip4-output", "nat44-ed-in2out-output",
@@ -2098,6 +2147,15 @@ feature_set:
        }
       else
        {
+         int rv =
+           ip4_sv_reass_enable_disable_with_refcnt (sw_if_index, !is_del);
+         if (rv)
+           return rv;
+         rv =
+           ip4_sv_reass_output_enable_disable_with_refcnt (sw_if_index,
+                                                           !is_del);
+         if (rv)
+           return rv;
          vnet_feature_enable_disable ("ip4-unicast", "nat44-out2in",
                                       sw_if_index, !is_del, 0, 0);
          vnet_feature_enable_disable ("ip4-output", "nat44-in2out-output",
@@ -2352,29 +2410,21 @@ snat_init (vlib_main_t * vm)
   sm->in2out_slowpath_node_index = node->index;
   node = vlib_get_node_by_name (vm, (u8 *) "nat44-in2out-output-slowpath");
   sm->in2out_slowpath_output_node_index = node->index;
-  node = vlib_get_node_by_name (vm, (u8 *) "nat44-in2out-reass");
-  sm->in2out_reass_node_index = node->index;
 
   node = vlib_get_node_by_name (vm, (u8 *) "nat44-ed-in2out");
   sm->ed_in2out_node_index = node->index;
   node = vlib_get_node_by_name (vm, (u8 *) "nat44-ed-in2out-slowpath");
   sm->ed_in2out_slowpath_node_index = node->index;
-  node = vlib_get_node_by_name (vm, (u8 *) "nat44-ed-in2out-reass");
-  sm->ed_in2out_reass_node_index = node->index;
 
   node = vlib_get_node_by_name (vm, (u8 *) "nat44-out2in");
   sm->out2in_node_index = node->index;
   node = vlib_get_node_by_name (vm, (u8 *) "nat44-out2in-fast");
   sm->out2in_fast_node_index = node->index;
-  node = vlib_get_node_by_name (vm, (u8 *) "nat44-out2in-reass");
-  sm->out2in_reass_node_index = node->index;
 
   node = vlib_get_node_by_name (vm, (u8 *) "nat44-ed-out2in");
   sm->ed_out2in_node_index = node->index;
   node = vlib_get_node_by_name (vm, (u8 *) "nat44-ed-out2in-slowpath");
   sm->ed_out2in_slowpath_node_index = node->index;
-  node = vlib_get_node_by_name (vm, (u8 *) "nat44-ed-out2in-reass");
-  sm->ed_out2in_reass_node_index = node->index;
 
   node = vlib_get_node_by_name (vm, (u8 *) "nat44-det-in2out");
   sm->det_in2out_node_index = node->index;
@@ -2471,8 +2521,7 @@ snat_init (vlib_main_t * vm)
                                         FIB_SOURCE_PRIORITY_LOW,
                                         FIB_SOURCE_BH_SIMPLE);
 
-  /* Init virtual fragmenentation reassembly */
-  return nat_reass_init (vm);
+  return error;
 }
 
 VLIB_INIT_FUNCTION (snat_init);
@@ -3012,8 +3061,8 @@ snat_get_worker_in2out_cb (ip4_header_t * ip0, u32 rx_fib_index0,
 }
 
 static u32
-snat_get_worker_out2in_cb (ip4_header_t * ip0, u32 rx_fib_index0,
-                          u8 is_output)
+snat_get_worker_out2in_cb (vlib_buffer_t * b, ip4_header_t * ip0,
+                          u32 rx_fib_index0, u8 is_output)
 {
   snat_main_t *sm = &snat_main;
   udp_header_t *udp;
@@ -3044,52 +3093,6 @@ snat_get_worker_out2in_cb (ip4_header_t * ip0, u32 rx_fib_index0,
   udp = ip4_next_header (ip0);
   port = udp->dst_port;
 
-  if (PREDICT_FALSE (ip4_is_fragment (ip0)))
-    {
-      if (PREDICT_FALSE (nat_reass_is_drop_frag (0)))
-       return vlib_get_thread_index ();
-
-      nat_reass_ip4_t *reass;
-      reass = nat_ip4_reass_find (ip0->src_address, ip0->dst_address,
-                                 ip0->fragment_id, ip0->protocol);
-
-      if (reass && (reass->thread_index != (u32) ~ 0))
-       return reass->thread_index;
-
-      if (ip4_is_first_fragment (ip0))
-       {
-         reass =
-           nat_ip4_reass_create (ip0->src_address, ip0->dst_address,
-                                 ip0->fragment_id, ip0->protocol);
-         if (!reass)
-           goto no_reass;
-
-         if (PREDICT_FALSE (pool_elts (sm->static_mappings)))
-           {
-             m_key.addr = ip0->dst_address;
-             m_key.port = clib_net_to_host_u16 (port);
-             m_key.protocol = proto;
-             m_key.fib_index = rx_fib_index0;
-             kv.key = m_key.as_u64;
-             if (!clib_bihash_search_8_8
-                 (&sm->static_mapping_by_external, &kv, &value))
-               {
-                 m = pool_elt_at_index (sm->static_mappings, value.value);
-                 reass->thread_index = m->workers[0];
-                 return reass->thread_index;
-               }
-           }
-         reass->thread_index = sm->first_worker_index;
-         reass->thread_index +=
-           sm->workers[(clib_net_to_host_u16 (port) - 1024) /
-                       sm->port_per_thread];
-         return reass->thread_index;
-       }
-      else
-       return vlib_get_thread_index ();
-    }
-
-no_reass:
   /* unknown protocol */
   if (PREDICT_FALSE (proto == ~0))
     {
@@ -3101,10 +3104,12 @@ no_reass:
     {
       icmp46_header_t *icmp = (icmp46_header_t *) udp;
       icmp_echo_header_t *echo = (icmp_echo_header_t *) (icmp + 1);
-      if (!icmp_is_error_message (icmp))
-       port = echo->identifier;
+      if (!icmp_type_is_error_message
+         (vnet_buffer (b)->ip.reass.icmp_type_or_tcp_flags))
+       port = vnet_buffer (b)->ip.reass.l4_src_port;
       else
        {
+         /* if error message, then it's not fragmented and we can access it */
          ip4_header_t *inner_ip = (ip4_header_t *) (echo + 1);
          proto = ip_proto_to_snat_proto (inner_ip->protocol);
          void *l4_header = ip4_next_header (inner_ip);
@@ -3252,8 +3257,8 @@ nat44_ed_get_worker_in2out_cb (ip4_header_t * ip, u32 rx_fib_index,
 }
 
 static u32
-nat44_ed_get_worker_out2in_cb (ip4_header_t * ip, u32 rx_fib_index,
-                              u8 is_output)
+nat44_ed_get_worker_out2in_cb (vlib_buffer_t * b, ip4_header_t * ip,
+                              u32 rx_fib_index, u8 is_output)
 {
   snat_main_t *sm = &snat_main;
   clib_bihash_kv_8_8_t kv, value;
@@ -3295,7 +3300,7 @@ nat44_ed_get_worker_out2in_cb (ip4_header_t * ip, u32 rx_fib_index,
     {
       nat_ed_ses_key_t key;
 
-      if (!get_icmp_o2i_ed_key (ip, &key))
+      if (!get_icmp_o2i_ed_key (b, ip, &key))
        {
 
          key.fib_index = rx_fib_index;
@@ -3349,10 +3354,12 @@ nat44_ed_get_worker_out2in_cb (ip4_header_t * ip, u32 rx_fib_index,
     {
       icmp46_header_t *icmp = (icmp46_header_t *) udp;
       icmp_echo_header_t *echo = (icmp_echo_header_t *) (icmp + 1);
-      if (!icmp_is_error_message (icmp))
-       port = echo->identifier;
+      if (!icmp_type_is_error_message
+         (vnet_buffer (b)->ip.reass.icmp_type_or_tcp_flags))
+       port = vnet_buffer (b)->ip.reass.l4_src_port;
       else
        {
+         /* if error message, then it's not fragmented and we can access it */
          ip4_header_t *inner_ip = (ip4_header_t *) (echo + 1);
          proto = ip_proto_to_snat_proto (inner_ip->protocol);
          void *l4_header = ip4_next_header (inner_ip);
@@ -4359,11 +4366,8 @@ VLIB_REGISTER_NODE (nat_default_node) = {
     [NAT_NEXT_IN2OUT_ED_FAST_PATH] = "nat44-ed-in2out",
     [NAT_NEXT_IN2OUT_ED_SLOW_PATH] = "nat44-ed-in2out-slowpath",
     [NAT_NEXT_IN2OUT_ED_OUTPUT_SLOW_PATH] = "nat44-ed-in2out-output-slowpath",
-    [NAT_NEXT_IN2OUT_ED_REASS] = "nat44-ed-in2out-reass",
-    [NAT_NEXT_IN2OUT_ED_OUTPUT_REASS] = "nat44-ed-in2out-reass-output",
     [NAT_NEXT_OUT2IN_ED_FAST_PATH] = "nat44-ed-out2in",
     [NAT_NEXT_OUT2IN_ED_SLOW_PATH] = "nat44-ed-out2in-slowpath",
-    [NAT_NEXT_OUT2IN_ED_REASS] = "nat44-ed-out2in-reass",
   },
 };
 /* *INDENT-ON* */
index 38f5a99..d3fa3ee 100644 (file)
@@ -58,11 +58,8 @@ typedef enum
   NAT_NEXT_IN2OUT_ED_FAST_PATH,
   NAT_NEXT_IN2OUT_ED_SLOW_PATH,
   NAT_NEXT_IN2OUT_ED_OUTPUT_SLOW_PATH,
-  NAT_NEXT_IN2OUT_ED_REASS,
-  NAT_NEXT_IN2OUT_ED_OUTPUT_REASS,
   NAT_NEXT_OUT2IN_ED_FAST_PATH,
   NAT_NEXT_OUT2IN_ED_SLOW_PATH,
-  NAT_NEXT_OUT2IN_ED_REASS,
   NAT_N_NEXT,
 } nat_next_t;
 
@@ -534,8 +531,14 @@ typedef u32 (snat_icmp_match_function_t) (struct snat_main_s * sm,
                                          void *e);
 
 /* Return worker thread index for given packet */
-typedef u32 (snat_get_worker_function_t) (ip4_header_t * ip,
-                                         u32 rx_fib_index, u8 is_output);
+typedef u32 (snat_get_worker_in2out_function_t) (ip4_header_t * ip,
+                                                u32 rx_fib_index,
+                                                u8 is_output);
+
+typedef u32 (snat_get_worker_out2in_function_t) (vlib_buffer_t * b,
+                                                ip4_header_t * ip,
+                                                u32 rx_fib_index,
+                                                u8 is_output);
 
 /* NAT address and port allacotaion function */
 typedef int (nat_alloc_out_addr_and_port_function_t) (snat_address_t *
@@ -556,8 +559,8 @@ typedef struct snat_main_s
   u32 num_workers;
   u32 first_worker_index;
   u32 *workers;
-  snat_get_worker_function_t *worker_in2out_cb;
-  snat_get_worker_function_t *worker_out2in_cb;
+  snat_get_worker_in2out_function_t *worker_in2out_cb;
+  snat_get_worker_out2in_function_t *worker_out2in_cb;
   u16 port_per_thread;
   u32 num_snat_thread;
 
@@ -629,16 +632,12 @@ typedef struct snat_main_s
   u32 in2out_fast_node_index;
   u32 in2out_slowpath_node_index;
   u32 in2out_slowpath_output_node_index;
-  u32 in2out_reass_node_index;
   u32 ed_in2out_node_index;
   u32 ed_in2out_slowpath_node_index;
-  u32 ed_in2out_reass_node_index;
   u32 out2in_node_index;
   u32 out2in_fast_node_index;
-  u32 out2in_reass_node_index;
   u32 ed_out2in_node_index;
   u32 ed_out2in_slowpath_node_index;
-  u32 ed_out2in_reass_node_index;
   u32 det_in2out_node_index;
   u32 det_out2in_node_index;
 
@@ -756,7 +755,6 @@ format_function_t format_snat_key;
 format_function_t format_static_mapping_key;
 format_function_t format_snat_protocol;
 format_function_t format_nat_addr_and_port_alloc_alg;
-format_function_t format_nat44_reass_trace;
 /* unformat functions */
 unformat_function_t unformat_snat_protocol;
 
@@ -848,7 +846,11 @@ unformat_function_t unformat_snat_protocol;
     @param t TCP header
     @return 1 if client initiating TCP connection
 */
-#define tcp_is_init(t) ((t->flags & TCP_FLAG_SYN) && !(t->flags & TCP_FLAG_ACK))
+always_inline bool
+tcp_flags_is_init (u8 f)
+{
+  return (f & TCP_FLAG_SYN) && !(f & TCP_FLAG_ACK);
+}
 
 /* logging */
 #define nat_log_err(...) \
index b6ce4d7..f339770 100644 (file)
 #include <vnet/vnet.h>
 #include <vnet/fib/ip4_fib.h>
 #include <nat/nat.h>
-#include <nat/nat_reass.h>
 #include <nat/nat_inlines.h>
 
 #define foreach_nat44_classify_error                      \
-_(MAX_REASS, "Maximum reassemblies exceeded")             \
-_(MAX_FRAG, "Maximum fragments per reassembly exceeded")  \
 _(NEXT_IN2OUT, "next in2out")                             \
 _(NEXT_OUT2IN, "next out2in")                             \
 _(FRAG_CACHED, "fragment cached")
@@ -87,7 +84,6 @@ nat44_classify_node_fn_inline (vlib_main_t * vm,
   nat44_classify_next_t next_index;
   snat_main_t *sm = &snat_main;
   snat_static_mapping_t *m;
-  u32 thread_index = vm->thread_index;
   u32 *fragments_to_drop = 0;
   u32 *fragments_to_loopback = 0;
   u32 next_in2out = 0, next_out2in = 0, frag_cached = 0;
@@ -111,8 +107,6 @@ nat44_classify_node_fn_inline (vlib_main_t * vm,
          snat_address_t *ap;
          snat_session_key_t m_key0;
          clib_bihash_kv_8_8_t kv0, value0;
-         udp_header_t *udp0;
-         nat_reass_ip4_t *reass0;
          u8 cached0 = 0;
 
          /* speculatively enqueue b0 to the current next frame */
@@ -125,7 +119,6 @@ nat44_classify_node_fn_inline (vlib_main_t * vm,
 
          b0 = vlib_get_buffer (vm, bi0);
          ip0 = vlib_buffer_get_current (b0);
-         udp0 = ip4_next_header (ip0);
 
           /* *INDENT-OFF* */
           vec_foreach (ap, sm->addresses)
@@ -154,87 +147,16 @@ nat44_classify_node_fn_inline (vlib_main_t * vm,
                    next0 = NAT44_CLASSIFY_NEXT_OUT2IN;
                  goto enqueue0;
                }
-             if (!ip4_is_fragment (ip0) || ip4_is_first_fragment (ip0))
-               {
-                 /* process leading fragment/whole packet (with L4 header) */
-                 m_key0.port = clib_net_to_host_u16 (udp0->dst_port);
-                 m_key0.protocol = ip_proto_to_snat_proto (ip0->protocol);
-                 kv0.key = m_key0.as_u64;
-                 if (!clib_bihash_search_8_8
-                     (&sm->static_mapping_by_external, &kv0, &value0))
-                   {
-                     m =
-                       pool_elt_at_index (sm->static_mappings, value0.value);
-                     if (m->local_addr.as_u32 != m->external_addr.as_u32)
-                       next0 = NAT44_CLASSIFY_NEXT_OUT2IN;
-                   }
-                 if (ip4_is_fragment (ip0))
-                   {
-                     reass0 = nat_ip4_reass_find_or_create (ip0->src_address,
-                                                            ip0->dst_address,
-                                                            ip0->fragment_id,
-                                                            ip0->protocol,
-                                                            1,
-                                                            &fragments_to_drop);
-                     if (PREDICT_FALSE (!reass0))
-                       {
-                         next0 = NAT44_CLASSIFY_NEXT_DROP;
-                         b0->error =
-                           node->errors[NAT44_CLASSIFY_ERROR_MAX_REASS];
-                         nat_elog_notice ("maximum reassemblies exceeded");
-                         goto enqueue0;
-                       }
-                     /* save classification for future fragments and set past
-                      * fragments to be looped over and reprocessed */
-                     if (next0 == NAT44_CLASSIFY_NEXT_OUT2IN)
-                       reass0->classify_next =
-                         NAT_REASS_IP4_CLASSIFY_NEXT_OUT2IN;
-                     else
-                       reass0->classify_next =
-                         NAT_REASS_IP4_CLASSIFY_NEXT_IN2OUT;
-                     nat_ip4_reass_get_frags (reass0,
-                                              &fragments_to_loopback);
-                   }
-               }
-             else
+             m_key0.port =
+               clib_net_to_host_u16 (vnet_buffer (b0)->ip.reass.l4_dst_port);
+             m_key0.protocol = ip_proto_to_snat_proto (ip0->protocol);
+             kv0.key = m_key0.as_u64;
+             if (!clib_bihash_search_8_8
+                 (&sm->static_mapping_by_external, &kv0, &value0))
                {
-                 /* process non-first fragment */
-                 reass0 = nat_ip4_reass_find_or_create (ip0->src_address,
-                                                        ip0->dst_address,
-                                                        ip0->fragment_id,
-                                                        ip0->protocol,
-                                                        1,
-                                                        &fragments_to_drop);
-                 if (PREDICT_FALSE (!reass0))
-                   {
-                     next0 = NAT44_CLASSIFY_NEXT_DROP;
-                     b0->error =
-                       node->errors[NAT44_CLASSIFY_ERROR_MAX_REASS];
-                     nat_elog_notice ("maximum reassemblies exceeded");
-                     goto enqueue0;
-                   }
-                 if (reass0->classify_next == NAT_REASS_IP4_CLASSIFY_NONE)
-                   /* first fragment still hasn't arrived */
-                   {
-                     if (nat_ip4_reass_add_fragment
-                         (thread_index, reass0, bi0, &fragments_to_drop))
-                       {
-                         b0->error =
-                           node->errors[NAT44_CLASSIFY_ERROR_MAX_FRAG];
-                         nat_elog_notice
-                           ("maximum fragments per reassembly exceeded");
-                         next0 = NAT44_CLASSIFY_NEXT_DROP;
-                         goto enqueue0;
-                       }
-                     cached0 = 1;
-                     goto enqueue0;
-                   }
-                 else if (reass0->classify_next ==
-                          NAT_REASS_IP4_CLASSIFY_NEXT_OUT2IN)
+                 m = pool_elt_at_index (sm->static_mappings, value0.value);
+                 if (m->local_addr.as_u32 != m->external_addr.as_u32)
                    next0 = NAT44_CLASSIFY_NEXT_OUT2IN;
-                 else if (reass0->classify_next ==
-                          NAT_REASS_IP4_CLASSIFY_NEXT_IN2OUT)
-                   next0 = NAT44_CLASSIFY_NEXT_IN2OUT;
                }
            }
 
@@ -343,8 +265,6 @@ nat44_ed_classify_node_fn_inline (vlib_main_t * vm,
          snat_session_key_t m_key0;
          clib_bihash_kv_8_8_t kv0, value0;
          clib_bihash_kv_16_8_t ed_kv0, ed_value0;
-         udp_header_t *udp0;
-         nat_reass_ip4_t *reass0;
          u8 cached0 = 0;
 
          /* speculatively enqueue b0 to the current next frame */
@@ -357,7 +277,6 @@ nat44_ed_classify_node_fn_inline (vlib_main_t * vm,
 
          b0 = vlib_get_buffer (vm, bi0);
          ip0 = vlib_buffer_get_current (b0);
-         udp0 = ip4_next_header (ip0);
 
          if (!in_loopback)
            {
@@ -369,108 +288,21 @@ nat44_ed_classify_node_fn_inline (vlib_main_t * vm,
 
          if (ip0->protocol != IP_PROTOCOL_ICMP)
            {
-             if (!ip4_is_fragment (ip0) || ip4_is_first_fragment (ip0))
-               {
-                 /* process leading fragment/whole packet (with L4 header) */
-                 sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
-                 rx_fib_index0 =
-                   fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP4,
-                                                        sw_if_index0);
-                 make_ed_kv (&ed_kv0, &ip0->src_address,
-                             &ip0->dst_address, ip0->protocol,
-                             rx_fib_index0, udp0->src_port, udp0->dst_port);
-                 if (ip4_is_fragment (ip0))
-                   {
-                     reass0 =
-                       nat_ip4_reass_find_or_create (ip0->src_address,
-                                                     ip0->dst_address,
-                                                     ip0->fragment_id,
-                                                     ip0->protocol, 1,
-                                                     &fragments_to_drop);
-                     if (PREDICT_FALSE (!reass0))
-                       {
-                         next0 = NAT_NEXT_DROP;
-                         b0->error =
-                           node->errors[NAT44_CLASSIFY_ERROR_MAX_REASS];
-                         nat_elog_notice ("maximum reassemblies exceeded");
-                         goto enqueue0;
-                       }
-                     if (!clib_bihash_search_16_8
-                         (&tsm->in2out_ed, &ed_kv0, &ed_value0))
-                       {
-                         /* session exists so classify as IN2OUT,
-                          * save this information for future fragments and set
-                          * past fragments to be looped over and reprocessed */
-                         reass0->sess_index = ed_value0.value;
-                         reass0->classify_next =
-                           NAT_REASS_IP4_CLASSIFY_NEXT_IN2OUT;
-                         nat_ip4_reass_get_frags (reass0,
-                                                  &fragments_to_loopback);
-                         goto enqueue0;
-                       }
-                     else
-                       {
-                         /* session doesn't exist so continue in the code,
-                          * save this information for future fragments and set
-                          * past fragments to be looped over and reprocessed */
-                         reass0->flags |=
-                           NAT_REASS_FLAG_CLASSIFY_ED_CONTINUE;
-                         nat_ip4_reass_get_frags (reass0,
-                                                  &fragments_to_loopback);
-                       }
-                   }
-                 else
-                   {
-                     /* process whole packet */
-                     if (!clib_bihash_search_16_8
-                         (&tsm->in2out_ed, &ed_kv0, &ed_value0))
-                       goto enqueue0;
-                     /* session doesn't exist so continue in code */
-                   }
-               }
-             else
-               {
-                 /* process non-first fragment */
-                 reass0 = nat_ip4_reass_find_or_create (ip0->src_address,
-                                                        ip0->dst_address,
-                                                        ip0->fragment_id,
-                                                        ip0->protocol,
-                                                        1,
-                                                        &fragments_to_drop);
-                 if (PREDICT_FALSE (!reass0))
-                   {
-                     next0 = NAT_NEXT_DROP;
-                     b0->error =
-                       node->errors[NAT44_CLASSIFY_ERROR_MAX_REASS];
-                     nat_elog_notice ("maximum reassemblies exceeded");
-                     goto enqueue0;
-                   }
-                 /* check if first fragment has arrived */
-                 if (reass0->classify_next == NAT_REASS_IP4_CLASSIFY_NONE
-                     && !(reass0->flags &
-                          NAT_REASS_FLAG_CLASSIFY_ED_CONTINUE))
-                   {
-                     /* first fragment still hasn't arrived, cache this fragment */
-                     if (nat_ip4_reass_add_fragment
-                         (thread_index, reass0, bi0, &fragments_to_drop))
-                       {
-                         b0->error =
-                           node->errors[NAT44_CLASSIFY_ERROR_MAX_FRAG];
-                         nat_elog_notice
-                           ("maximum fragments per reassembly exceeded");
-                         next0 = NAT_NEXT_DROP;
-                         goto enqueue0;
-                       }
-                     cached0 = 1;
-                     goto enqueue0;
-                   }
-                 if (reass0->classify_next ==
-                     NAT_REASS_IP4_CLASSIFY_NEXT_IN2OUT)
-                   goto enqueue0;
-                 /* flag NAT_REASS_FLAG_CLASSIFY_ED_CONTINUE is set
-                  * so keep the default next0 and continue in code to
-                  * potentially find other classification for this packet */
-               }
+             /* process leading fragment/whole packet (with L4 header) */
+             sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
+             rx_fib_index0 =
+               fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP4,
+                                                    sw_if_index0);
+             make_ed_kv (&ed_kv0, &ip0->src_address,
+                         &ip0->dst_address, ip0->protocol,
+                         rx_fib_index0,
+                         vnet_buffer (b0)->ip.reass.l4_src_port,
+                         vnet_buffer (b0)->ip.reass.l4_dst_port);
+             /* process whole packet */
+             if (!clib_bihash_search_16_8
+                 (&tsm->in2out_ed, &ed_kv0, &ed_value0))
+               goto enqueue0;
+             /* session doesn't exist so continue in code */
            }
 
           /* *INDENT-OFF* */
@@ -500,85 +332,16 @@ nat44_ed_classify_node_fn_inline (vlib_main_t * vm,
                    next0 = NAT_NEXT_OUT2IN_ED_FAST_PATH;
                  goto enqueue0;
                }
-             if (!ip4_is_fragment (ip0) || ip4_is_first_fragment (ip0))
-               {
-                 /* process leading fragment/whole packet (with L4 header) */
-                 m_key0.port = clib_net_to_host_u16 (udp0->dst_port);
-                 m_key0.protocol = ip_proto_to_snat_proto (ip0->protocol);
-                 kv0.key = m_key0.as_u64;
-                 if (!clib_bihash_search_8_8
-                     (&sm->static_mapping_by_external, &kv0, &value0))
-                   {
-                     m =
-                       pool_elt_at_index (sm->static_mappings, value0.value);
-                     if (m->local_addr.as_u32 != m->external_addr.as_u32)
-                       next0 = NAT_NEXT_OUT2IN_ED_FAST_PATH;
-                   }
-                 if (ip4_is_fragment (ip0))
-                   {
-                     reass0 = nat_ip4_reass_find_or_create (ip0->src_address,
-                                                            ip0->dst_address,
-                                                            ip0->fragment_id,
-                                                            ip0->protocol,
-                                                            1,
-                                                            &fragments_to_drop);
-                     if (PREDICT_FALSE (!reass0))
-                       {
-                         next0 = NAT_NEXT_DROP;
-                         b0->error =
-                           node->errors[NAT44_CLASSIFY_ERROR_MAX_REASS];
-                         nat_elog_notice ("maximum reassemblies exceeded");
-                         goto enqueue0;
-                       }
-                     /* save classification for future fragments and set past
-                      * fragments to be looped over and reprocessed */
-                     if (next0 == NAT_NEXT_OUT2IN_ED_FAST_PATH)
-                       reass0->classify_next = NAT_NEXT_OUT2IN_ED_REASS;
-                     else
-                       reass0->classify_next = NAT_NEXT_IN2OUT_ED_REASS;
-                     nat_ip4_reass_get_frags (reass0,
-                                              &fragments_to_loopback);
-                   }
-               }
-             else
+             m_key0.port =
+               clib_net_to_host_u16 (vnet_buffer (b0)->ip.reass.l4_dst_port);
+             m_key0.protocol = ip_proto_to_snat_proto (ip0->protocol);
+             kv0.key = m_key0.as_u64;
+             if (!clib_bihash_search_8_8
+                 (&sm->static_mapping_by_external, &kv0, &value0))
                {
-                 /* process non-first fragment */
-                 reass0 = nat_ip4_reass_find_or_create (ip0->src_address,
-                                                        ip0->dst_address,
-                                                        ip0->fragment_id,
-                                                        ip0->protocol,
-                                                        1,
-                                                        &fragments_to_drop);
-                 if (PREDICT_FALSE (!reass0))
-                   {
-                     next0 = NAT_NEXT_DROP;
-                     b0->error =
-                       node->errors[NAT44_CLASSIFY_ERROR_MAX_REASS];
-                     nat_elog_notice ("maximum reassemblies exceeded");
-                     goto enqueue0;
-                   }
-                 if (reass0->classify_next == NAT_REASS_IP4_CLASSIFY_NONE)
-                   /* first fragment still hasn't arrived */
-                   {
-                     if (nat_ip4_reass_add_fragment
-                         (thread_index, reass0, bi0, &fragments_to_drop))
-                       {
-                         b0->error =
-                           node->errors[NAT44_CLASSIFY_ERROR_MAX_FRAG];
-                         nat_elog_notice
-                           ("maximum fragments per reassembly exceeded");
-                         next0 = NAT_NEXT_DROP;
-                         goto enqueue0;
-                       }
-                     cached0 = 1;
-                     goto enqueue0;
-                   }
-                 else if (reass0->classify_next ==
-                          NAT_REASS_IP4_CLASSIFY_NEXT_OUT2IN)
+                 m = pool_elt_at_index (sm->static_mappings, value0.value);
+                 if (m->local_addr.as_u32 != m->external_addr.as_u32)
                    next0 = NAT_NEXT_OUT2IN_ED_FAST_PATH;
-                 else if (reass0->classify_next ==
-                          NAT_REASS_IP4_CLASSIFY_NEXT_IN2OUT)
-                   next0 = NAT_NEXT_IN2OUT_ED_FAST_PATH;
                }
            }
 
index 331e7ca..69a19b8 100644 (file)
@@ -22,7 +22,6 @@
 #include <vnet/fib/ip4_fib.h>
 #include <nat/nat.h>
 #include <nat/nat_inlines.h>
-#include <nat/nat_reass.h>
 
 typedef enum
 {
@@ -211,7 +210,8 @@ snat_icmp_hairpinning (snat_main_t * sm,
   snat_session_t *s0;
   snat_static_mapping_t *m0;
 
-  if (icmp_is_error_message (icmp0))
+  if (icmp_type_is_error_message
+      (vnet_buffer (b0)->ip.reass.icmp_type_or_tcp_flags))
     {
       ip4_header_t *inner_ip0 = 0;
       tcp_udp_header_t *l4_header = 0;
@@ -391,7 +391,7 @@ nat44_ed_hairpinning_unknown_proto (snat_main_t * sm,
   snat_main_per_thread_data_t *tsm;
 
   if (sm->num_workers > 1)
-    ti = sm->worker_out2in_cb (ip, sm->outside_fib_index, 0);
+    ti = sm->worker_out2in_cb (b, ip, sm->outside_fib_index, 0);
   else
     ti = sm->num_workers;
   tsm = &sm->per_thread_data[ti];
index 277f2de..d221e5b 100644 (file)
@@ -82,7 +82,6 @@ nat44_worker_handoff_fn_inline (vlib_main_t * vm,
   vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b = bufs;
   snat_main_t *sm = &snat_main;
 
-  snat_get_worker_function_t *get_worker;
   u32 fq_index, thread_index = vm->thread_index;
 
   from = vlib_frame_vector_args (frame);
@@ -93,12 +92,10 @@ nat44_worker_handoff_fn_inline (vlib_main_t * vm,
   if (is_in2out)
     {
       fq_index = is_output ? sm->fq_in2out_output_index : sm->fq_in2out_index;
-      get_worker = sm->worker_in2out_cb;
     }
   else
     {
       fq_index = sm->fq_out2in_index;
-      get_worker = sm->worker_out2in_cb;
     }
 
   while (n_left_from >= 4)
@@ -147,10 +144,20 @@ nat44_worker_handoff_fn_inline (vlib_main_t * vm,
       rx_fib_index2 = ip4_fib_table_get_index_for_sw_if_index (sw_if_index2);
       rx_fib_index3 = ip4_fib_table_get_index_for_sw_if_index (sw_if_index3);
 
-      ti[0] = get_worker (ip0, rx_fib_index0, is_output);
-      ti[1] = get_worker (ip1, rx_fib_index1, is_output);
-      ti[2] = get_worker (ip2, rx_fib_index2, is_output);
-      ti[3] = get_worker (ip3, rx_fib_index3, is_output);
+      if (is_in2out)
+       {
+         ti[0] = sm->worker_in2out_cb (ip0, rx_fib_index0, is_output);
+         ti[1] = sm->worker_in2out_cb (ip1, rx_fib_index1, is_output);
+         ti[2] = sm->worker_in2out_cb (ip2, rx_fib_index2, is_output);
+         ti[3] = sm->worker_in2out_cb (ip3, rx_fib_index3, is_output);
+       }
+      else
+       {
+         ti[0] = sm->worker_out2in_cb (b[0], ip0, rx_fib_index0, is_output);
+         ti[1] = sm->worker_out2in_cb (b[1], ip1, rx_fib_index1, is_output);
+         ti[2] = sm->worker_out2in_cb (b[2], ip2, rx_fib_index2, is_output);
+         ti[3] = sm->worker_out2in_cb (b[3], ip3, rx_fib_index3, is_output);
+       }
 
       if (ti[0] == thread_index)
        same_worker++;
@@ -194,7 +201,14 @@ nat44_worker_handoff_fn_inline (vlib_main_t * vm,
       sw_if_index0 = vnet_buffer (b[0])->sw_if_index[VLIB_RX];
       rx_fib_index0 = ip4_fib_table_get_index_for_sw_if_index (sw_if_index0);
 
-      ti[0] = get_worker (ip0, rx_fib_index0, is_output);
+      if (is_in2out)
+       {
+         ti[0] = sm->worker_in2out_cb (ip0, rx_fib_index0, is_output);
+       }
+      else
+       {
+         ti[0] = sm->worker_out2in_cb (b[0], ip0, rx_fib_index0, is_output);
+       }
 
       if (ti[0] == thread_index)
        same_worker++;
index e1afea6..405fc84 100644 (file)
 
 #include <nat/nat64.h>
 #include <nat/nat64_db.h>
-#include <nat/nat_reass.h>
 #include <nat/nat_inlines.h>
 #include <vnet/fib/ip4_fib.h>
 #include <vppinfra/crc32.h>
+#include <vnet/ip/reass/ip4_sv_reass.h>
+#include <vnet/ip/reass/ip6_sv_reass.h>
 
 
 nat64_main_t nat64_main;
@@ -34,21 +35,25 @@ VNET_FEATURE_INIT (nat64_in2out, static) = {
   .arc_name = "ip6-unicast",
   .node_name = "nat64-in2out",
   .runs_before = VNET_FEATURES ("ip6-lookup"),
+  .runs_after = VNET_FEATURES ("ip6-sv-reassembly-feature"),
 };
 VNET_FEATURE_INIT (nat64_out2in, static) = {
   .arc_name = "ip4-unicast",
   .node_name = "nat64-out2in",
   .runs_before = VNET_FEATURES ("ip4-lookup"),
+  .runs_after = VNET_FEATURES ("ip4-sv-reassembly-feature"),
 };
 VNET_FEATURE_INIT (nat64_in2out_handoff, static) = {
   .arc_name = "ip6-unicast",
   .node_name = "nat64-in2out-handoff",
   .runs_before = VNET_FEATURES ("ip6-lookup"),
+  .runs_after = VNET_FEATURES ("ip6-sv-reassembly-feature"),
 };
 VNET_FEATURE_INIT (nat64_out2in_handoff, static) = {
   .arc_name = "ip4-unicast",
   .node_name = "nat64-out2in-handoff",
   .runs_before = VNET_FEATURES ("ip4-lookup"),
+  .runs_after = VNET_FEATURES ("ip4-sv-reassembly-feature"),
 };
 
 
@@ -120,7 +125,7 @@ nat64_get_worker_in2out (ip6_address_t * addr)
 }
 
 u32
-nat64_get_worker_out2in (ip4_header_t * ip)
+nat64_get_worker_out2in (vlib_buffer_t * b, ip4_header_t * ip)
 {
   nat64_main_t *nm = &nat64_main;
   snat_main_t *sm = nm->sm;
@@ -132,41 +137,6 @@ nat64_get_worker_out2in (ip4_header_t * ip)
   udp = ip4_next_header (ip);
   port = udp->dst_port;
 
-  /* fragments */
-  if (PREDICT_FALSE (ip4_is_fragment (ip)))
-    {
-      if (PREDICT_FALSE (nat_reass_is_drop_frag (0)))
-       return vlib_get_thread_index ();
-
-      nat_reass_ip4_t *reass;
-      reass = nat_ip4_reass_find (ip->src_address, ip->dst_address,
-                                 ip->fragment_id, ip->protocol);
-
-      if (reass && (reass->thread_index != (u32) ~ 0))
-       return reass->thread_index;
-
-      if (ip4_is_first_fragment (ip))
-       {
-         reass =
-           nat_ip4_reass_create (ip->src_address, ip->dst_address,
-                                 ip->fragment_id, ip->protocol);
-         if (!reass)
-           goto no_reass;
-
-         port = clib_net_to_host_u16 (port);
-         if (port > 1024)
-           reass->thread_index =
-             nm->sm->first_worker_index +
-             ((port - 1024) / sm->port_per_thread);
-         else
-           reass->thread_index = vlib_get_thread_index ();
-         return reass->thread_index;
-       }
-      else
-       return vlib_get_thread_index ();
-    }
-
-no_reass:
   /* unknown protocol */
   if (PREDICT_FALSE (proto == ~0))
     {
@@ -193,10 +163,12 @@ no_reass:
     {
       icmp46_header_t *icmp = (icmp46_header_t *) udp;
       icmp_echo_header_t *echo = (icmp_echo_header_t *) (icmp + 1);
-      if (!icmp_is_error_message (icmp))
-       port = echo->identifier;
+      if (!icmp_type_is_error_message
+         (vnet_buffer (b)->ip.reass.icmp_type_or_tcp_flags))
+       port = vnet_buffer (b)->ip.reass.l4_src_port;
       else
        {
+         /* if error message, then it's not fragmented and we can access it */
          ip4_header_t *inner_ip = (ip4_header_t *) (echo + 1);
          proto = ip_proto_to_snat_proto (inner_ip->protocol);
          void *l4_header = ip4_next_header (inner_ip);
@@ -249,15 +221,9 @@ nat64_init (vlib_main_t * vm)
   node = vlib_get_node_by_name (vm, (u8 *) "nat64-in2out-slowpath");
   nm->in2out_slowpath_node_index = node->index;
 
-  node = vlib_get_node_by_name (vm, (u8 *) "nat64-in2out-reass");
-  nm->in2out_reass_node_index = node->index;
-
   node = vlib_get_node_by_name (vm, (u8 *) "nat64-out2in");
   nm->out2in_node_index = node->index;
 
-  node = vlib_get_node_by_name (vm, (u8 *) "nat64-out2in-reass");
-  nm->out2in_reass_node_index = node->index;
-
   /* set session timeouts to default values */
   nm->udp_timeout = SNAT_UDP_TIMEOUT;
   nm->icmp_timeout = SNAT_ICMP_TIMEOUT;
@@ -528,6 +494,19 @@ nat64_add_del_interface (u32 sw_if_index, u8 is_inside, u8 is_add)
 
   arc_name = is_inside ? "ip6-unicast" : "ip4-unicast";
 
+  if (is_inside)
+    {
+      int rv = ip6_sv_reass_enable_disable_with_refcnt (sw_if_index, is_add);
+      if (rv)
+       return rv;
+    }
+  else
+    {
+      int rv = ip4_sv_reass_enable_disable_with_refcnt (sw_if_index, is_add);
+      if (rv)
+       return rv;
+    }
+
   return vnet_feature_enable_disable (arc_name, feature_name, sw_if_index,
                                      is_add, 0, 0);
 }
index 2bca1f9..a95ded2 100644 (file)
@@ -115,10 +115,8 @@ typedef struct
 
   u32 in2out_node_index;
   u32 in2out_slowpath_node_index;
-  u32 in2out_reass_node_index;
 
   u32 out2in_node_index;
-  u32 out2in_reass_node_index;
 
   ip4_main_t *ip4_main;
   snat_main_t *sm;
@@ -380,7 +378,7 @@ u32 nat64_get_worker_in2out (ip6_address_t * addr);
  *
  * @returns worker thread index.
  */
-u32 nat64_get_worker_out2in (ip4_header_t * ip);
+u32 nat64_get_worker_out2in (vlib_buffer_t * b, ip4_header_t * ip);
 
 #endif /* __included_nat64_h__ */
 
index 8d1d734..8d4b1a8 100644 (file)
@@ -18,7 +18,6 @@
  */
 
 #include <nat/nat64.h>
-#include <nat/nat_reass.h>
 #include <nat/nat_inlines.h>
 #include <vnet/ip/ip6_to_ip4.h>
 #include <vnet/fib/fib_table.h>
@@ -47,38 +46,12 @@ format_nat64_in2out_trace (u8 * s, va_list * args)
   return s;
 }
 
-typedef struct
-{
-  u32 sw_if_index;
-  u32 next_index;
-  u8 cached;
-} nat64_in2out_reass_trace_t;
-
-static u8 *
-format_nat64_in2out_reass_trace (u8 * s, va_list * args)
-{
-  CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
-  CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
-  nat64_in2out_reass_trace_t *t =
-    va_arg (*args, nat64_in2out_reass_trace_t *);
-
-  s =
-    format (s, "NAT64-in2out-reass: sw_if_index %d, next index %d, status %s",
-           t->sw_if_index, t->next_index,
-           t->cached ? "cached" : "translated");
-
-  return s;
-}
-
-
 #define foreach_nat64_in2out_error                       \
 _(UNSUPPORTED_PROTOCOL, "unsupported protocol")          \
 _(IN2OUT_PACKETS, "good in2out packets processed")       \
 _(NO_TRANSLATION, "no translation")                      \
 _(UNKNOWN, "unknown")                                    \
 _(DROP_FRAGMENT, "drop fragment")                        \
-_(MAX_REASS, "maximum reassemblies exceeded")            \
-_(MAX_FRAG, "maximum fragments per reassembly exceeded") \
 _(TCP_PACKETS, "TCP packets")                            \
 _(UDP_PACKETS, "UDP packets")                            \
 _(ICMP_PACKETS, "ICMP packets")                          \
@@ -108,7 +81,6 @@ typedef enum
   NAT64_IN2OUT_NEXT_IP6_LOOKUP,
   NAT64_IN2OUT_NEXT_DROP,
   NAT64_IN2OUT_NEXT_SLOWPATH,
-  NAT64_IN2OUT_NEXT_REASS,
   NAT64_IN2OUT_N_NEXT,
 } nat64_in2out_next_t;
 
@@ -165,32 +137,75 @@ is_hairpinning (ip6_address_t * dst_addr)
 }
 
 static int
-nat64_in2out_tcp_udp_set_cb (ip6_header_t * ip6, ip4_header_t * ip4,
-                            void *arg)
+nat64_in2out_tcp_udp (vlib_main_t * vm, vlib_buffer_t * p, u16 l4_offset,
+                     u16 frag_hdr_offset, nat64_in2out_set_ctx_t * ctx)
 {
+  ip6_header_t *ip6;
+  ip_csum_t csum = 0;
+  ip4_header_t *ip4;
+  u16 fragment_id;
+  u8 frag_more;
+  u16 frag_offset;
   nat64_main_t *nm = &nat64_main;
-  nat64_in2out_set_ctx_t *ctx = arg;
   nat64_db_bib_entry_t *bibe;
   nat64_db_st_entry_t *ste;
-  ip46_address_t saddr, daddr;
+  ip46_address_t old_saddr, old_daddr;
+  ip4_address_t new_daddr;
   u32 sw_if_index, fib_index;
-  udp_header_t *udp = ip6_next_header (ip6);
-  u8 proto = ip6->protocol;
-  u16 sport = udp->src_port;
-  u16 dport = udp->dst_port;
+  u8 proto = vnet_buffer (p)->ip.reass.ip_proto;
+  u16 sport = vnet_buffer (p)->ip.reass.l4_src_port;
+  u16 dport = vnet_buffer (p)->ip.reass.l4_dst_port;
   nat64_db_t *db = &nm->db[ctx->thread_index];
 
+  ip6 = vlib_buffer_get_current (p);
+
+  vlib_buffer_advance (p, l4_offset - sizeof (*ip4));
+  ip4 = vlib_buffer_get_current (p);
+
+  u32 ip_version_traffic_class_and_flow_label =
+    ip6->ip_version_traffic_class_and_flow_label;
+  u16 payload_length = ip6->payload_length;
+  u8 hop_limit = ip6->hop_limit;
+
+  old_saddr.as_u64[0] = ip6->src_address.as_u64[0];
+  old_saddr.as_u64[1] = ip6->src_address.as_u64[1];
+  old_daddr.as_u64[0] = ip6->dst_address.as_u64[0];
+  old_daddr.as_u64[1] = ip6->dst_address.as_u64[1];
+
+  if (PREDICT_FALSE (frag_hdr_offset))
+    {
+      //Only the first fragment
+      ip6_frag_hdr_t *hdr =
+       (ip6_frag_hdr_t *) u8_ptr_add (ip6, frag_hdr_offset);
+      fragment_id = frag_id_6to4 (hdr->identification);
+      frag_more = ip6_frag_hdr_more (hdr);
+      frag_offset = ip6_frag_hdr_offset (hdr);
+    }
+  else
+    {
+      fragment_id = 0;
+      frag_offset = 0;
+      frag_more = 0;
+    }
+
+  ip4->ip_version_and_header_length =
+    IP4_VERSION_AND_HEADER_LENGTH_NO_OPTIONS;
+  ip4->tos = ip6_translate_tos (ip_version_traffic_class_and_flow_label);
+  ip4->length =
+    u16_net_add (payload_length, sizeof (*ip4) + sizeof (*ip6) - l4_offset);
+  ip4->fragment_id = fragment_id;
+  ip4->flags_and_fragment_offset =
+    clib_host_to_net_u16 (frag_offset |
+                         (frag_more ? IP4_HEADER_FLAG_MORE_FRAGMENTS : 0));
+  ip4->ttl = hop_limit;
+  ip4->protocol = (proto == IP_PROTOCOL_ICMP6) ? IP_PROTOCOL_ICMP : proto;
+
   sw_if_index = vnet_buffer (ctx->b)->sw_if_index[VLIB_RX];
   fib_index =
     fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP6, sw_if_index);
 
-  saddr.as_u64[0] = ip6->src_address.as_u64[0];
-  saddr.as_u64[1] = ip6->src_address.as_u64[1];
-  daddr.as_u64[0] = ip6->dst_address.as_u64[0];
-  daddr.as_u64[1] = ip6->dst_address.as_u64[1];
-
   ste =
-    nat64_db_st_entry_find (db, &saddr, &daddr, sport, dport, proto,
+    nat64_db_st_entry_find (db, &old_saddr, &old_daddr, sport, dport, proto,
                            fib_index, 1);
 
   if (ste)
@@ -201,7 +216,8 @@ nat64_in2out_tcp_udp_set_cb (ip6_header_t * ip6, ip4_header_t * ip4,
     }
   else
     {
-      bibe = nat64_db_bib_entry_find (db, &saddr, sport, proto, fib_index, 1);
+      bibe =
+       nat64_db_bib_entry_find (db, &old_saddr, sport, proto, fib_index, 1);
 
       if (!bibe)
        {
@@ -214,7 +230,7 @@ nat64_in2out_tcp_udp_set_cb (ip6_header_t * ip6, ip4_header_t * ip4,
 
          bibe =
            nat64_db_bib_entry_create (ctx->thread_index, db,
-                                      &ip6->src_address, &out_addr, sport,
+                                      &old_saddr.ip6, &out_addr, sport,
                                       out_port, fib_index, proto, 0);
          if (!bibe)
            return -1;
@@ -223,10 +239,10 @@ nat64_in2out_tcp_udp_set_cb (ip6_header_t * ip6, ip4_header_t * ip4,
                                   db->bib.bib_entries_num);
        }
 
-      nat64_extract_ip4 (&ip6->dst_address, &daddr.ip4, fib_index);
+      nat64_extract_ip4 (&old_daddr.ip6, &new_daddr, fib_index);
       ste =
        nat64_db_st_entry_create (ctx->thread_index, db, bibe,
-                                 &ip6->dst_address, &daddr.ip4, dport);
+                                 &old_daddr.ip6, &new_daddr, dport);
       if (!ste)
        return -1;
 
@@ -235,22 +251,36 @@ nat64_in2out_tcp_udp_set_cb (ip6_header_t * ip6, ip4_header_t * ip4,
     }
 
   ip4->src_address.as_u32 = bibe->out_addr.as_u32;
-  udp->src_port = bibe->out_port;
-
   ip4->dst_address.as_u32 = ste->out_r_addr.as_u32;
 
-  if (proto == IP_PROTOCOL_TCP)
+  ip4->checksum = ip4_header_checksum (ip4);
+
+  if (!vnet_buffer (p)->ip.reass.is_non_first_fragment)
     {
-      u16 *checksum;
-      ip_csum_t csum;
-      tcp_header_t *tcp = ip6_next_header (ip6);
+      udp_header_t *udp = (udp_header_t *) (ip4 + 1);
+      udp->src_port = bibe->out_port;
 
-      nat64_tcp_session_set_state (ste, tcp, 1);
-      checksum = &tcp->checksum;
-      csum = ip_csum_sub_even (*checksum, sport);
-      csum = ip_csum_add_even (csum, udp->src_port);
-      mss_clamping (nm->sm, tcp, &csum);
-      *checksum = ip_csum_fold (csum);
+      //UDP checksum is optional over IPv4
+      if (proto == IP_PROTOCOL_UDP)
+       {
+         udp->checksum = 0;
+       }
+      else
+       {
+         tcp_header_t *tcp = (tcp_header_t *) (ip4 + 1);
+         csum = ip_csum_sub_even (tcp->checksum, old_saddr.as_u64[0]);
+         csum = ip_csum_sub_even (csum, old_saddr.as_u64[1]);
+         csum = ip_csum_sub_even (csum, old_daddr.as_u64[0]);
+         csum = ip_csum_sub_even (csum, old_daddr.as_u64[1]);
+         csum = ip_csum_add_even (csum, ip4->dst_address.as_u32);
+         csum = ip_csum_add_even (csum, ip4->src_address.as_u32);
+         csum = ip_csum_sub_even (csum, sport);
+         csum = ip_csum_add_even (csum, udp->src_port);
+         mss_clamping (nm->sm, tcp, &csum);
+         tcp->checksum = ip_csum_fold (csum);
+
+         nat64_tcp_session_set_state (ste, tcp, 1);
+       }
     }
 
   nat64_session_reset_timeout (ste, ctx->vm);
@@ -480,16 +510,43 @@ unk_proto_st_walk (nat64_db_st_entry_t * ste, void *arg)
 }
 
 static int
-nat64_in2out_unk_proto_set_cb (ip6_header_t * ip6, ip4_header_t * ip4,
-                              void *arg)
+nat64_in2out_unk_proto (vlib_main_t * vm, vlib_buffer_t * p, u8 l4_protocol,
+                       u16 l4_offset, u16 frag_hdr_offset,
+                       nat64_in2out_set_ctx_t * s_ctx)
 {
+  ip6_header_t *ip6;
+  ip4_header_t *ip4;
+  u16 fragment_id;
+  u16 frag_offset;
+  u8 frag_more;
+
+  ip6 = vlib_buffer_get_current (p);
+
+  ip4 = (ip4_header_t *) u8_ptr_add (ip6, l4_offset - sizeof (*ip4));
+
+  vlib_buffer_advance (p, l4_offset - sizeof (*ip4));
+
+  if (PREDICT_FALSE (frag_hdr_offset))
+    {
+      //Only the first fragment
+      ip6_frag_hdr_t *hdr =
+       (ip6_frag_hdr_t *) u8_ptr_add (ip6, frag_hdr_offset);
+      fragment_id = frag_id_6to4 (hdr->identification);
+      frag_offset = ip6_frag_hdr_offset (hdr);
+      frag_more = ip6_frag_hdr_more (hdr);
+    }
+  else
+    {
+      fragment_id = 0;
+      frag_offset = 0;
+      frag_more = 0;
+    }
+
   nat64_main_t *nm = &nat64_main;
-  nat64_in2out_set_ctx_t *s_ctx = arg;
   nat64_db_bib_entry_t *bibe;
   nat64_db_st_entry_t *ste;
   ip46_address_t saddr, daddr, addr;
   u32 sw_if_index, fib_index;
-  u8 proto = ip6->protocol;
   int i;
   nat64_db_t *db = &nm->db[s_ctx->thread_index];
 
@@ -503,17 +560,19 @@ nat64_in2out_unk_proto_set_cb (ip6_header_t * ip6, ip4_header_t * ip4,
   daddr.as_u64[1] = ip6->dst_address.as_u64[1];
 
   ste =
-    nat64_db_st_entry_find (db, &saddr, &daddr, 0, 0, proto, fib_index, 1);
+    nat64_db_st_entry_find (db, &saddr, &daddr, 0, 0, l4_protocol, fib_index,
+                           1);
 
   if (ste)
     {
-      bibe = nat64_db_bib_entry_by_index (db, proto, ste->bibe_index);
+      bibe = nat64_db_bib_entry_by_index (db, l4_protocol, ste->bibe_index);
       if (!bibe)
        return -1;
     }
   else
     {
-      bibe = nat64_db_bib_entry_find (db, &saddr, 0, proto, fib_index, 1);
+      bibe =
+       nat64_db_bib_entry_find (db, &saddr, 0, l4_protocol, fib_index, 1);
 
       if (!bibe)
        {
@@ -525,7 +584,7 @@ nat64_in2out_unk_proto_set_cb (ip6_header_t * ip6, ip4_header_t * ip4,
            .dst_addr.as_u64[1] = ip6->dst_address.as_u64[1],
            .out_addr.as_u32 = 0,
            .fib_index = fib_index,
-           .proto = proto,
+           .proto = l4_protocol,
            .thread_index = s_ctx->thread_index,
          };
 
@@ -537,7 +596,7 @@ nat64_in2out_unk_proto_set_cb (ip6_header_t * ip6, ip4_header_t * ip4,
          /* Verify if out address is not already in use for protocol */
          clib_memset (&addr, 0, sizeof (addr));
          addr.ip4.as_u32 = ctx.out_addr.as_u32;
-         if (nat64_db_bib_entry_find (db, &addr, 0, proto, 0, 0))
+         if (nat64_db_bib_entry_find (db, &addr, 0, l4_protocol, 0, 0))
            ctx.out_addr.as_u32 = 0;
 
          if (!ctx.out_addr.as_u32)
@@ -545,7 +604,8 @@ nat64_in2out_unk_proto_set_cb (ip6_header_t * ip6, ip4_header_t * ip4,
              for (i = 0; i < vec_len (nm->addr_pool); i++)
                {
                  addr.ip4.as_u32 = nm->addr_pool[i].addr.as_u32;
-                 if (!nat64_db_bib_entry_find (db, &addr, 0, proto, 0, 0))
+                 if (!nat64_db_bib_entry_find
+                     (db, &addr, 0, l4_protocol, 0, 0))
                    break;
                }
            }
@@ -556,7 +616,7 @@ nat64_in2out_unk_proto_set_cb (ip6_header_t * ip6, ip4_header_t * ip4,
          bibe =
            nat64_db_bib_entry_create (s_ctx->thread_index, db,
                                       &ip6->src_address, &ctx.out_addr,
-                                      0, 0, fib_index, proto, 0);
+                                      0, 0, fib_index, l4_protocol, 0);
          if (!bibe)
            return -1;
 
@@ -580,27 +640,39 @@ nat64_in2out_unk_proto_set_cb (ip6_header_t * ip6, ip4_header_t * ip4,
   ip4->src_address.as_u32 = bibe->out_addr.as_u32;
   ip4->dst_address.as_u32 = ste->out_r_addr.as_u32;
 
+  ip4->ip_version_and_header_length =
+    IP4_VERSION_AND_HEADER_LENGTH_NO_OPTIONS;
+  ip4->tos = ip6_translate_tos (ip6->ip_version_traffic_class_and_flow_label);
+  ip4->length = u16_net_add (ip6->payload_length,
+                            sizeof (*ip4) + sizeof (*ip6) - l4_offset);
+  ip4->fragment_id = fragment_id;
+  ip4->flags_and_fragment_offset =
+    clib_host_to_net_u16 (frag_offset |
+                         (frag_more ? IP4_HEADER_FLAG_MORE_FRAGMENTS : 0));
+  ip4->ttl = ip6->hop_limit;
+  ip4->protocol = l4_protocol;
+  ip4->checksum = ip4_header_checksum (ip4);
+
   return 0;
 }
 
-
-
 static int
 nat64_in2out_tcp_udp_hairpinning (vlib_main_t * vm, vlib_buffer_t * b,
-                                 ip6_header_t * ip6, u32 thread_index)
+                                 ip6_header_t * ip6, u32 l4_offset,
+                                 u32 thread_index)
 {
   nat64_main_t *nm = &nat64_main;
   nat64_db_bib_entry_t *bibe;
   nat64_db_st_entry_t *ste;
   ip46_address_t saddr, daddr;
   u32 sw_if_index, fib_index;
-  udp_header_t *udp = ip6_next_header (ip6);
-  tcp_header_t *tcp = ip6_next_header (ip6);
-  u8 proto = ip6->protocol;
-  u16 sport = udp->src_port;
-  u16 dport = udp->dst_port;
-  u16 *checksum;
-  ip_csum_t csum;
+  udp_header_t *udp = (udp_header_t *) u8_ptr_add (ip6, l4_offset);
+  tcp_header_t *tcp = (tcp_header_t *) u8_ptr_add (ip6, l4_offset);
+  u8 proto = vnet_buffer (b)->ip.reass.ip_proto;
+  u16 sport = vnet_buffer (b)->ip.reass.l4_src_port;
+  u16 dport = vnet_buffer (b)->ip.reass.l4_dst_port;
+  u16 *checksum = NULL;
+  ip_csum_t csum = 0;
   nat64_db_t *db = &nm->db[thread_index];
 
   sw_if_index = vnet_buffer (b)->sw_if_index[VLIB_RX];
@@ -612,17 +684,17 @@ nat64_in2out_tcp_udp_hairpinning (vlib_main_t * vm, vlib_buffer_t * b,
   daddr.as_u64[0] = ip6->dst_address.as_u64[0];
   daddr.as_u64[1] = ip6->dst_address.as_u64[1];
 
-  if (proto == IP_PROTOCOL_UDP)
-    checksum = &udp->checksum;
-  else
-    checksum = &tcp->checksum;
-
-  csum = ip_csum_sub_even (*checksum, ip6->src_address.as_u64[0]);
-  csum = ip_csum_sub_even (csum, ip6->src_address.as_u64[1]);
-  csum = ip_csum_sub_even (csum, ip6->dst_address.as_u64[0]);
-  csum = ip_csum_sub_even (csum, ip6->dst_address.as_u64[1]);
-  csum = ip_csum_sub_even (csum, sport);
-  csum = ip_csum_sub_even (csum, dport);
+  if (!vnet_buffer (b)->ip.reass.is_non_first_fragment)
+    {
+      if (proto == IP_PROTOCOL_UDP)
+       checksum = &udp->checksum;
+      else
+       checksum = &tcp->checksum;
+      csum = ip_csum_sub_even (*checksum, ip6->src_address.as_u64[0]);
+      csum = ip_csum_sub_even (csum, ip6->src_address.as_u64[1]);
+      csum = ip_csum_sub_even (csum, ip6->dst_address.as_u64[0]);
+      csum = ip_csum_sub_even (csum, ip6->dst_address.as_u64[1]);
+    }
 
   ste =
     nat64_db_st_entry_find (db, &saddr, &daddr, sport, dport, proto,
@@ -674,7 +746,11 @@ nat64_in2out_tcp_udp_hairpinning (vlib_main_t * vm, vlib_buffer_t * b,
 
   nat64_session_reset_timeout (ste, vm);
 
-  sport = udp->src_port = bibe->out_port;
+  if (!vnet_buffer (b)->ip.reass.is_non_first_fragment)
+    {
+      udp->src_port = bibe->out_port;
+    }
+
   nat64_compose_ip6 (&ip6->src_address, &bibe->out_addr, fib_index);
 
   clib_memset (&daddr, 0, sizeof (daddr));
@@ -696,15 +772,20 @@ nat64_in2out_tcp_udp_hairpinning (vlib_main_t * vm, vlib_buffer_t * b,
 
   ip6->dst_address.as_u64[0] = bibe->in_addr.as_u64[0];
   ip6->dst_address.as_u64[1] = bibe->in_addr.as_u64[1];
-  udp->dst_port = bibe->in_port;
 
-  csum = ip_csum_add_even (csum, ip6->src_address.as_u64[0]);
-  csum = ip_csum_add_even (csum, ip6->src_address.as_u64[1]);
-  csum = ip_csum_add_even (csum, ip6->dst_address.as_u64[0]);
-  csum = ip_csum_add_even (csum, ip6->dst_address.as_u64[1]);
-  csum = ip_csum_add_even (csum, udp->src_port);
-  csum = ip_csum_add_even (csum, udp->dst_port);
-  *checksum = ip_csum_fold (csum);
+  if (!vnet_buffer (b)->ip.reass.is_non_first_fragment)
+    {
+      csum = ip_csum_add_even (csum, ip6->src_address.as_u64[0]);
+      csum = ip_csum_add_even (csum, ip6->src_address.as_u64[1]);
+      csum = ip_csum_add_even (csum, ip6->dst_address.as_u64[0]);
+      csum = ip_csum_add_even (csum, ip6->dst_address.as_u64[1]);
+      csum = ip_csum_sub_even (csum, sport);
+      csum = ip_csum_sub_even (csum, dport);
+      udp->dst_port = bibe->in_port;
+      csum = ip_csum_add_even (csum, udp->src_port);
+      csum = ip_csum_add_even (csum, udp->dst_port);
+      *checksum = ip_csum_fold (csum);
+    }
 
   return 0;
 }
@@ -990,7 +1071,7 @@ nat64_in2out_node_fn_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
          vlib_buffer_t *b0;
          u32 next0;
          ip6_header_t *ip60;
-         u16 l4_offset0, frag_offset0;
+         u16 l4_offset0, frag_hdr_offset0;
          u8 l4_protocol0;
          u32 proto0;
          nat64_in2out_set_ctx_t ctx0;
@@ -1015,8 +1096,8 @@ nat64_in2out_node_fn_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
 
          if (PREDICT_FALSE
              (ip6_parse
-              (ip60, b0->current_length, &l4_protocol0, &l4_offset0,
-               &frag_offset0)))
+              (vm, b0, ip60, b0->current_length, &l4_protocol0, &l4_offset0,
+               &frag_hdr_offset0)))
            {
              next0 = NAT64_IN2OUT_NEXT_DROP;
              b0->error = node->errors[NAT64_IN2OUT_ERROR_UNKNOWN];
@@ -1051,7 +1132,9 @@ nat64_in2out_node_fn_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
                      goto trace0;
                    }
 
-                 if (ip6_to_ip4 (b0, nat64_in2out_unk_proto_set_cb, &ctx0))
+                 if (nat64_in2out_unk_proto
+                     (vm, b0, l4_protocol0, l4_offset0, frag_hdr_offset0,
+                      &ctx0))
                    {
                      next0 = NAT64_IN2OUT_NEXT_DROP;
                      b0->error =
@@ -1070,14 +1153,6 @@ nat64_in2out_node_fn_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
                }
            }
 
-         if (PREDICT_FALSE
-             (ip60->protocol == IP_PROTOCOL_IPV6_FRAGMENTATION))
-           {
-             next0 = NAT64_IN2OUT_NEXT_REASS;
-             fragments++;
-             goto trace0;
-           }
-
          if (proto0 == SNAT_PROTOCOL_ICMP)
            {
              icmp_packets++;
@@ -1095,7 +1170,7 @@ nat64_in2out_node_fn_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
                }
 
              if (icmp6_to_icmp
-                 (b0, nat64_in2out_icmp_set_cb, &ctx0,
+                 (vm, b0, nat64_in2out_icmp_set_cb, &ctx0,
                   nat64_in2out_inner_icmp_set_cb, &ctx0))
                {
                  next0 = NAT64_IN2OUT_NEXT_DROP;
@@ -1114,7 +1189,7 @@ nat64_in2out_node_fn_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
                {
                  next0 = NAT64_IN2OUT_NEXT_IP6_LOOKUP;
                  if (nat64_in2out_tcp_udp_hairpinning
-                     (vm, b0, ip60, thread_index))
+                     (vm, b0, ip60, l4_offset0, thread_index))
                    {
                      next0 = NAT64_IN2OUT_NEXT_DROP;
                      b0->error =
@@ -1123,8 +1198,8 @@ nat64_in2out_node_fn_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
                  goto trace0;
                }
 
-             if (ip6_to_ip4_tcp_udp
-                 (b0, nat64_in2out_tcp_udp_set_cb, &ctx0, 0))
+             if (nat64_in2out_tcp_udp
+                 (vm, b0, l4_offset0, frag_hdr_offset0, &ctx0))
                {
                  next0 = NAT64_IN2OUT_NEXT_DROP;
                  b0->error = node->errors[NAT64_IN2OUT_ERROR_NO_TRANSLATION];
@@ -1191,7 +1266,6 @@ VLIB_REGISTER_NODE (nat64_in2out_node) = {
     [NAT64_IN2OUT_NEXT_IP4_LOOKUP] = "ip4-lookup",
     [NAT64_IN2OUT_NEXT_IP6_LOOKUP] = "ip6-lookup",
     [NAT64_IN2OUT_NEXT_SLOWPATH] = "nat64-in2out-slowpath",
-    [NAT64_IN2OUT_NEXT_REASS] = "nat64-in2out-reass",
   },
 };
 /* *INDENT-ON* */
@@ -1218,7 +1292,6 @@ VLIB_REGISTER_NODE (nat64_in2out_slowpath_node) = {
     [NAT64_IN2OUT_NEXT_IP4_LOOKUP] = "ip4-lookup",
     [NAT64_IN2OUT_NEXT_IP6_LOOKUP] = "ip6-lookup",
     [NAT64_IN2OUT_NEXT_SLOWPATH] = "nat64-in2out-slowpath",
-    [NAT64_IN2OUT_NEXT_REASS] = "nat64-in2out-reass",
   },
 };
 /* *INDENT-ON* */
@@ -1233,447 +1306,6 @@ typedef struct nat64_in2out_frag_set_ctx_t_
   u8 first_frag;
 } nat64_in2out_frag_set_ctx_t;
 
-static int
-nat64_in2out_frag_set_cb (ip6_header_t * ip6, ip4_header_t * ip4, void *arg)
-{
-  nat64_main_t *nm = &nat64_main;
-  nat64_in2out_frag_set_ctx_t *ctx = arg;
-  nat64_db_st_entry_t *ste;
-  nat64_db_bib_entry_t *bibe;
-  udp_header_t *udp;
-  nat64_db_t *db = &nm->db[ctx->thread_index];
-
-  ste = nat64_db_st_entry_by_index (db, ctx->proto, ctx->sess_index);
-  if (!ste)
-    return -1;
-
-  bibe = nat64_db_bib_entry_by_index (db, ctx->proto, ste->bibe_index);
-  if (!bibe)
-    return -1;
-
-  nat64_session_reset_timeout (ste, ctx->vm);
-
-  if (ctx->first_frag)
-    {
-      udp = (udp_header_t *) u8_ptr_add (ip6, ctx->l4_offset);
-
-      if (ctx->proto == IP_PROTOCOL_TCP)
-       {
-         u16 *checksum;
-         ip_csum_t csum;
-         tcp_header_t *tcp = (tcp_header_t *) udp;
-
-         nat64_tcp_session_set_state (ste, tcp, 1);
-         checksum = &tcp->checksum;
-         csum = ip_csum_sub_even (*checksum, tcp->src_port);
-         csum = ip_csum_sub_even (csum, ip6->src_address.as_u64[0]);
-         csum = ip_csum_sub_even (csum, ip6->src_address.as_u64[1]);
-         csum = ip_csum_sub_even (csum, ip6->dst_address.as_u64[0]);
-         csum = ip_csum_sub_even (csum, ip6->dst_address.as_u64[1]);
-         csum = ip_csum_add_even (csum, bibe->out_port);
-         csum = ip_csum_add_even (csum, bibe->out_addr.as_u32);
-         csum = ip_csum_add_even (csum, ste->out_r_addr.as_u32);
-         *checksum = ip_csum_fold (csum);
-       }
-
-      udp->src_port = bibe->out_port;
-    }
-
-  ip4->src_address.as_u32 = bibe->out_addr.as_u32;
-  ip4->dst_address.as_u32 = ste->out_r_addr.as_u32;
-
-  return 0;
-}
-
-static int
-nat64_in2out_frag_hairpinning (vlib_buffer_t * b, ip6_header_t * ip6,
-                              nat64_in2out_frag_set_ctx_t * ctx)
-{
-  nat64_main_t *nm = &nat64_main;
-  nat64_db_st_entry_t *ste;
-  nat64_db_bib_entry_t *bibe;
-  udp_header_t *udp = (udp_header_t *) u8_ptr_add (ip6, ctx->l4_offset);
-  tcp_header_t *tcp = (tcp_header_t *) udp;
-  u16 sport = udp->src_port;
-  u16 dport = udp->dst_port;
-  u16 *checksum;
-  ip_csum_t csum;
-  ip46_address_t daddr;
-  nat64_db_t *db = &nm->db[ctx->thread_index];
-
-  if (ctx->first_frag)
-    {
-      if (ctx->proto == IP_PROTOCOL_UDP)
-       checksum = &udp->checksum;
-      else
-       checksum = &tcp->checksum;
-
-      csum = ip_csum_sub_even (*checksum, ip6->src_address.as_u64[0]);
-      csum = ip_csum_sub_even (csum, ip6->src_address.as_u64[1]);
-      csum = ip_csum_sub_even (csum, ip6->dst_address.as_u64[0]);
-      csum = ip_csum_sub_even (csum, ip6->dst_address.as_u64[1]);
-      csum = ip_csum_sub_even (csum, sport);
-      csum = ip_csum_sub_even (csum, dport);
-    }
-
-  ste = nat64_db_st_entry_by_index (db, ctx->proto, ctx->sess_index);
-  if (!ste)
-    return -1;
-
-  bibe = nat64_db_bib_entry_by_index (db, ctx->proto, ste->bibe_index);
-  if (!bibe)
-    return -1;
-
-  if (ctx->proto == IP_PROTOCOL_TCP)
-    nat64_tcp_session_set_state (ste, tcp, 1);
-
-  nat64_session_reset_timeout (ste, ctx->vm);
-
-  sport = bibe->out_port;
-  dport = ste->r_port;
-
-  nat64_compose_ip6 (&ip6->src_address, &bibe->out_addr, bibe->fib_index);
-
-  clib_memset (&daddr, 0, sizeof (daddr));
-  daddr.ip4.as_u32 = ste->out_r_addr.as_u32;
-
-  bibe = 0;
-  /* *INDENT-OFF* */
-  vec_foreach (db, nm->db)
-    {
-      bibe = nat64_db_bib_entry_find (db, &daddr, dport, ctx->proto, 0, 0);
-
-      if (bibe)
-       break;
-    }
-  /* *INDENT-ON* */
-
-  if (!bibe)
-    return -1;
-
-  ip6->dst_address.as_u64[0] = bibe->in_addr.as_u64[0];
-  ip6->dst_address.as_u64[1] = bibe->in_addr.as_u64[1];
-
-  if (ctx->first_frag)
-    {
-      udp->dst_port = bibe->in_port;
-      udp->src_port = sport;
-      csum = ip_csum_add_even (csum, ip6->src_address.as_u64[0]);
-      csum = ip_csum_add_even (csum, ip6->src_address.as_u64[1]);
-      csum = ip_csum_add_even (csum, ip6->dst_address.as_u64[0]);
-      csum = ip_csum_add_even (csum, ip6->dst_address.as_u64[1]);
-      csum = ip_csum_add_even (csum, udp->src_port);
-      csum = ip_csum_add_even (csum, udp->dst_port);
-      *checksum = ip_csum_fold (csum);
-    }
-
-  return 0;
-}
-
-VLIB_NODE_FN (nat64_in2out_reass_node) (vlib_main_t * vm,
-                                       vlib_node_runtime_t * node,
-                                       vlib_frame_t * frame)
-{
-  u32 n_left_from, *from, *to_next;
-  nat64_in2out_next_t next_index;
-  u32 pkts_processed = 0, cached_fragments = 0;
-  u32 *fragments_to_drop = 0;
-  u32 *fragments_to_loopback = 0;
-  nat64_main_t *nm = &nat64_main;
-  u32 thread_index = vm->thread_index;
-
-  from = vlib_frame_vector_args (frame);
-  n_left_from = frame->n_vectors;
-  next_index = node->cached_next_index;
-
-  while (n_left_from > 0)
-    {
-      u32 n_left_to_next;
-
-      vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
-
-      while (n_left_from > 0 && n_left_to_next > 0)
-       {
-         u32 bi0;
-         vlib_buffer_t *b0;
-         u32 next0;
-         u8 cached0 = 0;
-         ip6_header_t *ip60;
-         u16 l4_offset0, frag_offset0;
-         u8 l4_protocol0;
-         nat_reass_ip6_t *reass0;
-         ip6_frag_hdr_t *frag0;
-         nat64_db_bib_entry_t *bibe0;
-         nat64_db_st_entry_t *ste0;
-         udp_header_t *udp0;
-         snat_protocol_t proto0;
-         u32 sw_if_index0, fib_index0;
-         ip46_address_t saddr0, daddr0;
-         nat64_in2out_frag_set_ctx_t ctx0;
-         nat64_db_t *db = &nm->db[thread_index];
-
-         /* speculatively enqueue b0 to the current next frame */
-         bi0 = from[0];
-         to_next[0] = bi0;
-         from += 1;
-         to_next += 1;
-         n_left_from -= 1;
-         n_left_to_next -= 1;
-
-         b0 = vlib_get_buffer (vm, bi0);
-         next0 = NAT64_IN2OUT_NEXT_IP4_LOOKUP;
-
-         sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
-         fib_index0 =
-           fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP6,
-                                                sw_if_index0);
-
-         ctx0.thread_index = thread_index;
-
-         if (PREDICT_FALSE (nat_reass_is_drop_frag (1)))
-           {
-             next0 = NAT64_IN2OUT_NEXT_DROP;
-             b0->error = node->errors[NAT64_IN2OUT_ERROR_DROP_FRAGMENT];
-             goto trace0;
-           }
-
-         ip60 = (ip6_header_t *) vlib_buffer_get_current (b0);
-
-         if (PREDICT_FALSE
-             (ip6_parse
-              (ip60, b0->current_length, &l4_protocol0, &l4_offset0,
-               &frag_offset0)))
-           {
-             next0 = NAT64_IN2OUT_NEXT_DROP;
-             b0->error = node->errors[NAT64_IN2OUT_ERROR_UNKNOWN];
-             goto trace0;
-           }
-
-         if (PREDICT_FALSE
-             (!(l4_protocol0 == IP_PROTOCOL_TCP
-                || l4_protocol0 == IP_PROTOCOL_UDP)))
-           {
-             next0 = NAT64_IN2OUT_NEXT_DROP;
-             b0->error = node->errors[NAT64_IN2OUT_ERROR_DROP_FRAGMENT];
-             goto trace0;
-           }
-
-         udp0 = (udp_header_t *) u8_ptr_add (ip60, l4_offset0);
-         frag0 = (ip6_frag_hdr_t *) u8_ptr_add (ip60, frag_offset0);
-         proto0 = ip_proto_to_snat_proto (l4_protocol0);
-
-         reass0 = nat_ip6_reass_find_or_create (ip60->src_address,
-                                                ip60->dst_address,
-                                                frag0->identification,
-                                                l4_protocol0,
-                                                1, &fragments_to_drop);
-
-         if (PREDICT_FALSE (!reass0))
-           {
-             next0 = NAT64_IN2OUT_NEXT_DROP;
-             b0->error = node->errors[NAT64_IN2OUT_ERROR_MAX_REASS];
-             goto trace0;
-           }
-
-         if (PREDICT_TRUE (ip6_frag_hdr_offset (frag0)))
-           {
-             ctx0.first_frag = 0;
-             if (PREDICT_FALSE (reass0->sess_index == (u32) ~ 0))
-               {
-                 if (nat_ip6_reass_add_fragment
-                     (thread_index, reass0, bi0, &fragments_to_drop))
-                   {
-                     b0->error = node->errors[NAT64_IN2OUT_ERROR_MAX_FRAG];
-                     next0 = NAT64_IN2OUT_NEXT_DROP;
-                     goto trace0;
-                   }
-                 cached0 = 1;
-                 goto trace0;
-               }
-           }
-         else
-           {
-             ctx0.first_frag = 1;
-
-             saddr0.as_u64[0] = ip60->src_address.as_u64[0];
-             saddr0.as_u64[1] = ip60->src_address.as_u64[1];
-             daddr0.as_u64[0] = ip60->dst_address.as_u64[0];
-             daddr0.as_u64[1] = ip60->dst_address.as_u64[1];
-
-             ste0 =
-               nat64_db_st_entry_find (db, &saddr0, &daddr0,
-                                       udp0->src_port, udp0->dst_port,
-                                       l4_protocol0, fib_index0, 1);
-             if (!ste0)
-               {
-                 bibe0 =
-                   nat64_db_bib_entry_find (db, &saddr0, udp0->src_port,
-                                            l4_protocol0, fib_index0, 1);
-                 if (!bibe0)
-                   {
-                     u16 out_port0;
-                     ip4_address_t out_addr0;
-                     if (nat64_alloc_out_addr_and_port
-                         (fib_index0, proto0, &out_addr0, &out_port0,
-                          thread_index))
-                       {
-                         next0 = NAT64_IN2OUT_NEXT_DROP;
-                         b0->error =
-                           node->errors[NAT64_IN2OUT_ERROR_NO_TRANSLATION];
-                         goto trace0;
-                       }
-
-                     bibe0 =
-                       nat64_db_bib_entry_create (thread_index, db,
-                                                  &ip60->src_address,
-                                                  &out_addr0, udp0->src_port,
-                                                  out_port0, fib_index0,
-                                                  l4_protocol0, 0);
-                     if (!bibe0)
-                       {
-                         next0 = NAT64_IN2OUT_NEXT_DROP;
-                         b0->error =
-                           node->errors[NAT64_IN2OUT_ERROR_NO_TRANSLATION];
-                         goto trace0;
-                       }
-                     vlib_set_simple_counter (&nm->total_bibs, thread_index,
-                                              0, db->bib.bib_entries_num);
-                   }
-                 nat64_extract_ip4 (&ip60->dst_address, &daddr0.ip4,
-                                    fib_index0);
-                 ste0 =
-                   nat64_db_st_entry_create (thread_index, db, bibe0,
-                                             &ip60->dst_address, &daddr0.ip4,
-                                             udp0->dst_port);
-                 if (!ste0)
-                   {
-                     next0 = NAT64_IN2OUT_NEXT_DROP;
-                     b0->error =
-                       node->errors[NAT64_IN2OUT_ERROR_NO_TRANSLATION];
-                     goto trace0;
-                   }
-
-                 vlib_set_simple_counter (&nm->total_sessions, thread_index,
-                                          0, db->st.st_entries_num);
-               }
-             reass0->sess_index = nat64_db_st_entry_get_index (db, ste0);
-
-             nat_ip6_reass_get_frags (reass0, &fragments_to_loopback);
-           }
-
-         ctx0.sess_index = reass0->sess_index;
-         ctx0.proto = l4_protocol0;
-         ctx0.vm = vm;
-         ctx0.l4_offset = l4_offset0;
-
-         if (PREDICT_FALSE (is_hairpinning (&ip60->dst_address)))
-           {
-             next0 = NAT64_IN2OUT_NEXT_IP6_LOOKUP;
-             if (nat64_in2out_frag_hairpinning (b0, ip60, &ctx0))
-               {
-                 next0 = NAT64_IN2OUT_NEXT_DROP;
-                 b0->error = node->errors[NAT64_IN2OUT_ERROR_NO_TRANSLATION];
-               }
-             goto trace0;
-           }
-         else
-           {
-             if (ip6_to_ip4_fragmented (b0, nat64_in2out_frag_set_cb, &ctx0))
-               {
-                 next0 = NAT64_IN2OUT_NEXT_DROP;
-                 b0->error = node->errors[NAT64_IN2OUT_ERROR_UNKNOWN];
-                 goto trace0;
-               }
-           }
-
-       trace0:
-         if (PREDICT_FALSE
-             ((node->flags & VLIB_NODE_FLAG_TRACE)
-              && (b0->flags & VLIB_BUFFER_IS_TRACED)))
-           {
-             nat64_in2out_reass_trace_t *t =
-               vlib_add_trace (vm, node, b0, sizeof (*t));
-             t->cached = cached0;
-             t->sw_if_index = sw_if_index0;
-             t->next_index = next0;
-           }
-
-         if (cached0)
-           {
-             n_left_to_next++;
-             to_next--;
-             cached_fragments++;
-           }
-         else
-           {
-             pkts_processed += next0 != NAT64_IN2OUT_NEXT_DROP;
-
-             /* verify speculative enqueue, maybe switch current next frame */
-             vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
-                                              to_next, n_left_to_next,
-                                              bi0, next0);
-           }
-
-         if (n_left_from == 0 && vec_len (fragments_to_loopback))
-           {
-             from = vlib_frame_vector_args (frame);
-             u32 len = vec_len (fragments_to_loopback);
-             if (len <= VLIB_FRAME_SIZE)
-               {
-                 clib_memcpy_fast (from, fragments_to_loopback,
-                                   sizeof (u32) * len);
-                 n_left_from = len;
-                 vec_reset_length (fragments_to_loopback);
-               }
-             else
-               {
-                 clib_memcpy_fast (from, fragments_to_loopback +
-                                   (len - VLIB_FRAME_SIZE),
-                                   sizeof (u32) * VLIB_FRAME_SIZE);
-                 n_left_from = VLIB_FRAME_SIZE;
-                 _vec_len (fragments_to_loopback) = len - VLIB_FRAME_SIZE;
-               }
-           }
-       }
-
-      vlib_put_next_frame (vm, node, next_index, n_left_to_next);
-    }
-
-  vlib_node_increment_counter (vm, nm->in2out_reass_node_index,
-                              NAT64_IN2OUT_ERROR_PROCESSED_FRAGMENTS,
-                              pkts_processed);
-  vlib_node_increment_counter (vm, nm->in2out_reass_node_index,
-                              NAT64_IN2OUT_ERROR_CACHED_FRAGMENTS,
-                              cached_fragments);
-
-  nat_send_all_to_node (vm, fragments_to_drop, node,
-                       &node->errors[NAT64_IN2OUT_ERROR_DROP_FRAGMENT],
-                       NAT64_IN2OUT_NEXT_DROP);
-
-  vec_free (fragments_to_drop);
-  vec_free (fragments_to_loopback);
-  return frame->n_vectors;
-}
-
-/* *INDENT-OFF* */
-VLIB_REGISTER_NODE (nat64_in2out_reass_node) = {
-  .name = "nat64-in2out-reass",
-  .vector_size = sizeof (u32),
-  .format_trace = format_nat64_in2out_reass_trace,
-  .type = VLIB_NODE_TYPE_INTERNAL,
-  .n_errors = ARRAY_LEN (nat64_in2out_error_strings),
-  .error_strings = nat64_in2out_error_strings,
-  .n_next_nodes = NAT64_IN2OUT_N_NEXT,
-  /* edit / add dispositions here */
-  .next_nodes = {
-    [NAT64_IN2OUT_NEXT_DROP] = "error-drop",
-    [NAT64_IN2OUT_NEXT_IP4_LOOKUP] = "ip4-lookup",
-    [NAT64_IN2OUT_NEXT_IP6_LOOKUP] = "ip6-lookup",
-    [NAT64_IN2OUT_NEXT_SLOWPATH] = "nat64-in2out-slowpath",
-    [NAT64_IN2OUT_NEXT_REASS] = "nat64-in2out-reass",
-  },
-};
-/* *INDENT-ON* */
 
 #define foreach_nat64_in2out_handoff_error                       \
 _(CONGESTION_DROP, "congestion drop")                            \
index e0dd407..6c00751 100644 (file)
@@ -18,7 +18,6 @@
  */
 
 #include <nat/nat64.h>
-#include <nat/nat_reass.h>
 #include <nat/nat_inlines.h>
 #include <vnet/ip/ip4_to_ip6.h>
 #include <vnet/fib/ip4_fib.h>
@@ -44,38 +43,12 @@ format_nat64_out2in_trace (u8 * s, va_list * args)
   return s;
 }
 
-typedef struct
-{
-  u32 sw_if_index;
-  u32 next_index;
-  u8 cached;
-} nat64_out2in_reass_trace_t;
-
-static u8 *
-format_nat64_out2in_reass_trace (u8 * s, va_list * args)
-{
-  CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
-  CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
-  nat64_out2in_reass_trace_t *t =
-    va_arg (*args, nat64_out2in_reass_trace_t *);
-
-  s =
-    format (s, "NAT64-out2in-reass: sw_if_index %d, next index %d, status %s",
-           t->sw_if_index, t->next_index,
-           t->cached ? "cached" : "translated");
-
-  return s;
-}
-
-
 #define foreach_nat64_out2in_error                       \
 _(UNSUPPORTED_PROTOCOL, "unsupported protocol")          \
 _(OUT2IN_PACKETS, "good out2in packets processed")       \
 _(NO_TRANSLATION, "no translation")                      \
 _(UNKNOWN, "unknown")                                    \
 _(DROP_FRAGMENT, "drop fragment")                        \
-_(MAX_REASS, "maximum reassemblies exceeded")            \
-_(MAX_FRAG, "maximum fragments per reassembly exceeded") \
 _(TCP_PACKETS, "TCP packets")                            \
 _(UDP_PACKETS, "UDP packets")                            \
 _(ICMP_PACKETS, "ICMP packets")                          \
@@ -104,7 +77,6 @@ typedef enum
   NAT64_OUT2IN_NEXT_IP6_LOOKUP,
   NAT64_OUT2IN_NEXT_IP4_LOOKUP,
   NAT64_OUT2IN_NEXT_DROP,
-  NAT64_OUT2IN_NEXT_REASS,
   NAT64_OUT2IN_N_NEXT,
 } nat64_out2in_next_t;
 
@@ -116,25 +88,90 @@ typedef struct nat64_out2in_set_ctx_t_
 } nat64_out2in_set_ctx_t;
 
 static int
-nat64_out2in_tcp_udp_set_cb (ip4_header_t * ip4, ip6_header_t * ip6,
-                            void *arg)
+nat64_out2in_tcp_udp (vlib_main_t * vm, vlib_buffer_t * b,
+                     nat64_out2in_set_ctx_t * ctx)
 {
+  ip4_header_t *ip4;
+  ip6_header_t *ip6;
+  ip_csum_t csum;
+  u16 *checksum = NULL;
+  ip6_frag_hdr_t *frag;
+  u32 frag_id;
+  ip4_address_t old_src, old_dst;
+
   nat64_main_t *nm = &nat64_main;
-  nat64_out2in_set_ctx_t *ctx = arg;
   nat64_db_bib_entry_t *bibe;
   nat64_db_st_entry_t *ste;
-  ip46_address_t saddr, daddr;
+  ip46_address_t saddr;
+  ip46_address_t daddr;
   ip6_address_t ip6_saddr;
-  udp_header_t *udp = ip4_next_header (ip4);
-  tcp_header_t *tcp = ip4_next_header (ip4);
-  u8 proto = ip4->protocol;
-  u16 dport = udp->dst_port;
-  u16 sport = udp->src_port;
+  u8 proto = vnet_buffer (b)->ip.reass.ip_proto;
+  u16 dport = vnet_buffer (b)->ip.reass.l4_dst_port;
+  u16 sport = vnet_buffer (b)->ip.reass.l4_src_port;
   u32 sw_if_index, fib_index;
-  u16 *checksum;
-  ip_csum_t csum;
   nat64_db_t *db = &nm->db[ctx->thread_index];
 
+  ip4 = vlib_buffer_get_current (b);
+
+  udp_header_t *udp = ip4_next_header (ip4);
+  tcp_header_t *tcp = ip4_next_header (ip4);
+  if (!vnet_buffer (b)->ip.reass.is_non_first_fragment)
+    {
+      if (ip4->protocol == IP_PROTOCOL_UDP)
+       {
+         checksum = &udp->checksum;
+         //UDP checksum is optional over IPv4 but mandatory for IPv6
+         //We do not check udp->length sanity but use our safe computed value instead
+         if (PREDICT_FALSE (!*checksum))
+           {
+             u16 udp_len =
+               clib_host_to_net_u16 (ip4->length) - sizeof (*ip4);
+             csum = ip_incremental_checksum (0, udp, udp_len);
+             csum =
+               ip_csum_with_carry (csum, clib_host_to_net_u16 (udp_len));
+             csum =
+               ip_csum_with_carry (csum,
+                                   clib_host_to_net_u16 (IP_PROTOCOL_UDP));
+             csum =
+               ip_csum_with_carry (csum, *((u64 *) (&ip4->src_address)));
+             *checksum = ~ip_csum_fold (csum);
+           }
+       }
+      else
+       {
+         checksum = &tcp->checksum;
+       }
+    }
+
+  old_src.as_u32 = ip4->src_address.as_u32;
+  old_dst.as_u32 = ip4->dst_address.as_u32;
+
+  // Deal with fragmented packets
+  u16 frag_offset = ip4_get_fragment_offset (ip4);
+  if (PREDICT_FALSE (ip4_get_fragment_more (ip4) || frag_offset))
+    {
+      ip6 =
+       (ip6_header_t *) u8_ptr_add (ip4,
+                                    sizeof (*ip4) - sizeof (*ip6) -
+                                    sizeof (*frag));
+      frag =
+       (ip6_frag_hdr_t *) u8_ptr_add (ip4, sizeof (*ip4) - sizeof (*frag));
+      frag_id = frag_id_4to6 (ip4->fragment_id);
+      vlib_buffer_advance (b, sizeof (*ip4) - sizeof (*ip6) - sizeof (*frag));
+    }
+  else
+    {
+      ip6 = (ip6_header_t *) (((u8 *) ip4) + sizeof (*ip4) - sizeof (*ip6));
+      vlib_buffer_advance (b, sizeof (*ip4) - sizeof (*ip6));
+      frag = NULL;
+    }
+
+  ip6->ip_version_traffic_class_and_flow_label =
+    clib_host_to_net_u32 ((6 << 28) + (ip4->tos << 20));
+  ip6->payload_length = u16_net_add (ip4->length, -sizeof (*ip4));
+  ip6->hop_limit = ip4->ttl;
+  ip6->protocol = ip4->protocol;
+
   sw_if_index = vnet_buffer (ctx->b)->sw_if_index[VLIB_RX];
   fib_index = ip4_fib_table_get_index_for_sw_if_index (sw_if_index);
 
@@ -159,7 +196,7 @@ nat64_out2in_tcp_udp_set_cb (ip4_header_t * ip4, ip6_header_t * ip6,
       if (!bibe)
        return -1;
 
-      nat64_compose_ip6 (&ip6_saddr, &ip4->src_address, bibe->fib_index);
+      nat64_compose_ip6 (&ip6_saddr, &old_src, bibe->fib_index);
       ste =
        nat64_db_st_entry_create (ctx->thread_index, db, bibe, &ip6_saddr,
                                  &saddr.ip4, sport);
@@ -176,29 +213,48 @@ nat64_out2in_tcp_udp_set_cb (ip4_header_t * ip4, ip6_header_t * ip6,
 
   ip6->dst_address.as_u64[0] = bibe->in_addr.as_u64[0];
   ip6->dst_address.as_u64[1] = bibe->in_addr.as_u64[1];
-  udp->dst_port = bibe->in_port;
 
-  if (proto == IP_PROTOCOL_UDP)
-    checksum = &udp->checksum;
-  else
+  vnet_buffer (ctx->b)->sw_if_index[VLIB_TX] = bibe->fib_index;
+
+  nat64_session_reset_timeout (ste, ctx->vm);
+
+  if (PREDICT_FALSE (frag != NULL))
     {
-      checksum = &tcp->checksum;
-      nat64_tcp_session_set_state (ste, tcp, 0);
+      frag->next_hdr = ip6->protocol;
+      frag->identification = frag_id;
+      frag->rsv = 0;
+      frag->fragment_offset_and_more =
+       ip6_frag_hdr_offset_and_more (frag_offset, 1);
+      ip6->protocol = IP_PROTOCOL_IPV6_FRAGMENTATION;
+      ip6->payload_length = u16_net_add (ip6->payload_length, sizeof (*frag));
     }
 
-  csum = ip_csum_sub_even (*checksum, dport);
-  csum = ip_csum_add_even (csum, udp->dst_port);
-  *checksum = ip_csum_fold (csum);
+  if (!vnet_buffer (b)->ip.reass.is_non_first_fragment)
+    {
+      udp->dst_port = bibe->in_port;
 
-  vnet_buffer (ctx->b)->sw_if_index[VLIB_TX] = bibe->fib_index;
+      if (proto == IP_PROTOCOL_TCP)
+       {
+         nat64_tcp_session_set_state (ste, tcp, 0);
+       }
 
-  nat64_session_reset_timeout (ste, ctx->vm);
+      csum = ip_csum_sub_even (*checksum, dport);
+      csum = ip_csum_add_even (csum, udp->dst_port);
+      csum = ip_csum_sub_even (csum, old_src.as_u32);
+      csum = ip_csum_sub_even (csum, old_dst.as_u32);
+      csum = ip_csum_add_even (csum, ip6->src_address.as_u64[0]);
+      csum = ip_csum_add_even (csum, ip6->src_address.as_u64[1]);
+      csum = ip_csum_add_even (csum, ip6->dst_address.as_u64[0]);
+      csum = ip_csum_add_even (csum, ip6->dst_address.as_u64[1]);
+      *checksum = ip_csum_fold (csum);
+    }
 
   return 0;
 }
 
 static int
-nat64_out2in_icmp_set_cb (ip4_header_t * ip4, ip6_header_t * ip6, void *arg)
+nat64_out2in_icmp_set_cb (vlib_buffer_t * b, ip4_header_t * ip4,
+                         ip6_header_t * ip6, void *arg)
 {
   nat64_main_t *nm = &nat64_main;
   nat64_out2in_set_ctx_t *ctx = arg;
@@ -278,8 +334,8 @@ nat64_out2in_icmp_set_cb (ip4_header_t * ip4, ip6_header_t * ip6, void *arg)
 }
 
 static int
-nat64_out2in_inner_icmp_set_cb (ip4_header_t * ip4, ip6_header_t * ip6,
-                               void *arg)
+nat64_out2in_inner_icmp_set_cb (vlib_buffer_t * b, ip4_header_t * ip4,
+                               ip6_header_t * ip6, void *arg)
 {
   nat64_main_t *nm = &nat64_main;
   nat64_out2in_set_ctx_t *ctx = arg;
@@ -370,11 +426,15 @@ nat64_out2in_inner_icmp_set_cb (ip4_header_t * ip4, ip6_header_t * ip6,
 }
 
 static int
-nat64_out2in_unk_proto_set_cb (ip4_header_t * ip4, ip6_header_t * ip6,
-                              void *arg)
+nat64_out2in_unk_proto (vlib_main_t * vm, vlib_buffer_t * p,
+                       nat64_out2in_set_ctx_t * ctx)
 {
+  ip4_header_t *ip4 = vlib_buffer_get_current (p);
+  ip6_header_t *ip6;
+  ip6_frag_hdr_t *frag;
+  u32 frag_id;
+
   nat64_main_t *nm = &nat64_main;
-  nat64_out2in_set_ctx_t *ctx = arg;
   nat64_db_bib_entry_t *bibe;
   nat64_db_st_entry_t *ste;
   ip46_address_t saddr, daddr;
@@ -383,6 +443,43 @@ nat64_out2in_unk_proto_set_cb (ip4_header_t * ip4, ip6_header_t * ip6,
   u8 proto = ip4->protocol;
   nat64_db_t *db = &nm->db[ctx->thread_index];
 
+  // Deal with fragmented packets
+  u16 frag_offset = ip4_get_fragment_offset (ip4);
+  if (PREDICT_FALSE (ip4_get_fragment_more (ip4) || frag_offset))
+    {
+      ip6 =
+       (ip6_header_t *) u8_ptr_add (ip4,
+                                    sizeof (*ip4) - sizeof (*ip6) -
+                                    sizeof (*frag));
+      frag =
+       (ip6_frag_hdr_t *) u8_ptr_add (ip4, sizeof (*ip4) - sizeof (*frag));
+      frag_id = frag_id_4to6 (ip4->fragment_id);
+      vlib_buffer_advance (p, sizeof (*ip4) - sizeof (*ip6) - sizeof (*frag));
+    }
+  else
+    {
+      ip6 = (ip6_header_t *) (((u8 *) ip4) + sizeof (*ip4) - sizeof (*ip6));
+      vlib_buffer_advance (p, sizeof (*ip4) - sizeof (*ip6));
+      frag = NULL;
+    }
+
+  ip6->ip_version_traffic_class_and_flow_label =
+    clib_host_to_net_u32 ((6 << 28) + (ip4->tos << 20));
+  ip6->payload_length = u16_net_add (ip4->length, -sizeof (*ip4));
+  ip6->hop_limit = ip4->ttl;
+  ip6->protocol = ip4->protocol;
+
+  if (PREDICT_FALSE (frag != NULL))
+    {
+      frag->next_hdr = ip6->protocol;
+      frag->identification = frag_id;
+      frag->rsv = 0;
+      frag->fragment_offset_and_more =
+       ip6_frag_hdr_offset_and_more (frag_offset, 1);
+      ip6->protocol = IP_PROTOCOL_IPV6_FRAGMENTATION;
+      ip6->payload_length = u16_net_add (ip6->payload_length, sizeof (*frag));
+    }
+
   sw_if_index = vnet_buffer (ctx->b)->sw_if_index[VLIB_RX];
   fib_index = ip4_fib_table_get_index_for_sw_if_index (sw_if_index);
 
@@ -482,7 +579,7 @@ VLIB_NODE_FN (nat64_out2in_node) (vlib_main_t * vm,
 
          if (PREDICT_FALSE (proto0 == ~0))
            {
-             if (ip4_to_ip6 (b0, nat64_out2in_unk_proto_set_cb, &ctx0))
+             if (nat64_out2in_unk_proto (vm, b0, &ctx0))
                {
                  next0 = NAT64_OUT2IN_NEXT_DROP;
                  b0->error = node->errors[NAT64_OUT2IN_ERROR_NO_TRANSLATION];
@@ -491,13 +588,6 @@ VLIB_NODE_FN (nat64_out2in_node) (vlib_main_t * vm,
              goto trace0;
            }
 
-         if (PREDICT_FALSE (ip4_is_fragment (ip40)))
-           {
-             next0 = NAT64_OUT2IN_NEXT_REASS;
-             fragments++;
-             goto trace0;
-           }
-
          if (proto0 == SNAT_PROTOCOL_ICMP)
            {
              icmp_packets++;
@@ -517,7 +607,7 @@ VLIB_NODE_FN (nat64_out2in_node) (vlib_main_t * vm,
              else
                udp_packets++;
 
-             if (ip4_to_ip6_tcp_udp (b0, nat64_out2in_tcp_udp_set_cb, &ctx0))
+             if (nat64_out2in_tcp_udp (vm, b0, &ctx0))
                {
                  udp0 = ip4_next_header (ip40);
                  /*
@@ -587,7 +677,6 @@ VLIB_REGISTER_NODE (nat64_out2in_node) = {
     [NAT64_OUT2IN_NEXT_DROP] = "error-drop",
     [NAT64_OUT2IN_NEXT_IP6_LOOKUP] = "ip6-lookup",
     [NAT64_OUT2IN_NEXT_IP4_LOOKUP] = "ip4-lookup",
-    [NAT64_OUT2IN_NEXT_REASS] = "nat64-out2in-reass",
   },
 };
 /* *INDENT-ON* */
@@ -602,350 +691,6 @@ typedef struct nat64_out2in_frag_set_ctx_t_
   u8 first_frag;
 } nat64_out2in_frag_set_ctx_t;
 
-static int
-nat64_out2in_frag_set_cb (ip4_header_t * ip4, ip6_header_t * ip6, void *arg)
-{
-  nat64_main_t *nm = &nat64_main;
-  nat64_out2in_frag_set_ctx_t *ctx = arg;
-  nat64_db_st_entry_t *ste;
-  nat64_db_bib_entry_t *bibe;
-  udp_header_t *udp = ip4_next_header (ip4);
-  ip_csum_t csum;
-  u16 *checksum;
-  nat64_db_t *db = &nm->db[ctx->thread_index];
-
-  ste = nat64_db_st_entry_by_index (db, ctx->proto, ctx->sess_index);
-  if (!ste)
-    return -1;
-
-  bibe = nat64_db_bib_entry_by_index (db, ctx->proto, ste->bibe_index);
-  if (!bibe)
-    return -1;
-
-  if (ctx->first_frag)
-    {
-      udp->dst_port = bibe->in_port;
-
-      if (ip4->protocol == IP_PROTOCOL_UDP)
-       {
-         checksum = &udp->checksum;
-
-         if (!checksum)
-           {
-             u16 udp_len =
-               clib_host_to_net_u16 (ip4->length) - sizeof (*ip4);
-             csum = ip_incremental_checksum (0, udp, udp_len);
-             csum =
-               ip_csum_with_carry (csum, clib_host_to_net_u16 (udp_len));
-             csum =
-               ip_csum_with_carry (csum,
-                                   clib_host_to_net_u16 (IP_PROTOCOL_UDP));
-             csum = ip_csum_with_carry (csum, ste->in_r_addr.as_u64[0]);
-             csum = ip_csum_with_carry (csum, ste->in_r_addr.as_u64[1]);
-             csum = ip_csum_with_carry (csum, bibe->in_addr.as_u64[0]);
-             csum = ip_csum_with_carry (csum, bibe->in_addr.as_u64[1]);
-             *checksum = ~ip_csum_fold (csum);
-           }
-         else
-           {
-             csum = ip_csum_sub_even (*checksum, bibe->out_addr.as_u32);
-             csum = ip_csum_sub_even (csum, ste->out_r_addr.as_u32);
-             csum = ip_csum_sub_even (csum, bibe->out_port);
-             csum = ip_csum_add_even (csum, ste->in_r_addr.as_u64[0]);
-             csum = ip_csum_add_even (csum, ste->in_r_addr.as_u64[1]);
-             csum = ip_csum_add_even (csum, bibe->in_addr.as_u64[0]);
-             csum = ip_csum_add_even (csum, bibe->in_addr.as_u64[1]);
-             csum = ip_csum_add_even (csum, bibe->in_port);
-             *checksum = ip_csum_fold (csum);
-           }
-       }
-      else
-       {
-         tcp_header_t *tcp = ip4_next_header (ip4);
-         nat64_tcp_session_set_state (ste, tcp, 0);
-         checksum = &tcp->checksum;
-         csum = ip_csum_sub_even (*checksum, bibe->out_addr.as_u32);
-         csum = ip_csum_sub_even (csum, ste->out_r_addr.as_u32);
-         csum = ip_csum_sub_even (csum, bibe->out_port);
-         csum = ip_csum_add_even (csum, ste->in_r_addr.as_u64[0]);
-         csum = ip_csum_add_even (csum, ste->in_r_addr.as_u64[1]);
-         csum = ip_csum_add_even (csum, bibe->in_addr.as_u64[0]);
-         csum = ip_csum_add_even (csum, bibe->in_addr.as_u64[1]);
-         csum = ip_csum_add_even (csum, bibe->in_port);
-         *checksum = ip_csum_fold (csum);
-       }
-
-    }
-
-  ip6->src_address.as_u64[0] = ste->in_r_addr.as_u64[0];
-  ip6->src_address.as_u64[1] = ste->in_r_addr.as_u64[1];
-
-  ip6->dst_address.as_u64[0] = bibe->in_addr.as_u64[0];
-  ip6->dst_address.as_u64[1] = bibe->in_addr.as_u64[1];
-
-  vnet_buffer (ctx->b)->sw_if_index[VLIB_TX] = bibe->fib_index;
-
-  nat64_session_reset_timeout (ste, ctx->vm);
-
-  return 0;
-}
-
-VLIB_NODE_FN (nat64_out2in_reass_node) (vlib_main_t * vm,
-                                       vlib_node_runtime_t * node,
-                                       vlib_frame_t * frame)
-{
-  u32 n_left_from, *from, *to_next;
-  nat64_out2in_next_t next_index;
-  u32 pkts_processed = 0, cached_fragments = 0;
-  u32 *fragments_to_drop = 0;
-  u32 *fragments_to_loopback = 0;
-  nat64_main_t *nm = &nat64_main;
-  u32 thread_index = vm->thread_index;
-
-  from = vlib_frame_vector_args (frame);
-  n_left_from = frame->n_vectors;
-  next_index = node->cached_next_index;
-
-  while (n_left_from > 0)
-    {
-      u32 n_left_to_next;
-
-      vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
-
-      while (n_left_from > 0 && n_left_to_next > 0)
-       {
-         u32 bi0;
-         vlib_buffer_t *b0;
-         u32 next0;
-         ip4_header_t *ip40;
-         u8 cached0 = 0;
-         u32 sw_if_index0, fib_index0;
-         udp_header_t *udp0;
-         nat_reass_ip4_t *reass0;
-         ip46_address_t saddr0, daddr0;
-         nat64_db_st_entry_t *ste0;
-         nat64_db_bib_entry_t *bibe0;
-         ip6_address_t ip6_saddr0;
-         nat64_out2in_frag_set_ctx_t ctx0;
-         nat64_db_t *db = &nm->db[thread_index];
-
-         /* speculatively enqueue b0 to the current next frame */
-         bi0 = from[0];
-         to_next[0] = bi0;
-         from += 1;
-         to_next += 1;
-         n_left_from -= 1;
-         n_left_to_next -= 1;
-
-         b0 = vlib_get_buffer (vm, bi0);
-         next0 = NAT64_OUT2IN_NEXT_IP6_LOOKUP;
-
-         sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
-         fib_index0 =
-           fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP4,
-                                                sw_if_index0);
-
-         ctx0.thread_index = thread_index;
-
-         if (PREDICT_FALSE (nat_reass_is_drop_frag (1)))
-           {
-             next0 = NAT64_OUT2IN_NEXT_DROP;
-             b0->error = node->errors[NAT64_OUT2IN_ERROR_DROP_FRAGMENT];
-             goto trace0;
-           }
-
-         ip40 = vlib_buffer_get_current (b0);
-
-         if (PREDICT_FALSE (!(ip40->protocol == IP_PROTOCOL_TCP
-                              || ip40->protocol == IP_PROTOCOL_UDP)))
-           {
-             next0 = NAT64_OUT2IN_NEXT_DROP;
-             b0->error = node->errors[NAT64_OUT2IN_ERROR_DROP_FRAGMENT];
-             goto trace0;
-           }
-
-         udp0 = ip4_next_header (ip40);
-
-         reass0 = nat_ip4_reass_find_or_create (ip40->src_address,
-                                                ip40->dst_address,
-                                                ip40->fragment_id,
-                                                ip40->protocol,
-                                                1, &fragments_to_drop);
-
-         if (PREDICT_FALSE (!reass0))
-           {
-             next0 = NAT64_OUT2IN_NEXT_DROP;
-             b0->error = node->errors[NAT64_OUT2IN_ERROR_MAX_REASS];
-             goto trace0;
-           }
-
-         if (PREDICT_FALSE (ip4_is_first_fragment (ip40)))
-           {
-             ctx0.first_frag = 1;
-
-             clib_memset (&saddr0, 0, sizeof (saddr0));
-             saddr0.ip4.as_u32 = ip40->src_address.as_u32;
-             clib_memset (&daddr0, 0, sizeof (daddr0));
-             daddr0.ip4.as_u32 = ip40->dst_address.as_u32;
-
-             ste0 =
-               nat64_db_st_entry_find (db, &daddr0, &saddr0,
-                                       udp0->dst_port, udp0->src_port,
-                                       ip40->protocol, fib_index0, 0);
-             if (!ste0)
-               {
-                 bibe0 =
-                   nat64_db_bib_entry_find (db, &daddr0, udp0->dst_port,
-                                            ip40->protocol, fib_index0, 0);
-                 if (!bibe0)
-                   {
-                     next0 = NAT64_OUT2IN_NEXT_DROP;
-                     b0->error =
-                       node->errors[NAT64_OUT2IN_ERROR_NO_TRANSLATION];
-                     goto trace0;
-                   }
-
-                 nat64_compose_ip6 (&ip6_saddr0, &ip40->src_address,
-                                    bibe0->fib_index);
-                 ste0 =
-                   nat64_db_st_entry_create (thread_index,
-                                             db, bibe0, &ip6_saddr0,
-                                             &saddr0.ip4, udp0->src_port);
-
-                 if (!ste0)
-                   {
-                     next0 = NAT64_OUT2IN_NEXT_DROP;
-                     b0->error =
-                       node->errors[NAT64_OUT2IN_ERROR_NO_TRANSLATION];
-                     goto trace0;
-                   }
-
-                 vlib_set_simple_counter (&nm->total_sessions, thread_index,
-                                          0, db->st.st_entries_num);
-               }
-             reass0->sess_index = nat64_db_st_entry_get_index (db, ste0);
-             reass0->thread_index = thread_index;
-
-             nat_ip4_reass_get_frags (reass0, &fragments_to_loopback);
-           }
-         else
-           {
-             ctx0.first_frag = 0;
-
-             if (PREDICT_FALSE (reass0->sess_index == (u32) ~ 0))
-               {
-                 if (nat_ip4_reass_add_fragment
-                     (thread_index, reass0, bi0, &fragments_to_drop))
-                   {
-                     b0->error = node->errors[NAT64_OUT2IN_ERROR_MAX_FRAG];
-                     next0 = NAT64_OUT2IN_NEXT_DROP;
-                     goto trace0;
-                   }
-                 cached0 = 1;
-                 goto trace0;
-               }
-           }
-
-         ctx0.sess_index = reass0->sess_index;
-         ctx0.proto = ip40->protocol;
-         ctx0.vm = vm;
-         ctx0.b = b0;
-
-         if (ip4_to_ip6_fragmented (b0, nat64_out2in_frag_set_cb, &ctx0))
-           {
-             next0 = NAT64_OUT2IN_NEXT_DROP;
-             b0->error = node->errors[NAT64_OUT2IN_ERROR_UNKNOWN];
-             goto trace0;
-           }
-
-       trace0:
-         if (PREDICT_FALSE
-             ((node->flags & VLIB_NODE_FLAG_TRACE)
-              && (b0->flags & VLIB_BUFFER_IS_TRACED)))
-           {
-             nat64_out2in_reass_trace_t *t =
-               vlib_add_trace (vm, node, b0, sizeof (*t));
-             t->cached = cached0;
-             t->sw_if_index = sw_if_index0;
-             t->next_index = next0;
-           }
-
-         if (cached0)
-           {
-             n_left_to_next++;
-             to_next--;
-             cached_fragments++;
-           }
-         else
-           {
-             pkts_processed += next0 != NAT64_OUT2IN_NEXT_DROP;
-
-             /* verify speculative enqueue, maybe switch current next frame */
-             vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
-                                              to_next, n_left_to_next,
-                                              bi0, next0);
-           }
-
-         if (n_left_from == 0 && vec_len (fragments_to_loopback))
-           {
-             from = vlib_frame_vector_args (frame);
-             u32 len = vec_len (fragments_to_loopback);
-             if (len <= VLIB_FRAME_SIZE)
-               {
-                 clib_memcpy_fast (from, fragments_to_loopback,
-                                   sizeof (u32) * len);
-                 n_left_from = len;
-                 vec_reset_length (fragments_to_loopback);
-               }
-             else
-               {
-                 clib_memcpy_fast (from, fragments_to_loopback +
-                                   (len - VLIB_FRAME_SIZE),
-                                   sizeof (u32) * VLIB_FRAME_SIZE);
-                 n_left_from = VLIB_FRAME_SIZE;
-                 _vec_len (fragments_to_loopback) = len - VLIB_FRAME_SIZE;
-               }
-           }
-       }
-
-      vlib_put_next_frame (vm, node, next_index, n_left_to_next);
-    }
-
-  vlib_node_increment_counter (vm, nm->out2in_reass_node_index,
-                              NAT64_OUT2IN_ERROR_PROCESSED_FRAGMENTS,
-                              pkts_processed);
-  vlib_node_increment_counter (vm, nm->out2in_reass_node_index,
-                              NAT64_OUT2IN_ERROR_CACHED_FRAGMENTS,
-                              cached_fragments);
-
-  nat_send_all_to_node (vm, fragments_to_drop, node,
-                       &node->errors[NAT64_OUT2IN_ERROR_DROP_FRAGMENT],
-                       NAT64_OUT2IN_NEXT_DROP);
-
-  vec_free (fragments_to_drop);
-  vec_free (fragments_to_loopback);
-  return frame->n_vectors;
-}
-
-/* *INDENT-OFF* */
-VLIB_REGISTER_NODE (nat64_out2in_reass_node) = {
-  .name = "nat64-out2in-reass",
-  .vector_size = sizeof (u32),
-  .format_trace = format_nat64_out2in_reass_trace,
-  .type = VLIB_NODE_TYPE_INTERNAL,
-  .n_errors = ARRAY_LEN (nat64_out2in_error_strings),
-  .error_strings = nat64_out2in_error_strings,
-  .n_next_nodes = NAT64_OUT2IN_N_NEXT,
-  /* edit / add dispositions here */
-  .next_nodes = {
-    [NAT64_OUT2IN_NEXT_DROP] = "error-drop",
-    [NAT64_OUT2IN_NEXT_IP6_LOOKUP] = "ip6-lookup",
-    [NAT64_OUT2IN_NEXT_IP4_LOOKUP] = "ip4-lookup",
-    [NAT64_OUT2IN_NEXT_REASS] = "nat64-out2in-reass",
-  },
-};
-/* *INDENT-ON* */
-
 #define foreach_nat64_out2in_handoff_error                       \
 _(CONGESTION_DROP, "congestion drop")                            \
 _(SAME_WORKER, "same worker")                                    \
@@ -1010,7 +755,7 @@ VLIB_NODE_FN (nat64_out2in_handoff_node) (vlib_main_t * vm,
       ip4_header_t *ip0;
 
       ip0 = vlib_buffer_get_current (b[0]);
-      ti[0] = nat64_get_worker_out2in (ip0);
+      ti[0] = nat64_get_worker_out2in (b[0], ip0);
 
       if (ti[0] != thread_index)
        do_handoff++;
index e5e783b..3ac773c 100644 (file)
@@ -19,6 +19,7 @@
 
 #include <nat/nat66.h>
 #include <vnet/fib/fib_table.h>
+#include <vnet/ip/reass/ip6_sv_reass.h>
 
 nat66_main_t nat66_main;
 
@@ -29,11 +30,13 @@ VNET_FEATURE_INIT (nat66_in2out, static) = {
   .arc_name = "ip6-unicast",
   .node_name = "nat66-in2out",
   .runs_before = VNET_FEATURES ("ip6-lookup"),
+  .runs_after = VNET_FEATURES ("ip6-sv-reassembly-feature"),
 };
 VNET_FEATURE_INIT (nat66_out2in, static) = {
   .arc_name = "ip6-unicast",
   .node_name = "nat66-out2in",
   .runs_before = VNET_FEATURES ("ip6-lookup"),
+  .runs_after = VNET_FEATURES ("ip6-sv-reassembly-feature"),
 };
 
 /* *INDENT-ON* */
@@ -99,6 +102,9 @@ nat66_interface_add_del (u32 sw_if_index, u8 is_inside, u8 is_add)
     }
 
   feature_name = is_inside ? "nat66-in2out" : "nat66-out2in";
+  int rv = ip6_sv_reass_enable_disable_with_refcnt (sw_if_index, is_add);
+  if (rv)
+    return rv;
   return vnet_feature_enable_disable ("ip6-unicast", feature_name,
                                      sw_if_index, is_add, 0, 0);
 }
index ac1f329..437d665 100644 (file)
@@ -156,7 +156,7 @@ VLIB_NODE_FN (nat66_in2out_node) (vlib_main_t * vm,
 
          if (PREDICT_FALSE
              (ip6_parse
-              (ip60, b0->current_length, &l4_protocol0, &l4_offset0,
+              (vm, b0, ip60, b0->current_length, &l4_protocol0, &l4_offset0,
                &frag_offset0)))
            {
              next0 = NAT66_IN2OUT_NEXT_DROP;
index d404d9f..8386cd3 100644 (file)
@@ -116,7 +116,7 @@ VLIB_NODE_FN (nat66_out2in_node) (vlib_main_t * vm,
 
          if (PREDICT_FALSE
              (ip6_parse
-              (ip60, b0->current_length, &l4_protocol0, &l4_offset0,
+              (vm, b0, ip60, b0->current_length, &l4_protocol0, &l4_offset0,
                &frag_offset0)))
            {
              next0 = NAT66_OUT2IN_NEXT_DROP;
index b83ea0b..6df1a85 100644 (file)
@@ -23,7 +23,6 @@
 #include <nat/nat64.h>
 #include <nat/nat66.h>
 #include <nat/dslite.h>
-#include <nat/nat_reass.h>
 #include <nat/nat_inlines.h>
 #include <nat/nat_ha.h>
 #include <vlibapi/api.h>
@@ -303,156 +302,6 @@ vl_api_nat_ipfix_enable_disable_t_print (vl_api_nat_ipfix_enable_disable_t *
   FINISH;
 }
 
-static void
-vl_api_nat_set_reass_t_handler (vl_api_nat_set_reass_t * mp)
-{
-  snat_main_t *sm = &snat_main;
-  vl_api_nat_set_reass_reply_t *rmp;
-  int rv = 0;
-
-  rv =
-    nat_reass_set (ntohl (mp->timeout), ntohs (mp->max_reass), mp->max_frag,
-                  mp->drop_frag, mp->is_ip6);
-
-  REPLY_MACRO (VL_API_NAT_SET_REASS_REPLY);
-}
-
-static void *
-vl_api_nat_set_reass_t_print (vl_api_nat_set_reass_t * mp, void *handle)
-{
-  u8 *s;
-
-  s = format (0, "SCRIPT: nat_set_reass ");
-  s = format (s, "timeout %d max_reass %d max_frag %d drop_frag %d is_ip6 %d",
-             clib_host_to_net_u32 (mp->timeout),
-             clib_host_to_net_u16 (mp->max_reass),
-             mp->max_frag, mp->drop_frag, mp->is_ip6);
-
-  FINISH;
-}
-
-static void
-vl_api_nat_get_reass_t_handler (vl_api_nat_get_reass_t * mp)
-{
-  snat_main_t *sm = &snat_main;
-  vl_api_nat_get_reass_reply_t *rmp;
-  int rv = 0;
-
-  /* *INDENT-OFF* */
-  REPLY_MACRO2 (VL_API_NAT_GET_REASS_REPLY,
-  ({
-    rmp->ip4_timeout = htonl (nat_reass_get_timeout(0));
-    rmp->ip4_max_reass = htons (nat_reass_get_max_reass(0));
-    rmp->ip4_max_frag = nat_reass_get_max_frag(0);
-    rmp->ip4_drop_frag = nat_reass_is_drop_frag(0);
-    rmp->ip6_timeout = htonl (nat_reass_get_timeout(1));
-    rmp->ip6_max_reass = htons (nat_reass_get_max_reass(1));
-    rmp->ip6_max_frag = nat_reass_get_max_frag(1);
-    rmp->ip6_drop_frag = nat_reass_is_drop_frag(1);
-  }))
-  /* *INDENT-ON* */
-}
-
-static void *
-vl_api_nat_get_reass_t_print (vl_api_nat_get_reass_t * mp, void *handle)
-{
-  u8 *s;
-
-  s = format (0, "SCRIPT: nat_get_reass");
-
-  FINISH;
-}
-
-typedef struct nat_api_walk_ctx_t_
-{
-  vl_api_registration_t *reg;
-  u32 context;
-} nat_api_walk_ctx_t;
-
-static int
-nat_ip4_reass_walk_api (nat_reass_ip4_t * reass, void *arg)
-{
-  vl_api_nat_reass_details_t *rmp;
-  snat_main_t *sm = &snat_main;
-  nat_api_walk_ctx_t *ctx = arg;
-  ip46_address_t ip_address;
-
-  rmp = vl_msg_api_alloc (sizeof (*rmp));
-  clib_memset (rmp, 0, sizeof (*rmp));
-  rmp->_vl_msg_id = ntohs (VL_API_NAT_REASS_DETAILS + sm->msg_id_base);
-  rmp->context = ctx->context;
-
-  clib_memcpy (&ip_address.ip4, &reass->key.src, 4);
-  ip_address_encode (&ip_address, IP46_TYPE_IP4, &rmp->src_addr);
-
-  clib_memcpy (&ip_address.ip4, &reass->key.dst, 4);
-  ip_address_encode (&ip_address, IP46_TYPE_IP4, &rmp->dst_addr);
-
-  rmp->proto = reass->key.proto;
-  rmp->frag_id = ntohl (reass->key.frag_id);
-  rmp->frag_n = reass->frag_n;
-
-  vl_api_send_msg (ctx->reg, (u8 *) rmp);
-
-  return 0;
-}
-
-static int
-nat_ip6_reass_walk_api (nat_reass_ip6_t * reass, void *arg)
-{
-  vl_api_nat_reass_details_t *rmp;
-  snat_main_t *sm = &snat_main;
-  nat_api_walk_ctx_t *ctx = arg;
-  ip46_address_t ip_address;
-
-  rmp = vl_msg_api_alloc (sizeof (*rmp));
-  clib_memset (rmp, 0, sizeof (*rmp));
-  rmp->_vl_msg_id = ntohs (VL_API_NAT_REASS_DETAILS + sm->msg_id_base);
-  rmp->context = ctx->context;
-
-  clib_memcpy (&ip_address.ip6, &reass->key.src, 16);
-  ip_address_encode (&ip_address, IP46_TYPE_IP6, &rmp->src_addr);
-
-  clib_memcpy (&ip_address.ip6, &reass->key.dst, 16);
-  ip_address_encode (&ip_address, IP46_TYPE_IP6, &rmp->dst_addr);
-
-  rmp->proto = reass->key.proto;
-  rmp->frag_id = ntohl (reass->key.frag_id);
-  rmp->frag_n = reass->frag_n;
-
-  vl_api_send_msg (ctx->reg, (u8 *) rmp);
-
-  return 0;
-}
-
-static void
-vl_api_nat_reass_dump_t_handler (vl_api_nat_reass_dump_t * mp)
-{
-  vl_api_registration_t *reg;
-
-  reg = vl_api_client_index_to_registration (mp->client_index);
-  if (!reg)
-    return;
-
-  nat_api_walk_ctx_t ctx = {
-    .reg = reg,
-    .context = mp->context,
-  };
-
-  nat_ip4_reass_walk (nat_ip4_reass_walk_api, &ctx);
-  nat_ip6_reass_walk (nat_ip6_reass_walk_api, &ctx);
-}
-
-static void *
-vl_api_nat_reass_dump_t_print (vl_api_nat_reass_dump_t * mp, void *handle)
-{
-  u8 *s;
-
-  s = format (0, "SCRIPT: nat_reass_dump");
-
-  FINISH;
-}
-
 static void
 vl_api_nat_set_timeouts_t_handler (vl_api_nat_set_timeouts_t * mp)
 {
@@ -3471,9 +3320,6 @@ _(NAT_SET_WORKERS, nat_set_workers)                                     \
 _(NAT_WORKER_DUMP, nat_worker_dump)                                     \
 _(NAT_SET_LOG_LEVEL, nat_set_log_level)                                 \
 _(NAT_IPFIX_ENABLE_DISABLE, nat_ipfix_enable_disable)                   \
-_(NAT_SET_REASS, nat_set_reass)                                         \
-_(NAT_GET_REASS, nat_get_reass)                                         \
-_(NAT_REASS_DUMP, nat_reass_dump)                                       \
 _(NAT_SET_TIMEOUTS, nat_set_timeouts)                                   \
 _(NAT_GET_TIMEOUTS, nat_get_timeouts)                                   \
 _(NAT_SET_ADDR_AND_PORT_ALLOC_ALG, nat_set_addr_and_port_alloc_alg)     \
index 832a2ba..384a1eb 100644 (file)
@@ -121,14 +121,16 @@ icmp_match_in2out_det (snat_main_t * sm, vlib_node_runtime_t * node,
   sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
   rx_fib_index0 = ip4_fib_table_get_index_for_sw_if_index (sw_if_index0);
 
-  if (!icmp_is_error_message (icmp0))
+  if (!icmp_type_is_error_message
+      (vnet_buffer (b0)->ip.reass.icmp_type_or_tcp_flags))
     {
       protocol = SNAT_PROTOCOL_ICMP;
       in_addr = ip0->src_address;
-      in_port = echo0->identifier;
+      in_port = vnet_buffer (b0)->ip.reass.l4_src_port;
     }
   else
     {
+      /* if error message, then it's not fragmented and we can access it */
       inner_ip0 = (ip4_header_t *) (echo0 + 1);
       l4_header = ip4_next_header (inner_ip0);
       protocol = ip_proto_to_snat_proto (inner_ip0->protocol);
@@ -213,8 +215,10 @@ icmp_match_in2out_det (snat_main_t * sm, vlib_node_runtime_t * node,
        }
     }
 
-  if (PREDICT_FALSE (icmp0->type != ICMP4_echo_request &&
-                    !icmp_is_error_message (icmp0)))
+  if (PREDICT_FALSE
+      (vnet_buffer (b0)->ip.reass.icmp_type_or_tcp_flags != ICMP4_echo_request
+       && !icmp_type_is_error_message (vnet_buffer (b0)->ip.
+                                      reass.icmp_type_or_tcp_flags)))
     {
       b0->error = node->errors[NAT_DET_IN2OUT_ERROR_BAD_ICMP_TYPE];
       next0 = NAT_DET_IN2OUT_NEXT_DROP;
index c4bd096..74210e1 100644 (file)
@@ -117,16 +117,18 @@ icmp_match_out2in_det (snat_main_t * sm, vlib_node_runtime_t * node,
   echo0 = (icmp_echo_header_t *) (icmp0 + 1);
   sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
 
-  if (!icmp_is_error_message (icmp0))
+  if (!icmp_type_is_error_message
+      (vnet_buffer (b0)->ip.reass.icmp_type_or_tcp_flags))
     {
       protocol = SNAT_PROTOCOL_ICMP;
       key0.ext_host_addr = ip0->src_address;
       key0.ext_host_port = 0;
-      key0.out_port = echo0->identifier;
+      key0.out_port = vnet_buffer (b0)->ip.reass.l4_src_port;
       out_addr = ip0->dst_address;
     }
   else
     {
+      /* if error message, then it's not fragmented and we can access it */
       inner_ip0 = (ip4_header_t *) (echo0 + 1);
       l4_header = ip4_next_header (inner_ip0);
       protocol = ip_proto_to_snat_proto (inner_ip0->protocol);
@@ -191,8 +193,10 @@ icmp_match_out2in_det (snat_main_t * sm, vlib_node_runtime_t * node,
       goto out;
     }
 
-  if (PREDICT_FALSE (icmp0->type != ICMP4_echo_reply &&
-                    !icmp_is_error_message (icmp0)))
+  if (PREDICT_FALSE
+      (vnet_buffer (b0)->ip.reass.icmp_type_or_tcp_flags != ICMP4_echo_reply
+       && !icmp_type_is_error_message (vnet_buffer (b0)->ip.
+                                      reass.icmp_type_or_tcp_flags)))
     {
       b0->error = node->errors[NAT_DET_OUT2IN_ERROR_BAD_ICMP_TYPE];
       next0 = NAT_DET_OUT2IN_NEXT_DROP;
index 7dcdff6..17f64b9 100644 (file)
@@ -333,20 +333,6 @@ format_det_map_ses (u8 * s, va_list * args)
   return s;
 }
 
-u8 *
-format_nat44_reass_trace (u8 * s, va_list * args)
-{
-  CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
-  CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
-  nat44_reass_trace_t *t = va_arg (*args, nat44_reass_trace_t *);
-
-  s = format (s, "NAT44_REASS: sw_if_index %d, next index %d, status %s",
-             t->sw_if_index, t->next_index,
-             t->cached ? "cached" : "translated");
-
-  return s;
-}
-
 /*
  * fd.io coding-style-patch-verification: ON
  *
index 2f68ed4..a58317a 100644 (file)
@@ -171,9 +171,9 @@ snat_proto_to_ip_proto (snat_protocol_t snat_proto)
 }
 
 static_always_inline u8
-icmp_is_error_message (icmp46_header_t * icmp)
+icmp_type_is_error_message (u8 icmp_type)
 {
-  switch (icmp->type)
+  switch (icmp_type)
     {
     case ICMP4_destination_unreachable:
     case ICMP4_time_exceeded:
@@ -323,25 +323,28 @@ nat44_delete_session (snat_main_t * sm, snat_session_t * ses,
 */
 always_inline int
 nat44_set_tcp_session_state_i2o (snat_main_t * sm, snat_session_t * ses,
-                                tcp_header_t * tcp, u32 thread_index)
+                                vlib_buffer_t * b, u32 thread_index)
 {
-  if ((ses->state == 0) && (tcp->flags & TCP_FLAG_RST))
+  u8 tcp_flags = vnet_buffer (b)->ip.reass.icmp_type_or_tcp_flags;
+  u32 tcp_ack_number = vnet_buffer (b)->ip.reass.tcp_ack_number;
+  u32 tcp_seq_number = vnet_buffer (b)->ip.reass.tcp_seq_number;
+  if ((ses->state == 0) && (tcp_flags & TCP_FLAG_RST))
     ses->state = NAT44_SES_RST;
-  if ((ses->state == NAT44_SES_RST) && !(tcp->flags & TCP_FLAG_RST))
+  if ((ses->state == NAT44_SES_RST) && !(tcp_flags & TCP_FLAG_RST))
     ses->state = 0;
-  if ((tcp->flags & TCP_FLAG_ACK) && (ses->state & NAT44_SES_I2O_SYN) &&
+  if ((tcp_flags & TCP_FLAG_ACK) && (ses->state & NAT44_SES_I2O_SYN) &&
       (ses->state & NAT44_SES_O2I_SYN))
     ses->state = 0;
-  if (tcp->flags & TCP_FLAG_SYN)
+  if (tcp_flags & TCP_FLAG_SYN)
     ses->state |= NAT44_SES_I2O_SYN;
-  if (tcp->flags & TCP_FLAG_FIN)
+  if (tcp_flags & TCP_FLAG_FIN)
     {
-      ses->i2o_fin_seq = clib_net_to_host_u32 (tcp->seq_number);
+      ses->i2o_fin_seq = clib_net_to_host_u32 (tcp_seq_number);
       ses->state |= NAT44_SES_I2O_FIN;
     }
-  if ((tcp->flags & TCP_FLAG_ACK) && (ses->state & NAT44_SES_O2I_FIN))
+  if ((tcp_flags & TCP_FLAG_ACK) && (ses->state & NAT44_SES_O2I_FIN))
     {
-      if (clib_net_to_host_u32 (tcp->ack_number) > ses->o2i_fin_seq)
+      if (clib_net_to_host_u32 (tcp_ack_number) > ses->o2i_fin_seq)
        ses->state |= NAT44_SES_O2I_FIN_ACK;
     }
   if (nat44_is_ses_closed (ses)
@@ -356,25 +359,26 @@ nat44_set_tcp_session_state_i2o (snat_main_t * sm, snat_session_t * ses,
 
 always_inline int
 nat44_set_tcp_session_state_o2i (snat_main_t * sm, snat_session_t * ses,
-                                tcp_header_t * tcp, u32 thread_index)
+                                u8 tcp_flags, u32 tcp_ack_number,
+                                u32 tcp_seq_number, u32 thread_index)
 {
-  if ((ses->state == 0) && (tcp->flags & TCP_FLAG_RST))
+  if ((ses->state == 0) && (tcp_flags & TCP_FLAG_RST))
     ses->state = NAT44_SES_RST;
-  if ((ses->state == NAT44_SES_RST) && !(tcp->flags & TCP_FLAG_RST))
+  if ((ses->state == NAT44_SES_RST) && !(tcp_flags & TCP_FLAG_RST))
     ses->state = 0;
-  if ((tcp->flags & TCP_FLAG_ACK) && (ses->state & NAT44_SES_I2O_SYN) &&
+  if ((tcp_flags & TCP_FLAG_ACK) && (ses->state & NAT44_SES_I2O_SYN) &&
       (ses->state & NAT44_SES_O2I_SYN))
     ses->state = 0;
-  if (tcp->flags & TCP_FLAG_SYN)
+  if (tcp_flags & TCP_FLAG_SYN)
     ses->state |= NAT44_SES_O2I_SYN;
-  if (tcp->flags & TCP_FLAG_FIN)
+  if (tcp_flags & TCP_FLAG_FIN)
     {
-      ses->o2i_fin_seq = clib_net_to_host_u32 (tcp->seq_number);
+      ses->o2i_fin_seq = clib_net_to_host_u32 (tcp_seq_number);
       ses->state |= NAT44_SES_O2I_FIN;
     }
-  if ((tcp->flags & TCP_FLAG_ACK) && (ses->state & NAT44_SES_I2O_FIN))
+  if ((tcp_flags & TCP_FLAG_ACK) && (ses->state & NAT44_SES_I2O_FIN))
     {
-      if (clib_net_to_host_u32 (tcp->ack_number) > ses->i2o_fin_seq)
+      if (clib_net_to_host_u32 (tcp_ack_number) > ses->i2o_fin_seq)
        ses->state |= NAT44_SES_I2O_FIN_ACK;
     }
   if (nat44_is_ses_closed (ses))
@@ -466,7 +470,8 @@ make_sm_kv (clib_bihash_kv_8_8_t * kv, ip4_address_t * addr, u8 proto,
 }
 
 static_always_inline int
-get_icmp_i2o_ed_key (ip4_header_t * ip0, nat_ed_ses_key_t * p_key0)
+get_icmp_i2o_ed_key (vlib_buffer_t * b, ip4_header_t * ip0,
+                    nat_ed_ses_key_t * p_key0)
 {
   icmp46_header_t *icmp0;
   nat_ed_ses_key_t key0;
@@ -478,12 +483,13 @@ get_icmp_i2o_ed_key (ip4_header_t * ip0, nat_ed_ses_key_t * p_key0)
   icmp0 = (icmp46_header_t *) ip4_next_header (ip0);
   echo0 = (icmp_echo_header_t *) (icmp0 + 1);
 
-  if (!icmp_is_error_message (icmp0))
+  if (!icmp_type_is_error_message
+      (vnet_buffer (b)->ip.reass.icmp_type_or_tcp_flags))
     {
       key0.proto = IP_PROTOCOL_ICMP;
       key0.l_addr = ip0->src_address;
       key0.r_addr = ip0->dst_address;
-      key0.l_port = echo0->identifier;
+      key0.l_port = vnet_buffer (b)->ip.reass.l4_src_port;     // TODO should this be src or dst?
       key0.r_port = 0;
     }
   else
@@ -516,7 +522,8 @@ get_icmp_i2o_ed_key (ip4_header_t * ip0, nat_ed_ses_key_t * p_key0)
 
 
 static_always_inline int
-get_icmp_o2i_ed_key (ip4_header_t * ip0, nat_ed_ses_key_t * p_key0)
+get_icmp_o2i_ed_key (vlib_buffer_t * b, ip4_header_t * ip0,
+                    nat_ed_ses_key_t * p_key0)
 {
   icmp46_header_t *icmp0;
   nat_ed_ses_key_t key0;
@@ -528,12 +535,13 @@ get_icmp_o2i_ed_key (ip4_header_t * ip0, nat_ed_ses_key_t * p_key0)
   icmp0 = (icmp46_header_t *) ip4_next_header (ip0);
   echo0 = (icmp_echo_header_t *) (icmp0 + 1);
 
-  if (!icmp_is_error_message (icmp0))
+  if (!icmp_type_is_error_message
+      (vnet_buffer (b)->ip.reass.icmp_type_or_tcp_flags))
     {
       key0.proto = IP_PROTOCOL_ICMP;
       key0.l_addr = ip0->dst_address;
       key0.r_addr = ip0->src_address;
-      key0.l_port = echo0->identifier;
+      key0.l_port = vnet_buffer (b)->ip.reass.l4_src_port;     // TODO should this be src or dst?
       key0.r_port = 0;
     }
   else
diff --git a/src/plugins/nat/nat_reass.c b/src/plugins/nat/nat_reass.c
deleted file mode 100755 (executable)
index b518c0c..0000000
+++ /dev/null
@@ -1,893 +0,0 @@
-/*
- * Copyright (c) 2017 Cisco and/or its affiliates.
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at:
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-/**
- * @file
- * @brief NAT plugin virtual fragmentation reassembly
- */
-
-#include <vnet/vnet.h>
-#include <nat/nat_reass.h>
-#include <nat/nat_ipfix_logging.h>
-
-nat_reass_main_t nat_reass_main;
-
-static u32
-nat_reass_get_nbuckets (u8 is_ip6)
-{
-  nat_reass_main_t *srm = &nat_reass_main;
-  u32 nbuckets;
-  u8 i;
-
-  if (is_ip6)
-    nbuckets = (u32) (srm->ip6_max_reass / NAT_REASS_HT_LOAD_FACTOR);
-  else
-    nbuckets = (u32) (srm->ip4_max_reass / NAT_REASS_HT_LOAD_FACTOR);
-
-  for (i = 0; i < 31; i++)
-    if ((1 << i) >= nbuckets)
-      break;
-  nbuckets = 1 << i;
-
-  return nbuckets;
-}
-
-static_always_inline void
-nat_ip4_reass_get_frags_inline (nat_reass_ip4_t * reass, u32 ** bi)
-{
-  nat_reass_main_t *srm = &nat_reass_main;
-  u32 elt_index;
-  dlist_elt_t *elt;
-
-  while ((elt_index =
-         clib_dlist_remove_head (srm->ip4_frags_list_pool,
-                                 reass->frags_per_reass_list_head_index)) !=
-        ~0)
-    {
-      elt = pool_elt_at_index (srm->ip4_frags_list_pool, elt_index);
-      vec_add1 (*bi, elt->value);
-      reass->frag_n--;
-      pool_put_index (srm->ip4_frags_list_pool, elt_index);
-    }
-}
-
-static_always_inline void
-nat_ip6_reass_get_frags_inline (nat_reass_ip6_t * reass, u32 ** bi)
-{
-  nat_reass_main_t *srm = &nat_reass_main;
-  u32 elt_index;
-  dlist_elt_t *elt;
-
-  while ((elt_index =
-         clib_dlist_remove_head (srm->ip6_frags_list_pool,
-                                 reass->frags_per_reass_list_head_index)) !=
-        ~0)
-    {
-      elt = pool_elt_at_index (srm->ip6_frags_list_pool, elt_index);
-      vec_add1 (*bi, elt->value);
-      reass->frag_n--;
-      pool_put_index (srm->ip6_frags_list_pool, elt_index);
-    }
-}
-
-int
-nat_reass_set (u32 timeout, u16 max_reass, u8 max_frag, u8 drop_frag,
-              u8 is_ip6)
-{
-  nat_reass_main_t *srm = &nat_reass_main;
-  u32 nbuckets;
-
-  if (is_ip6)
-    {
-      if (srm->ip6_max_reass != max_reass)
-       {
-         clib_spinlock_lock_if_init (&srm->ip6_reass_lock);
-
-         srm->ip6_max_reass = max_reass;
-         pool_free (srm->ip6_reass_pool);
-         pool_alloc (srm->ip6_reass_pool, srm->ip4_max_reass);
-         nbuckets = nat_reass_get_nbuckets (0);
-         clib_bihash_free_48_8 (&srm->ip6_reass_hash);
-         clib_bihash_init_48_8 (&srm->ip6_reass_hash, "nat-ip6-reass",
-                                nbuckets, nbuckets * 1024);
-
-         clib_spinlock_unlock_if_init (&srm->ip6_reass_lock);
-       }
-      srm->ip6_timeout = timeout;
-      srm->ip6_max_frag = max_frag;
-      srm->ip6_drop_frag = drop_frag;
-    }
-  else
-    {
-      if (srm->ip4_max_reass != max_reass)
-       {
-         clib_spinlock_lock_if_init (&srm->ip4_reass_lock);
-
-         srm->ip4_max_reass = max_reass;
-         pool_free (srm->ip4_reass_pool);
-         pool_alloc (srm->ip4_reass_pool, srm->ip4_max_reass);
-         nbuckets = nat_reass_get_nbuckets (0);
-         clib_bihash_free_16_8 (&srm->ip4_reass_hash);
-         clib_bihash_init_16_8 (&srm->ip4_reass_hash, "nat-ip4-reass",
-                                nbuckets, nbuckets * 1024);
-         clib_spinlock_unlock_if_init (&srm->ip4_reass_lock);
-       }
-      srm->ip4_timeout = timeout;
-      srm->ip4_max_frag = max_frag;
-      srm->ip4_drop_frag = drop_frag;
-    }
-
-  return 0;
-}
-
-u32
-nat_reass_get_timeout (u8 is_ip6)
-{
-  nat_reass_main_t *srm = &nat_reass_main;
-
-  if (is_ip6)
-    return srm->ip6_timeout;
-
-  return srm->ip4_timeout;
-}
-
-u16
-nat_reass_get_max_reass (u8 is_ip6)
-{
-  nat_reass_main_t *srm = &nat_reass_main;
-
-  if (is_ip6)
-    return srm->ip6_max_reass;
-
-  return srm->ip4_max_reass;
-}
-
-u8
-nat_reass_get_max_frag (u8 is_ip6)
-{
-  nat_reass_main_t *srm = &nat_reass_main;
-
-  if (is_ip6)
-    return srm->ip6_max_frag;
-
-  return srm->ip4_max_frag;
-}
-
-u8
-nat_reass_is_drop_frag (u8 is_ip6)
-{
-  nat_reass_main_t *srm = &nat_reass_main;
-
-  if (is_ip6)
-    return srm->ip6_drop_frag;
-
-  return srm->ip4_drop_frag;
-}
-
-static_always_inline nat_reass_ip4_t *
-nat_ip4_reass_lookup (nat_reass_ip4_key_t * k, f64 now)
-{
-  nat_reass_main_t *srm = &nat_reass_main;
-  clib_bihash_kv_16_8_t kv, value;
-  nat_reass_ip4_t *reass;
-
-  kv.key[0] = k->as_u64[0];
-  kv.key[1] = k->as_u64[1];
-
-  if (clib_bihash_search_16_8 (&srm->ip4_reass_hash, &kv, &value))
-    return 0;
-
-  reass = pool_elt_at_index (srm->ip4_reass_pool, value.value);
-  if (now < reass->last_heard + (f64) srm->ip4_timeout)
-    return reass;
-
-  return 0;
-}
-
-nat_reass_ip4_t *
-nat_ip4_reass_find (ip4_address_t src, ip4_address_t dst, u16 frag_id,
-                   u8 proto)
-{
-  nat_reass_main_t *srm = &nat_reass_main;
-  nat_reass_ip4_t *reass = 0;
-  nat_reass_ip4_key_t k;
-  f64 now = vlib_time_now (srm->vlib_main);
-
-  k.src.as_u32 = src.as_u32;
-  k.dst.as_u32 = dst.as_u32;
-  k.frag_id = frag_id;
-  k.proto = proto;
-
-  clib_spinlock_lock_if_init (&srm->ip4_reass_lock);
-  reass = nat_ip4_reass_lookup (&k, now);
-  clib_spinlock_unlock_if_init (&srm->ip4_reass_lock);
-
-  return reass;
-}
-
-nat_reass_ip4_t *
-nat_ip4_reass_create (ip4_address_t src, ip4_address_t dst, u16 frag_id,
-                     u8 proto)
-{
-  nat_reass_main_t *srm = &nat_reass_main;
-  nat_reass_ip4_t *reass = 0;
-  dlist_elt_t *elt, *per_reass_list_head_elt;
-  u32 elt_index;
-  f64 now = vlib_time_now (srm->vlib_main);
-  nat_reass_ip4_key_t k;
-  clib_bihash_kv_16_8_t kv;
-
-  clib_spinlock_lock_if_init (&srm->ip4_reass_lock);
-
-  if (srm->ip4_reass_n >= srm->ip4_max_reass)
-    {
-      nat_elog_warn ("no free resassembly slot");
-      goto unlock;
-    }
-
-  pool_get (srm->ip4_reass_pool, reass);
-  pool_get (srm->ip4_reass_lru_list_pool, elt);
-  reass->lru_list_index = elt_index = elt - srm->ip4_reass_lru_list_pool;
-  clib_dlist_init (srm->ip4_reass_lru_list_pool, elt_index);
-  elt->value = reass - srm->ip4_reass_pool;
-  clib_dlist_addtail (srm->ip4_reass_lru_list_pool,
-                     srm->ip4_reass_head_index, elt_index);
-  pool_get (srm->ip4_frags_list_pool, per_reass_list_head_elt);
-  reass->frags_per_reass_list_head_index =
-    per_reass_list_head_elt - srm->ip4_frags_list_pool;
-  clib_dlist_init (srm->ip4_frags_list_pool,
-                  reass->frags_per_reass_list_head_index);
-  srm->ip4_reass_n++;
-  k.src.as_u32 = src.as_u32;
-  k.dst.as_u32 = dst.as_u32;
-  k.frag_id = frag_id;
-  k.proto = proto;
-  reass->key.as_u64[0] = kv.key[0] = k.as_u64[0];
-  reass->key.as_u64[1] = kv.key[1] = k.as_u64[1];
-  kv.value = reass - srm->ip4_reass_pool;
-  reass->sess_index = (u32) ~ 0;
-  reass->thread_index = (u32) ~ 0;
-  reass->last_heard = now;
-  reass->frag_n = 0;
-  reass->flags = 0;
-  reass->classify_next = NAT_REASS_IP4_CLASSIFY_NONE;
-  if (clib_bihash_add_del_16_8 (&srm->ip4_reass_hash, &kv, 1))
-    nat_elog_warn ("ip4_reass_hash add key failed");
-
-unlock:
-  clib_spinlock_unlock_if_init (&srm->ip4_reass_lock);
-  return reass;
-}
-
-nat_reass_ip4_t *
-nat_ip4_reass_find_or_create (ip4_address_t src, ip4_address_t dst,
-                             u16 frag_id, u8 proto, u8 reset_timeout,
-                             u32 ** bi_to_drop)
-{
-  nat_reass_main_t *srm = &nat_reass_main;
-  nat_reass_ip4_t *reass = 0;
-  nat_reass_ip4_key_t k;
-  f64 now = vlib_time_now (srm->vlib_main);
-  dlist_elt_t *oldest_elt, *elt;
-  dlist_elt_t *per_reass_list_head_elt;
-  u32 oldest_index, elt_index;
-  clib_bihash_kv_16_8_t kv, value;
-
-  k.src.as_u32 = src.as_u32;
-  k.dst.as_u32 = dst.as_u32;
-  k.frag_id = frag_id;
-  k.proto = proto;
-
-  clib_spinlock_lock_if_init (&srm->ip4_reass_lock);
-
-  reass = nat_ip4_reass_lookup (&k, now);
-  if (reass)
-    {
-      if (reset_timeout)
-       {
-         reass->last_heard = now;
-         clib_dlist_remove (srm->ip4_reass_lru_list_pool,
-                            reass->lru_list_index);
-         clib_dlist_addtail (srm->ip4_reass_lru_list_pool,
-                             srm->ip4_reass_head_index,
-                             reass->lru_list_index);
-       }
-
-      if (reass->flags & NAT_REASS_FLAG_MAX_FRAG_DROP)
-       {
-         reass = 0;
-         goto unlock;
-       }
-
-      goto unlock;
-    }
-
-  if (srm->ip4_reass_n >= srm->ip4_max_reass)
-    {
-      oldest_index =
-       clib_dlist_remove_head (srm->ip4_reass_lru_list_pool,
-                               srm->ip4_reass_head_index);
-      ASSERT (oldest_index != ~0);
-      oldest_elt =
-       pool_elt_at_index (srm->ip4_reass_lru_list_pool, oldest_index);
-      reass = pool_elt_at_index (srm->ip4_reass_pool, oldest_elt->value);
-      if (now < reass->last_heard + (f64) srm->ip4_timeout)
-       {
-         clib_dlist_addhead (srm->ip4_reass_lru_list_pool,
-                             srm->ip4_reass_head_index, oldest_index);
-         nat_elog_warn ("no free resassembly slot");
-         reass = 0;
-         goto unlock;
-       }
-
-      clib_dlist_addtail (srm->ip4_reass_lru_list_pool,
-                         srm->ip4_reass_head_index, oldest_index);
-
-      kv.key[0] = reass->key.as_u64[0];
-      kv.key[1] = reass->key.as_u64[1];
-      if (!clib_bihash_search_16_8 (&srm->ip4_reass_hash, &kv, &value))
-       {
-         if (value.value == (reass - srm->ip4_reass_pool))
-           {
-             if (clib_bihash_add_del_16_8 (&srm->ip4_reass_hash, &kv, 0))
-               {
-                 reass = 0;
-                 goto unlock;
-               }
-           }
-       }
-
-      nat_ip4_reass_get_frags_inline (reass, bi_to_drop);
-    }
-  else
-    {
-      pool_get (srm->ip4_reass_pool, reass);
-      pool_get (srm->ip4_reass_lru_list_pool, elt);
-      reass->lru_list_index = elt_index = elt - srm->ip4_reass_lru_list_pool;
-      clib_dlist_init (srm->ip4_reass_lru_list_pool, elt_index);
-      elt->value = reass - srm->ip4_reass_pool;
-      clib_dlist_addtail (srm->ip4_reass_lru_list_pool,
-                         srm->ip4_reass_head_index, elt_index);
-      pool_get (srm->ip4_frags_list_pool, per_reass_list_head_elt);
-      reass->frags_per_reass_list_head_index =
-       per_reass_list_head_elt - srm->ip4_frags_list_pool;
-      clib_dlist_init (srm->ip4_frags_list_pool,
-                      reass->frags_per_reass_list_head_index);
-      srm->ip4_reass_n++;
-    }
-
-  reass->key.as_u64[0] = kv.key[0] = k.as_u64[0];
-  reass->key.as_u64[1] = kv.key[1] = k.as_u64[1];
-  kv.value = reass - srm->ip4_reass_pool;
-  reass->sess_index = (u32) ~ 0;
-  reass->thread_index = (u32) ~ 0;
-  reass->last_heard = now;
-  reass->frag_n = 0;
-  reass->flags = 0;
-  reass->classify_next = NAT_REASS_IP4_CLASSIFY_NONE;
-
-  if (clib_bihash_add_del_16_8 (&srm->ip4_reass_hash, &kv, 1))
-    {
-      reass = 0;
-      goto unlock;
-    }
-
-unlock:
-  clib_spinlock_unlock_if_init (&srm->ip4_reass_lock);
-  return reass;
-}
-
-int
-nat_ip4_reass_add_fragment (u32 thread_index, nat_reass_ip4_t * reass,
-                           u32 bi, u32 ** bi_to_drop)
-{
-  nat_reass_main_t *srm = &nat_reass_main;
-  dlist_elt_t *elt;
-  u32 elt_index;
-
-  if (reass->frag_n >= srm->ip4_max_frag)
-    {
-      nat_ipfix_logging_max_fragments_ip4 (thread_index, srm->ip4_max_frag,
-                                          &reass->key.src);
-      reass->flags |= NAT_REASS_FLAG_MAX_FRAG_DROP;
-      nat_ip4_reass_get_frags_inline (reass, bi_to_drop);
-      return -1;
-    }
-
-  clib_spinlock_lock_if_init (&srm->ip4_reass_lock);
-
-  pool_get (srm->ip4_frags_list_pool, elt);
-  elt_index = elt - srm->ip4_frags_list_pool;
-  clib_dlist_init (srm->ip4_frags_list_pool, elt_index);
-  elt->value = bi;
-  clib_dlist_addtail (srm->ip4_frags_list_pool,
-                     reass->frags_per_reass_list_head_index, elt_index);
-  reass->frag_n++;
-
-  clib_spinlock_unlock_if_init (&srm->ip4_reass_lock);
-
-  return 0;
-}
-
-void
-nat_ip4_reass_get_frags (nat_reass_ip4_t * reass, u32 ** bi)
-{
-  nat_reass_main_t *srm = &nat_reass_main;
-
-  clib_spinlock_lock_if_init (&srm->ip4_reass_lock);
-
-  nat_ip4_reass_get_frags_inline (reass, bi);
-
-  clib_spinlock_unlock_if_init (&srm->ip4_reass_lock);
-}
-
-void
-nat_ip4_reass_walk (nat_ip4_reass_walk_fn_t fn, void *ctx)
-{
-  nat_reass_ip4_t *reass;
-  nat_reass_main_t *srm = &nat_reass_main;
-  f64 now = vlib_time_now (srm->vlib_main);
-
-  /* *INDENT-OFF* */
-  pool_foreach (reass, srm->ip4_reass_pool,
-  ({
-    if (now < reass->last_heard + (f64) srm->ip4_timeout)
-      {
-        if (fn (reass, ctx))
-          return;
-      }
-  }));
-  /* *INDENT-ON* */
-}
-
-static_always_inline nat_reass_ip6_t *
-nat_ip6_reass_lookup (nat_reass_ip6_key_t * k, f64 now)
-{
-  nat_reass_main_t *srm = &nat_reass_main;
-  clib_bihash_kv_48_8_t kv, value;
-  nat_reass_ip6_t *reass;
-
-  k->unused = 0;
-  kv.key[0] = k->as_u64[0];
-  kv.key[1] = k->as_u64[1];
-  kv.key[2] = k->as_u64[2];
-  kv.key[3] = k->as_u64[3];
-  kv.key[4] = k->as_u64[4];
-  kv.key[5] = k->as_u64[5];
-
-  if (clib_bihash_search_48_8 (&srm->ip6_reass_hash, &kv, &value))
-    return 0;
-
-  reass = pool_elt_at_index (srm->ip6_reass_pool, value.value);
-  if (now < reass->last_heard + (f64) srm->ip6_timeout)
-    return reass;
-
-  return 0;
-}
-
-nat_reass_ip6_t *
-nat_ip6_reass_find_or_create (ip6_address_t src, ip6_address_t dst,
-                             u32 frag_id, u8 proto, u8 reset_timeout,
-                             u32 ** bi_to_drop)
-{
-  nat_reass_main_t *srm = &nat_reass_main;
-  nat_reass_ip6_t *reass = 0;
-  nat_reass_ip6_key_t k;
-  f64 now = vlib_time_now (srm->vlib_main);
-  dlist_elt_t *oldest_elt, *elt;
-  dlist_elt_t *per_reass_list_head_elt;
-  u32 oldest_index, elt_index;
-  clib_bihash_kv_48_8_t kv;
-
-  k.src.as_u64[0] = src.as_u64[0];
-  k.src.as_u64[1] = src.as_u64[1];
-  k.dst.as_u64[0] = dst.as_u64[0];
-  k.dst.as_u64[1] = dst.as_u64[1];
-  k.frag_id = frag_id;
-  k.proto = proto;
-  k.unused = 0;
-
-  clib_spinlock_lock_if_init (&srm->ip6_reass_lock);
-
-  reass = nat_ip6_reass_lookup (&k, now);
-  if (reass)
-    {
-      if (reset_timeout)
-       {
-         reass->last_heard = now;
-         clib_dlist_remove (srm->ip6_reass_lru_list_pool,
-                            reass->lru_list_index);
-         clib_dlist_addtail (srm->ip6_reass_lru_list_pool,
-                             srm->ip6_reass_head_index,
-                             reass->lru_list_index);
-       }
-
-      if (reass->flags & NAT_REASS_FLAG_MAX_FRAG_DROP)
-       {
-         reass = 0;
-         goto unlock;
-       }
-
-      goto unlock;
-    }
-
-  if (srm->ip6_reass_n >= srm->ip6_max_reass)
-    {
-      oldest_index =
-       clib_dlist_remove_head (srm->ip6_reass_lru_list_pool,
-                               srm->ip6_reass_head_index);
-      ASSERT (oldest_index != ~0);
-      oldest_elt =
-       pool_elt_at_index (srm->ip4_reass_lru_list_pool, oldest_index);
-      reass = pool_elt_at_index (srm->ip6_reass_pool, oldest_elt->value);
-      if (now < reass->last_heard + (f64) srm->ip6_timeout)
-       {
-         clib_dlist_addhead (srm->ip6_reass_lru_list_pool,
-                             srm->ip6_reass_head_index, oldest_index);
-         nat_elog_warn ("no free resassembly slot");
-         reass = 0;
-         goto unlock;
-       }
-
-      clib_dlist_addtail (srm->ip6_reass_lru_list_pool,
-                         srm->ip6_reass_head_index, oldest_index);
-
-      kv.key[0] = k.as_u64[0];
-      kv.key[1] = k.as_u64[1];
-      kv.key[2] = k.as_u64[2];
-      kv.key[3] = k.as_u64[3];
-      kv.key[4] = k.as_u64[4];
-      kv.key[5] = k.as_u64[5];
-      if (clib_bihash_add_del_48_8 (&srm->ip6_reass_hash, &kv, 0))
-       {
-         reass = 0;
-         goto unlock;
-       }
-
-      nat_ip6_reass_get_frags_inline (reass, bi_to_drop);
-    }
-  else
-    {
-      pool_get (srm->ip6_reass_pool, reass);
-      pool_get (srm->ip6_reass_lru_list_pool, elt);
-      reass->lru_list_index = elt_index = elt - srm->ip6_reass_lru_list_pool;
-      clib_dlist_init (srm->ip6_reass_lru_list_pool, elt_index);
-      elt->value = reass - srm->ip6_reass_pool;
-      clib_dlist_addtail (srm->ip6_reass_lru_list_pool,
-                         srm->ip6_reass_head_index, elt_index);
-      pool_get (srm->ip6_frags_list_pool, per_reass_list_head_elt);
-      reass->frags_per_reass_list_head_index =
-       per_reass_list_head_elt - srm->ip6_frags_list_pool;
-      clib_dlist_init (srm->ip6_frags_list_pool,
-                      reass->frags_per_reass_list_head_index);
-      srm->ip6_reass_n++;
-    }
-
-  reass->key.as_u64[0] = kv.key[0] = k.as_u64[0];
-  reass->key.as_u64[1] = kv.key[1] = k.as_u64[1];
-  reass->key.as_u64[2] = kv.key[2] = k.as_u64[2];
-  reass->key.as_u64[3] = kv.key[3] = k.as_u64[3];
-  reass->key.as_u64[4] = kv.key[4] = k.as_u64[4];
-  reass->key.as_u64[5] = kv.key[5] = k.as_u64[5];
-  kv.value = reass - srm->ip6_reass_pool;
-  reass->sess_index = (u32) ~ 0;
-  reass->last_heard = now;
-
-  if (clib_bihash_add_del_48_8 (&srm->ip6_reass_hash, &kv, 1))
-    {
-      reass = 0;
-      goto unlock;
-    }
-
-unlock:
-  clib_spinlock_unlock_if_init (&srm->ip6_reass_lock);
-  return reass;
-}
-
-int
-nat_ip6_reass_add_fragment (u32 thread_index, nat_reass_ip6_t * reass,
-                           u32 bi, u32 ** bi_to_drop)
-{
-  nat_reass_main_t *srm = &nat_reass_main;
-  dlist_elt_t *elt;
-  u32 elt_index;
-
-  if (reass->frag_n >= srm->ip6_max_frag)
-    {
-      nat_ipfix_logging_max_fragments_ip6 (thread_index, srm->ip6_max_frag,
-                                          &reass->key.src);
-      reass->flags |= NAT_REASS_FLAG_MAX_FRAG_DROP;
-      nat_ip6_reass_get_frags_inline (reass, bi_to_drop);
-      return -1;
-    }
-
-  clib_spinlock_lock_if_init (&srm->ip6_reass_lock);
-
-  pool_get (srm->ip6_frags_list_pool, elt);
-  elt_index = elt - srm->ip6_frags_list_pool;
-  clib_dlist_init (srm->ip6_frags_list_pool, elt_index);
-  elt->value = bi;
-  clib_dlist_addtail (srm->ip6_frags_list_pool,
-                     reass->frags_per_reass_list_head_index, elt_index);
-  reass->frag_n++;
-
-  clib_spinlock_unlock_if_init (&srm->ip6_reass_lock);
-
-  return 0;
-}
-
-void
-nat_ip6_reass_get_frags (nat_reass_ip6_t * reass, u32 ** bi)
-{
-  nat_reass_main_t *srm = &nat_reass_main;
-
-  clib_spinlock_lock_if_init (&srm->ip6_reass_lock);
-
-  nat_ip6_reass_get_frags_inline (reass, bi);
-
-  clib_spinlock_unlock_if_init (&srm->ip6_reass_lock);
-}
-
-void
-nat_ip6_reass_walk (nat_ip6_reass_walk_fn_t fn, void *ctx)
-{
-  nat_reass_ip6_t *reass;
-  nat_reass_main_t *srm = &nat_reass_main;
-  f64 now = vlib_time_now (srm->vlib_main);
-
-  /* *INDENT-OFF* */
-  pool_foreach (reass, srm->ip6_reass_pool,
-  ({
-    if (now < reass->last_heard + (f64) srm->ip4_timeout)
-      {
-        if (fn (reass, ctx))
-          return;
-      }
-  }));
-  /* *INDENT-ON* */
-}
-
-clib_error_t *
-nat_reass_init (vlib_main_t * vm)
-{
-  nat_reass_main_t *srm = &nat_reass_main;
-  vlib_thread_main_t *tm = vlib_get_thread_main ();
-  clib_error_t *error = 0;
-  dlist_elt_t *head;
-  u32 nbuckets, head_index;
-
-  srm->vlib_main = vm;
-  srm->vnet_main = vnet_get_main ();
-
-  /* IPv4 */
-  srm->ip4_timeout = NAT_REASS_TIMEOUT_DEFAULT;
-  srm->ip4_max_reass = NAT_MAX_REASS_DEAFULT;
-  srm->ip4_max_frag = NAT_MAX_FRAG_DEFAULT;
-  srm->ip4_drop_frag = 0;
-  srm->ip4_reass_n = 0;
-
-  if (tm->n_vlib_mains > 1)
-    clib_spinlock_init (&srm->ip4_reass_lock);
-
-  pool_alloc (srm->ip4_reass_pool, srm->ip4_max_reass);
-
-  nbuckets = nat_reass_get_nbuckets (0);
-  clib_bihash_init_16_8 (&srm->ip4_reass_hash, "nat-ip4-reass", nbuckets,
-                        nbuckets * 1024);
-
-  pool_get (srm->ip4_reass_lru_list_pool, head);
-  srm->ip4_reass_head_index = head_index =
-    head - srm->ip4_reass_lru_list_pool;
-  clib_dlist_init (srm->ip4_reass_lru_list_pool, head_index);
-
-  /* IPv6 */
-  srm->ip6_timeout = NAT_REASS_TIMEOUT_DEFAULT;
-  srm->ip6_max_reass = NAT_MAX_REASS_DEAFULT;
-  srm->ip6_max_frag = NAT_MAX_FRAG_DEFAULT;
-  srm->ip6_drop_frag = 0;
-  srm->ip6_reass_n = 0;
-
-  if (tm->n_vlib_mains > 1)
-    clib_spinlock_init (&srm->ip6_reass_lock);
-
-  pool_alloc (srm->ip6_reass_pool, srm->ip6_max_reass);
-
-  nbuckets = nat_reass_get_nbuckets (1);
-  clib_bihash_init_48_8 (&srm->ip6_reass_hash, "nat-ip6-reass", nbuckets,
-                        nbuckets * 1024);
-
-  pool_get (srm->ip6_reass_lru_list_pool, head);
-  srm->ip6_reass_head_index = head_index =
-    head - srm->ip6_reass_lru_list_pool;
-  clib_dlist_init (srm->ip6_reass_lru_list_pool, head_index);
-
-  return error;
-}
-
-static clib_error_t *
-nat_reass_command_fn (vlib_main_t * vm, unformat_input_t * input,
-                     vlib_cli_command_t * cmd)
-{
-  clib_error_t *error = 0;
-  unformat_input_t _line_input, *line_input = &_line_input;
-  u32 timeout = 0, max_reass = 0, max_frag = 0;
-  u8 drop_frag = (u8) ~ 0, is_ip6 = 0;
-  int rv;
-
-  /* Get a line of input. */
-  if (!unformat_user (input, unformat_line_input, line_input))
-    return 0;
-
-  while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
-    {
-      if (unformat (line_input, "max-reassemblies %u", &max_reass))
-       ;
-      else if (unformat (line_input, "max-fragments %u", &max_frag))
-       ;
-      else if (unformat (line_input, "timeout %u", &timeout))
-       ;
-      else if (unformat (line_input, "enable"))
-       drop_frag = 0;
-      else if (unformat (line_input, "disable"))
-       drop_frag = 1;
-      else if (unformat (line_input, "ip4"))
-       is_ip6 = 0;
-      else if (unformat (line_input, "ip6"))
-       is_ip6 = 1;
-      else
-       {
-         error = clib_error_return (0, "unknown input '%U'",
-                                    format_unformat_error, line_input);
-         goto done;
-       }
-    }
-
-  if (!timeout)
-    timeout = nat_reass_get_timeout (is_ip6);
-  if (!max_reass)
-    max_reass = nat_reass_get_max_reass (is_ip6);
-  if (!max_frag)
-    max_frag = nat_reass_get_max_frag (is_ip6);
-  if (drop_frag == (u8) ~ 0)
-    drop_frag = nat_reass_is_drop_frag (is_ip6);
-
-  rv =
-    nat_reass_set (timeout, (u16) max_reass, (u8) max_frag, drop_frag,
-                  is_ip6);
-  if (rv)
-    {
-      error = clib_error_return (0, "nat_set_reass return %d", rv);
-      goto done;
-    }
-
-done:
-  unformat_free (line_input);
-
-  return error;
-}
-
-static int
-nat_ip4_reass_walk_cli (nat_reass_ip4_t * reass, void *ctx)
-{
-  vlib_main_t *vm = ctx;
-  u8 *flags_str = 0;
-  const char *classify_next_str;
-
-  if (reass->flags & NAT_REASS_FLAG_MAX_FRAG_DROP)
-    flags_str = format (flags_str, "MAX_FRAG_DROP");
-  if (reass->flags & NAT_REASS_FLAG_CLASSIFY_ED_CONTINUE)
-    {
-      if (flags_str)
-       flags_str = format (flags_str, " | ");
-      flags_str = format (flags_str, "CLASSIFY_ED_CONTINUE");
-    }
-  if (reass->flags & NAT_REASS_FLAG_ED_DONT_TRANSLATE)
-    {
-      if (flags_str)
-       flags_str = format (flags_str, " | ");
-      flags_str = format (flags_str, "CLASSIFY_ED_DONT_TRANSLATE");
-    }
-  if (!flags_str)
-    flags_str = format (flags_str, "0");
-  flags_str = format (flags_str, "%c", 0);
-
-  switch (reass->classify_next)
-    {
-    case NAT_REASS_IP4_CLASSIFY_NONE:
-      classify_next_str = "NONE";
-      break;
-    case NAT_REASS_IP4_CLASSIFY_NEXT_IN2OUT:
-      classify_next_str = "IN2OUT";
-      break;
-    case NAT_REASS_IP4_CLASSIFY_NEXT_OUT2IN:
-      classify_next_str = "OUT2IN";
-      break;
-    default:
-      classify_next_str = "invalid value";
-    }
-
-  vlib_cli_output (vm, "  src %U dst %U proto %u id 0x%04x cached %u "
-                  "flags %s classify_next %s",
-                  format_ip4_address, &reass->key.src,
-                  format_ip4_address, &reass->key.dst,
-                  reass->key.proto,
-                  clib_net_to_host_u16 (reass->key.frag_id), reass->frag_n,
-                  flags_str, classify_next_str);
-
-  vec_free (flags_str);
-
-  return 0;
-}
-
-static int
-nat_ip6_reass_walk_cli (nat_reass_ip6_t * reass, void *ctx)
-{
-  vlib_main_t *vm = ctx;
-
-  vlib_cli_output (vm, "  src %U dst %U proto %u id 0x%08x cached %u",
-                  format_ip6_address, &reass->key.src,
-                  format_ip6_address, &reass->key.dst,
-                  reass->key.proto,
-                  clib_net_to_host_u32 (reass->key.frag_id), reass->frag_n);
-
-  return 0;
-}
-
-static clib_error_t *
-show_nat_reass_command_fn (vlib_main_t * vm, unformat_input_t * input,
-                          vlib_cli_command_t * cmd)
-{
-  vlib_cli_output (vm, "NAT IPv4 virtual fragmentation reassembly is %s",
-                  nat_reass_is_drop_frag (0) ? "DISABLED" : "ENABLED");
-  vlib_cli_output (vm, " max-reassemblies %u", nat_reass_get_max_reass (0));
-  vlib_cli_output (vm, " max-fragments %u", nat_reass_get_max_frag (0));
-  vlib_cli_output (vm, " timeout %usec", nat_reass_get_timeout (0));
-  vlib_cli_output (vm, " reassemblies:");
-  nat_ip4_reass_walk (nat_ip4_reass_walk_cli, vm);
-
-  vlib_cli_output (vm, "NAT IPv6 virtual fragmentation reassembly is %s",
-                  nat_reass_is_drop_frag (1) ? "DISABLED" : "ENABLED");
-  vlib_cli_output (vm, " max-reassemblies %u", nat_reass_get_max_reass (1));
-  vlib_cli_output (vm, " max-fragments %u", nat_reass_get_max_frag (1));
-  vlib_cli_output (vm, " timeout %usec", nat_reass_get_timeout (1));
-  vlib_cli_output (vm, " reassemblies:");
-  nat_ip6_reass_walk (nat_ip6_reass_walk_cli, vm);
-
-  return 0;
-}
-
-/* *INDENT-OFF* */
-VLIB_CLI_COMMAND (nat_reass_command, static) =
-{
-  .path = "nat virtual-reassembly",
-  .short_help = "nat virtual-reassembly ip4|ip6 [max-reassemblies <n>] "
-                "[max-fragments <n>] [timeout <sec>] [enable|disable]",
-  .function = nat_reass_command_fn,
-};
-
-VLIB_CLI_COMMAND (show_nat_reass_command, static) =
-{
-  .path = "show nat virtual-reassembly",
-  .short_help = "show nat virtual-reassembly",
-  .function = show_nat_reass_command_fn,
-};
-/* *INDENT-ON* */
-
-/*
- * fd.io coding-style-patch-verification: ON
- *
- * Local Variables:
- * eval: (c-set-style "gnu")
- * End:
- */
diff --git a/src/plugins/nat/nat_reass.h b/src/plugins/nat/nat_reass.h
deleted file mode 100644 (file)
index 11f9db5..0000000
+++ /dev/null
@@ -1,340 +0,0 @@
-/*
- * Copyright (c) 2017 Cisco and/or its affiliates.
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at:
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-/**
- * @file
- * @brief NAT plugin virtual fragmentation reassembly
- */
-#ifndef __included_nat_reass_h__
-#define __included_nat_reass_h__
-
-#include <vnet/vnet.h>
-#include <vnet/ip/ip.h>
-#include <vppinfra/bihash_16_8.h>
-#include <vppinfra/bihash_48_8.h>
-#include <vppinfra/dlist.h>
-
-#define NAT_REASS_TIMEOUT_DEFAULT 2
-#define NAT_MAX_REASS_DEAFULT 1024
-#define NAT_MAX_FRAG_DEFAULT 5
-#define NAT_REASS_HT_LOAD_FACTOR (0.75)
-
-#define NAT_REASS_FLAG_MAX_FRAG_DROP 1
-#define NAT_REASS_FLAG_CLASSIFY_ED_CONTINUE 2
-#define NAT_REASS_FLAG_ED_DONT_TRANSLATE 4
-
-typedef struct
-{
-  union
-  {
-    struct
-    {
-      ip4_address_t src;
-      ip4_address_t dst;
-      /* align by making this 4 octets even though its a 2 octets field */
-      u32 frag_id;
-      /* align by making this 4 octets even though its a 1 octet field */
-      u32 proto;
-    };
-    u64 as_u64[2];
-  };
-} nat_reass_ip4_key_t;
-
-enum
-{
-  NAT_REASS_IP4_CLASSIFY_NONE,
-  NAT_REASS_IP4_CLASSIFY_NEXT_IN2OUT,
-  NAT_REASS_IP4_CLASSIFY_NEXT_OUT2IN
-};
-
-/* *INDENT-OFF* */
-typedef CLIB_PACKED(struct
-{
-  nat_reass_ip4_key_t key;
-  u32 lru_list_index;
-  u32 sess_index;
-  u32 thread_index;
-  f64 last_heard;
-  u32 frags_per_reass_list_head_index;
-  u8 frag_n;
-  u8 flags;
-  u8 classify_next;
-}) nat_reass_ip4_t;
-/* *INDENT-ON* */
-
-typedef struct
-{
-  union
-  {
-    struct
-    {
-      ip6_address_t src;
-      ip6_address_t dst;
-      u32 frag_id;
-      /* align by making this 4 octets even though its a 1 octet field */
-      u32 proto;
-      u64 unused;
-    };
-    u64 as_u64[6];
-  };
-} nat_reass_ip6_key_t;
-
-/* *INDENT-OFF* */
-typedef CLIB_PACKED(struct
-{
-  nat_reass_ip6_key_t key;
-  u32 lru_list_index;
-  u32 sess_index;
-  f64 last_heard;
-  u32 frags_per_reass_list_head_index;
-  u8 frag_n;
-  u8 flags;
-}) nat_reass_ip6_t;
-/* *INDENT-ON* */
-
-typedef struct
-{
-  /* IPv4 config */
-  u32 ip4_timeout;
-  u16 ip4_max_reass;
-  u8 ip4_max_frag;
-  u8 ip4_drop_frag;
-
-  /* IPv6 config */
-  u32 ip6_timeout;
-  u16 ip6_max_reass;
-  u8 ip6_max_frag;
-  u8 ip6_drop_frag;
-
-  /* IPv4 runtime */
-  nat_reass_ip4_t *ip4_reass_pool;
-  clib_bihash_16_8_t ip4_reass_hash;
-  dlist_elt_t *ip4_reass_lru_list_pool;
-  dlist_elt_t *ip4_frags_list_pool;
-  u32 ip4_reass_head_index;
-  u16 ip4_reass_n;
-  clib_spinlock_t ip4_reass_lock;
-
-  /* IPv6 runtime */
-  nat_reass_ip6_t *ip6_reass_pool;
-  clib_bihash_48_8_t ip6_reass_hash;
-  dlist_elt_t *ip6_reass_lru_list_pool;
-  dlist_elt_t *ip6_frags_list_pool;
-  u32 ip6_reass_head_index;
-  u16 ip6_reass_n;
-  clib_spinlock_t ip6_reass_lock;
-
-  /* convenience */
-  vlib_main_t *vlib_main;
-  vnet_main_t *vnet_main;
-} nat_reass_main_t;
-
-/**
- * @brief Set NAT virtual fragmentation reassembly configuration.
- *
- * @param timeout   Reassembly timeout.
- * @param max_reass Maximum number of concurrent reassemblies.
- * @param max_frag  Maximum number of fragmets per reassembly
- * @param drop_frag If zero translate fragments, otherwise drop fragments.
- * @param is_ip6    1 if IPv6, 0 if IPv4.
- *
- * @returns 0 on success, non-zero value otherwise.
- */
-int nat_reass_set (u32 timeout, u16 max_reass, u8 max_frag, u8 drop_frag,
-                  u8 is_ip6);
-
-/**
- * @brief Get reassembly timeout.
- *
- * @param is_ip6 1 if IPv6, 0 if IPv4.
- *
- * @returns reassembly timeout.
- */
-u32 nat_reass_get_timeout (u8 is_ip6);
-
-/**
- * @brief Get maximum number of concurrent reassemblies.
- *
- * @param is_ip6 1 if IPv6, 0 if IPv4.
- *
- * @returns maximum number of concurrent reassemblies.
- */
-u16 nat_reass_get_max_reass (u8 is_ip6);
-
-/**
- * @brief Get maximum number of fragmets per reassembly.
- *
- * @param is_ip6 1 if IPv6, 0 if IPv4.
- *
- * @returns maximum number of fragmets per reassembly.
- */
-u8 nat_reass_get_max_frag (u8 is_ip6);
-
-/**
- * @brief Get status of virtual fragmentation reassembly.
- *
- * @param is_ip6 1 if IPv6, 0 if IPv4.
- *
- * @returns zero if translate fragments, non-zero value if drop fragments.
- */
-u8 nat_reass_is_drop_frag (u8 is_ip6);
-
-/**
- * @brief Initialize NAT virtual fragmentation reassembly.
- *
- * @param vm vlib main.
- *
- * @return error code.
- */
-clib_error_t *nat_reass_init (vlib_main_t * vm);
-
-/**
- * @brief Find reassembly.
- *
- * @param src Source IPv4 address.
- * @param dst Destination IPv4 address.
- * @param frag_id Fragment ID.
- * @param proto L4 protocol.
- *
- * @returns Reassembly data or 0 if not found.
- */
-nat_reass_ip4_t *nat_ip4_reass_find (ip4_address_t src,
-                                    ip4_address_t dst,
-                                    u16 frag_id, u8 proto);
-
-/**
- * @brief Create reassembly.
- *
- * @param src Source IPv4 address.
- * @param dst Destination IPv4 address.
- * @param frag_id Fragment ID.
- * @param proto L4 protocol.
- *
- * @returns Reassembly data or 0 on failure.
- */
-nat_reass_ip4_t *nat_ip4_reass_create (ip4_address_t src, ip4_address_t dst,
-                                      u16 frag_id, u8 proto);
-
-/**
- * @brief Find or create reassembly.
- *
- * @param src Source IPv4 address.
- * @param dst Destination IPv4 address.
- * @param frag_id Fragment ID.
- * @param proto L4 protocol.
- * @param reset_timeout If non-zero value reset timeout.
- * @param bi_to_drop Fragments to drop.
- *
- * @returns Reassembly data or 0 on failure.
- */
-nat_reass_ip4_t *nat_ip4_reass_find_or_create (ip4_address_t src,
-                                              ip4_address_t dst,
-                                              u16 frag_id, u8 proto,
-                                              u8 reset_timeout,
-                                              u32 ** bi_to_drop);
-
-/**
- * @brief Cache fragment.
- *
- * @param reass Reassembly data.
- * @param bi Buffer index.
- * @param bi_to_drop Fragments to drop.
- *
- * @returns 0 on success, non-zero value otherwise.
- */
-int nat_ip4_reass_add_fragment (u32 thread_index, nat_reass_ip4_t * reass,
-                               u32 bi, u32 ** bi_to_drop);
-
-/**
- * @brief Get cached fragments.
- *
- * @param reass Reassembly data.
- * @param bi Vector of buffer indexes.
- */
-void nat_ip4_reass_get_frags (nat_reass_ip4_t * reass, u32 ** bi);
-
-/**
- * @breif Call back function when walking IPv4 reassemblies, non-zero return
- * value stop walk.
- */
-typedef int (*nat_ip4_reass_walk_fn_t) (nat_reass_ip4_t * reass, void *ctx);
-
-/**
- * @brief Walk IPv4 reassemblies.
- *
- * @param fn The function to invoke on each entry visited.
- * @param ctx A context passed in the visit function.
- */
-void nat_ip4_reass_walk (nat_ip4_reass_walk_fn_t fn, void *ctx);
-
-/**
- * @brief Find or create reassembly.
- *
- * @param src Source IPv6 address.
- * @param dst Destination IPv6 address.
- * @param frag_id Fragment ID.
- * @param proto L4 protocol.
- * @param reset_timeout If non-zero value reset timeout.
- * @param bi_to_drop Fragments to drop.
- *
- * @returns Reassembly data or 0 on failure.
- */
-nat_reass_ip6_t *nat_ip6_reass_find_or_create (ip6_address_t src,
-                                              ip6_address_t dst,
-                                              u32 frag_id, u8 proto,
-                                              u8 reset_timeout,
-                                              u32 ** bi_to_drop);
-/**
- * @brief Cache fragment.
- *
- * @param reass Reassembly data.
- * @param bi Buffer index.
- * @param bi_to_drop Fragments to drop.
- *
- * @returns 0 on success, non-zero value otherwise.
- */
-int nat_ip6_reass_add_fragment (u32 thread_index, nat_reass_ip6_t * reass,
-                               u32 bi, u32 ** bi_to_drop);
-
-/**
- * @brief Get cached fragments.
- *
- * @param reass Reassembly data.
- * @param bi Vector of buffer indexes.
- */
-void nat_ip6_reass_get_frags (nat_reass_ip6_t * reass, u32 ** bi);
-
-/**
- * @breif Call back function when walking IPv6 reassemblies, non-zero return
- * value stop walk.
- */
-typedef int (*nat_ip6_reass_walk_fn_t) (nat_reass_ip6_t * reass, void *ctx);
-
-/**
- * @brief Walk IPv6 reassemblies.
- *
- * @param fn The function to invoke on each entry visited.
- * @param ctx A context passed in the visit function.
- */
-void nat_ip6_reass_walk (nat_ip6_reass_walk_fn_t fn, void *ctx);
-
-#endif /* __included_nat_reass_h__ */
-
-/*
- * fd.io coding-style-patch-verification: ON
- *
- * Local Variables:
- * eval: (c-set-style "gnu")
- * End:
- */
index 6ee1266..e9ca88f 100755 (executable)
@@ -27,7 +27,6 @@
 #include <vnet/fib/ip4_fib.h>
 #include <nat/nat.h>
 #include <nat/nat_ipfix_logging.h>
-#include <nat/nat_reass.h>
 #include <nat/nat_inlines.h>
 #include <nat/nat44_inlines.h>
 #include <nat/nat_syslog.h>
@@ -108,7 +107,6 @@ typedef enum
   SNAT_OUT2IN_NEXT_DROP,
   SNAT_OUT2IN_NEXT_LOOKUP,
   SNAT_OUT2IN_NEXT_ICMP_ERROR,
-  SNAT_OUT2IN_NEXT_REASS,
   SNAT_OUT2IN_N_NEXT,
 } snat_out2in_next_t;
 
@@ -267,7 +265,7 @@ create_session_for_static_mapping (snat_main_t * sm,
 
 #ifndef CLIB_MARCH_VARIANT
 static_always_inline
-  snat_out2in_error_t icmp_get_key (ip4_header_t * ip0,
+  snat_out2in_error_t icmp_get_key (vlib_buffer_t * b, ip4_header_t * ip0,
                                    snat_session_key_t * p_key0)
 {
   icmp46_header_t *icmp0;
@@ -280,11 +278,12 @@ static_always_inline
   icmp0 = (icmp46_header_t *) ip4_next_header (ip0);
   echo0 = (icmp_echo_header_t *) (icmp0 + 1);
 
-  if (!icmp_is_error_message (icmp0))
+  if (!icmp_type_is_error_message
+      (vnet_buffer (b)->ip.reass.icmp_type_or_tcp_flags))
     {
       key0.protocol = SNAT_PROTOCOL_ICMP;
       key0.addr = ip0->dst_address;
-      key0.port = echo0->identifier;
+      key0.port = vnet_buffer (b)->ip.reass.l4_src_port;       // TODO should this be dst port?
     }
   else
     {
@@ -332,7 +331,6 @@ icmp_match_out2in_slow (snat_main_t * sm, vlib_node_runtime_t * node,
                        snat_session_key_t * p_value,
                        u8 * p_dont_translate, void *d, void *e)
 {
-  icmp46_header_t *icmp0;
   u32 sw_if_index0;
   u32 rx_fib_index0;
   snat_session_key_t key0;
@@ -345,13 +343,12 @@ icmp_match_out2in_slow (snat_main_t * sm, vlib_node_runtime_t * node,
   int err;
   u8 identity_nat;
 
-  icmp0 = (icmp46_header_t *) ip4_next_header (ip0);
   sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
   rx_fib_index0 = ip4_fib_table_get_index_for_sw_if_index (sw_if_index0);
 
   key0.protocol = 0;
 
-  err = icmp_get_key (ip0, &key0);
+  err = icmp_get_key (b0, ip0, &key0);
   if (err != -1)
     {
       b0->error = node->errors[SNAT_OUT2IN_ERROR_UNSUPPORTED_PROTOCOL];
@@ -390,9 +387,11 @@ icmp_match_out2in_slow (snat_main_t * sm, vlib_node_runtime_t * node,
            }
        }
 
-      if (PREDICT_FALSE (icmp0->type != ICMP4_echo_reply &&
-                        (icmp0->type != ICMP4_echo_request
-                         || !is_addr_only)))
+      if (PREDICT_FALSE
+         (vnet_buffer (b0)->ip.reass.icmp_type_or_tcp_flags !=
+          ICMP4_echo_reply
+          && (vnet_buffer (b0)->ip.reass.icmp_type_or_tcp_flags !=
+              ICMP4_echo_request || !is_addr_only)))
        {
          b0->error = node->errors[SNAT_OUT2IN_ERROR_BAD_ICMP_TYPE];
          next0 = SNAT_OUT2IN_NEXT_DROP;
@@ -417,9 +416,13 @@ icmp_match_out2in_slow (snat_main_t * sm, vlib_node_runtime_t * node,
     }
   else
     {
-      if (PREDICT_FALSE (icmp0->type != ICMP4_echo_reply &&
-                        icmp0->type != ICMP4_echo_request &&
-                        !icmp_is_error_message (icmp0)))
+      if (PREDICT_FALSE
+         (vnet_buffer (b0)->ip.reass.icmp_type_or_tcp_flags !=
+          ICMP4_echo_reply
+          && vnet_buffer (b0)->ip.reass.icmp_type_or_tcp_flags !=
+          ICMP4_echo_request
+          && !icmp_type_is_error_message (vnet_buffer (b0)->ip.
+                                          reass.icmp_type_or_tcp_flags)))
        {
          b0->error = node->errors[SNAT_OUT2IN_ERROR_BAD_ICMP_TYPE];
          next0 = SNAT_OUT2IN_NEXT_DROP;
@@ -462,7 +465,6 @@ icmp_match_out2in_fast (snat_main_t * sm, vlib_node_runtime_t * node,
                        snat_session_key_t * p_value,
                        u8 * p_dont_translate, void *d, void *e)
 {
-  icmp46_header_t *icmp0;
   u32 sw_if_index0;
   u32 rx_fib_index0;
   snat_session_key_t key0;
@@ -472,11 +474,10 @@ icmp_match_out2in_fast (snat_main_t * sm, vlib_node_runtime_t * node,
   u32 next0 = ~0;
   int err;
 
-  icmp0 = (icmp46_header_t *) ip4_next_header (ip0);
   sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
   rx_fib_index0 = ip4_fib_table_get_index_for_sw_if_index (sw_if_index0);
 
-  err = icmp_get_key (ip0, &key0);
+  err = icmp_get_key (b0, ip0, &key0);
   if (err != -1)
     {
       b0->error = node->errors[err];
@@ -499,9 +500,12 @@ icmp_match_out2in_fast (snat_main_t * sm, vlib_node_runtime_t * node,
       goto out;
     }
 
-  if (PREDICT_FALSE (icmp0->type != ICMP4_echo_reply &&
-                    (icmp0->type != ICMP4_echo_request || !is_addr_only) &&
-                    !icmp_is_error_message (icmp0)))
+  if (PREDICT_FALSE
+      (vnet_buffer (b0)->ip.reass.icmp_type_or_tcp_flags != ICMP4_echo_reply
+       && (vnet_buffer (b0)->ip.reass.icmp_type_or_tcp_flags !=
+          ICMP4_echo_request || !is_addr_only)
+       && !icmp_type_is_error_message (vnet_buffer (b0)->ip.
+                                      reass.icmp_type_or_tcp_flags)))
     {
       b0->error = node->errors[SNAT_OUT2IN_ERROR_BAD_ICMP_TYPE];
       next0 = SNAT_OUT2IN_NEXT_DROP;
@@ -575,72 +579,78 @@ icmp_out2in (snat_main_t * sm,
                         dst_address /* changed member */ );
   ip0->checksum = ip_csum_fold (sum0);
 
-  if (icmp0->checksum == 0)
-    icmp0->checksum = 0xffff;
 
-  if (!icmp_is_error_message (icmp0))
+  if (!vnet_buffer (b0)->ip.reass.is_non_first_fragment)
     {
-      new_id0 = sm0.port;
-      if (PREDICT_FALSE (new_id0 != echo0->identifier))
+      if (icmp0->checksum == 0)
+       icmp0->checksum = 0xffff;
+
+      if (!icmp_type_is_error_message (icmp0->type))
        {
-         old_id0 = echo0->identifier;
          new_id0 = sm0.port;
-         echo0->identifier = new_id0;
-
-         sum0 = icmp0->checksum;
-         sum0 = ip_csum_update (sum0, old_id0, new_id0, icmp_echo_header_t,
-                                identifier /* changed member */ );
-         icmp0->checksum = ip_csum_fold (sum0);
+         if (PREDICT_FALSE (new_id0 != echo0->identifier))
+           {
+             old_id0 = echo0->identifier;
+             new_id0 = sm0.port;
+             echo0->identifier = new_id0;
+
+             sum0 = icmp0->checksum;
+             sum0 =
+               ip_csum_update (sum0, old_id0, new_id0, icmp_echo_header_t,
+                               identifier /* changed member */ );
+             icmp0->checksum = ip_csum_fold (sum0);
+           }
        }
-    }
-  else
-    {
-      inner_ip0 = (ip4_header_t *) (echo0 + 1);
-      l4_header = ip4_next_header (inner_ip0);
-
-      if (!ip4_header_checksum_is_valid (inner_ip0))
+      else
        {
-         next0 = SNAT_OUT2IN_NEXT_DROP;
-         goto out;
-       }
+         inner_ip0 = (ip4_header_t *) (echo0 + 1);
+         l4_header = ip4_next_header (inner_ip0);
 
-      old_addr0 = inner_ip0->src_address.as_u32;
-      inner_ip0->src_address = sm0.addr;
-      new_addr0 = inner_ip0->src_address.as_u32;
-
-      sum0 = icmp0->checksum;
-      sum0 = ip_csum_update (sum0, old_addr0, new_addr0, ip4_header_t,
-                            src_address /* changed member */ );
-      icmp0->checksum = ip_csum_fold (sum0);
+         if (!ip4_header_checksum_is_valid (inner_ip0))
+           {
+             next0 = SNAT_OUT2IN_NEXT_DROP;
+             goto out;
+           }
 
-      switch (protocol)
-       {
-       case SNAT_PROTOCOL_ICMP:
-         inner_icmp0 = (icmp46_header_t *) l4_header;
-         inner_echo0 = (icmp_echo_header_t *) (inner_icmp0 + 1);
-
-         old_id0 = inner_echo0->identifier;
-         new_id0 = sm0.port;
-         inner_echo0->identifier = new_id0;
+         old_addr0 = inner_ip0->src_address.as_u32;
+         inner_ip0->src_address = sm0.addr;
+         new_addr0 = inner_ip0->src_address.as_u32;
 
          sum0 = icmp0->checksum;
-         sum0 = ip_csum_update (sum0, old_id0, new_id0, icmp_echo_header_t,
-                                identifier);
+         sum0 = ip_csum_update (sum0, old_addr0, new_addr0, ip4_header_t,
+                                src_address /* changed member */ );
          icmp0->checksum = ip_csum_fold (sum0);
-         break;
-       case SNAT_PROTOCOL_UDP:
-       case SNAT_PROTOCOL_TCP:
-         old_id0 = ((tcp_udp_header_t *) l4_header)->src_port;
-         new_id0 = sm0.port;
-         ((tcp_udp_header_t *) l4_header)->src_port = new_id0;
 
-         sum0 = icmp0->checksum;
-         sum0 = ip_csum_update (sum0, old_id0, new_id0, tcp_udp_header_t,
-                                src_port);
-         icmp0->checksum = ip_csum_fold (sum0);
-         break;
-       default:
-         ASSERT (0);
+         switch (protocol)
+           {
+           case SNAT_PROTOCOL_ICMP:
+             inner_icmp0 = (icmp46_header_t *) l4_header;
+             inner_echo0 = (icmp_echo_header_t *) (inner_icmp0 + 1);
+
+             old_id0 = inner_echo0->identifier;
+             new_id0 = sm0.port;
+             inner_echo0->identifier = new_id0;
+
+             sum0 = icmp0->checksum;
+             sum0 =
+               ip_csum_update (sum0, old_id0, new_id0, icmp_echo_header_t,
+                               identifier);
+             icmp0->checksum = ip_csum_fold (sum0);
+             break;
+           case SNAT_PROTOCOL_UDP:
+           case SNAT_PROTOCOL_TCP:
+             old_id0 = ((tcp_udp_header_t *) l4_header)->src_port;
+             new_id0 = sm0.port;
+             ((tcp_udp_header_t *) l4_header)->src_port = new_id0;
+
+             sum0 = icmp0->checksum;
+             sum0 = ip_csum_update (sum0, old_id0, new_id0, tcp_udp_header_t,
+                                    src_port);
+             icmp0->checksum = ip_csum_fold (sum0);
+             break;
+           default:
+             ASSERT (0);
+           }
        }
     }
 
@@ -816,13 +826,6 @@ VLIB_NODE_FN (snat_out2in_node) (vlib_main_t * vm,
              goto trace0;
            }
 
-         if (PREDICT_FALSE (ip4_is_fragment (ip0)))
-           {
-             next0 = SNAT_OUT2IN_NEXT_REASS;
-             fragments++;
-             goto trace0;
-           }
-
          if (PREDICT_FALSE (proto0 == SNAT_PROTOCOL_ICMP))
            {
              next0 = icmp_out2in_slow_path
@@ -833,7 +836,7 @@ VLIB_NODE_FN (snat_out2in_node) (vlib_main_t * vm,
            }
 
          key0.addr = ip0->dst_address;
-         key0.port = udp0->dst_port;
+         key0.port = vnet_buffer (b0)->ip.reass.l4_dst_port;
          key0.protocol = proto0;
          key0.fib_index = rx_fib_index0;
 
@@ -851,10 +854,11 @@ VLIB_NODE_FN (snat_out2in_node) (vlib_main_t * vm,
                   * Send DHCP packets to the ipv4 stack, or we won't
                   * be able to use dhcp client on the outside interface
                   */
-                 if (PREDICT_FALSE (proto0 == SNAT_PROTOCOL_UDP
-                                    && (udp0->dst_port ==
-                                        clib_host_to_net_u16
-                                        (UDP_DST_PORT_dhcp_to_client))))
+                 if (PREDICT_FALSE
+                     (proto0 == SNAT_PROTOCOL_UDP
+                      && (vnet_buffer (b0)->ip.reass.l4_dst_port ==
+                          clib_host_to_net_u16
+                          (UDP_DST_PORT_dhcp_to_client))))
                    {
                      vnet_feature_next (&next0, b0);
                      goto trace0;
@@ -897,34 +901,41 @@ VLIB_NODE_FN (snat_out2in_node) (vlib_main_t * vm,
                                 dst_address /* changed member */ );
          ip0->checksum = ip_csum_fold (sum0);
 
-         old_port0 = udp0->dst_port;
-         new_port0 = udp0->dst_port = s0->in2out.port;
-
          if (PREDICT_TRUE (proto0 == SNAT_PROTOCOL_TCP))
            {
-             sum0 = tcp0->checksum;
-             sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
-                                    ip4_header_t,
-                                    dst_address /* changed member */ );
-
-             sum0 = ip_csum_update (sum0, old_port0, new_port0,
-                                    ip4_header_t /* cheat */ ,
-                                    length /* changed member */ );
-             tcp0->checksum = ip_csum_fold (sum0);
-             tcp_packets++;
-           }
-         else
-           {
-             if (PREDICT_FALSE (udp0->checksum))
+             if (!vnet_buffer (b0)->ip.reass.is_non_first_fragment)
                {
-                 sum0 = udp0->checksum;
+                 old_port0 = vnet_buffer (b0)->ip.reass.l4_dst_port;
+                 new_port0 = udp0->dst_port = s0->in2out.port;
+                 sum0 = tcp0->checksum;
                  sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
                                         ip4_header_t,
                                         dst_address /* changed member */ );
+
                  sum0 = ip_csum_update (sum0, old_port0, new_port0,
                                         ip4_header_t /* cheat */ ,
                                         length /* changed member */ );
-                 udp0->checksum = ip_csum_fold (sum0);
+                 tcp0->checksum = ip_csum_fold (sum0);
+               }
+             tcp_packets++;
+           }
+         else
+           {
+             if (!vnet_buffer (b0)->ip.reass.is_non_first_fragment)
+               {
+                 old_port0 = vnet_buffer (b0)->ip.reass.l4_dst_port;
+                 new_port0 = udp0->dst_port = s0->in2out.port;
+                 if (PREDICT_FALSE (udp0->checksum))
+                   {
+                     sum0 = udp0->checksum;
+                     sum0 = ip_csum_update (sum0, old_addr0, new_addr0, ip4_header_t, dst_address      /* changed member */
+                       );
+                     sum0 =
+                       ip_csum_update (sum0, old_port0, new_port0,
+                                       ip4_header_t /* cheat */ ,
+                                       length /* changed member */ );
+                     udp0->checksum = ip_csum_fold (sum0);
+                   }
                }
              udp_packets++;
            }
@@ -989,13 +1000,6 @@ VLIB_NODE_FN (snat_out2in_node) (vlib_main_t * vm,
              goto trace1;
            }
 
-         if (PREDICT_FALSE (ip4_is_fragment (ip1)))
-           {
-             next1 = SNAT_OUT2IN_NEXT_REASS;
-             fragments++;
-             goto trace1;
-           }
-
          if (PREDICT_FALSE (proto1 == SNAT_PROTOCOL_ICMP))
            {
              next1 = icmp_out2in_slow_path
@@ -1006,7 +1010,7 @@ VLIB_NODE_FN (snat_out2in_node) (vlib_main_t * vm,
            }
 
          key1.addr = ip1->dst_address;
-         key1.port = udp1->dst_port;
+         key1.port = vnet_buffer (b1)->ip.reass.l4_dst_port;
          key1.protocol = proto1;
          key1.fib_index = rx_fib_index1;
 
@@ -1024,10 +1028,11 @@ VLIB_NODE_FN (snat_out2in_node) (vlib_main_t * vm,
                   * Send DHCP packets to the ipv4 stack, or we won't
                   * be able to use dhcp client on the outside interface
                   */
-                 if (PREDICT_FALSE (proto1 == SNAT_PROTOCOL_UDP
-                                    && (udp1->dst_port ==
-                                        clib_host_to_net_u16
-                                        (UDP_DST_PORT_dhcp_to_client))))
+                 if (PREDICT_FALSE
+                     (proto1 == SNAT_PROTOCOL_UDP
+                      && (vnet_buffer (b1)->ip.reass.l4_dst_port ==
+                          clib_host_to_net_u16
+                          (UDP_DST_PORT_dhcp_to_client))))
                    {
                      vnet_feature_next (&next1, b1);
                      goto trace1;
@@ -1070,34 +1075,45 @@ VLIB_NODE_FN (snat_out2in_node) (vlib_main_t * vm,
                                 dst_address /* changed member */ );
          ip1->checksum = ip_csum_fold (sum1);
 
-         old_port1 = udp1->dst_port;
-         new_port1 = udp1->dst_port = s1->in2out.port;
-
          if (PREDICT_TRUE (proto1 == SNAT_PROTOCOL_TCP))
            {
-             sum1 = tcp1->checksum;
-             sum1 = ip_csum_update (sum1, old_addr1, new_addr1,
-                                    ip4_header_t,
-                                    dst_address /* changed member */ );
-
-             sum1 = ip_csum_update (sum1, old_port1, new_port1,
-                                    ip4_header_t /* cheat */ ,
-                                    length /* changed member */ );
-             tcp1->checksum = ip_csum_fold (sum1);
-             tcp_packets++;
-           }
-         else
-           {
-             if (PREDICT_FALSE (udp1->checksum))
+             if (!vnet_buffer (b1)->ip.reass.is_non_first_fragment)
                {
-                 sum1 = udp1->checksum;
+                 old_port1 = vnet_buffer (b1)->ip.reass.l4_dst_port;
+                 new_port1 = udp1->dst_port = s1->in2out.port;
+
+                 sum1 = tcp1->checksum;
                  sum1 = ip_csum_update (sum1, old_addr1, new_addr1,
                                         ip4_header_t,
                                         dst_address /* changed member */ );
+
                  sum1 = ip_csum_update (sum1, old_port1, new_port1,
                                         ip4_header_t /* cheat */ ,
                                         length /* changed member */ );
-                 udp1->checksum = ip_csum_fold (sum1);
+                 tcp1->checksum = ip_csum_fold (sum1);
+               }
+             tcp_packets++;
+           }
+         else
+           {
+             if (!vnet_buffer (b1)->ip.reass.is_non_first_fragment)
+               {
+                 old_port1 = vnet_buffer (b1)->ip.reass.l4_dst_port;
+                 new_port1 = udp1->dst_port = s1->in2out.port;
+                 if (PREDICT_FALSE (udp1->checksum))
+                   {
+
+                     sum1 = udp1->checksum;
+                     sum1 =
+                       ip_csum_update (sum1, old_addr1, new_addr1,
+                                       ip4_header_t,
+                                       dst_address /* changed member */ );
+                     sum1 =
+                       ip_csum_update (sum1, old_port1, new_port1,
+                                       ip4_header_t /* cheat */ ,
+                                       length /* changed member */ );
+                     udp1->checksum = ip_csum_fold (sum1);
+                   }
                }
              udp_packets++;
            }
@@ -1199,13 +1215,6 @@ VLIB_NODE_FN (snat_out2in_node) (vlib_main_t * vm,
              goto trace00;
            }
 
-         if (PREDICT_FALSE (ip4_is_fragment (ip0)))
-           {
-             next0 = SNAT_OUT2IN_NEXT_REASS;
-             fragments++;
-             goto trace00;
-           }
-
          if (PREDICT_FALSE (proto0 == SNAT_PROTOCOL_ICMP))
            {
              next0 = icmp_out2in_slow_path
@@ -1216,7 +1225,7 @@ VLIB_NODE_FN (snat_out2in_node) (vlib_main_t * vm,
            }
 
          key0.addr = ip0->dst_address;
-         key0.port = udp0->dst_port;
+         key0.port = vnet_buffer (b0)->ip.reass.l4_dst_port;
          key0.protocol = proto0;
          key0.fib_index = rx_fib_index0;
 
@@ -1234,10 +1243,11 @@ VLIB_NODE_FN (snat_out2in_node) (vlib_main_t * vm,
                   * Send DHCP packets to the ipv4 stack, or we won't
                   * be able to use dhcp client on the outside interface
                   */
-                 if (PREDICT_FALSE (proto0 == SNAT_PROTOCOL_UDP
-                                    && (udp0->dst_port ==
-                                        clib_host_to_net_u16
-                                        (UDP_DST_PORT_dhcp_to_client))))
+                 if (PREDICT_FALSE
+                     (proto0 == SNAT_PROTOCOL_UDP
+                      && (vnet_buffer (b0)->ip.reass.l4_dst_port ==
+                          clib_host_to_net_u16
+                          (UDP_DST_PORT_dhcp_to_client))))
                    {
                      vnet_feature_next (&next0, b0);
                      goto trace00;
@@ -1280,34 +1290,42 @@ VLIB_NODE_FN (snat_out2in_node) (vlib_main_t * vm,
                                 dst_address /* changed member */ );
          ip0->checksum = ip_csum_fold (sum0);
 
-         old_port0 = udp0->dst_port;
-         new_port0 = udp0->dst_port = s0->in2out.port;
-
          if (PREDICT_TRUE (proto0 == SNAT_PROTOCOL_TCP))
            {
-             sum0 = tcp0->checksum;
-             sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
-                                    ip4_header_t,
-                                    dst_address /* changed member */ );
-
-             sum0 = ip_csum_update (sum0, old_port0, new_port0,
-                                    ip4_header_t /* cheat */ ,
-                                    length /* changed member */ );
-             tcp0->checksum = ip_csum_fold (sum0);
-             tcp_packets++;
-           }
-         else
-           {
-             if (PREDICT_FALSE (udp0->checksum))
+             if (!vnet_buffer (b0)->ip.reass.is_non_first_fragment)
                {
-                 sum0 = udp0->checksum;
+                 old_port0 = vnet_buffer (b0)->ip.reass.l4_dst_port;
+                 new_port0 = udp0->dst_port = s0->in2out.port;
+
+                 sum0 = tcp0->checksum;
                  sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
                                         ip4_header_t,
                                         dst_address /* changed member */ );
+
                  sum0 = ip_csum_update (sum0, old_port0, new_port0,
                                         ip4_header_t /* cheat */ ,
                                         length /* changed member */ );
-                 udp0->checksum = ip_csum_fold (sum0);
+                 tcp0->checksum = ip_csum_fold (sum0);
+               }
+             tcp_packets++;
+           }
+         else
+           {
+             if (!vnet_buffer (b0)->ip.reass.is_non_first_fragment)
+               {
+                 old_port0 = vnet_buffer (b0)->ip.reass.l4_dst_port;
+                 new_port0 = udp0->dst_port = s0->in2out.port;
+                 if (PREDICT_FALSE (udp0->checksum))
+                   {
+                     sum0 = udp0->checksum;
+                     sum0 = ip_csum_update (sum0, old_addr0, new_addr0, ip4_header_t, dst_address      /* changed member */
+                       );
+                     sum0 =
+                       ip_csum_update (sum0, old_port0, new_port0,
+                                       ip4_header_t /* cheat */ ,
+                                       length /* changed member */ );
+                     udp0->checksum = ip_csum_fold (sum0);
+                   }
                }
              udp_packets++;
            }
@@ -1381,344 +1399,6 @@ VLIB_REGISTER_NODE (snat_out2in_node) = {
     [SNAT_OUT2IN_NEXT_DROP] = "error-drop",
     [SNAT_OUT2IN_NEXT_LOOKUP] = "ip4-lookup",
     [SNAT_OUT2IN_NEXT_ICMP_ERROR] = "ip4-icmp-error",
-    [SNAT_OUT2IN_NEXT_REASS] = "nat44-out2in-reass",
-  },
-};
-/* *INDENT-ON* */
-
-VLIB_NODE_FN (nat44_out2in_reass_node) (vlib_main_t * vm,
-                                       vlib_node_runtime_t * node,
-                                       vlib_frame_t * frame)
-{
-  u32 n_left_from, *from, *to_next;
-  snat_out2in_next_t next_index;
-  u32 pkts_processed = 0, cached_fragments = 0;
-  snat_main_t *sm = &snat_main;
-  f64 now = vlib_time_now (vm);
-  u32 thread_index = vm->thread_index;
-  snat_main_per_thread_data_t *per_thread_data =
-    &sm->per_thread_data[thread_index];
-  u32 *fragments_to_drop = 0;
-  u32 *fragments_to_loopback = 0;
-
-  from = vlib_frame_vector_args (frame);
-  n_left_from = frame->n_vectors;
-  next_index = node->cached_next_index;
-
-  while (n_left_from > 0)
-    {
-      u32 n_left_to_next;
-
-      vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
-
-      while (n_left_from > 0 && n_left_to_next > 0)
-       {
-         u32 bi0, sw_if_index0, proto0, rx_fib_index0, new_addr0, old_addr0;
-         vlib_buffer_t *b0;
-         u32 next0;
-         u8 cached0 = 0;
-         ip4_header_t *ip0;
-         nat_reass_ip4_t *reass0;
-         udp_header_t *udp0;
-         tcp_header_t *tcp0;
-         icmp46_header_t *icmp0;
-         snat_session_key_t key0, sm0;
-         clib_bihash_kv_8_8_t kv0, value0;
-         snat_session_t *s0 = 0;
-         u16 old_port0, new_port0;
-         ip_csum_t sum0;
-         u8 identity_nat0;
-
-         /* speculatively enqueue b0 to the current next frame */
-         bi0 = from[0];
-         to_next[0] = bi0;
-         from += 1;
-         to_next += 1;
-         n_left_from -= 1;
-         n_left_to_next -= 1;
-
-         b0 = vlib_get_buffer (vm, bi0);
-         next0 = SNAT_OUT2IN_NEXT_LOOKUP;
-
-         sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
-         rx_fib_index0 =
-           fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP4,
-                                                sw_if_index0);
-
-         if (PREDICT_FALSE (nat_reass_is_drop_frag (0)))
-           {
-             next0 = SNAT_OUT2IN_NEXT_DROP;
-             b0->error = node->errors[SNAT_OUT2IN_ERROR_DROP_FRAGMENT];
-             goto trace0;
-           }
-
-         ip0 = (ip4_header_t *) vlib_buffer_get_current (b0);
-         udp0 = ip4_next_header (ip0);
-         tcp0 = (tcp_header_t *) udp0;
-         icmp0 = (icmp46_header_t *) udp0;
-         proto0 = ip_proto_to_snat_proto (ip0->protocol);
-
-         reass0 = nat_ip4_reass_find_or_create (ip0->src_address,
-                                                ip0->dst_address,
-                                                ip0->fragment_id,
-                                                ip0->protocol,
-                                                1, &fragments_to_drop);
-
-         if (PREDICT_FALSE (!reass0))
-           {
-             next0 = SNAT_OUT2IN_NEXT_DROP;
-             b0->error = node->errors[SNAT_OUT2IN_ERROR_MAX_REASS];
-             nat_elog_notice ("maximum reassemblies exceeded");
-             goto trace0;
-           }
-
-         if (PREDICT_FALSE (ip4_is_first_fragment (ip0)))
-           {
-             if (PREDICT_FALSE (proto0 == SNAT_PROTOCOL_ICMP))
-               {
-                 next0 = icmp_out2in_slow_path
-                   (sm, b0, ip0, icmp0, sw_if_index0, rx_fib_index0, node,
-                    next0, now, thread_index, &s0);
-
-                 if (PREDICT_TRUE (next0 != SNAT_OUT2IN_NEXT_DROP))
-                   {
-                     if (s0)
-                       reass0->sess_index = s0 - per_thread_data->sessions;
-                     else
-                       reass0->flags |= NAT_REASS_FLAG_ED_DONT_TRANSLATE;
-                     reass0->thread_index = thread_index;
-                     nat_ip4_reass_get_frags (reass0,
-                                              &fragments_to_loopback);
-                   }
-
-                 goto trace0;
-               }
-
-             key0.addr = ip0->dst_address;
-             key0.port = udp0->dst_port;
-             key0.protocol = proto0;
-             key0.fib_index = rx_fib_index0;
-             kv0.key = key0.as_u64;
-
-             if (clib_bihash_search_8_8
-                 (&per_thread_data->out2in, &kv0, &value0))
-               {
-                 /* Try to match static mapping by external address and port,
-                    destination address and port in packet */
-                 if (snat_static_mapping_match
-                     (sm, key0, &sm0, 1, 0, 0, 0, 0, &identity_nat0))
-                   {
-                     /*
-                      * Send DHCP packets to the ipv4 stack, or we won't
-                      * be able to use dhcp client on the outside interface
-                      */
-                     if (PREDICT_FALSE (proto0 == SNAT_PROTOCOL_UDP
-                                        && (udp0->dst_port
-                                            ==
-                                            clib_host_to_net_u16
-                                            (UDP_DST_PORT_dhcp_to_client))))
-                       {
-                         vnet_feature_next (&next0, b0);
-                         goto trace0;
-                       }
-
-                     if (!sm->forwarding_enabled)
-                       {
-                         b0->error =
-                           node->errors[SNAT_OUT2IN_ERROR_NO_TRANSLATION];
-                         next0 = SNAT_OUT2IN_NEXT_DROP;
-                       }
-                     else
-                       {
-                         reass0->flags |= NAT_REASS_FLAG_ED_DONT_TRANSLATE;
-                         nat_ip4_reass_get_frags (reass0,
-                                                  &fragments_to_loopback);
-                       }
-                     goto trace0;
-                   }
-
-                 if (PREDICT_FALSE (identity_nat0))
-                   goto trace0;
-
-                 /* Create session initiated by host from external network */
-                 s0 =
-                   create_session_for_static_mapping (sm, b0, sm0, key0,
-                                                      node, thread_index,
-                                                      now);
-                 if (!s0)
-                   {
-                     b0->error =
-                       node->errors[SNAT_OUT2IN_ERROR_NO_TRANSLATION];
-                     next0 = SNAT_OUT2IN_NEXT_DROP;
-                     goto trace0;
-                   }
-                 reass0->sess_index = s0 - per_thread_data->sessions;
-                 reass0->thread_index = thread_index;
-               }
-             else
-               {
-                 s0 = pool_elt_at_index (per_thread_data->sessions,
-                                         value0.value);
-                 reass0->sess_index = value0.value;
-               }
-             nat_ip4_reass_get_frags (reass0, &fragments_to_loopback);
-           }
-         else
-           {
-             if (reass0->flags & NAT_REASS_FLAG_ED_DONT_TRANSLATE)
-               goto trace0;
-             if (PREDICT_FALSE (reass0->sess_index == (u32) ~ 0))
-               {
-                 if (nat_ip4_reass_add_fragment
-                     (thread_index, reass0, bi0, &fragments_to_drop))
-                   {
-                     b0->error = node->errors[SNAT_OUT2IN_ERROR_MAX_FRAG];
-                     nat_elog_notice
-                       ("maximum fragments per reassembly exceeded");
-                     next0 = SNAT_OUT2IN_NEXT_DROP;
-                     goto trace0;
-                   }
-                 cached0 = 1;
-                 goto trace0;
-               }
-             s0 = pool_elt_at_index (per_thread_data->sessions,
-                                     reass0->sess_index);
-           }
-
-         old_addr0 = ip0->dst_address.as_u32;
-         ip0->dst_address = s0->in2out.addr;
-         new_addr0 = ip0->dst_address.as_u32;
-         vnet_buffer (b0)->sw_if_index[VLIB_TX] = s0->in2out.fib_index;
-
-         sum0 = ip0->checksum;
-         sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
-                                ip4_header_t,
-                                dst_address /* changed member */ );
-         ip0->checksum = ip_csum_fold (sum0);
-
-         if (PREDICT_FALSE (ip4_is_first_fragment (ip0)))
-           {
-             old_port0 = udp0->dst_port;
-             new_port0 = udp0->dst_port = s0->in2out.port;
-
-             if (PREDICT_TRUE (proto0 == SNAT_PROTOCOL_TCP))
-               {
-                 sum0 = tcp0->checksum;
-                 sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
-                                        ip4_header_t,
-                                        dst_address /* changed member */ );
-
-                 sum0 = ip_csum_update (sum0, old_port0, new_port0,
-                                        ip4_header_t /* cheat */ ,
-                                        length /* changed member */ );
-                 tcp0->checksum = ip_csum_fold (sum0);
-               }
-             else if (udp0->checksum)
-               {
-                 sum0 = udp0->checksum;
-                 sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
-                                        ip4_header_t,
-                                        dst_address /* changed member */ );
-                 sum0 = ip_csum_update (sum0, old_port0, new_port0,
-                                        ip4_header_t /* cheat */ ,
-                                        length /* changed member */ );
-                 udp0->checksum = ip_csum_fold (sum0);
-               }
-           }
-
-         /* Accounting */
-         nat44_session_update_counters (s0, now,
-                                        vlib_buffer_length_in_chain (vm, b0),
-                                        thread_index);
-         /* Per-user LRU list maintenance */
-         nat44_session_update_lru (sm, s0, thread_index);
-
-       trace0:
-         if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)
-                            && (b0->flags & VLIB_BUFFER_IS_TRACED)))
-           {
-             nat44_reass_trace_t *t =
-               vlib_add_trace (vm, node, b0, sizeof (*t));
-             t->cached = cached0;
-             t->sw_if_index = sw_if_index0;
-             t->next_index = next0;
-           }
-
-         if (cached0)
-           {
-             n_left_to_next++;
-             to_next--;
-             cached_fragments++;
-           }
-         else
-           {
-             pkts_processed += next0 != SNAT_OUT2IN_NEXT_DROP;
-
-             /* verify speculative enqueue, maybe switch current next frame */
-             vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
-                                              to_next, n_left_to_next,
-                                              bi0, next0);
-           }
-
-         if (n_left_from == 0 && vec_len (fragments_to_loopback))
-           {
-             from = vlib_frame_vector_args (frame);
-             u32 len = vec_len (fragments_to_loopback);
-             if (len <= VLIB_FRAME_SIZE)
-               {
-                 clib_memcpy_fast (from, fragments_to_loopback,
-                                   sizeof (u32) * len);
-                 n_left_from = len;
-                 vec_reset_length (fragments_to_loopback);
-               }
-             else
-               {
-                 clib_memcpy_fast (from, fragments_to_loopback +
-                                   (len - VLIB_FRAME_SIZE),
-                                   sizeof (u32) * VLIB_FRAME_SIZE);
-                 n_left_from = VLIB_FRAME_SIZE;
-                 _vec_len (fragments_to_loopback) = len - VLIB_FRAME_SIZE;
-               }
-           }
-       }
-
-      vlib_put_next_frame (vm, node, next_index, n_left_to_next);
-    }
-
-  vlib_node_increment_counter (vm, sm->out2in_reass_node_index,
-                              SNAT_OUT2IN_ERROR_PROCESSED_FRAGMENTS,
-                              pkts_processed);
-  vlib_node_increment_counter (vm, sm->out2in_reass_node_index,
-                              SNAT_OUT2IN_ERROR_CACHED_FRAGMENTS,
-                              cached_fragments);
-
-  nat_send_all_to_node (vm, fragments_to_drop, node,
-                       &node->errors[SNAT_OUT2IN_ERROR_DROP_FRAGMENT],
-                       SNAT_OUT2IN_NEXT_DROP);
-
-  vec_free (fragments_to_drop);
-  vec_free (fragments_to_loopback);
-  return frame->n_vectors;
-}
-
-/* *INDENT-OFF* */
-VLIB_REGISTER_NODE (nat44_out2in_reass_node) = {
-  .name = "nat44-out2in-reass",
-  .vector_size = sizeof (u32),
-  .format_trace = format_nat44_reass_trace,
-  .type = VLIB_NODE_TYPE_INTERNAL,
-
-  .n_errors = ARRAY_LEN(snat_out2in_error_strings),
-  .error_strings = snat_out2in_error_strings,
-
-  .n_next_nodes = SNAT_OUT2IN_N_NEXT,
-
-  /* edit / add dispositions here */
-  .next_nodes = {
-    [SNAT_OUT2IN_NEXT_DROP] = "error-drop",
-    [SNAT_OUT2IN_NEXT_LOOKUP] = "ip4-lookup",
-    [SNAT_OUT2IN_NEXT_ICMP_ERROR] = "ip4-icmp-error",
-    [SNAT_OUT2IN_NEXT_REASS] = "nat44-out2in-reass",
   },
 };
 /* *INDENT-ON* */
@@ -1919,7 +1599,6 @@ VLIB_REGISTER_NODE (snat_out2in_fast_node) = {
     [SNAT_OUT2IN_NEXT_LOOKUP] = "ip4-lookup",
     [SNAT_OUT2IN_NEXT_DROP] = "error-drop",
     [SNAT_OUT2IN_NEXT_ICMP_ERROR] = "ip4-icmp-error",
-    [SNAT_OUT2IN_NEXT_REASS] = "nat44-out2in-reass",
   },
 };
 /* *INDENT-ON* */
index cb1cbda..ee2f85a 100644 (file)
@@ -27,7 +27,6 @@
 #include <vppinfra/error.h>
 #include <nat/nat.h>
 #include <nat/nat_ipfix_logging.h>
-#include <nat/nat_reass.h>
 #include <nat/nat_inlines.h>
 #include <nat/nat44_inlines.h>
 #include <nat/nat_syslog.h>
@@ -326,8 +325,8 @@ next_src_nat (snat_main_t * sm, ip4_header_t * ip, u8 proto, u16 src_port,
 }
 
 static void
-create_bypass_for_fwd (snat_main_t * sm, ip4_header_t * ip, u32 rx_fib_index,
-                      u32 thread_index)
+create_bypass_for_fwd (snat_main_t * sm, vlib_buffer_t * b, ip4_header_t * ip,
+                      u32 rx_fib_index, u32 thread_index)
 {
   nat_ed_ses_key_t key;
   clib_bihash_kv_16_8_t kv, value;
@@ -339,7 +338,7 @@ create_bypass_for_fwd (snat_main_t * sm, ip4_header_t * ip, u32 rx_fib_index,
 
   if (ip->protocol == IP_PROTOCOL_ICMP)
     {
-      if (get_icmp_o2i_ed_key (ip, &key))
+      if (get_icmp_o2i_ed_key (b, ip, &key))
        return;
     }
   else if (ip->protocol == IP_PROTOCOL_UDP || ip->protocol == IP_PROTOCOL_TCP)
@@ -414,7 +413,8 @@ create_bypass_for_fwd (snat_main_t * sm, ip4_header_t * ip, u32 rx_fib_index,
   if (ip->protocol == IP_PROTOCOL_TCP)
     {
       tcp_header_t *tcp = ip4_next_header (ip);
-      if (nat44_set_tcp_session_state_o2i (sm, s, tcp, thread_index))
+      if (nat44_set_tcp_session_state_o2i
+         (sm, s, tcp->flags, tcp->ack_number, tcp->seq_number, thread_index))
        return;
     }
 
@@ -425,15 +425,15 @@ create_bypass_for_fwd (snat_main_t * sm, ip4_header_t * ip, u32 rx_fib_index,
 }
 
 static inline void
-create_bypass_for_fwd_worker (snat_main_t * sm, ip4_header_t * ip,
-                             u32 rx_fib_index)
+create_bypass_for_fwd_worker (snat_main_t * sm, vlib_buffer_t * b,
+                             ip4_header_t * ip, u32 rx_fib_index)
 {
   ip4_header_t ip_wkr = {
     .src_address = ip->dst_address,
   };
   u32 thread_index = sm->worker_in2out_cb (&ip_wkr, rx_fib_index, 0);
 
-  create_bypass_for_fwd (sm, ip, rx_fib_index, thread_index);
+  create_bypass_for_fwd (sm, b, ip, rx_fib_index, thread_index);
 }
 
 #ifndef CLIB_MARCH_VARIANT
@@ -444,7 +444,6 @@ icmp_match_out2in_ed (snat_main_t * sm, vlib_node_runtime_t * node,
                      u8 * p_dont_translate, void *d, void *e)
 {
   u32 next = ~0, sw_if_index, rx_fib_index;
-  icmp46_header_t *icmp;
   nat_ed_ses_key_t key;
   clib_bihash_kv_16_8_t kv, value;
   snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index];
@@ -452,11 +451,10 @@ icmp_match_out2in_ed (snat_main_t * sm, vlib_node_runtime_t * node,
   u8 dont_translate = 0, is_addr_only, identity_nat;
   snat_session_key_t e_key, l_key;
 
-  icmp = (icmp46_header_t *) ip4_next_header (ip);
   sw_if_index = vnet_buffer (b)->sw_if_index[VLIB_RX];
   rx_fib_index = ip4_fib_table_get_index_for_sw_if_index (sw_if_index);
 
-  if (get_icmp_o2i_ed_key (ip, &key))
+  if (get_icmp_o2i_ed_key (b, ip, &key))
     {
       b->error = node->errors[NAT_OUT2IN_ED_ERROR_UNSUPPORTED_PROTOCOL];
       next = NAT_NEXT_DROP;
@@ -499,15 +497,18 @@ icmp_match_out2in_ed (snat_main_t * sm, vlib_node_runtime_t * node,
                  goto out;
                }
              if (sm->num_workers > 1)
-               create_bypass_for_fwd_worker (sm, ip, rx_fib_index);
+               create_bypass_for_fwd_worker (sm, b, ip, rx_fib_index);
              else
-               create_bypass_for_fwd (sm, ip, rx_fib_index, thread_index);
+               create_bypass_for_fwd (sm, b, ip, rx_fib_index, thread_index);
              goto out;
            }
        }
 
-      if (PREDICT_FALSE (icmp->type != ICMP4_echo_reply &&
-                        (icmp->type != ICMP4_echo_request || !is_addr_only)))
+      if (PREDICT_FALSE
+         (vnet_buffer (b)->ip.reass.icmp_type_or_tcp_flags !=
+          ICMP4_echo_reply
+          && (vnet_buffer (b)->ip.reass.icmp_type_or_tcp_flags !=
+              ICMP4_echo_request || !is_addr_only)))
        {
          b->error = node->errors[NAT_OUT2IN_ED_ERROR_BAD_ICMP_TYPE];
          next = NAT_NEXT_DROP;
@@ -534,9 +535,13 @@ icmp_match_out2in_ed (snat_main_t * sm, vlib_node_runtime_t * node,
     }
   else
     {
-      if (PREDICT_FALSE (icmp->type != ICMP4_echo_reply &&
-                        icmp->type != ICMP4_echo_request &&
-                        !icmp_is_error_message (icmp)))
+      if (PREDICT_FALSE
+         (vnet_buffer (b)->ip.reass.icmp_type_or_tcp_flags !=
+          ICMP4_echo_reply
+          && vnet_buffer (b)->ip.reass.icmp_type_or_tcp_flags !=
+          ICMP4_echo_request
+          && !icmp_type_is_error_message (vnet_buffer (b)->ip.
+                                          reass.icmp_type_or_tcp_flags)))
        {
          b->error = node->errors[NAT_OUT2IN_ED_ERROR_BAD_ICMP_TYPE];
          next = NAT_NEXT_DROP;
@@ -795,13 +800,6 @@ nat44_ed_out2in_node_fn_inline (vlib_main_t * vm,
                  goto trace00;
                }
 
-             if (ip4_is_fragment (ip0))
-               {
-                 next0 = NAT_NEXT_OUT2IN_ED_REASS;
-                 fragments++;
-                 goto trace00;
-               }
-
              if (PREDICT_FALSE (proto0 == SNAT_PROTOCOL_ICMP))
                {
                  next0 = NAT_NEXT_OUT2IN_ED_SLOW_PATH;
@@ -810,8 +808,9 @@ nat44_ed_out2in_node_fn_inline (vlib_main_t * vm,
            }
 
          make_ed_kv (&kv0, &ip0->dst_address, &ip0->src_address,
-                     ip0->protocol, rx_fib_index0, udp0->dst_port,
-                     udp0->src_port);
+                     ip0->protocol, rx_fib_index0,
+                     vnet_buffer (b0)->ip.reass.l4_dst_port,
+                     vnet_buffer (b0)->ip.reass.l4_src_port);
 
          if (clib_bihash_search_16_8 (&tsm->out2in_ed, &kv0, &value0))
            {
@@ -820,7 +819,7 @@ nat44_ed_out2in_node_fn_inline (vlib_main_t * vm,
                  /* Try to match static mapping by external address and port,
                     destination address and port in packet */
                  e_key0.addr = ip0->dst_address;
-                 e_key0.port = udp0->dst_port;
+                 e_key0.port = vnet_buffer (b0)->ip.reass.l4_dst_port;
                  e_key0.protocol = proto0;
                  e_key0.fib_index = rx_fib_index0;
                  if (snat_static_mapping_match (sm, e_key0, &l_key0, 1, 0,
@@ -832,10 +831,11 @@ nat44_ed_out2in_node_fn_inline (vlib_main_t * vm,
                       * Send DHCP packets to the ipv4 stack, or we won't
                       * be able to use dhcp client on the outside interface
                       */
-                     if (PREDICT_FALSE (proto0 == SNAT_PROTOCOL_UDP
-                                        && (udp0->dst_port ==
-                                            clib_host_to_net_u16
-                                            (UDP_DST_PORT_dhcp_to_client))))
+                     if (PREDICT_FALSE
+                         (proto0 == SNAT_PROTOCOL_UDP
+                          && (vnet_buffer (b0)->ip.reass.l4_dst_port ==
+                              clib_host_to_net_u16
+                              (UDP_DST_PORT_dhcp_to_client))))
                        {
                          goto trace00;
                        }
@@ -848,18 +848,20 @@ nat44_ed_out2in_node_fn_inline (vlib_main_t * vm,
                        }
                      else
                        {
-                         if (next_src_nat (sm, ip0, ip0->protocol,
-                                           udp0->src_port, udp0->dst_port,
-                                           thread_index, rx_fib_index0))
+                         if (next_src_nat
+                             (sm, ip0, ip0->protocol,
+                              vnet_buffer (b0)->ip.reass.l4_src_port,
+                              vnet_buffer (b0)->ip.reass.l4_dst_port,
+                              thread_index, rx_fib_index0))
                            {
                              next0 = NAT_NEXT_IN2OUT_ED_FAST_PATH;
                              goto trace00;
                            }
                          if (sm->num_workers > 1)
-                           create_bypass_for_fwd_worker (sm, ip0,
+                           create_bypass_for_fwd_worker (sm, b0, ip0,
                                                          rx_fib_index0);
                          else
-                           create_bypass_for_fwd (sm, ip0, rx_fib_index0,
+                           create_bypass_for_fwd (sm, b0, ip0, rx_fib_index0,
                                                   thread_index);
                        }
                      goto trace00;
@@ -868,7 +870,9 @@ nat44_ed_out2in_node_fn_inline (vlib_main_t * vm,
                  if (PREDICT_FALSE (identity_nat0))
                    goto trace00;
 
-                 if ((proto0 == SNAT_PROTOCOL_TCP) && !tcp_is_init (tcp0))
+                 if ((proto0 == SNAT_PROTOCOL_TCP)
+                     && !tcp_flags_is_init (vnet_buffer (b0)->ip.
+                                            reass.icmp_type_or_tcp_flags))
                    {
                      b0->error = node->errors[NAT_OUT2IN_ED_ERROR_NON_SYN];
                      next0 = NAT_NEXT_DROP;
@@ -912,35 +916,47 @@ nat44_ed_out2in_node_fn_inline (vlib_main_t * vm,
                                   src_address);
          ip0->checksum = ip_csum_fold (sum0);
 
-         old_port0 = udp0->dst_port;
-         new_port0 = udp0->dst_port = s0->in2out.port;
+         old_port0 = vnet_buffer (b0)->ip.reass.l4_dst_port;
 
          if (PREDICT_TRUE (proto0 == SNAT_PROTOCOL_TCP))
            {
-             sum0 = tcp0->checksum;
-             sum0 = ip_csum_update (sum0, old_addr0, new_addr0, ip4_header_t,
-                                    dst_address);
-             sum0 = ip_csum_update (sum0, old_port0, new_port0, ip4_header_t,
-                                    length);
-             if (is_twice_nat_session (s0))
+             if (!vnet_buffer (b0)->ip.reass.is_non_first_fragment)
                {
-                 sum0 = ip_csum_update (sum0, ip0->src_address.as_u32,
-                                        s0->ext_host_nat_addr.as_u32,
-                                        ip4_header_t, dst_address);
-                 sum0 = ip_csum_update (sum0, tcp0->src_port,
-                                        s0->ext_host_nat_port, ip4_header_t,
-                                        length);
-                 tcp0->src_port = s0->ext_host_nat_port;
-                 ip0->src_address.as_u32 = s0->ext_host_nat_addr.as_u32;
+                 new_port0 = udp0->dst_port = s0->in2out.port;
+                 sum0 = tcp0->checksum;
+                 sum0 =
+                   ip_csum_update (sum0, old_addr0, new_addr0, ip4_header_t,
+                                   dst_address);
+                 sum0 =
+                   ip_csum_update (sum0, old_port0, new_port0, ip4_header_t,
+                                   length);
+                 if (is_twice_nat_session (s0))
+                   {
+                     sum0 = ip_csum_update (sum0, ip0->src_address.as_u32,
+                                            s0->ext_host_nat_addr.as_u32,
+                                            ip4_header_t, dst_address);
+                     sum0 =
+                       ip_csum_update (sum0,
+                                       vnet_buffer (b0)->ip.
+                                       reass.l4_src_port,
+                                       s0->ext_host_nat_port, ip4_header_t,
+                                       length);
+                     tcp0->src_port = s0->ext_host_nat_port;
+                     ip0->src_address.as_u32 = s0->ext_host_nat_addr.as_u32;
+                   }
+                 tcp0->checksum = ip_csum_fold (sum0);
                }
-             tcp0->checksum = ip_csum_fold (sum0);
              tcp_packets++;
              if (nat44_set_tcp_session_state_o2i
-                 (sm, s0, tcp0, thread_index))
+                 (sm, s0, vnet_buffer (b0)->ip.reass.icmp_type_or_tcp_flags,
+                  vnet_buffer (b0)->ip.reass.tcp_ack_number,
+                  vnet_buffer (b0)->ip.reass.tcp_seq_number, thread_index))
                goto trace00;
            }
-         else if (udp0->checksum)
+         else if (!vnet_buffer (b0)->ip.reass.is_non_first_fragment
+                  && udp0->checksum)
            {
+             new_port0 = udp0->dst_port = s0->in2out.port;
              sum0 = udp0->checksum;
              sum0 = ip_csum_update (sum0, old_addr0, new_addr0, ip4_header_t,
                                     dst_address);
@@ -951,9 +967,11 @@ nat44_ed_out2in_node_fn_inline (vlib_main_t * vm,
                  sum0 = ip_csum_update (sum0, ip0->src_address.as_u32,
                                         s0->ext_host_nat_addr.as_u32,
                                         ip4_header_t, dst_address);
-                 sum0 = ip_csum_update (sum0, udp0->src_port,
-                                        s0->ext_host_nat_port, ip4_header_t,
-                                        length);
+                 sum0 =
+                   ip_csum_update (sum0,
+                                   vnet_buffer (b0)->ip.reass.l4_src_port,
+                                   s0->ext_host_nat_port, ip4_header_t,
+                                   length);
                  udp0->src_port = s0->ext_host_nat_port;
                  ip0->src_address.as_u32 = s0->ext_host_nat_addr.as_u32;
                }
@@ -962,10 +980,20 @@ nat44_ed_out2in_node_fn_inline (vlib_main_t * vm,
            }
          else
            {
-             if (PREDICT_FALSE (is_twice_nat_session (s0)))
+             if (!vnet_buffer (b0)->ip.reass.is_non_first_fragment)
                {
-                 udp0->src_port = s0->ext_host_nat_port;
-                 ip0->src_address.as_u32 = s0->ext_host_nat_addr.as_u32;
+                 new_port0 = udp0->dst_port = s0->in2out.port;
+                 if (PREDICT_FALSE (is_twice_nat_session (s0)))
+                   {
+                     udp0->dst_port = s0->in2out.port;
+                     if (is_twice_nat_session (s0))
+                       {
+                         udp0->src_port = s0->ext_host_nat_port;
+                         ip0->src_address.as_u32 =
+                           s0->ext_host_nat_addr.as_u32;
+                       }
+                     udp0->checksum = 0;
+                   }
                }
              udp_packets++;
            }
@@ -1050,13 +1078,6 @@ nat44_ed_out2in_node_fn_inline (vlib_main_t * vm,
                  goto trace01;
                }
 
-             if (ip4_is_fragment (ip1))
-               {
-                 next1 = NAT_NEXT_OUT2IN_ED_REASS;
-                 fragments++;
-                 goto trace01;
-               }
-
              if (PREDICT_FALSE (proto1 == SNAT_PROTOCOL_ICMP))
                {
                  next1 = NAT_NEXT_OUT2IN_ED_SLOW_PATH;
@@ -1065,8 +1086,9 @@ nat44_ed_out2in_node_fn_inline (vlib_main_t * vm,
            }
 
          make_ed_kv (&kv1, &ip1->dst_address, &ip1->src_address,
-                     ip1->protocol, rx_fib_index1, udp1->dst_port,
-                     udp1->src_port);
+                     ip1->protocol, rx_fib_index1,
+                     vnet_buffer (b1)->ip.reass.l4_dst_port,
+                     vnet_buffer (b1)->ip.reass.l4_src_port);
 
          if (clib_bihash_search_16_8 (&tsm->out2in_ed, &kv1, &value1))
            {
@@ -1075,7 +1097,7 @@ nat44_ed_out2in_node_fn_inline (vlib_main_t * vm,
                  /* Try to match static mapping by external address and port,
                     destination address and port in packet */
                  e_key1.addr = ip1->dst_address;
-                 e_key1.port = udp1->dst_port;
+                 e_key1.port = vnet_buffer (b1)->ip.reass.l4_dst_port;
                  e_key1.protocol = proto1;
                  e_key1.fib_index = rx_fib_index1;
                  if (snat_static_mapping_match (sm, e_key1, &l_key1, 1, 0,
@@ -1087,10 +1109,11 @@ nat44_ed_out2in_node_fn_inline (vlib_main_t * vm,
                       * Send DHCP packets to the ipv4 stack, or we won't
                       * be able to use dhcp client on the outside interface
                       */
-                     if (PREDICT_FALSE (proto1 == SNAT_PROTOCOL_UDP
-                                        && (udp1->dst_port ==
-                                            clib_host_to_net_u16
-                                            (UDP_DST_PORT_dhcp_to_client))))
+                     if (PREDICT_FALSE
+                         (proto1 == SNAT_PROTOCOL_UDP
+                          && (vnet_buffer (b1)->ip.reass.l4_dst_port ==
+                              clib_host_to_net_u16
+                              (UDP_DST_PORT_dhcp_to_client))))
                        {
                          goto trace01;
                        }
@@ -1103,18 +1126,20 @@ nat44_ed_out2in_node_fn_inline (vlib_main_t * vm,
                        }
                      else
                        {
-                         if (next_src_nat (sm, ip1, ip1->protocol,
-                                           udp1->src_port, udp1->dst_port,
-                                           thread_index, rx_fib_index1))
+                         if (next_src_nat
+                             (sm, ip1, ip1->protocol,
+                              vnet_buffer (b1)->ip.reass.l4_src_port,
+                              vnet_buffer (b1)->ip.reass.l4_dst_port,
+                              thread_index, rx_fib_index1))
                            {
                              next1 = NAT_NEXT_IN2OUT_ED_FAST_PATH;
                              goto trace01;
                            }
                          if (sm->num_workers > 1)
-                           create_bypass_for_fwd_worker (sm, ip1,
+                           create_bypass_for_fwd_worker (sm, b1, ip1,
                                                          rx_fib_index1);
                          else
-                           create_bypass_for_fwd (sm, ip1, rx_fib_index1,
+                           create_bypass_for_fwd (sm, b1, ip1, rx_fib_index1,
                                                   thread_index);
                        }
                      goto trace01;
@@ -1123,7 +1148,9 @@ nat44_ed_out2in_node_fn_inline (vlib_main_t * vm,
                  if (PREDICT_FALSE (identity_nat1))
                    goto trace01;
 
-                 if ((proto1 == SNAT_PROTOCOL_TCP) && !tcp_is_init (tcp1))
+                 if ((proto1 == SNAT_PROTOCOL_TCP)
+                     && !tcp_flags_is_init (vnet_buffer (b1)->ip.
+                                            reass.icmp_type_or_tcp_flags))
                    {
                      b1->error = node->errors[NAT_OUT2IN_ED_ERROR_NON_SYN];
                      next1 = NAT_NEXT_DROP;
@@ -1167,35 +1194,48 @@ nat44_ed_out2in_node_fn_inline (vlib_main_t * vm,
                                   src_address);
          ip1->checksum = ip_csum_fold (sum1);
 
-         old_port1 = udp1->dst_port;
-         new_port1 = udp1->dst_port = s1->in2out.port;
+         old_port1 = vnet_buffer (b1)->ip.reass.l4_dst_port;
 
          if (PREDICT_TRUE (proto1 == SNAT_PROTOCOL_TCP))
            {
-             sum1 = tcp1->checksum;
-             sum1 = ip_csum_update (sum1, old_addr1, new_addr1, ip4_header_t,
-                                    dst_address);
-             sum1 = ip_csum_update (sum1, old_port1, new_port1, ip4_header_t,
-                                    length);
-             if (is_twice_nat_session (s1))
+             if (!vnet_buffer (b1)->ip.reass.is_non_first_fragment)
                {
-                 sum1 = ip_csum_update (sum1, ip1->src_address.as_u32,
-                                        s1->ext_host_nat_addr.as_u32,
-                                        ip4_header_t, dst_address);
-                 sum1 = ip_csum_update (sum1, tcp1->src_port,
-                                        s1->ext_host_nat_port, ip4_header_t,
-                                        length);
-                 tcp1->src_port = s1->ext_host_nat_port;
-                 ip1->src_address.as_u32 = s1->ext_host_nat_addr.as_u32;
+                 new_port1 = udp1->dst_port = s1->in2out.port;
+
+                 sum1 = tcp1->checksum;
+                 sum1 =
+                   ip_csum_update (sum1, old_addr1, new_addr1, ip4_header_t,
+                                   dst_address);
+                 sum1 =
+                   ip_csum_update (sum1, old_port1, new_port1, ip4_header_t,
+                                   length);
+                 if (is_twice_nat_session (s1))
+                   {
+                     sum1 = ip_csum_update (sum1, ip1->src_address.as_u32,
+                                            s1->ext_host_nat_addr.as_u32,
+                                            ip4_header_t, dst_address);
+                     sum1 =
+                       ip_csum_update (sum1,
+                                       vnet_buffer (b1)->ip.
+                                       reass.l4_src_port,
+                                       s1->ext_host_nat_port, ip4_header_t,
+                                       length);
+                     tcp1->src_port = s1->ext_host_nat_port;
+                     ip1->src_address.as_u32 = s1->ext_host_nat_addr.as_u32;
+                   }
+                 tcp1->checksum = ip_csum_fold (sum1);
                }
-             tcp1->checksum = ip_csum_fold (sum1);
              tcp_packets++;
              if (nat44_set_tcp_session_state_o2i
-                 (sm, s1, tcp1, thread_index))
+                 (sm, s1, vnet_buffer (b1)->ip.reass.icmp_type_or_tcp_flags,
+                  vnet_buffer (b1)->ip.reass.tcp_ack_number,
+                  vnet_buffer (b1)->ip.reass.tcp_seq_number, thread_index))
                goto trace01;
            }
-         else if (udp1->checksum)
+         else if (!vnet_buffer (b1)->ip.reass.is_non_first_fragment
+                  && udp1->checksum)
            {
+             new_port1 = udp1->dst_port = s1->in2out.port;
              sum1 = udp1->checksum;
              sum1 = ip_csum_update (sum1, old_addr1, new_addr1, ip4_header_t,
                                     dst_address);
@@ -1206,9 +1246,11 @@ nat44_ed_out2in_node_fn_inline (vlib_main_t * vm,
                  sum1 = ip_csum_update (sum1, ip1->src_address.as_u32,
                                         s1->ext_host_nat_addr.as_u32,
                                         ip4_header_t, dst_address);
-                 sum1 = ip_csum_update (sum1, udp1->src_port,
-                                        s1->ext_host_nat_port, ip4_header_t,
-                                        length);
+                 sum1 =
+                   ip_csum_update (sum1,
+                                   vnet_buffer (b1)->ip.reass.l4_src_port,
+                                   s1->ext_host_nat_port, ip4_header_t,
+                                   length);
                  udp1->src_port = s1->ext_host_nat_port;
                  ip1->src_address.as_u32 = s1->ext_host_nat_addr.as_u32;
                }
@@ -1217,10 +1259,19 @@ nat44_ed_out2in_node_fn_inline (vlib_main_t * vm,
            }
          else
            {
-             if (PREDICT_FALSE (is_twice_nat_session (s1)))
+             if (!vnet_buffer (b1)->ip.reass.is_non_first_fragment)
                {
-                 udp1->src_port = s1->ext_host_nat_port;
-                 ip1->src_address.as_u32 = s1->ext_host_nat_addr.as_u32;
+                 if (PREDICT_FALSE (is_twice_nat_session (s1)))
+                   {
+                     udp1->dst_port = s1->in2out.port;
+                     if (is_twice_nat_session (s1))
+                       {
+                         udp1->src_port = s1->ext_host_nat_port;
+                         ip1->src_address.as_u32 =
+                           s1->ext_host_nat_addr.as_u32;
+                       }
+                     udp1->checksum = 0;
+                   }
                }
              udp_packets++;
            }
@@ -1341,13 +1392,6 @@ nat44_ed_out2in_node_fn_inline (vlib_main_t * vm,
                  goto trace0;
                }
 
-             if (ip4_is_fragment (ip0))
-               {
-                 next0 = NAT_NEXT_OUT2IN_ED_REASS;
-                 fragments++;
-                 goto trace0;
-               }
-
              if (PREDICT_FALSE (proto0 == SNAT_PROTOCOL_ICMP))
                {
                  next0 = NAT_NEXT_OUT2IN_ED_SLOW_PATH;
@@ -1356,8 +1400,9 @@ nat44_ed_out2in_node_fn_inline (vlib_main_t * vm,
            }
 
          make_ed_kv (&kv0, &ip0->dst_address, &ip0->src_address,
-                     ip0->protocol, rx_fib_index0, udp0->dst_port,
-                     udp0->src_port);
+                     ip0->protocol, rx_fib_index0,
+                     vnet_buffer (b0)->ip.reass.l4_dst_port,
+                     vnet_buffer (b0)->ip.reass.l4_src_port);
 
          if (clib_bihash_search_16_8 (&tsm->out2in_ed, &kv0, &value0))
            {
@@ -1366,7 +1411,7 @@ nat44_ed_out2in_node_fn_inline (vlib_main_t * vm,
                  /* Try to match static mapping by external address and port,
                     destination address and port in packet */
                  e_key0.addr = ip0->dst_address;
-                 e_key0.port = udp0->dst_port;
+                 e_key0.port = vnet_buffer (b0)->ip.reass.l4_dst_port;
                  e_key0.protocol = proto0;
                  e_key0.fib_index = rx_fib_index0;
                  if (snat_static_mapping_match (sm, e_key0, &l_key0, 1, 0,
@@ -1378,10 +1423,11 @@ nat44_ed_out2in_node_fn_inline (vlib_main_t * vm,
                       * Send DHCP packets to the ipv4 stack, or we won't
                       * be able to use dhcp client on the outside interface
                       */
-                     if (PREDICT_FALSE (proto0 == SNAT_PROTOCOL_UDP
-                                        && (udp0->dst_port ==
-                                            clib_host_to_net_u16
-                                            (UDP_DST_PORT_dhcp_to_client))))
+                     if (PREDICT_FALSE
+                         (proto0 == SNAT_PROTOCOL_UDP
+                          && (vnet_buffer (b0)->ip.reass.l4_dst_port ==
+                              clib_host_to_net_u16
+                              (UDP_DST_PORT_dhcp_to_client))))
                        {
                          goto trace0;
                        }
@@ -1394,18 +1440,20 @@ nat44_ed_out2in_node_fn_inline (vlib_main_t * vm,
                        }
                      else
                        {
-                         if (next_src_nat (sm, ip0, ip0->protocol,
-                                           udp0->src_port, udp0->dst_port,
-                                           thread_index, rx_fib_index0))
+                         if (next_src_nat
+                             (sm, ip0, ip0->protocol,
+                              vnet_buffer (b0)->ip.reass.l4_src_port,
+                              vnet_buffer (b0)->ip.reass.l4_dst_port,
+                              thread_index, rx_fib_index0))
                            {
                              next0 = NAT_NEXT_IN2OUT_ED_FAST_PATH;
                              goto trace0;
                            }
                          if (sm->num_workers > 1)
-                           create_bypass_for_fwd_worker (sm, ip0,
+                           create_bypass_for_fwd_worker (sm, b0, ip0,
                                                          rx_fib_index0);
                          else
-                           create_bypass_for_fwd (sm, ip0, rx_fib_index0,
+                           create_bypass_for_fwd (sm, b0, ip0, rx_fib_index0,
                                                   thread_index);
                        }
                      goto trace0;
@@ -1414,7 +1462,9 @@ nat44_ed_out2in_node_fn_inline (vlib_main_t * vm,
                  if (PREDICT_FALSE (identity_nat0))
                    goto trace0;
 
-                 if ((proto0 == SNAT_PROTOCOL_TCP) && !tcp_is_init (tcp0))
+                 if ((proto0 == SNAT_PROTOCOL_TCP)
+                     && !tcp_flags_is_init (vnet_buffer (b0)->ip.
+                                            reass.icmp_type_or_tcp_flags))
                    {
                      b0->error = node->errors[NAT_OUT2IN_ED_ERROR_NON_SYN];
                      next0 = NAT_NEXT_DROP;
@@ -1458,35 +1508,47 @@ nat44_ed_out2in_node_fn_inline (vlib_main_t * vm,
                                   src_address);
          ip0->checksum = ip_csum_fold (sum0);
 
-         old_port0 = udp0->dst_port;
-         new_port0 = udp0->dst_port = s0->in2out.port;
+         old_port0 = vnet_buffer (b0)->ip.reass.l4_dst_port;
 
          if (PREDICT_TRUE (proto0 == SNAT_PROTOCOL_TCP))
            {
-             sum0 = tcp0->checksum;
-             sum0 = ip_csum_update (sum0, old_addr0, new_addr0, ip4_header_t,
-                                    dst_address);
-             sum0 = ip_csum_update (sum0, old_port0, new_port0, ip4_header_t,
-                                    length);
-             if (is_twice_nat_session (s0))
+             if (!vnet_buffer (b0)->ip.reass.is_non_first_fragment)
                {
-                 sum0 = ip_csum_update (sum0, ip0->src_address.as_u32,
-                                        s0->ext_host_nat_addr.as_u32,
-                                        ip4_header_t, dst_address);
-                 sum0 = ip_csum_update (sum0, tcp0->src_port,
-                                        s0->ext_host_nat_port, ip4_header_t,
-                                        length);
-                 tcp0->src_port = s0->ext_host_nat_port;
-                 ip0->src_address.as_u32 = s0->ext_host_nat_addr.as_u32;
+                 new_port0 = udp0->dst_port = s0->in2out.port;
+                 sum0 = tcp0->checksum;
+                 sum0 =
+                   ip_csum_update (sum0, old_addr0, new_addr0, ip4_header_t,
+                                   dst_address);
+                 sum0 =
+                   ip_csum_update (sum0, old_port0, new_port0, ip4_header_t,
+                                   length);
+                 if (is_twice_nat_session (s0))
+                   {
+                     sum0 = ip_csum_update (sum0, ip0->src_address.as_u32,
+                                            s0->ext_host_nat_addr.as_u32,
+                                            ip4_header_t, dst_address);
+                     sum0 =
+                       ip_csum_update (sum0,
+                                       vnet_buffer (b0)->ip.
+                                       reass.l4_src_port,
+                                       s0->ext_host_nat_port, ip4_header_t,
+                                       length);
+                     tcp0->src_port = s0->ext_host_nat_port;
+                     ip0->src_address.as_u32 = s0->ext_host_nat_addr.as_u32;
+                   }
+                 tcp0->checksum = ip_csum_fold (sum0);
                }
-             tcp0->checksum = ip_csum_fold (sum0);
              tcp_packets++;
              if (nat44_set_tcp_session_state_o2i
-                 (sm, s0, tcp0, thread_index))
+                 (sm, s0, vnet_buffer (b0)->ip.reass.icmp_type_or_tcp_flags,
+                  vnet_buffer (b0)->ip.reass.tcp_ack_number,
+                  vnet_buffer (b0)->ip.reass.tcp_seq_number, thread_index))
                goto trace0;
            }
-         else if (udp0->checksum)
+         else if (!vnet_buffer (b0)->ip.reass.is_non_first_fragment
+                  && udp0->checksum)
            {
+             new_port0 = udp0->dst_port = s0->in2out.port;
              sum0 = udp0->checksum;
              sum0 = ip_csum_update (sum0, old_addr0, new_addr0, ip4_header_t,
                                     dst_address);
@@ -1497,9 +1559,11 @@ nat44_ed_out2in_node_fn_inline (vlib_main_t * vm,
                  sum0 = ip_csum_update (sum0, ip0->src_address.as_u32,
                                         s0->ext_host_nat_addr.as_u32,
                                         ip4_header_t, dst_address);
-                 sum0 = ip_csum_update (sum0, udp0->src_port,
-                                        s0->ext_host_nat_port, ip4_header_t,
-                                        length);
+                 sum0 =
+                   ip_csum_update (sum0,
+                                   vnet_buffer (b0)->ip.reass.l4_src_port,
+                                   s0->ext_host_nat_port, ip4_header_t,
+                                   length);
                  udp0->src_port = s0->ext_host_nat_port;
                  ip0->src_address.as_u32 = s0->ext_host_nat_addr.as_u32;
                }
@@ -1508,10 +1572,14 @@ nat44_ed_out2in_node_fn_inline (vlib_main_t * vm,
            }
          else
            {
-             if (PREDICT_FALSE (is_twice_nat_session (s0)))
+             if (!vnet_buffer (b0)->ip.reass.is_non_first_fragment)
                {
-                 udp0->src_port = s0->ext_host_nat_port;
-                 ip0->src_address.as_u32 = s0->ext_host_nat_addr.as_u32;
+                 new_port0 = udp0->dst_port = s0->in2out.port;
+                 if (PREDICT_FALSE (is_twice_nat_session (s0)))
+                   {
+                     udp0->src_port = s0->ext_host_nat_port;
+                     ip0->src_address.as_u32 = s0->ext_host_nat_addr.as_u32;
+                   }
                }
              udp_packets++;
            }
@@ -1565,380 +1633,6 @@ nat44_ed_out2in_node_fn_inline (vlib_main_t * vm,
   return frame->n_vectors;
 }
 
-static inline uword
-nat44_ed_out2in_reass_node_fn_inline (vlib_main_t * vm,
-                                     vlib_node_runtime_t * node,
-                                     vlib_frame_t * frame)
-{
-  u32 n_left_from, *from, *to_next;
-  nat_next_t next_index;
-  u32 pkts_processed = 0;
-  snat_main_t *sm = &snat_main;
-  f64 now = vlib_time_now (vm);
-  u32 thread_index = vm->thread_index;
-  snat_main_per_thread_data_t *per_thread_data =
-    &sm->per_thread_data[thread_index];
-  u32 *fragments_to_drop = 0;
-  u32 *fragments_to_loopback = 0;
-
-  from = vlib_frame_vector_args (frame);
-  n_left_from = frame->n_vectors;
-  next_index = node->cached_next_index;
-
-  while (n_left_from > 0)
-    {
-      u32 n_left_to_next;
-
-      vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
-
-      while (n_left_from > 0 && n_left_to_next > 0)
-       {
-         u32 bi0, sw_if_index0, proto0, rx_fib_index0, new_addr0, old_addr0;
-         vlib_buffer_t *b0;
-         u32 next0;
-         u8 cached0 = 0;
-         ip4_header_t *ip0;
-         nat_reass_ip4_t *reass0;
-         udp_header_t *udp0;
-         tcp_header_t *tcp0;
-         icmp46_header_t *icmp0;
-         clib_bihash_kv_16_8_t kv0, value0;
-         snat_session_t *s0 = 0;
-         u16 old_port0, new_port0;
-         ip_csum_t sum0;
-         snat_session_key_t e_key0, l_key0;
-         lb_nat_type_t lb0;
-         twice_nat_type_t twice_nat0;
-         u8 identity_nat0;
-
-         /* speculatively enqueue b0 to the current next frame */
-         bi0 = from[0];
-         to_next[0] = bi0;
-         from += 1;
-         to_next += 1;
-         n_left_from -= 1;
-         n_left_to_next -= 1;
-
-         b0 = vlib_get_buffer (vm, bi0);
-         next0 = nat_buffer_opaque (b0)->arc_next;
-
-         sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
-         rx_fib_index0 =
-           fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP4,
-                                                sw_if_index0);
-
-         if (PREDICT_FALSE (nat_reass_is_drop_frag (0)))
-           {
-             next0 = NAT_NEXT_DROP;
-             b0->error = node->errors[NAT_OUT2IN_ED_ERROR_DROP_FRAGMENT];
-             goto trace0;
-           }
-
-         ip0 = (ip4_header_t *) vlib_buffer_get_current (b0);
-         udp0 = ip4_next_header (ip0);
-         tcp0 = (tcp_header_t *) udp0;
-         icmp0 = (icmp46_header_t *) udp0;
-         proto0 = ip_proto_to_snat_proto (ip0->protocol);
-
-         reass0 = nat_ip4_reass_find_or_create (ip0->src_address,
-                                                ip0->dst_address,
-                                                ip0->fragment_id,
-                                                ip0->protocol,
-                                                1, &fragments_to_drop);
-
-         if (PREDICT_FALSE (!reass0))
-           {
-             next0 = NAT_NEXT_DROP;
-             b0->error = node->errors[NAT_OUT2IN_ED_ERROR_MAX_REASS];
-             nat_elog_notice ("maximum reassemblies exceeded");
-             goto trace0;
-           }
-
-         if (PREDICT_FALSE (ip4_is_first_fragment (ip0)))
-           {
-             if (PREDICT_FALSE (proto0 == SNAT_PROTOCOL_ICMP))
-               {
-                 next0 = icmp_out2in_ed_slow_path
-                   (sm, b0, ip0, icmp0, sw_if_index0, rx_fib_index0, node,
-                    next0, now, thread_index, &s0);
-
-                 if (PREDICT_TRUE (next0 != NAT_NEXT_DROP))
-                   {
-                     if (s0)
-                       reass0->sess_index = s0 - per_thread_data->sessions;
-                     else
-                       reass0->flags |= NAT_REASS_FLAG_ED_DONT_TRANSLATE;
-                     reass0->thread_index = thread_index;
-                     nat_ip4_reass_get_frags (reass0,
-                                              &fragments_to_loopback);
-                   }
-
-                 goto trace0;
-               }
-
-             make_ed_kv (&kv0, &ip0->dst_address, &ip0->src_address,
-                         ip0->protocol, rx_fib_index0, udp0->dst_port,
-                         udp0->src_port);
-
-             if (clib_bihash_search_16_8
-                 (&per_thread_data->out2in_ed, &kv0, &value0))
-               {
-                 /* Try to match static mapping by external address and port,
-                    destination address and port in packet */
-                 e_key0.addr = ip0->dst_address;
-                 e_key0.port = udp0->dst_port;
-                 e_key0.protocol = proto0;
-                 e_key0.fib_index = rx_fib_index0;
-                 if (snat_static_mapping_match (sm, e_key0, &l_key0, 1, 0,
-                                                &twice_nat0, &lb0, 0,
-                                                &identity_nat0))
-                   {
-                     /*
-                      * Send DHCP packets to the ipv4 stack, or we won't
-                      * be able to use dhcp client on the outside interface
-                      */
-                     if (PREDICT_FALSE (proto0 == SNAT_PROTOCOL_UDP
-                                        && (udp0->dst_port
-                                            ==
-                                            clib_host_to_net_u16
-                                            (UDP_DST_PORT_dhcp_to_client))))
-                       {
-                         goto trace0;
-                       }
-
-                     if (!sm->forwarding_enabled)
-                       {
-                         b0->error =
-                           node->errors[NAT_OUT2IN_ED_ERROR_NO_TRANSLATION];
-                         next0 = NAT_NEXT_DROP;
-                       }
-                     else
-                       {
-                         if (next_src_nat (sm, ip0, ip0->protocol,
-                                           udp0->src_port, udp0->dst_port,
-                                           thread_index, rx_fib_index0))
-                           {
-                             next0 = NAT_NEXT_IN2OUT_ED_FAST_PATH;
-                             goto trace0;
-                           }
-                         if (sm->num_workers > 1)
-                           create_bypass_for_fwd_worker (sm, ip0,
-                                                         rx_fib_index0);
-                         else
-                           create_bypass_for_fwd (sm, ip0, rx_fib_index0,
-                                                  thread_index);
-                         reass0->flags |= NAT_REASS_FLAG_ED_DONT_TRANSLATE;
-                         nat_ip4_reass_get_frags (reass0,
-                                                  &fragments_to_loopback);
-                       }
-                     goto trace0;
-                   }
-
-                 if (PREDICT_FALSE (identity_nat0))
-                   {
-                     reass0->flags |= NAT_REASS_FLAG_ED_DONT_TRANSLATE;
-                     goto trace0;
-                   }
-
-                 if ((proto0 == SNAT_PROTOCOL_TCP) && !tcp_is_init (tcp0))
-                   {
-                     b0->error = node->errors[NAT_OUT2IN_ED_ERROR_NON_SYN];
-                     next0 = NAT_NEXT_DROP;
-                     goto trace0;
-                   }
-
-                 /* Create session initiated by host from external network */
-                 s0 = create_session_for_static_mapping_ed (sm, b0, l_key0,
-                                                            e_key0, node,
-                                                            thread_index,
-                                                            twice_nat0, lb0,
-                                                            now);
-                 if (!s0)
-                   {
-                     b0->error =
-                       node->errors[NAT_OUT2IN_ED_ERROR_NO_TRANSLATION];
-                     next0 = NAT_NEXT_DROP;
-                     goto trace0;
-                   }
-                 reass0->sess_index = s0 - per_thread_data->sessions;
-                 reass0->thread_index = thread_index;
-               }
-             else
-               {
-                 s0 = pool_elt_at_index (per_thread_data->sessions,
-                                         value0.value);
-                 reass0->sess_index = value0.value;
-               }
-             nat_ip4_reass_get_frags (reass0, &fragments_to_loopback);
-           }
-         else
-           {
-             if (reass0->flags & NAT_REASS_FLAG_ED_DONT_TRANSLATE)
-               goto trace0;
-             if (PREDICT_FALSE (reass0->sess_index == (u32) ~ 0))
-               {
-                 if (nat_ip4_reass_add_fragment
-                     (thread_index, reass0, bi0, &fragments_to_drop))
-                   {
-                     b0->error = node->errors[NAT_OUT2IN_ED_ERROR_MAX_FRAG];
-                     nat_elog_notice
-                       ("maximum fragments per reassembly exceeded");
-                     next0 = NAT_NEXT_DROP;
-                     goto trace0;
-                   }
-                 cached0 = 1;
-                 goto trace0;
-               }
-             s0 = pool_elt_at_index (per_thread_data->sessions,
-                                     reass0->sess_index);
-           }
-
-         old_addr0 = ip0->dst_address.as_u32;
-         ip0->dst_address = s0->in2out.addr;
-         new_addr0 = ip0->dst_address.as_u32;
-         vnet_buffer (b0)->sw_if_index[VLIB_TX] = s0->in2out.fib_index;
-
-         sum0 = ip0->checksum;
-         sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
-                                ip4_header_t,
-                                dst_address /* changed member */ );
-         if (PREDICT_FALSE (is_twice_nat_session (s0)))
-           sum0 = ip_csum_update (sum0, ip0->src_address.as_u32,
-                                  s0->ext_host_nat_addr.as_u32, ip4_header_t,
-                                  src_address);
-         ip0->checksum = ip_csum_fold (sum0);
-
-         if (PREDICT_FALSE (ip4_is_first_fragment (ip0)))
-           {
-             old_port0 = udp0->dst_port;
-             new_port0 = udp0->dst_port = s0->in2out.port;
-
-             if (PREDICT_TRUE (proto0 == SNAT_PROTOCOL_TCP))
-               {
-                 sum0 = tcp0->checksum;
-                 sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
-                                        ip4_header_t,
-                                        dst_address /* changed member */ );
-
-                 sum0 = ip_csum_update (sum0, old_port0, new_port0,
-                                        ip4_header_t /* cheat */ ,
-                                        length /* changed member */ );
-                 if (is_twice_nat_session (s0))
-                   {
-                     sum0 = ip_csum_update (sum0, ip0->src_address.as_u32,
-                                            s0->ext_host_nat_addr.as_u32,
-                                            ip4_header_t, dst_address);
-                     sum0 = ip_csum_update (sum0, tcp0->src_port,
-                                            s0->ext_host_nat_port,
-                                            ip4_header_t, length);
-                     tcp0->src_port = s0->ext_host_nat_port;
-                     ip0->src_address.as_u32 = s0->ext_host_nat_addr.as_u32;
-                   }
-                 tcp0->checksum = ip_csum_fold (sum0);
-               }
-             else if (udp0->checksum)
-               {
-                 sum0 = udp0->checksum;
-                 sum0 =
-                   ip_csum_update (sum0, old_addr0, new_addr0, ip4_header_t,
-                                   dst_address);
-                 sum0 =
-                   ip_csum_update (sum0, old_port0, new_port0, ip4_header_t,
-                                   length);
-                 if (PREDICT_FALSE (is_twice_nat_session (s0)))
-                   {
-                     sum0 = ip_csum_update (sum0, ip0->src_address.as_u32,
-                                            s0->ext_host_nat_addr.as_u32,
-                                            ip4_header_t, dst_address);
-                     sum0 = ip_csum_update (sum0, udp0->src_port,
-                                            s0->ext_host_nat_port,
-                                            ip4_header_t, length);
-                     udp0->src_port = s0->ext_host_nat_port;
-                     ip0->src_address.as_u32 = s0->ext_host_nat_addr.as_u32;
-                   }
-                 udp0->checksum = ip_csum_fold (sum0);
-               }
-             else
-               {
-                 if (PREDICT_FALSE (is_twice_nat_session (s0)))
-                   {
-                     udp0->src_port = s0->ext_host_nat_port;
-                     ip0->src_address.as_u32 = s0->ext_host_nat_addr.as_u32;
-                   }
-               }
-           }
-
-         /* Accounting */
-         nat44_session_update_counters (s0, now,
-                                        vlib_buffer_length_in_chain (vm, b0),
-                                        thread_index);
-         /* Per-user LRU list maintenance */
-         nat44_session_update_lru (sm, s0, thread_index);
-
-       trace0:
-         if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)
-                            && (b0->flags & VLIB_BUFFER_IS_TRACED)))
-           {
-             nat44_reass_trace_t *t =
-               vlib_add_trace (vm, node, b0, sizeof (*t));
-             t->cached = cached0;
-             t->sw_if_index = sw_if_index0;
-             t->next_index = next0;
-           }
-
-         if (cached0)
-           {
-             n_left_to_next++;
-             to_next--;
-           }
-         else
-           {
-             pkts_processed += next0 != NAT_NEXT_DROP;
-
-             /* verify speculative enqueue, maybe switch current next frame */
-             vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
-                                              to_next, n_left_to_next,
-                                              bi0, next0);
-           }
-
-         if (n_left_from == 0 && vec_len (fragments_to_loopback))
-           {
-             from = vlib_frame_vector_args (frame);
-             u32 len = vec_len (fragments_to_loopback);
-             if (len <= VLIB_FRAME_SIZE)
-               {
-                 clib_memcpy_fast (from, fragments_to_loopback,
-                                   sizeof (u32) * len);
-                 n_left_from = len;
-                 vec_reset_length (fragments_to_loopback);
-               }
-             else
-               {
-                 clib_memcpy_fast (from, fragments_to_loopback +
-                                   (len - VLIB_FRAME_SIZE),
-                                   sizeof (u32) * VLIB_FRAME_SIZE);
-                 n_left_from = VLIB_FRAME_SIZE;
-                 _vec_len (fragments_to_loopback) = len - VLIB_FRAME_SIZE;
-               }
-           }
-       }
-
-      vlib_put_next_frame (vm, node, next_index, n_left_to_next);
-    }
-
-  vlib_node_increment_counter (vm, sm->ed_out2in_reass_node_index,
-                              NAT_OUT2IN_ED_ERROR_OUT2IN_PACKETS,
-                              pkts_processed);
-
-  nat_send_all_to_node (vm, fragments_to_drop, node,
-                       &node->errors[NAT_OUT2IN_ED_ERROR_DROP_FRAGMENT],
-                       NAT_NEXT_DROP);
-
-  vec_free (fragments_to_drop);
-  vec_free (fragments_to_loopback);
-  return frame->n_vectors;
-}
-
 VLIB_NODE_FN (nat44_ed_out2in_node) (vlib_main_t * vm,
                                     vlib_node_runtime_t * node,
                                     vlib_frame_t * frame)
@@ -1979,25 +1673,6 @@ VLIB_REGISTER_NODE (nat44_ed_out2in_slowpath_node) = {
 };
 /* *INDENT-ON* */
 
-VLIB_NODE_FN (nat44_ed_out2in_reass_node) (vlib_main_t * vm,
-                                          vlib_node_runtime_t * node,
-                                          vlib_frame_t * frame)
-{
-  return nat44_ed_out2in_reass_node_fn_inline (vm, node, frame);
-}
-
-/* *INDENT-OFF* */
-VLIB_REGISTER_NODE (nat44_ed_out2in_reass_node) = {
-  .name = "nat44-ed-out2in-reass",
-  .vector_size = sizeof (u32),
-  .sibling_of = "nat-default",
-  .format_trace = format_nat44_reass_trace,
-  .type = VLIB_NODE_TYPE_INTERNAL,
-  .n_errors = ARRAY_LEN(nat_out2in_ed_error_strings),
-  .error_strings = nat_out2in_ed_error_strings,
-};
-/* *INDENT-ON* */
-
 static u8 *
 format_nat_pre_trace (u8 * s, va_list * args)
 {
index 0daa610..46b97c0 100644 (file)
@@ -31,6 +31,7 @@ from scapy.all import bind_layers, Packet, ByteEnumField, ShortField, \
     IPField, IntField, LongField, XByteField, FlagsField, FieldLenField, \
     PacketListField
 from ipaddress import IPv6Network
+from util import ppc, ppp
 
 
 # NAT HA protocol event data
@@ -168,10 +169,6 @@ class MethodHolder(VppTestCase):
                 last_ip_address=addr.ip_address,
                 vrf_id=0xFFFFFFFF, flags=addr.flags)
 
-        self.vapi.nat_set_reass(timeout=2, max_reass=1024, max_frag=5,
-                                drop_frag=0)
-        self.vapi.nat_set_reass(timeout=2, max_reass=1024, max_frag=5,
-                                drop_frag=0, is_ip6=1)
         self.verify_no_nat44_user()
         self.vapi.nat_set_timeouts(udp=300, tcp_established=7440,
                                    tcp_transitory=240, icmp=60)
@@ -762,6 +759,7 @@ class MethodHolder(VppTestCase):
                 proto=frags[0][IP].proto)
         if ip.proto == IP_PROTOS.tcp:
             p = (ip / TCP(buffer.getvalue()))
+            self.logger.debug(ppp("Reassembled:", p))
             self.assert_tcp_checksum_valid(p)
         elif ip.proto == IP_PROTOS.udp:
             p = (ip / UDP(buffer.getvalue()[:8]) /
@@ -792,6 +790,7 @@ class MethodHolder(VppTestCase):
             p = (ip / TCP(buffer.getvalue()))
         elif ip.nh == IP_PROTOS.udp:
             p = (ip / UDP(buffer.getvalue()))
+        self.logger.debug(ppp("Reassembled:", p))
         self.assert_packet_checksums_valid(p)
         return p
 
@@ -1154,9 +1153,6 @@ class MethodHolder(VppTestCase):
             data = b"A" * 16 + b"B" * 16 + b"C" * 3
         self.port_in = random.randint(1025, 65535)
 
-        reass = self.vapi.nat_reass_dump()
-        reass_n_start = len(reass)
-
         # in2out
         pkts = self.create_stream_frag(self.pg0,
                                        self.pg1.remote_ip4,
@@ -1221,11 +1217,6 @@ class MethodHolder(VppTestCase):
             self.assertEqual(p[layer].id, self.port_in)
         self.assertEqual(data, p[Raw].load)
 
-        reass = self.vapi.nat_reass_dump()
-        reass_n_end = len(reass)
-
-        self.assertEqual(reass_n_end - reass_n_start, 2)
-
     def frag_in_order_in_plus_out(self, proto=IP_PROTOS.tcp):
         layer = self.proto2layer(proto)
 
@@ -1236,9 +1227,6 @@ class MethodHolder(VppTestCase):
         self.port_in = random.randint(1025, 65535)
 
         for i in range(2):
-            reass = self.vapi.nat_reass_dump()
-            reass_n_start = len(reass)
-
             # out2in
             pkts = self.create_stream_frag(self.pg0,
                                            self.server_out_addr,
@@ -1290,11 +1278,6 @@ class MethodHolder(VppTestCase):
                 self.assertEqual(p[layer].id, self.port_in)
             self.assertEqual(data, p[Raw].load)
 
-            reass = self.vapi.nat_reass_dump()
-            reass_n_end = len(reass)
-
-            self.assertEqual(reass_n_end - reass_n_start, 2)
-
     def reass_hairpinning(self, proto=IP_PROTOS.tcp):
         layer = self.proto2layer(proto)
 
@@ -3581,25 +3564,6 @@ class TestNAT44(MethodHolder):
 
         self.verify_no_nat44_user()
 
-    def test_set_get_reass(self):
-        """ NAT44 set/get virtual fragmentation reassembly """
-        reas_cfg1 = self.vapi.nat_get_reass()
-
-        self.vapi.nat_set_reass(timeout=reas_cfg1.ip4_timeout + 5,
-                                max_reass=reas_cfg1.ip4_max_reass * 2,
-                                max_frag=reas_cfg1.ip4_max_frag * 2,
-                                drop_frag=0)
-
-        reas_cfg2 = self.vapi.nat_get_reass()
-
-        self.assertEqual(reas_cfg1.ip4_timeout + 5, reas_cfg2.ip4_timeout)
-        self.assertEqual(reas_cfg1.ip4_max_reass * 2, reas_cfg2.ip4_max_reass)
-        self.assertEqual(reas_cfg1.ip4_max_frag * 2, reas_cfg2.ip4_max_frag)
-
-        self.vapi.nat_set_reass(timeout=2, max_reass=1024, max_frag=5,
-                                drop_frag=1)
-        self.assertTrue(self.vapi.nat_get_reass().ip4_drop_frag)
-
     def test_frag_in_order(self):
         """ NAT44 translate fragments arriving in order """
 
@@ -3612,22 +3576,10 @@ class TestNAT44(MethodHolder):
             sw_if_index=self.pg1.sw_if_index,
             is_add=1)
 
-        reas_cfg1 = self.vapi.nat_get_reass()
-        # this test was intermittently failing in some cases
-        # until we temporarily bump the reassembly timeouts
-        self.vapi.nat_set_reass(timeout=20, max_reass=1024, max_frag=5,
-                                drop_frag=0)
-
         self.frag_in_order(proto=IP_PROTOS.tcp)
         self.frag_in_order(proto=IP_PROTOS.udp)
         self.frag_in_order(proto=IP_PROTOS.icmp)
 
-        # restore the reassembly timeouts
-        self.vapi.nat_set_reass(timeout=reas_cfg1.ip4_timeout,
-                                max_reass=reas_cfg1.ip4_max_reass,
-                                max_frag=reas_cfg1.ip4_max_frag,
-                                drop_frag=reas_cfg1.ip4_drop_frag)
-
     def test_frag_forwarding(self):
         """ NAT44 forwarding fragment test """
         self.vapi.nat44_add_del_interface_addr(
@@ -3772,60 +3724,6 @@ class TestNAT44(MethodHolder):
             self.assertGreaterEqual(tcp.sport, 1025)
             self.assertLessEqual(tcp.sport, 1027)
 
-    def test_ipfix_max_frags(self):
-        """ IPFIX logging maximum fragments pending reassembly exceeded """
-        self.nat44_add_address(self.nat_addr)
-        flags = self.config_flags.NAT_IS_INSIDE
-        self.vapi.nat44_interface_add_del_feature(
-            sw_if_index=self.pg0.sw_if_index,
-            flags=flags, is_add=1)
-        self.vapi.nat44_interface_add_del_feature(
-            sw_if_index=self.pg1.sw_if_index,
-            is_add=1)
-        self.vapi.nat_set_reass(timeout=2, max_reass=1024, max_frag=1,
-                                drop_frag=0)
-        self.vapi.set_ipfix_exporter(collector_address=self.pg3.remote_ip4,
-                                     src_address=self.pg3.local_ip4,
-                                     path_mtu=512,
-                                     template_interval=10)
-        self.vapi.nat_ipfix_enable_disable(domain_id=self.ipfix_domain_id,
-                                           src_port=self.ipfix_src_port,
-                                           enable=1)
-
-        data = b"A" * 4 + b"B" * 16 + b"C" * 3
-        self.tcp_port_in = random.randint(1025, 65535)
-        pkts = self.create_stream_frag(self.pg0,
-                                       self.pg1.remote_ip4,
-                                       self.tcp_port_in,
-                                       20,
-                                       data)
-        pkts.reverse()
-        self.pg0.add_stream(pkts)
-        self.pg_enable_capture(self.pg_interfaces)
-        self.pg_start()
-        self.pg1.assert_nothing_captured()
-        sleep(1)
-        self.vapi.ipfix_flush()
-        capture = self.pg3.get_capture(9)
-        ipfix = IPFIXDecoder()
-        # first load template
-        for p in capture:
-            self.assertTrue(p.haslayer(IPFIX))
-            self.assertEqual(p[IP].src, self.pg3.local_ip4)
-            self.assertEqual(p[IP].dst, self.pg3.remote_ip4)
-            self.assertEqual(p[UDP].sport, self.ipfix_src_port)
-            self.assertEqual(p[UDP].dport, 4739)
-            self.assertEqual(p[IPFIX].observationDomainID,
-                             self.ipfix_domain_id)
-            if p.haslayer(Template):
-                ipfix.add_template(p.getlayer(Template))
-        # verify events in data set
-        for p in capture:
-            if p.haslayer(Data):
-                data = ipfix.decode_data_set(p.getlayer(Set))
-                self.verify_ipfix_max_fragments_ip4(data, 1,
-                                                    self.pg0.remote_ip4n)
-
     def test_multiple_outside_vrf(self):
         """ Multiple outside VRF """
         vrf_id1 = 1
@@ -4323,7 +4221,6 @@ class TestNAT44(MethodHolder):
         self.logger.info(self.vapi.cli("show nat44 static mappings"))
         self.logger.info(self.vapi.cli("show nat44 interface address"))
         self.logger.info(self.vapi.cli("show nat44 sessions detail"))
-        self.logger.info(self.vapi.cli("show nat virtual-reassembly"))
         self.logger.info(self.vapi.cli("show nat44 hash tables detail"))
         self.logger.info(self.vapi.cli("show nat timeouts"))
         self.logger.info(
@@ -4565,17 +4462,7 @@ class TestNAT44EndpointDependent(MethodHolder):
             sw_if_index=self.pg1.sw_if_index,
             is_add=1)
         self.vapi.nat44_forwarding_enable_disable(enable=True)
-        reas_cfg1 = self.vapi.nat_get_reass()
-        # this test was intermittently failing in some cases
-        # until we temporarily bump the reassembly timeouts
-        self.vapi.nat_set_reass(timeout=20, max_reass=1024, max_frag=5,
-                                drop_frag=0)
         self.frag_in_order(proto=IP_PROTOS.tcp, dont_translate=True)
-        # restore the reassembly timeouts
-        self.vapi.nat_set_reass(timeout=reas_cfg1.ip4_timeout,
-                                max_reass=reas_cfg1.ip4_max_reass,
-                                max_frag=reas_cfg1.ip4_max_frag,
-                                drop_frag=reas_cfg1.ip4_drop_frag)
 
     def test_frag_out_of_order(self):
         """ NAT44 translate fragments arriving out of order """
@@ -4643,9 +4530,6 @@ class TestNAT44EndpointDependent(MethodHolder):
                                       self.server_out_addr,
                                       proto=IP_PROTOS.icmp)
 
-        self.vapi.nat_set_reass(timeout=10, max_reass=1024, max_frag=5,
-                                drop_frag=0)
-
         self.frag_in_order_in_plus_out(proto=IP_PROTOS.tcp)
         self.frag_in_order_in_plus_out(proto=IP_PROTOS.udp)
         self.frag_in_order_in_plus_out(proto=IP_PROTOS.icmp)
@@ -4690,9 +4574,6 @@ class TestNAT44EndpointDependent(MethodHolder):
                                       self.server_out_addr,
                                       proto=IP_PROTOS.icmp)
 
-        self.vapi.nat_set_reass(timeout=10, max_reass=1024, max_frag=5,
-                                drop_frag=0)
-
         self.frag_out_of_order_in_plus_out(proto=IP_PROTOS.tcp)
         self.frag_out_of_order_in_plus_out(proto=IP_PROTOS.udp)
         self.frag_out_of_order_in_plus_out(proto=IP_PROTOS.icmp)
@@ -8756,9 +8637,6 @@ class TestNAT64(MethodHolder):
         self.vapi.nat64_add_del_interface(is_add=1, flags=0,
                                           sw_if_index=self.pg1.sw_if_index)
 
-        reass = self.vapi.nat_reass_dump()
-        reass_n_start = len(reass)
-
         # in2out
         data = b'a' * 200
         pkts = self.create_stream_frag_ip6(self.pg0, self.pg1.remote_ip4,
@@ -8786,17 +8664,13 @@ class TestNAT64(MethodHolder):
         self.pg_enable_capture(self.pg_interfaces)
         self.pg_start()
         frags = self.pg0.get_capture(len(pkts))
+        self.logger.debug(ppc("Captured:", frags))
         src = self.compose_ip6(self.pg1.remote_ip4, '64:ff9b::', 96)
         p = self.reass_frags_and_verify_ip6(frags, src, self.pg0.remote_ip6)
         self.assertEqual(p[TCP].sport, 20)
         self.assertEqual(p[TCP].dport, self.tcp_port_in)
         self.assertEqual(data, p[Raw].load)
 
-        reass = self.vapi.nat_reass_dump()
-        reass_n_end = len(reass)
-
-        self.assertEqual(reass_n_end - reass_n_start, 2)
-
     def test_reass_hairpinning(self):
         """ NAT64 fragments hairpinning """
         data = b'a' * 200
@@ -8835,6 +8709,7 @@ class TestNAT64(MethodHolder):
         self.pg_enable_capture(self.pg_interfaces)
         self.pg_start()
         frags = self.pg0.get_capture(len(pkts))
+        self.logger.debug(ppc("Captured:", frags))
         p = self.reass_frags_and_verify_ip6(frags, nat_addr_ip6, server.ip6)
         self.assertNotEqual(p[TCP].sport, client_in_port)
         self.assertEqual(p[TCP].dport, server_in_port)
@@ -9007,57 +8882,6 @@ class TestNAT64(MethodHolder):
                 data = ipfix.decode_data_set(p.getlayer(Set))
                 self.verify_ipfix_max_bibs(data, max_bibs)
 
-    def test_ipfix_max_frags(self):
-        """ IPFIX logging maximum fragments pending reassembly exceeded """
-        self.vapi.nat64_add_del_pool_addr_range(start_addr=self.nat_addr,
-                                                end_addr=self.nat_addr,
-                                                vrf_id=0xFFFFFFFF,
-                                                is_add=1)
-        flags = self.config_flags.NAT_IS_INSIDE
-        self.vapi.nat64_add_del_interface(is_add=1, flags=flags,
-                                          sw_if_index=self.pg0.sw_if_index)
-        self.vapi.nat64_add_del_interface(is_add=1, flags=0,
-                                          sw_if_index=self.pg1.sw_if_index)
-        self.vapi.nat_set_reass(timeout=2, max_reass=1024, max_frag=1,
-                                drop_frag=0, is_ip6=1)
-        self.vapi.set_ipfix_exporter(collector_address=self.pg3.remote_ip4,
-                                     src_address=self.pg3.local_ip4,
-                                     path_mtu=512,
-                                     template_interval=10)
-        self.vapi.nat_ipfix_enable_disable(domain_id=self.ipfix_domain_id,
-                                           src_port=self.ipfix_src_port,
-                                           enable=1)
-
-        data = b'a' * 200
-        pkts = self.create_stream_frag_ip6(self.pg0, self.pg1.remote_ip4,
-                                           self.tcp_port_in, 20, data)
-        pkts.reverse()
-        self.pg0.add_stream(pkts)
-        self.pg_enable_capture(self.pg_interfaces)
-        self.pg_start()
-        self.pg1.assert_nothing_captured()
-        sleep(1)
-        self.vapi.ipfix_flush()
-        capture = self.pg3.get_capture(9)
-        ipfix = IPFIXDecoder()
-        # first load template
-        for p in capture:
-            self.assertTrue(p.haslayer(IPFIX))
-            self.assertEqual(p[IP].src, self.pg3.local_ip4)
-            self.assertEqual(p[IP].dst, self.pg3.remote_ip4)
-            self.assertEqual(p[UDP].sport, self.ipfix_src_port)
-            self.assertEqual(p[UDP].dport, 4739)
-            self.assertEqual(p[IPFIX].observationDomainID,
-                             self.ipfix_domain_id)
-            if p.haslayer(Template):
-                ipfix.add_template(p.getlayer(Template))
-        # verify events in data set
-        for p in capture:
-            if p.haslayer(Data):
-                data = ipfix.decode_data_set(p.getlayer(Set))
-                self.verify_ipfix_max_fragments_ip6(data, 1,
-                                                    self.pg0.remote_ip6n)
-
     def test_ipfix_bib_ses(self):
         """ IPFIX logging NAT64 BIB/session create and delete events """
         self.tcp_port_in = random.randint(1025, 65535)
@@ -9257,7 +9081,6 @@ class TestNAT64(MethodHolder):
         self.logger.info(self.vapi.cli("show nat64 prefix"))
         self.logger.info(self.vapi.cli("show nat64 bib all"))
         self.logger.info(self.vapi.cli("show nat64 session table all"))
-        self.logger.info(self.vapi.cli("show nat virtual-reassembly"))
 
 
 class TestDSlite(MethodHolder):
@@ -9625,6 +9448,7 @@ class TestNAT66(MethodHolder):
         self.pg_enable_capture(self.pg_interfaces)
         self.pg_start()
         capture = self.pg1.get_capture(len(pkts))
+
         for packet in capture:
             try:
                 self.assertEqual(packet[IPv6].src, self.nat_addr)
index 59a8256..b174587 100644 (file)
@@ -185,9 +185,8 @@ typedef struct
        /* reassembly */
        union
        {
-         /* group input/output/handoff to simplify the code, this way:
-          * we can handoff while keeping input variables intact
-          * and also we can write the output and still use next_index later */
+         /* group input/output to simplify the code, this way
+          * we can handoff while keeping input variables intact */
          struct
          {
            /* input variables */
@@ -201,23 +200,28 @@ typedef struct
            {
              u16 owner_thread_index;
            };
-           /* output variables */
-           struct
+         };
+         /* output variables */
+         struct
+         {
+           union
            {
-             union
+             /* shallow virtual reassembly output variables */
+             struct
              {
-               /* shallow virtual reassembly output variables */
-               struct
-               {
-                 u8 ip_proto;  /* protocol in ip header */
-                 u16 l4_src_port;      /* tcp/udp/icmp src port */
-                 u16 l4_dst_port;      /* tcp/udp/icmp dst port */
-               };
-               /* full reassembly output variables */
-               struct
-               {
-                 u16 estimated_mtu;    /* estimated MTU calculated during reassembly */
-               };
+               u8 ip_proto;    /* protocol in ip header */
+               u8 icmp_type_or_tcp_flags;
+               u8 is_non_first_fragment;
+               u8 save_rewrite_length;
+               u16 l4_src_port;        /* tcp/udp/icmp src port */
+               u16 l4_dst_port;        /* tcp/udp/icmp dst port */
+               u32 tcp_ack_number;
+               u32 tcp_seq_number;
+             };
+             /* full reassembly output variables */
+             struct
+             {
+               u16 estimated_mtu;      /* estimated MTU calculated during reassembly */
              };
            };
          };
@@ -384,7 +388,10 @@ typedef struct
 
 STATIC_ASSERT (STRUCT_SIZE_OF (vnet_buffer_opaque_t, ip.save_rewrite_length)
               == STRUCT_SIZE_OF (vnet_buffer_opaque_t,
-                                 mpls.save_rewrite_length)
+                                 ip.reass.save_rewrite_length)
+              && STRUCT_SIZE_OF (vnet_buffer_opaque_t,
+                                 ip.reass.save_rewrite_length) ==
+              STRUCT_SIZE_OF (vnet_buffer_opaque_t, mpls.save_rewrite_length)
               && STRUCT_SIZE_OF (vnet_buffer_opaque_t,
                                  mpls.save_rewrite_length) == 1
               && VNET_REWRITE_TOTAL_BYTES < UINT8_MAX,
index 79cf22c..1d3607e 100644 (file)
@@ -196,13 +196,13 @@ typedef union
 
 #define IP4_ROUTER_ALERT_OPTION 20
 
-always_inline int
+always_inline u16
 ip4_get_fragment_offset (const ip4_header_t * i)
 {
   return clib_net_to_host_u16 (i->flags_and_fragment_offset) & 0x1fff;
 }
 
-always_inline int
+always_inline u16
 ip4_get_fragment_more (const ip4_header_t * i)
 {
   return clib_net_to_host_u16 (i->flags_and_fragment_offset) &
index b1905e4..21538a9 100644 (file)
@@ -25,8 +25,8 @@
 /**
  * IPv4 to IPv6 set call back function type
  */
-typedef int (*ip4_to_ip6_set_fn_t) (ip4_header_t * ip4, ip6_header_t * ip6,
-                                   void *ctx);
+typedef int (*ip4_to_ip6_set_fn_t) (vlib_buffer_t * b, ip4_header_t * ip4,
+                                   ip6_header_t * ip6, void *ctx);
 
 /* *INDENT-OFF* */
 static u8 icmp_to_icmp6_updater_pointer_table[] =
@@ -261,7 +261,7 @@ icmp_to_icmp6 (vlib_buffer_t * p, ip4_to_ip6_set_fn_t fn, void *ctx,
                               sizeof (*inner_frag));
          ip6 = vlib_buffer_get_current (p);
          memmove (u8_ptr_add (ip6, sizeof (*ip6) - sizeof (*ip4)), ip4,
-                   20 + 8);
+                  20 + 8);
          ip4 =
            (ip4_header_t *) u8_ptr_add (ip6, sizeof (*ip6) - sizeof (*ip4));
          icmp = (icmp46_header_t *) (ip4 + 1);
@@ -287,7 +287,7 @@ icmp_to_icmp6 (vlib_buffer_t * p, ip4_to_ip6_set_fn_t fn, void *ctx,
          vlib_buffer_advance (p, -2 * (sizeof (*ip6) - sizeof (*ip4)));
          ip6 = vlib_buffer_get_current (p);
          memmove (u8_ptr_add (ip6, sizeof (*ip6) - sizeof (*ip4)), ip4,
-                   20 + 8);
+                  20 + 8);
          ip4 =
            (ip4_header_t *) u8_ptr_add (ip6, sizeof (*ip6) - sizeof (*ip4));
          icmp = (icmp46_header_t *) u8_ptr_add (ip4, sizeof (*ip4));
@@ -340,7 +340,7 @@ icmp_to_icmp6 (vlib_buffer_t * p, ip4_to_ip6_set_fn_t fn, void *ctx,
       inner_ip6->hop_limit = inner_ip4->ttl;
       inner_ip6->protocol = inner_ip4->protocol;
 
-      if ((rv = inner_fn (inner_ip4, inner_ip6, inner_ctx)) != 0)
+      if ((rv = inner_fn (p, inner_ip4, inner_ip6, inner_ctx)) != 0)
        return rv;
 
       if (PREDICT_FALSE (inner_frag != NULL))
@@ -411,7 +411,7 @@ icmp_to_icmp6 (vlib_buffer_t * p, ip4_to_ip6_set_fn_t fn, void *ctx,
   ip6->hop_limit = ip4->ttl;
   ip6->protocol = IP_PROTOCOL_ICMP6;
 
-  if ((rv = fn (ip4, ip6, ctx)) != 0)
+  if ((rv = fn (p, ip4, ip6, ctx)) != 0)
     return rv;
 
   //Truncate when the packet exceeds the minimal IPv6 MTU
@@ -437,217 +437,4 @@ icmp_to_icmp6 (vlib_buffer_t * p, ip4_to_ip6_set_fn_t fn, void *ctx,
   return 0;
 }
 
-/**
- * @brief Translate IPv4 fragmented packet to IPv6.
- *
- * @param p   Buffer to translate.
- * @param fn  The function to translate header.
- * @param ctx A context passed in the header translate function.
- *
- * @returns 0 on success, non-zero value otherwise.
- */
-always_inline int
-ip4_to_ip6_fragmented (vlib_buffer_t * p, ip4_to_ip6_set_fn_t fn, void *ctx)
-{
-  ip4_header_t *ip4;
-  ip6_header_t *ip6;
-  ip6_frag_hdr_t *frag;
-  int rv;
-
-  ip4 = vlib_buffer_get_current (p);
-  frag = (ip6_frag_hdr_t *) u8_ptr_add (ip4, sizeof (*ip4) - sizeof (*frag));
-  ip6 =
-    (ip6_header_t *) u8_ptr_add (ip4,
-                                sizeof (*ip4) - sizeof (*frag) -
-                                sizeof (*ip6));
-  vlib_buffer_advance (p, sizeof (*ip4) - sizeof (*ip6) - sizeof (*frag));
-
-  //We know that the protocol was one of ICMP, TCP or UDP
-  //because the first fragment was found and cached
-  frag->next_hdr =
-    (ip4->protocol == IP_PROTOCOL_ICMP) ? IP_PROTOCOL_ICMP6 : ip4->protocol;
-  frag->identification = frag_id_4to6 (ip4->fragment_id);
-  frag->rsv = 0;
-  frag->fragment_offset_and_more =
-    ip6_frag_hdr_offset_and_more (ip4_get_fragment_offset (ip4),
-                                 clib_net_to_host_u16
-                                 (ip4->flags_and_fragment_offset) &
-                                 IP4_HEADER_FLAG_MORE_FRAGMENTS);
-
-  ip6->ip_version_traffic_class_and_flow_label =
-    clib_host_to_net_u32 ((6 << 28) + (ip4->tos << 20));
-  ip6->payload_length =
-    clib_host_to_net_u16 (clib_net_to_host_u16 (ip4->length) -
-                         sizeof (*ip4) + sizeof (*frag));
-  ip6->hop_limit = ip4->ttl;
-  ip6->protocol = IP_PROTOCOL_IPV6_FRAGMENTATION;
-
-  if ((rv = fn (ip4, ip6, ctx)) != 0)
-    return rv;
-
-  return 0;
-}
-
-/**
- * @brief Translate IPv4 UDP/TCP packet to IPv6.
- *
- * @param p   Buffer to translate.
- * @param fn  The function to translate header.
- * @param ctx A context passed in the header translate function.
- *
- * @returns 0 on success, non-zero value otherwise.
- */
-always_inline int
-ip4_to_ip6_tcp_udp (vlib_buffer_t * p, ip4_to_ip6_set_fn_t fn, void *ctx)
-{
-  ip4_header_t *ip4;
-  ip6_header_t *ip6;
-  ip_csum_t csum;
-  u16 *checksum;
-  ip6_frag_hdr_t *frag;
-  u32 frag_id;
-  int rv;
-  ip4_address_t old_src, old_dst;
-
-  ip4 = vlib_buffer_get_current (p);
-
-  if (ip4->protocol == IP_PROTOCOL_UDP)
-    {
-      udp_header_t *udp = ip4_next_header (ip4);
-      checksum = &udp->checksum;
-
-      //UDP checksum is optional over IPv4 but mandatory for IPv6
-      //We do not check udp->length sanity but use our safe computed value instead
-      if (PREDICT_FALSE (!*checksum))
-       {
-         u16 udp_len = clib_host_to_net_u16 (ip4->length) - sizeof (*ip4);
-         csum = ip_incremental_checksum (0, udp, udp_len);
-         csum = ip_csum_with_carry (csum, clib_host_to_net_u16 (udp_len));
-         csum =
-           ip_csum_with_carry (csum, clib_host_to_net_u16 (IP_PROTOCOL_UDP));
-         csum = ip_csum_with_carry (csum, *((u64 *) (&ip4->src_address)));
-         *checksum = ~ip_csum_fold (csum);
-       }
-    }
-  else
-    {
-      tcp_header_t *tcp = ip4_next_header (ip4);
-      checksum = &tcp->checksum;
-    }
-
-  old_src.as_u32 = ip4->src_address.as_u32;
-  old_dst.as_u32 = ip4->dst_address.as_u32;
-
-  // Deal with fragmented packets
-  if (PREDICT_FALSE (ip4->flags_and_fragment_offset &
-                    clib_host_to_net_u16 (IP4_HEADER_FLAG_MORE_FRAGMENTS)))
-    {
-      ip6 =
-       (ip6_header_t *) u8_ptr_add (ip4,
-                                    sizeof (*ip4) - sizeof (*ip6) -
-                                    sizeof (*frag));
-      frag =
-       (ip6_frag_hdr_t *) u8_ptr_add (ip4, sizeof (*ip4) - sizeof (*frag));
-      frag_id = frag_id_4to6 (ip4->fragment_id);
-      vlib_buffer_advance (p, sizeof (*ip4) - sizeof (*ip6) - sizeof (*frag));
-    }
-  else
-    {
-      ip6 = (ip6_header_t *) (((u8 *) ip4) + sizeof (*ip4) - sizeof (*ip6));
-      vlib_buffer_advance (p, sizeof (*ip4) - sizeof (*ip6));
-      frag = NULL;
-    }
-
-  ip6->ip_version_traffic_class_and_flow_label =
-    clib_host_to_net_u32 ((6 << 28) + (ip4->tos << 20));
-  ip6->payload_length = u16_net_add (ip4->length, -sizeof (*ip4));
-  ip6->hop_limit = ip4->ttl;
-  ip6->protocol = ip4->protocol;
-
-  if (PREDICT_FALSE (frag != NULL))
-    {
-      frag->next_hdr = ip6->protocol;
-      frag->identification = frag_id;
-      frag->rsv = 0;
-      frag->fragment_offset_and_more = ip6_frag_hdr_offset_and_more (0, 1);
-      ip6->protocol = IP_PROTOCOL_IPV6_FRAGMENTATION;
-      ip6->payload_length = u16_net_add (ip6->payload_length, sizeof (*frag));
-    }
-
-  if ((rv = fn (ip4, ip6, ctx)) != 0)
-    return rv;
-
-  csum = ip_csum_sub_even (*checksum, old_src.as_u32);
-  csum = ip_csum_sub_even (csum, old_dst.as_u32);
-  csum = ip_csum_add_even (csum, ip6->src_address.as_u64[0]);
-  csum = ip_csum_add_even (csum, ip6->src_address.as_u64[1]);
-  csum = ip_csum_add_even (csum, ip6->dst_address.as_u64[0]);
-  csum = ip_csum_add_even (csum, ip6->dst_address.as_u64[1]);
-  *checksum = ip_csum_fold (csum);
-
-  return 0;
-}
-
-/**
- * @brief Translate IPv4 packet to IPv6 (IP header only).
- *
- * @param p   Buffer to translate.
- * @param fn  The function to translate header.
- * @param ctx A context passed in the header translate function.
- *
- * @returns 0 on success, non-zero value otherwise.
- */
-always_inline int
-ip4_to_ip6 (vlib_buffer_t * p, ip4_to_ip6_set_fn_t fn, void *ctx)
-{
-  ip4_header_t *ip4;
-  ip6_header_t *ip6;
-  ip6_frag_hdr_t *frag;
-  u32 frag_id;
-  int rv;
-
-  ip4 = vlib_buffer_get_current (p);
-
-  // Deal with fragmented packets
-  if (PREDICT_FALSE (ip4->flags_and_fragment_offset &
-                    clib_host_to_net_u16 (IP4_HEADER_FLAG_MORE_FRAGMENTS)))
-    {
-      ip6 =
-       (ip6_header_t *) u8_ptr_add (ip4,
-                                    sizeof (*ip4) - sizeof (*ip6) -
-                                    sizeof (*frag));
-      frag =
-       (ip6_frag_hdr_t *) u8_ptr_add (ip4, sizeof (*ip4) - sizeof (*frag));
-      frag_id = frag_id_4to6 (ip4->fragment_id);
-      vlib_buffer_advance (p, sizeof (*ip4) - sizeof (*ip6) - sizeof (*frag));
-    }
-  else
-    {
-      ip6 = (ip6_header_t *) (((u8 *) ip4) + sizeof (*ip4) - sizeof (*ip6));
-      vlib_buffer_advance (p, sizeof (*ip4) - sizeof (*ip6));
-      frag = NULL;
-    }
-
-  ip6->ip_version_traffic_class_and_flow_label =
-    clib_host_to_net_u32 ((6 << 28) + (ip4->tos << 20));
-  ip6->payload_length = u16_net_add (ip4->length, -sizeof (*ip4));
-  ip6->hop_limit = ip4->ttl;
-  ip6->protocol = ip4->protocol;
-
-  if (PREDICT_FALSE (frag != NULL))
-    {
-      frag->next_hdr = ip6->protocol;
-      frag->identification = frag_id;
-      frag->rsv = 0;
-      frag->fragment_offset_and_more = ip6_frag_hdr_offset_and_more (0, 1);
-      ip6->protocol = IP_PROTOCOL_IPV6_FRAGMENTATION;
-      ip6->payload_length = u16_net_add (ip6->payload_length, sizeof (*frag));
-    }
-
-  if ((rv = fn (ip4, ip6, ctx)) != 0)
-    return rv;
-
-  return 0;
-}
-
 #endif /* __included_ip4_to_ip6_h__ */
index 2ba55b7..e823214 100644 (file)
@@ -579,6 +579,67 @@ ip6_ext_header_find (vlib_main_t * vm, vlib_buffer_t * b,
   return result;
 }
 
+/*
+ * walk extension headers, looking for a specific extension header and last
+ * extension header, calculating length of all extension headers
+ *
+ * @param vm
+ * @param b buffer to limit search to
+ * @param ip6_header ipv6 header
+ * @param find_hdr extension header to look for (ignored if ext_hdr is NULL)
+ * @param length[out] length of all extension headers
+ * @param ext_hdr[out] extension header of type find_hdr (may be NULL)
+ * @param last_ext_hdr[out] last extension header (may be NULL)
+ *
+ * @return 0 on success, -1 on failure (ext headers crossing buffer boundary)
+ */
+always_inline int
+ip6_walk_ext_hdr (vlib_main_t * vm, vlib_buffer_t * b,
+                 const ip6_header_t * ip6_header, u8 find_hdr, u32 * length,
+                 ip6_ext_header_t ** ext_hdr,
+                 ip6_ext_header_t ** last_ext_hdr)
+{
+  if (!ip6_ext_hdr (ip6_header->protocol))
+    {
+      *length = 0;
+      *ext_hdr = NULL;
+      *last_ext_hdr = NULL;
+      return 0;
+    }
+  *length = 0;
+  ip6_ext_header_t *h = (void *) (ip6_header + 1);
+  if (!vlib_object_within_buffer_data (vm, b, h, ip6_ext_header_len (h)))
+    {
+      return -1;
+    }
+  *length += ip6_ext_header_len (h);
+  *last_ext_hdr = h;
+  *ext_hdr = NULL;
+  if (ip6_header->protocol == find_hdr)
+    {
+      *ext_hdr = h;
+    }
+  while (ip6_ext_hdr (h->next_hdr))
+    {
+      if (h->next_hdr == find_hdr)
+       {
+         h = ip6_ext_next_header (h);
+         *ext_hdr = h;
+       }
+      else
+       {
+         h = ip6_ext_next_header (h);
+       }
+      if (!vlib_object_within_buffer_data (vm, b, h, ip6_ext_header_len (h)))
+       {
+         return -1;
+       }
+      *length += ip6_ext_header_len (h);
+      *last_ext_hdr = h;
+    }
+  return 0;
+}
+
 /* *INDENT-OFF* */
 typedef CLIB_PACKED (struct {
   u8 next_hdr;
index d13a0c1..17a11e6 100644 (file)
 /**
  * IPv6 to IPv4 set call back function type
  */
-typedef int (*ip6_to_ip4_set_fn_t) (ip6_header_t * ip6, ip4_header_t * ip4,
-                                   void *ctx);
+typedef int (*ip6_to_ip4_icmp_set_fn_t) (ip6_header_t * ip6,
+                                        ip4_header_t * ip4, void *ctx);
+
+typedef int (*ip6_to_ip4_tcp_udp_set_fn_t) (vlib_buffer_t * b,
+                                           ip6_header_t * ip6,
+                                           ip4_header_t * ip4, void *ctx);
 
 /* *INDENT-OFF* */
 static u8 icmp6_to_icmp_updater_pointer_table[] =
@@ -47,6 +51,8 @@ static u8 icmp6_to_icmp_updater_pointer_table[] =
 /**
  * @brief Parse some useful information from IPv6 header.
  *
+ * @param vm              vlib main
+ * @param b               vlib buffer
  * @param ip6             IPv6 header.
  * @param buff_len        Buffer length.
  * @param l4_protocol     L4 protocol number.
@@ -56,21 +62,37 @@ static u8 icmp6_to_icmp_updater_pointer_table[] =
  * @returns 0 on success, non-zero value otherwise.
  */
 static_always_inline int
-ip6_parse (const ip6_header_t * ip6, u32 buff_len,
-          u8 * l4_protocol, u16 * l4_offset, u16 * frag_hdr_offset)
+ip6_parse (vlib_main_t * vm, vlib_buffer_t * b, const ip6_header_t * ip6,
+          u32 buff_len, u8 * l4_protocol, u16 * l4_offset,
+          u16 * frag_hdr_offset)
 {
-  if (ip6->protocol == IP_PROTOCOL_IPV6_FRAGMENTATION)
+  ip6_ext_header_t *last_hdr, *frag_hdr;
+  u32 length;
+  if (ip6_walk_ext_hdr
+      (vm, b, ip6, IP_PROTOCOL_IPV6_FRAGMENTATION, &length, &frag_hdr,
+       &last_hdr))
     {
-      *l4_protocol = ((ip6_frag_hdr_t *) (ip6 + 1))->next_hdr;
-      *frag_hdr_offset = sizeof (*ip6);
-      *l4_offset = sizeof (*ip6) + sizeof (ip6_frag_hdr_t);
+      return -1;
+    }
+
+  if (length > 0)
+    {
+      if (frag_hdr)
+       {
+         *frag_hdr_offset = (u8 *) frag_hdr - (u8 *) ip6;
+       }
+      else
+       {
+         *frag_hdr_offset = 0;
+       }
+      *l4_protocol = last_hdr->next_hdr;
     }
   else
     {
-      *l4_protocol = ip6->protocol;
       *frag_hdr_offset = 0;
-      *l4_offset = sizeof (*ip6);
+      *l4_protocol = ip6->protocol;
     }
+  *l4_offset = sizeof (*ip6) + length;
 
   return (buff_len < (*l4_offset + 4)) ||
     (clib_net_to_host_u16 (ip6->payload_length) <
@@ -78,23 +100,32 @@ ip6_parse (const ip6_header_t * ip6, u32 buff_len,
 }
 
 /**
- * @brief Get TCP/UDP port number or ICMP id from IPv6 packet.
+ * @brief Get L4 information like port number or ICMP id from IPv6 packet.
  *
  * @param ip6        IPv6 header.
- * @param sender     1 get sender port, 0 get receiver port.
  * @param buffer_len Buffer length.
+ * @param ip_protocol L4 protocol
+ * @param src_port L4 src port or icmp id
+ * @param dst_post L4 dst port or icmp id
+ * @param icmp_type_or_tcp_flags ICMP type or TCP flags, if applicable
+ * @param tcp_ack_number TCP ack number, if applicable
+ * @param tcp_seq_number TCP seq number, if applicable
  *
- * @returns Port number on success, 0 otherwise.
+ * @returns 1 on success, 0 otherwise.
  */
 always_inline u16
-ip6_get_port (ip6_header_t * ip6, u8 sender, u16 buffer_len)
+ip6_get_port (vlib_main_t * vm, vlib_buffer_t * b, ip6_header_t * ip6,
+             u16 buffer_len, u8 * ip_protocol, u16 * src_port,
+             u16 * dst_port, u8 * icmp_type_or_tcp_flags,
+             u32 * tcp_ack_number, u32 * tcp_seq_number)
 {
   u8 l4_protocol;
   u16 l4_offset;
   u16 frag_offset;
   u8 *l4;
 
-  if (ip6_parse (ip6, buffer_len, &l4_protocol, &l4_offset, &frag_offset))
+  if (ip6_parse
+      (vm, b, ip6, buffer_len, &l4_protocol, &l4_offset, &frag_offset))
     return 0;
 
   if (frag_offset &&
@@ -102,26 +133,45 @@ ip6_get_port (ip6_header_t * ip6, u8 sender, u16 buffer_len)
                            u8_ptr_add (ip6, frag_offset))))
     return 0;                  //Can't deal with non-first fragment for now
 
+  if (ip_protocol)
+    {
+      *ip_protocol = l4_protocol;
+    }
   l4 = u8_ptr_add (ip6, l4_offset);
   if (l4_protocol == IP_PROTOCOL_TCP || l4_protocol == IP_PROTOCOL_UDP)
     {
-      return (sender) ? ((udp_header_t *) (l4))->src_port : ((udp_header_t
-                                                             *)
-                                                            (l4))->dst_port;
+      if (src_port)
+       *src_port = ((udp_header_t *) (l4))->src_port;
+      if (dst_port)
+       *dst_port = ((udp_header_t *) (l4))->dst_port;
+      if (icmp_type_or_tcp_flags && l4_protocol == IP_PROTOCOL_TCP)
+       *icmp_type_or_tcp_flags = ((tcp_header_t *) (l4))->flags;
+      if (tcp_ack_number && l4_protocol == IP_PROTOCOL_TCP)
+       *tcp_ack_number = ((tcp_header_t *) (l4))->ack_number;
+      if (tcp_seq_number && l4_protocol == IP_PROTOCOL_TCP)
+       *tcp_seq_number = ((tcp_header_t *) (l4))->seq_number;
     }
   else if (l4_protocol == IP_PROTOCOL_ICMP6)
     {
       icmp46_header_t *icmp = (icmp46_header_t *) (l4);
+      if (icmp_type_or_tcp_flags)
+       *icmp_type_or_tcp_flags = ((icmp46_header_t *) (l4))->type;
       if (icmp->type == ICMP6_echo_request)
        {
-         return (sender) ? ((u16 *) (icmp))[2] : -1;
+         if (src_port)
+           *src_port = ((u16 *) (icmp))[2];
+         if (dst_port)
+           *dst_port = ((u16 *) (icmp))[2];
        }
       else if (icmp->type == ICMP6_echo_reply)
        {
-         return (sender) ? -1 : ((u16 *) (icmp))[2];
+         if (src_port)
+           *src_port = ((u16 *) (icmp))[2];
+         if (dst_port)
+           *dst_port = ((u16 *) (icmp))[2];
        }
     }
-  return 0;
+  return 1;
 }
 
 /**
@@ -223,14 +273,14 @@ icmp6_to_icmp_header (icmp46_header_t * icmp, ip6_header_t ** inner_ip6)
 /**
  * @brief Translate TOS value from IPv6 to IPv4.
  *
- * @param ip6 IPv6 header.
+ * @param ip_version_traffic_class_and_flow_label in network byte order
  *
  * @returns IPv4 TOS value.
  */
 static_always_inline u8
-ip6_translate_tos (const ip6_header_t * ip6)
+ip6_translate_tos (u32 ip_version_traffic_class_and_flow_label)
 {
-  return (clib_net_to_host_u32 (ip6->ip_version_traffic_class_and_flow_label)
+  return (clib_net_to_host_u32 (ip_version_traffic_class_and_flow_label)
          & 0x0ff00000) >> 20;
 }
 
@@ -246,8 +296,9 @@ ip6_translate_tos (const ip6_header_t * ip6)
  * @returns 0 on success, non-zero value otherwise.
  */
 always_inline int
-icmp6_to_icmp (vlib_buffer_t * p, ip6_to_ip4_set_fn_t fn, void *ctx,
-              ip6_to_ip4_set_fn_t inner_fn, void *inner_ctx)
+icmp6_to_icmp (vlib_main_t * vm, vlib_buffer_t * p,
+              ip6_to_ip4_icmp_set_fn_t fn, void *ctx,
+              ip6_to_ip4_icmp_set_fn_t inner_fn, void *inner_ctx)
 {
   ip6_header_t *ip6, *inner_ip6;
   ip4_header_t *ip4, *inner_ip4;
@@ -285,7 +336,7 @@ icmp6_to_icmp (vlib_buffer_t * p, ip6_to_ip4_set_fn_t fn, void *ctx,
       //    TO
       //                               [ IPv4][IC][ IPv4][L4 header ...
 
-      if (ip6_parse (inner_ip6, ip6_pay_len - 8,
+      if (ip6_parse (vm, p, inner_ip6, ip6_pay_len - 8,
                     &inner_protocol, &inner_l4_offset, &inner_frag_offset))
        return -1;
 
@@ -336,7 +387,9 @@ icmp6_to_icmp (vlib_buffer_t * p, ip6_to_ip4_set_fn_t fn, void *ctx,
 
       inner_ip4->ip_version_and_header_length =
        IP4_VERSION_AND_HEADER_LENGTH_NO_OPTIONS;
-      inner_ip4->tos = ip6_translate_tos (inner_ip6);
+      inner_ip4->tos =
+       ip6_translate_tos
+       (inner_ip6->ip_version_traffic_class_and_flow_label);
       inner_ip4->length =
        u16_net_add (inner_ip6->payload_length,
                     sizeof (*ip4) + sizeof (*ip6) - inner_l4_offset);
@@ -389,7 +442,7 @@ icmp6_to_icmp (vlib_buffer_t * p, ip6_to_ip4_set_fn_t fn, void *ctx,
 
   ip4->ip_version_and_header_length =
     IP4_VERSION_AND_HEADER_LENGTH_NO_OPTIONS;
-  ip4->tos = ip6_translate_tos (ip6);
+  ip4->tos = ip6_translate_tos (ip6->ip_version_traffic_class_and_flow_label);
   ip4->fragment_id = 0;
   ip4->flags_and_fragment_offset = 0;
   ip4->ttl = ip6->hop_limit;
@@ -412,220 +465,6 @@ icmp6_to_icmp (vlib_buffer_t * p, ip6_to_ip4_set_fn_t fn, void *ctx,
   return 0;
 }
 
-/**
- * @brief Translate IPv6 fragmented packet to IPv4.
- *
- * @param p   Buffer to translate.
- * @param fn  The function to translate header.
- * @param ctx A context passed in the header translate function.
- *
- * @returns 0 on success, non-zero value otherwise.
- */
-always_inline int
-ip6_to_ip4_fragmented (vlib_buffer_t * p, ip6_to_ip4_set_fn_t fn, void *ctx)
-{
-  ip6_header_t *ip6;
-  ip6_frag_hdr_t *frag;
-  ip4_header_t *ip4;
-  u16 frag_id;
-  u8 frag_more;
-  u16 frag_offset;
-  u8 l4_protocol;
-  u16 l4_offset;
-  int rv;
-
-  ip6 = vlib_buffer_get_current (p);
-
-  if (ip6_parse
-      (ip6, p->current_length, &l4_protocol, &l4_offset, &frag_offset))
-    return -1;
-
-  frag = (ip6_frag_hdr_t *) u8_ptr_add (ip6, frag_offset);
-  ip4 = (ip4_header_t *) u8_ptr_add (ip6, l4_offset - sizeof (*ip4));
-  vlib_buffer_advance (p, l4_offset - sizeof (*ip4));
-
-  frag_id = frag_id_6to4 (frag->identification);
-  frag_more = ip6_frag_hdr_more (frag);
-  frag_offset = ip6_frag_hdr_offset (frag);
-
-  if ((rv = fn (ip6, ip4, ctx)) != 0)
-    return rv;
-
-  ip4->ip_version_and_header_length =
-    IP4_VERSION_AND_HEADER_LENGTH_NO_OPTIONS;
-  ip4->tos = ip6_translate_tos (ip6);
-  ip4->length = u16_net_add (ip6->payload_length,
-                            sizeof (*ip4) - l4_offset + sizeof (*ip6));
-  ip4->fragment_id = frag_id;
-  ip4->flags_and_fragment_offset =
-    clib_host_to_net_u16 (frag_offset |
-                         (frag_more ? IP4_HEADER_FLAG_MORE_FRAGMENTS : 0));
-  ip4->ttl = ip6->hop_limit;
-  ip4->protocol =
-    (l4_protocol == IP_PROTOCOL_ICMP6) ? IP_PROTOCOL_ICMP : l4_protocol;
-  ip4->checksum = ip4_header_checksum (ip4);
-
-  return 0;
-}
-
-/**
- * @brief Translate IPv6 UDP/TCP packet to IPv4.
- *
- * @param p   Buffer to translate.
- * @param fn  The function to translate header.
- * @param ctx A context passed in the header translate function.
- *
- * @returns 0 on success, non-zero value otherwise.
- */
-always_inline int
-ip6_to_ip4_tcp_udp (vlib_buffer_t * p, ip6_to_ip4_set_fn_t fn, void *ctx,
-                   u8 udp_checksum)
-{
-  ip6_header_t *ip6;
-  u16 *checksum;
-  ip_csum_t csum = 0;
-  ip4_header_t *ip4;
-  u16 fragment_id;
-  u16 flags;
-  u16 frag_offset;
-  u8 l4_protocol;
-  u16 l4_offset;
-  int rv;
-  ip6_address_t old_src, old_dst;
-
-  ip6 = vlib_buffer_get_current (p);
-
-  if (ip6_parse
-      (ip6, p->current_length, &l4_protocol, &l4_offset, &frag_offset))
-    return -1;
-
-  if (l4_protocol == IP_PROTOCOL_TCP)
-    {
-      tcp_header_t *tcp = ip6_next_header (ip6);
-      checksum = &tcp->checksum;
-    }
-  else
-    {
-      udp_header_t *udp = ip6_next_header (ip6);
-      checksum = &udp->checksum;
-    }
-
-  old_src.as_u64[0] = ip6->src_address.as_u64[0];
-  old_src.as_u64[1] = ip6->src_address.as_u64[1];
-  old_dst.as_u64[0] = ip6->dst_address.as_u64[0];
-  old_dst.as_u64[1] = ip6->dst_address.as_u64[1];
-
-  ip4 = (ip4_header_t *) u8_ptr_add (ip6, l4_offset - sizeof (*ip4));
-
-  vlib_buffer_advance (p, l4_offset - sizeof (*ip4));
-
-  if (PREDICT_FALSE (frag_offset))
-    {
-      //Only the first fragment
-      ip6_frag_hdr_t *hdr = (ip6_frag_hdr_t *) u8_ptr_add (ip6, frag_offset);
-      fragment_id = frag_id_6to4 (hdr->identification);
-      flags = clib_host_to_net_u16 (IP4_HEADER_FLAG_MORE_FRAGMENTS);
-    }
-  else
-    {
-      fragment_id = 0;
-      flags = 0;
-    }
-
-  if ((rv = fn (ip6, ip4, ctx)) != 0)
-    return rv;
-
-  ip4->ip_version_and_header_length =
-    IP4_VERSION_AND_HEADER_LENGTH_NO_OPTIONS;
-  ip4->tos = ip6_translate_tos (ip6);
-  ip4->length = u16_net_add (ip6->payload_length,
-                            sizeof (*ip4) + sizeof (*ip6) - l4_offset);
-  ip4->fragment_id = fragment_id;
-  ip4->flags_and_fragment_offset = flags;
-  ip4->ttl = ip6->hop_limit;
-  ip4->protocol = l4_protocol;
-  ip4->checksum = ip4_header_checksum (ip4);
-
-  //UDP checksum is optional over IPv4
-  if (!udp_checksum && l4_protocol == IP_PROTOCOL_UDP)
-    {
-      *checksum = 0;
-    }
-  else
-    {
-      csum = ip_csum_sub_even (*checksum, old_src.as_u64[0]);
-      csum = ip_csum_sub_even (csum, old_src.as_u64[1]);
-      csum = ip_csum_sub_even (csum, old_dst.as_u64[0]);
-      csum = ip_csum_sub_even (csum, old_dst.as_u64[1]);
-      csum = ip_csum_add_even (csum, ip4->dst_address.as_u32);
-      csum = ip_csum_add_even (csum, ip4->src_address.as_u32);
-      *checksum = ip_csum_fold (csum);
-    }
-
-  return 0;
-}
-
-/**
- * @brief Translate IPv6 packet to IPv4 (IP header only).
- *
- * @param p   Buffer to translate.
- * @param fn  The function to translate header.
- * @param ctx A context passed in the header translate function.
- *
- * @returns 0 on success, non-zero value otherwise.
- */
-always_inline int
-ip6_to_ip4 (vlib_buffer_t * p, ip6_to_ip4_set_fn_t fn, void *ctx)
-{
-  ip6_header_t *ip6;
-  ip4_header_t *ip4;
-  u16 fragment_id;
-  u16 flags;
-  u16 frag_offset;
-  u8 l4_protocol;
-  u16 l4_offset;
-  int rv;
-
-  ip6 = vlib_buffer_get_current (p);
-
-  if (ip6_parse
-      (ip6, p->current_length, &l4_protocol, &l4_offset, &frag_offset))
-    return -1;
-
-  ip4 = (ip4_header_t *) u8_ptr_add (ip6, l4_offset - sizeof (*ip4));
-
-  vlib_buffer_advance (p, l4_offset - sizeof (*ip4));
-
-  if (PREDICT_FALSE (frag_offset))
-    {
-      //Only the first fragment
-      ip6_frag_hdr_t *hdr = (ip6_frag_hdr_t *) u8_ptr_add (ip6, frag_offset);
-      fragment_id = frag_id_6to4 (hdr->identification);
-      flags = clib_host_to_net_u16 (IP4_HEADER_FLAG_MORE_FRAGMENTS);
-    }
-  else
-    {
-      fragment_id = 0;
-      flags = 0;
-    }
-
-  if ((rv = fn (ip6, ip4, ctx)) != 0)
-    return rv;
-
-  ip4->ip_version_and_header_length =
-    IP4_VERSION_AND_HEADER_LENGTH_NO_OPTIONS;
-  ip4->tos = ip6_translate_tos (ip6);
-  ip4->length = u16_net_add (ip6->payload_length,
-                            sizeof (*ip4) + sizeof (*ip6) - l4_offset);
-  ip4->fragment_id = fragment_id;
-  ip4->flags_and_fragment_offset = flags;
-  ip4->ttl = ip6->hop_limit;
-  ip4->protocol = l4_protocol;
-  ip4->checksum = ip4_header_checksum (ip4);
-
-  return 0;
-}
-
 #endif /* __included_ip6_to_ip4_h__ */
 
 /*
index d713062..b94e9b2 100644 (file)
@@ -98,6 +98,9 @@ typedef struct
   bool is_complete;
   // ip protocol
   u8 ip_proto;
+  u8 icmp_type_or_tcp_flags;
+  u32 tcp_ack_number;
+  u32 tcp_seq_number;
   // l4 src port
   u16 l4_src_port;
   // l4 dst port
@@ -151,6 +154,9 @@ typedef struct
   // reference count for enabling/disabling feature - per interface
   u32 *feature_use_refcount_per_intf;
 
+  // reference count for enabling/disabling feature - per interface
+  u32 *output_feature_use_refcount_per_intf;
+
 } ip4_sv_reass_main_t;
 
 extern ip4_sv_reass_main_t ip4_sv_reass_main;
@@ -172,6 +178,7 @@ typedef enum
   REASS_FRAGMENT_CACHE,
   REASS_FINISH,
   REASS_FRAGMENT_FORWARD,
+  REASS_PASSTHROUGH,
 } ip4_sv_reass_trace_operation_e;
 
 typedef struct
@@ -193,7 +200,10 @@ format_ip4_sv_reass_trace (u8 * s, va_list * args)
   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
   ip4_sv_reass_trace_t *t = va_arg (*args, ip4_sv_reass_trace_t *);
-  s = format (s, "reass id: %u, op id: %u ", t->reass_id, t->op_id);
+  if (REASS_PASSTHROUGH != t->action)
+    {
+      s = format (s, "reass id: %u, op id: %u ", t->reass_id, t->op_id);
+    }
   switch (t->action)
     {
     case REASS_FRAGMENT_CACHE:
@@ -211,6 +221,9 @@ format_ip4_sv_reass_trace (u8 * s, va_list * args)
                t->ip_proto, clib_net_to_host_u16 (t->l4_src_port),
                clib_net_to_host_u16 (t->l4_dst_port));
       break;
+    case REASS_PASSTHROUGH:
+      s = format (s, "[not-fragmented]");
+      break;
     }
   return s;
 }
@@ -223,13 +236,16 @@ ip4_sv_reass_add_trace (vlib_main_t * vm, vlib_node_runtime_t * node,
 {
   vlib_buffer_t *b = vlib_get_buffer (vm, bi);
   ip4_sv_reass_trace_t *t = vlib_add_trace (vm, node, b, sizeof (t[0]));
-  t->reass_id = reass->id;
+  if (reass)
+    {
+      t->reass_id = reass->id;
+      t->op_id = reass->trace_op_counter;
+      ++reass->trace_op_counter;
+    }
   t->action = action;
-  t->op_id = reass->trace_op_counter;
   t->ip_proto = ip_proto;
   t->l4_src_port = l4_src_port;
   t->l4_dst_port = l4_dst_port;
-  ++reass->trace_op_counter;
 #if 0
   static u8 *s = NULL;
   s = format (s, "%U", format_ip4_sv_reass_trace, NULL, NULL, t);
@@ -358,19 +374,29 @@ ip4_sv_reass_find_or_create (vlib_main_t * vm, ip4_sv_reass_main_t * rm,
 always_inline ip4_sv_reass_rc_t
 ip4_sv_reass_update (vlib_main_t * vm, vlib_node_runtime_t * node,
                     ip4_sv_reass_main_t * rm, ip4_sv_reass_per_thread_t * rt,
-                    ip4_sv_reass_t * reass, u32 bi0)
+                    ip4_header_t * ip0, ip4_sv_reass_t * reass, u32 bi0)
 {
-  vlib_buffer_t *fb = vlib_get_buffer (vm, bi0);
+  vlib_buffer_t *b0 = vlib_get_buffer (vm, bi0);
   ip4_sv_reass_rc_t rc = IP4_SV_REASS_RC_OK;
-  ip4_header_t *fip = vlib_buffer_get_current (fb);
-  const u32 fragment_first = ip4_get_fragment_offset_bytes (fip);
+  const u32 fragment_first = ip4_get_fragment_offset_bytes (ip0);
   if (0 == fragment_first)
     {
-      reass->ip_proto = fip->protocol;
-      reass->l4_src_port = ip4_get_port (fip, 1);
-      reass->l4_dst_port = ip4_get_port (fip, 0);
+      reass->ip_proto = ip0->protocol;
+      reass->l4_src_port = ip4_get_port (ip0, 1);
+      reass->l4_dst_port = ip4_get_port (ip0, 0);
       if (!reass->l4_src_port || !reass->l4_dst_port)
        return IP4_SV_REASS_RC_UNSUPP_IP_PROTO;
+      if (IP_PROTOCOL_TCP == reass->ip_proto)
+       {
+         reass->icmp_type_or_tcp_flags = ((tcp_header_t *) (ip0 + 1))->flags;
+         reass->tcp_ack_number = ((tcp_header_t *) (ip0 + 1))->ack_number;
+         reass->tcp_seq_number = ((tcp_header_t *) (ip0 + 1))->seq_number;
+       }
+      else if (IP_PROTOCOL_ICMP == reass->ip_proto)
+       {
+         reass->icmp_type_or_tcp_flags =
+           ((icmp46_header_t *) (ip0 + 1))->type;
+       }
       reass->is_complete = true;
       vlib_buffer_t *b0 = vlib_get_buffer (vm, bi0);
       if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
@@ -383,7 +409,7 @@ ip4_sv_reass_update (vlib_main_t * vm, vlib_node_runtime_t * node,
   vec_add1 (reass->cached_buffers, bi0);
   if (!reass->is_complete)
     {
-      if (PREDICT_FALSE (fb->flags & VLIB_BUFFER_IS_TRACED))
+      if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
        {
          ip4_sv_reass_add_trace (vm, node, rm, reass, bi0,
                                  REASS_FRAGMENT_CACHE, ~0, ~0, ~0);
@@ -398,7 +424,8 @@ ip4_sv_reass_update (vlib_main_t * vm, vlib_node_runtime_t * node,
 
 always_inline uword
 ip4_sv_reass_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
-                    vlib_frame_t * frame, bool is_feature, bool is_custom)
+                    vlib_frame_t * frame, bool is_feature,
+                    bool is_output_feature, bool is_custom)
 {
   u32 *from = vlib_frame_vector_args (frame);
   u32 n_left_from, n_left_to_next, *to_next, next_index;
@@ -423,14 +450,52 @@ ip4_sv_reass_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
          bi0 = from[0];
          b0 = vlib_get_buffer (vm, bi0);
 
-         ip4_header_t *ip0 = vlib_buffer_get_current (b0);
+         ip4_header_t *ip0 =
+           (ip4_header_t *) u8_ptr_add (vlib_buffer_get_current (b0),
+                                        is_output_feature *
+                                        vnet_buffer (b0)->
+                                        ip.save_rewrite_length);
          if (!ip4_get_fragment_more (ip0) && !ip4_get_fragment_offset (ip0))
            {
              // this is a regular packet - no fragmentation
+             if (is_custom)
+               {
+                 next0 = vnet_buffer (b0)->ip.reass.next_index;
+               }
+             else
+               {
+                 next0 = IP4_SV_REASSEMBLY_NEXT_INPUT;
+               }
+             vnet_buffer (b0)->ip.reass.save_rewrite_length =
+               vnet_buffer (b0)->ip.save_rewrite_length;
+             vnet_buffer (b0)->ip.reass.is_non_first_fragment = 0;
              vnet_buffer (b0)->ip.reass.ip_proto = ip0->protocol;
+             if (IP_PROTOCOL_TCP == ip0->protocol)
+               {
+                 vnet_buffer (b0)->ip.reass.icmp_type_or_tcp_flags =
+                   ((tcp_header_t *) (ip0 + 1))->flags;
+                 vnet_buffer (b0)->ip.reass.tcp_ack_number =
+                   ((tcp_header_t *) (ip0 + 1))->ack_number;
+                 vnet_buffer (b0)->ip.reass.tcp_seq_number =
+                   ((tcp_header_t *) (ip0 + 1))->seq_number;
+               }
+             else if (IP_PROTOCOL_ICMP == ip0->protocol)
+               {
+                 vnet_buffer (b0)->ip.reass.icmp_type_or_tcp_flags =
+                   ((icmp46_header_t *) (ip0 + 1))->type;
+               }
              vnet_buffer (b0)->ip.reass.l4_src_port = ip4_get_port (ip0, 1);
              vnet_buffer (b0)->ip.reass.l4_dst_port = ip4_get_port (ip0, 0);
-             next0 = IP4_SV_REASSEMBLY_NEXT_INPUT;
+             if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
+               {
+                 ip4_sv_reass_add_trace (vm, node, rm, NULL, bi0,
+                                         REASS_PASSTHROUGH,
+                                         vnet_buffer (b0)->ip.reass.ip_proto,
+                                         vnet_buffer (b0)->ip.
+                                         reass.l4_src_port,
+                                         vnet_buffer (b0)->ip.
+                                         reass.l4_dst_port);
+               }
              goto packet_enqueue;
            }
          const u32 fragment_first = ip4_get_fragment_offset_bytes (ip0);
@@ -474,10 +539,27 @@ ip4_sv_reass_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
 
          if (reass->is_complete)
            {
+             if (is_custom)
+               {
+                 next0 = vnet_buffer (b0)->ip.reass.next_index;
+               }
+             else
+               {
+                 next0 = IP4_SV_REASSEMBLY_NEXT_INPUT;
+               }
+             vnet_buffer (b0)->ip.reass.save_rewrite_length =
+               vnet_buffer (b0)->ip.save_rewrite_length;
+             vnet_buffer (b0)->ip.reass.is_non_first_fragment =
+               ! !fragment_first;
              vnet_buffer (b0)->ip.reass.ip_proto = reass->ip_proto;
+             vnet_buffer (b0)->ip.reass.icmp_type_or_tcp_flags =
+               reass->icmp_type_or_tcp_flags;
+             vnet_buffer (b0)->ip.reass.tcp_ack_number =
+               reass->tcp_ack_number;
+             vnet_buffer (b0)->ip.reass.tcp_seq_number =
+               reass->tcp_seq_number;
              vnet_buffer (b0)->ip.reass.l4_src_port = reass->l4_src_port;
              vnet_buffer (b0)->ip.reass.l4_dst_port = reass->l4_dst_port;
-             next0 = IP4_SV_REASSEMBLY_NEXT_INPUT;
              error0 = IP4_ERROR_NONE;
              if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
                {
@@ -491,7 +573,7 @@ ip4_sv_reass_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
            }
 
          ip4_sv_reass_rc_t rc =
-           ip4_sv_reass_update (vm, node, rm, rt, reass, bi0);
+           ip4_sv_reass_update (vm, node, rm, rt, ip0, reass, bi0);
          switch (rc)
            {
            case IP4_SV_REASS_RC_OK:
@@ -538,7 +620,18 @@ ip4_sv_reass_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
                to_next[0] = bi0;
                to_next += 1;
                n_left_to_next -= 1;
+               ASSERT (vnet_buffer (b0)->ip.save_rewrite_length < (2 << 14));
+               vnet_buffer (b0)->ip.reass.save_rewrite_length =
+                 vnet_buffer (b0)->ip.save_rewrite_length;
+               vnet_buffer (b0)->ip.reass.is_non_first_fragment =
+                 ! !ip4_get_fragment_offset (vlib_buffer_get_current (b0));
                vnet_buffer (b0)->ip.reass.ip_proto = reass->ip_proto;
+               vnet_buffer (b0)->ip.reass.icmp_type_or_tcp_flags =
+                 reass->icmp_type_or_tcp_flags;
+               vnet_buffer (b0)->ip.reass.tcp_ack_number =
+                 reass->tcp_ack_number;
+               vnet_buffer (b0)->ip.reass.tcp_seq_number =
+                 reass->tcp_seq_number;
                vnet_buffer (b0)->ip.reass.l4_src_port = reass->l4_src_port;
                vnet_buffer (b0)->ip.reass.l4_dst_port = reass->l4_dst_port;
                if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
@@ -568,10 +661,6 @@ ip4_sv_reass_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
              b0 = vlib_get_buffer (vm, bi0);
              vnet_feature_next (&next0, b0);
            }
-         if (is_custom)
-           {
-             next0 = vnet_buffer (b0)->ip.reass.next_index;
-           }
          vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
                                           to_next, n_left_to_next,
                                           bi0, next0);
@@ -599,6 +688,7 @@ VLIB_NODE_FN (ip4_sv_reass_node) (vlib_main_t * vm,
                                  vlib_frame_t * frame)
 {
   return ip4_sv_reass_inline (vm, node, frame, false /* is_feature */ ,
+                             false /* is_output_feature */ ,
                              false /* is_custom */ );
 }
 
@@ -625,6 +715,7 @@ VLIB_NODE_FN (ip4_sv_reass_node_feature) (vlib_main_t * vm,
                                          vlib_frame_t * frame)
 {
   return ip4_sv_reass_inline (vm, node, frame, true /* is_feature */ ,
+                             false /* is_output_feature */ ,
                              false /* is_custom */ );
 }
 
@@ -654,6 +745,42 @@ VNET_FEATURE_INIT (ip4_sv_reass_feature) = {
 };
 /* *INDENT-ON* */
 
+VLIB_NODE_FN (ip4_sv_reass_node_output_feature) (vlib_main_t * vm,
+                                                vlib_node_runtime_t * node,
+                                                vlib_frame_t * frame)
+{
+  return ip4_sv_reass_inline (vm, node, frame, true /* is_feature */ ,
+                             true /* is_output_feature */ ,
+                             false /* is_custom */ );
+}
+
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (ip4_sv_reass_node_output_feature) = {
+    .name = "ip4-sv-reassembly-output-feature",
+    .vector_size = sizeof (u32),
+    .format_trace = format_ip4_sv_reass_trace,
+    .n_errors = ARRAY_LEN (ip4_sv_reass_error_strings),
+    .error_strings = ip4_sv_reass_error_strings,
+    .n_next_nodes = IP4_SV_REASSEMBLY_N_NEXT,
+    .next_nodes =
+        {
+                [IP4_SV_REASSEMBLY_NEXT_INPUT] = "ip4-input",
+                [IP4_SV_REASSEMBLY_NEXT_DROP] = "ip4-drop",
+                [IP4_SV_REASSEMBLY_NEXT_HANDOFF] = "ip4-sv-reass-feature-hoff",
+        },
+};
+/* *INDENT-ON* */
+
+/* *INDENT-OFF* */
+VNET_FEATURE_INIT (ip4_sv_reass_output_feature) = {
+    .arc_name = "ip4-output",
+    .node_name = "ip4-sv-reassembly-output-feature",
+    .runs_before = 0,
+    .runs_after = 0,
+};
+/* *INDENT-ON* */
+
 /* *INDENT-OFF* */
 VLIB_REGISTER_NODE (ip4_sv_reass_custom_node) = {
     .name = "ip4-sv-reassembly-custom-next",
@@ -677,6 +804,7 @@ VLIB_NODE_FN (ip4_sv_reass_custom_node) (vlib_main_t * vm,
                                         vlib_frame_t * frame)
 {
   return ip4_sv_reass_inline (vm, node, frame, false /* is_feature */ ,
+                             false /* is_output_feature */ ,
                              true /* is_custom */ );
 }
 
@@ -824,6 +952,7 @@ ip4_sv_reass_init_function (vlib_main_t * vm)
     vlib_frame_queue_main_init (ip4_sv_reass_node_feature.index, 0);
 
   rm->feature_use_refcount_per_intf = NULL;
+  rm->output_feature_use_refcount_per_intf = NULL;
 
   return error;
 }
@@ -1010,9 +1139,8 @@ VLIB_CLI_COMMAND (show_ip4_sv_reass_cmd, static) = {
 vnet_api_error_t
 ip4_sv_reass_enable_disable (u32 sw_if_index, u8 enable_disable)
 {
-  return vnet_feature_enable_disable ("ip4-unicast",
-                                     "ip4-sv-reassembly-feature",
-                                     sw_if_index, enable_disable, 0, 0);
+  return ip4_sv_reass_enable_disable_with_refcnt (sw_if_index,
+                                                 enable_disable);
 }
 #endif /* CLIB_MARCH_VARIANT */
 
@@ -1177,7 +1305,8 @@ ip4_sv_reass_enable_disable_with_refcnt (u32 sw_if_index, int is_enable)
     }
   else
     {
-      --rm->feature_use_refcount_per_intf[sw_if_index];
+      if (rm->feature_use_refcount_per_intf[sw_if_index])
+       --rm->feature_use_refcount_per_intf[sw_if_index];
       if (!rm->feature_use_refcount_per_intf[sw_if_index])
        return vnet_feature_enable_disable ("ip4-unicast",
                                            "ip4-sv-reassembly-feature",
@@ -1192,6 +1321,35 @@ ip4_sv_reass_custom_register_next_node (uword node_index)
   return vlib_node_add_next (vlib_get_main (), ip4_sv_reass_custom_node.index,
                             node_index);
 }
+
+int
+ip4_sv_reass_output_enable_disable_with_refcnt (u32 sw_if_index,
+                                               int is_enable)
+{
+  ip4_sv_reass_main_t *rm = &ip4_sv_reass_main;
+  vec_validate (rm->output_feature_use_refcount_per_intf, sw_if_index);
+  if (is_enable)
+    {
+      if (!rm->output_feature_use_refcount_per_intf[sw_if_index])
+       {
+         ++rm->output_feature_use_refcount_per_intf[sw_if_index];
+         return vnet_feature_enable_disable ("ip4-output",
+                                             "ip4-sv-reassembly-output-feature",
+                                             sw_if_index, 1, 0, 0);
+       }
+      ++rm->output_feature_use_refcount_per_intf[sw_if_index];
+    }
+  else
+    {
+      if (rm->output_feature_use_refcount_per_intf[sw_if_index])
+       --rm->output_feature_use_refcount_per_intf[sw_if_index];
+      if (!rm->output_feature_use_refcount_per_intf[sw_if_index])
+       return vnet_feature_enable_disable ("ip4-output",
+                                           "ip4-sv-reassembly-output-feature",
+                                           sw_if_index, 0, 0, 0);
+    }
+  return 0;
+}
 #endif
 
 /*
index cf9f365..e926dbe 100644 (file)
@@ -45,6 +45,8 @@ vnet_api_error_t ip4_sv_reass_enable_disable (u32 sw_if_index,
 
 
 int ip4_sv_reass_enable_disable_with_refcnt (u32 sw_if_index, int is_enable);
+int ip4_sv_reass_output_enable_disable_with_refcnt (u32 sw_if_index,
+                                                   int is_enable);
 
 uword ip4_sv_reass_custom_register_next_node (uword node_index);
 
index 0837f06..4426177 100644 (file)
@@ -96,6 +96,9 @@ typedef struct
   bool is_complete;
   // ip protocol
   u8 ip_proto;
+  u8 icmp_type_or_tcp_flags;
+  u32 tcp_ack_number;
+  u32 tcp_seq_number;
   // l4 src port
   u16 l4_src_port;
   // l4 dst port
@@ -170,6 +173,7 @@ typedef enum
   REASS_FRAGMENT_CACHE,
   REASS_FINISH,
   REASS_FRAGMENT_FORWARD,
+  REASS_PASSTHROUGH,
 } ip6_sv_reass_trace_operation_e;
 
 typedef struct
@@ -188,7 +192,10 @@ format_ip6_sv_reass_trace (u8 * s, va_list * args)
   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
   ip6_sv_reass_trace_t *t = va_arg (*args, ip6_sv_reass_trace_t *);
-  s = format (s, "reass id: %u, op id: %u ", t->reass_id, t->op_id);
+  if (REASS_PASSTHROUGH != t->action)
+    {
+      s = format (s, "reass id: %u, op id: %u ", t->reass_id, t->op_id);
+    }
   switch (t->action)
     {
     case REASS_FRAGMENT_CACHE:
@@ -206,6 +213,9 @@ format_ip6_sv_reass_trace (u8 * s, va_list * args)
                t->ip_proto, clib_net_to_host_u16 (t->l4_src_port),
                clib_net_to_host_u16 (t->l4_dst_port));
       break;
+    case REASS_PASSTHROUGH:
+      s = format (s, "[not-fragmented]");
+      break;
     }
   return s;
 }
@@ -219,13 +229,16 @@ ip6_sv_reass_add_trace (vlib_main_t * vm, vlib_node_runtime_t * node,
 {
   vlib_buffer_t *b = vlib_get_buffer (vm, bi);
   ip6_sv_reass_trace_t *t = vlib_add_trace (vm, node, b, sizeof (t[0]));
-  t->reass_id = reass->id;
+  if (reass)
+    {
+      t->reass_id = reass->id;
+      t->op_id = reass->trace_op_counter;
+      ++reass->trace_op_counter;
+    }
   t->action = action;
-  t->op_id = reass->trace_op_counter;
   t->ip_proto = ip_proto;
   t->l4_src_port = l4_src_port;
   t->l4_dst_port = l4_dst_port;
-  ++reass->trace_op_counter;
 #if 0
   static u8 *s = NULL;
   s = format (s, "%U", format_ip6_sv_reass_trace, NULL, NULL, t);
@@ -391,18 +404,13 @@ ip6_sv_reass_update (vlib_main_t * vm, vlib_node_runtime_t * node,
   fvnb->ip.reass.next_range_bi = ~0;
   if (0 == fragment_first)
     {
-      ip6_ext_header_t *ext_hdr = (void *) frag_hdr;
-      while (ip6_ext_hdr (ext_hdr->next_hdr)
-            && vlib_object_within_buffer_data (vm, fb, ext_hdr,
-                                               ext_hdr->n_data_u64s * 8))
-       {
-         ext_hdr = ip6_ext_next_header (ext_hdr);
-       }
-      reass->ip_proto = ext_hdr->next_hdr;
-      reass->l4_src_port = ip6_get_port (fip, 1, fb->current_length);
-      reass->l4_dst_port = ip6_get_port (fip, 0, fb->current_length);
-      if (!reass->l4_src_port || !reass->l4_dst_port)
+      if (!ip6_get_port
+         (vm, fb, fip, fb->current_length, &reass->ip_proto,
+          &reass->l4_src_port, &reass->l4_dst_port,
+          &reass->icmp_type_or_tcp_flags, &reass->tcp_ack_number,
+          &reass->tcp_seq_number))
        return IP6_SV_REASS_RC_UNSUPP_IP_PROTO;
+
       reass->is_complete = true;
       vlib_buffer_t *b0 = vlib_get_buffer (vm, bi0);
       if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
@@ -538,12 +546,34 @@ ip6_sv_reassembly_inline (vlib_main_t * vm,
          if (!frag_hdr)
            {
              // this is a regular packet - no fragmentation
-             vnet_buffer (b0)->ip.reass.ip_proto = ip0->protocol;
-             vnet_buffer (b0)->ip.reass.l4_src_port =
-               ip6_get_port (ip0, 1, b0->current_length);
-             vnet_buffer (b0)->ip.reass.l4_dst_port =
-               ip6_get_port (ip0, 0, b0->current_length);
+             if (!ip6_get_port
+                 (vm, b0, ip0, b0->current_length,
+                  &(vnet_buffer (b0)->ip.reass.ip_proto),
+                  &(vnet_buffer (b0)->ip.reass.l4_src_port),
+                  &(vnet_buffer (b0)->ip.reass.l4_dst_port),
+                  &(vnet_buffer (b0)->ip.reass.icmp_type_or_tcp_flags),
+                  &(vnet_buffer (b0)->ip.reass.tcp_ack_number),
+                  &(vnet_buffer (b0)->ip.reass.tcp_seq_number)))
+               {
+                 error0 = IP6_ERROR_REASS_UNSUPP_IP_PROTO;
+                 next0 = IP6_SV_REASSEMBLY_NEXT_DROP;
+                 goto packet_enqueue;
+               }
+             ASSERT (vnet_buffer (b0)->ip.save_rewrite_length < (2 << 14));
+             vnet_buffer (b0)->ip.reass.save_rewrite_length =
+               vnet_buffer (b0)->ip.save_rewrite_length;
+             vnet_buffer (b0)->ip.reass.is_non_first_fragment = 0;
              next0 = IP6_SV_REASSEMBLY_NEXT_INPUT;
+             if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
+               {
+                 ip6_sv_reass_add_trace (vm, node, rm, NULL, bi0,
+                                         REASS_PASSTHROUGH,
+                                         vnet_buffer (b0)->ip.reass.ip_proto,
+                                         vnet_buffer (b0)->ip.
+                                         reass.l4_src_port,
+                                         vnet_buffer (b0)->ip.
+                                         reass.l4_dst_port);
+               }
              goto packet_enqueue;
            }
          vnet_buffer (b0)->ip.reass.ip6_frag_hdr_offset =
@@ -601,7 +631,18 @@ ip6_sv_reassembly_inline (vlib_main_t * vm,
 
          if (reass->is_complete)
            {
+             ASSERT (vnet_buffer (b0)->ip.save_rewrite_length < (2 << 14));
+             vnet_buffer (b0)->ip.reass.save_rewrite_length =
+               vnet_buffer (b0)->ip.save_rewrite_length;
+             vnet_buffer (b0)->ip.reass.is_non_first_fragment =
+               ! !ip6_frag_hdr_offset (frag_hdr);
              vnet_buffer (b0)->ip.reass.ip_proto = reass->ip_proto;
+             vnet_buffer (b0)->ip.reass.icmp_type_or_tcp_flags =
+               reass->icmp_type_or_tcp_flags;
+             vnet_buffer (b0)->ip.reass.tcp_ack_number =
+               reass->tcp_ack_number;
+             vnet_buffer (b0)->ip.reass.tcp_seq_number =
+               reass->tcp_seq_number;
              vnet_buffer (b0)->ip.reass.l4_src_port = reass->l4_src_port;
              vnet_buffer (b0)->ip.reass.l4_dst_port = reass->l4_dst_port;
              next0 = IP6_SV_REASSEMBLY_NEXT_INPUT;
@@ -668,7 +709,21 @@ ip6_sv_reassembly_inline (vlib_main_t * vm,
                  {
                    vnet_feature_next (&next0, b0);
                  }
+               frag_hdr =
+                 vlib_buffer_get_current (b0) +
+                 vnet_buffer (b0)->ip.reass.ip6_frag_hdr_offset;
+               ASSERT (vnet_buffer (b0)->ip.save_rewrite_length < (2 << 14));
+               vnet_buffer (b0)->ip.reass.save_rewrite_length =
+                 vnet_buffer (b0)->ip.save_rewrite_length;
+               vnet_buffer (b0)->ip.reass.is_non_first_fragment =
+                 ! !ip6_frag_hdr_offset (frag_hdr);
                vnet_buffer (b0)->ip.reass.ip_proto = reass->ip_proto;
+               vnet_buffer (b0)->ip.reass.icmp_type_or_tcp_flags =
+                 reass->icmp_type_or_tcp_flags;
+               vnet_buffer (b0)->ip.reass.tcp_ack_number =
+                 reass->tcp_ack_number;
+               vnet_buffer (b0)->ip.reass.tcp_seq_number =
+                 reass->tcp_seq_number;
                vnet_buffer (b0)->ip.reass.l4_src_port = reass->l4_src_port;
                vnet_buffer (b0)->ip.reass.l4_dst_port = reass->l4_dst_port;
                if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
@@ -1124,9 +1179,8 @@ VLIB_CLI_COMMAND (show_ip6_sv_reassembly_cmd, static) = {
 vnet_api_error_t
 ip6_sv_reass_enable_disable (u32 sw_if_index, u8 enable_disable)
 {
-  return vnet_feature_enable_disable ("ip6-unicast",
-                                     "ip6-sv-reassembly-feature",
-                                     sw_if_index, enable_disable, 0, 0);
+  return ip6_sv_reass_enable_disable_with_refcnt (sw_if_index,
+                                                 enable_disable);
 }
 #endif /* CLIB_MARCH_VARIANT */
 
index d17f757..c422d54 100644 (file)
@@ -127,6 +127,7 @@ typedef struct
   u32 dev_instance;            /* Real device instance in tunnel vector */
   u32 user_instance;           /* Instance name being shown to user */
 
+
     VNET_DECLARE_REWRITE;
 } vxlan_gbp_tunnel_t;