NAT44: active-passive HA (VPP-1571) 80/17880/2
authorMatus Fabian <matfabia@cisco.com>
Tue, 26 Feb 2019 17:05:23 +0000 (09:05 -0800)
committerMatus Fabian <matfabia@cisco.com>
Wed, 27 Feb 2019 08:56:32 +0000 (00:56 -0800)
session synchronization so that we can build a plain active-passive HA NAT pair

Change-Id: I21db200491081ca46b7af3e82afc677c1985abf4
Signed-off-by: Matus Fabian <matfabia@cisco.com>
16 files changed:
src/plugins/nat/CMakeLists.txt
src/plugins/nat/in2out.c
src/plugins/nat/in2out_ed.c
src/plugins/nat/nat.api
src/plugins/nat/nat.c
src/plugins/nat/nat.h
src/plugins/nat/nat44_cli.c
src/plugins/nat/nat_api.c
src/plugins/nat/nat_ha.c [new file with mode: 0644]
src/plugins/nat/nat_ha.h [new file with mode: 0644]
src/plugins/nat/nat_ha_doc.md [new file with mode: 0644]
src/plugins/nat/nat_inlines.h
src/plugins/nat/out2in.c
src/plugins/nat/out2in_ed.c
test/test_nat.py
test/vpp_papi_provider.py

index f4a9919..4f6ed67 100644 (file)
@@ -48,6 +48,7 @@ add_vpp_plugin(nat
   nat_affinity.c
   nat_format.c
   nat_syslog.c
+  nat_ha.c
 
   MULTIARCH_SOURCES
   dslite_ce_decap.c
index 7ed375d..8bdb792 100755 (executable)
@@ -29,6 +29,7 @@
 #include <nat/nat_reass.h>
 #include <nat/nat_inlines.h>
 #include <nat/nat_syslog.h>
+#include <nat/nat_ha.h>
 
 #include <vppinfra/hash.h>
 #include <vppinfra/error.h>
@@ -224,6 +225,10 @@ nat44_i2o_is_idle_session_cb (clib_bihash_kv_8_8_t * kv, void *arg)
                               &s->out2in.addr, s->out2in.port,
                               s->in2out.protocol);
 
+      nat_ha_sdel (&s->out2in.addr, s->out2in.port, &s->ext_host_addr,
+                  s->ext_host_port, s->out2in.protocol, s->out2in.fib_index,
+                  ctx->thread_index);
+
       if (!snat_is_session_static (s))
        snat_free_outside_address_and_port (sm->addresses, ctx->thread_index,
                                            &s->out2in);
@@ -306,7 +311,7 @@ slow_path (snat_main_t * sm, vlib_buffer_t * b0,
       return SNAT_IN2OUT_NEXT_DROP;
     }
 
-  s = nat_session_alloc_or_recycle (sm, u, thread_index);
+  s = nat_session_alloc_or_recycle (sm, u, thread_index, now);
   if (!s)
     {
       nat44_delete_user_with_no_session (sm, u, thread_index);
@@ -380,6 +385,12 @@ slow_path (snat_main_t * sm, vlib_buffer_t * b0,
                           &s->in2out.addr, s->in2out.port, &s->out2in.addr,
                           s->out2in.port, s->in2out.protocol);
 
+  nat_ha_sadd (&s->in2out.addr, s->in2out.port, &s->out2in.addr,
+              s->out2in.port, &s->ext_host_addr, s->ext_host_port,
+              &s->ext_host_nat_addr, s->ext_host_nat_port,
+              s->in2out.protocol, s->in2out.fib_index, s->flags,
+              thread_index, 0);
+
   return next0;
 }
 
@@ -803,7 +814,7 @@ icmp_in2out_slow_path (snat_main_t * sm,
       /* Accounting */
       nat44_session_update_counters (s0, now,
                                     vlib_buffer_length_in_chain
-                                    (sm->vlib_main, b0));
+                                    (sm->vlib_main, b0), thread_index);
       /* Per-user LRU list maintenance */
       nat44_session_update_lru (sm, s0, thread_index);
     }
@@ -1091,8 +1102,8 @@ snat_in2out_node_fn_inline (vlib_main_t * vm,
 
          /* Accounting */
          nat44_session_update_counters (s0, now,
-                                        vlib_buffer_length_in_chain (vm,
-                                                                     b0));
+                                        vlib_buffer_length_in_chain (vm, b0),
+                                        thread_index);
          /* Per-user LRU list maintenance */
          nat44_session_update_lru (sm, s0, thread_index);
        trace00:
@@ -1280,8 +1291,8 @@ snat_in2out_node_fn_inline (vlib_main_t * vm,
 
          /* Accounting */
          nat44_session_update_counters (s1, now,
-                                        vlib_buffer_length_in_chain (vm,
-                                                                     b1));
+                                        vlib_buffer_length_in_chain (vm, b1),
+                                        thread_index);
          /* Per-user LRU list maintenance */
          nat44_session_update_lru (sm, s1, thread_index);
        trace01:
@@ -1504,8 +1515,8 @@ snat_in2out_node_fn_inline (vlib_main_t * vm,
 
          /* Accounting */
          nat44_session_update_counters (s0, now,
-                                        vlib_buffer_length_in_chain (vm,
-                                                                     b0));
+                                        vlib_buffer_length_in_chain (vm, b0),
+                                        thread_index);
          /* Per-user LRU list maintenance */
          nat44_session_update_lru (sm, s0, thread_index);
 
@@ -1887,8 +1898,8 @@ VLIB_NODE_FN (nat44_in2out_reass_node) (vlib_main_t * vm,
 
          /* Accounting */
          nat44_session_update_counters (s0, now,
-                                        vlib_buffer_length_in_chain (vm,
-                                                                     b0));
+                                        vlib_buffer_length_in_chain (vm, b0),
+                                        thread_index);
          /* Per-user LRU list maintenance */
          nat44_session_update_lru (sm, s0, thread_index);
 
index 8479d27..3bcde10 100644 (file)
@@ -29,6 +29,7 @@
 #include <nat/nat_reass.h>
 #include <nat/nat_inlines.h>
 #include <nat/nat_syslog.h>
+#include <nat/nat_ha.h>
 
 #define foreach_nat_in2out_ed_error                     \
 _(UNSUPPORTED_PROTOCOL, "unsupported protocol")         \
@@ -209,6 +210,10 @@ nat44_i2o_ed_is_idle_session_cb (clib_bihash_kv_16_8_t * kv, void *arg)
                             &s->ext_host_addr, s->ext_host_port,
                             s->in2out.protocol, is_twice_nat_session (s));
 
+      nat_ha_sdel (&s->out2in.addr, s->out2in.port, &s->ext_host_addr,
+                  s->ext_host_port, s->out2in.protocol, s->out2in.fib_index,
+                  ctx->thread_index);
+
       if (is_twice_nat_session (s))
        {
          for (i = 0; i < vec_len (sm->twice_nat_addresses); i++)
@@ -255,7 +260,7 @@ icmp_in2out_ed_slow_path (snat_main_t * sm, vlib_buffer_t * b0,
       /* Accounting */
       nat44_session_update_counters (s0, now,
                                     vlib_buffer_length_in_chain
-                                    (sm->vlib_main, b0));
+                                    (sm->vlib_main, b0), thread_index);
       /* Per-user LRU list maintenance */
       nat44_session_update_lru (sm, s0, thread_index);
     }
@@ -431,6 +436,12 @@ slow_path_ed (snat_main_t * sm,
                         &s->ext_host_addr, s->ext_host_port,
                         s->in2out.protocol, 0);
 
+  nat_ha_sadd (&s->in2out.addr, s->in2out.port, &s->out2in.addr,
+              s->out2in.port, &s->ext_host_addr, s->ext_host_port,
+              &s->ext_host_nat_addr, s->ext_host_nat_port,
+              s->in2out.protocol, s->in2out.fib_index, s->flags,
+              thread_index, 0);
+
   return next;
 }
 
@@ -517,7 +528,8 @@ nat_not_translate_output_feature_fwd (snat_main_t * sm, ip4_header_t * ip,
            }
          /* Accounting */
          nat44_session_update_counters (s, now,
-                                        vlib_buffer_length_in_chain (vm, b));
+                                        vlib_buffer_length_in_chain (vm, b),
+                                        thread_index);
          /* Per-user LRU list maintenance */
          nat44_session_update_lru (sm, s, thread_index);
          return 1;
@@ -552,7 +564,7 @@ nat44_ed_not_translate_output_feature (snat_main_t * sm, ip4_header_t * ip,
        {
          nat_log_debug ("TCP close connection %U", format_snat_session,
                         &sm->per_thread_data[thread_index], s);
-         nat_free_session_data (sm, s, thread_index);
+         nat_free_session_data (sm, s, thread_index, 0);
          nat44_delete_session (sm, s, thread_index);
        }
       else
@@ -881,7 +893,8 @@ nat44_ed_in2out_unknown_proto (snat_main_t * sm,
   ip->checksum = ip_csum_fold (sum);
 
   /* Accounting */
-  nat44_session_update_counters (s, now, vlib_buffer_length_in_chain (vm, b));
+  nat44_session_update_counters (s, now, vlib_buffer_length_in_chain (vm, b),
+                                thread_index);
   /* Per-user LRU list maintenance */
   nat44_session_update_lru (sm, s, thread_index);
 
@@ -1153,7 +1166,8 @@ nat44_ed_in2out_node_fn_inline (vlib_main_t * vm,
          /* Accounting */
          nat44_session_update_counters (s0, now,
                                         vlib_buffer_length_in_chain (vm,
-                                                                     b0));
+                                                                     b0),
+                                        thread_index);
          /* Per-user LRU list maintenance */
          nat44_session_update_lru (sm, s0, thread_index);
 
@@ -1361,8 +1375,8 @@ nat44_ed_in2out_node_fn_inline (vlib_main_t * vm,
 
          /* Accounting */
          nat44_session_update_counters (s1, now,
-                                        vlib_buffer_length_in_chain (vm,
-                                                                     b1));
+                                        vlib_buffer_length_in_chain (vm, b1),
+                                        thread_index);
          /* Per-user LRU list maintenance */
          nat44_session_update_lru (sm, s1, thread_index);
 
@@ -1599,8 +1613,8 @@ nat44_ed_in2out_node_fn_inline (vlib_main_t * vm,
 
          /* Accounting */
          nat44_session_update_counters (s0, now,
-                                        vlib_buffer_length_in_chain (vm,
-                                                                     b0));
+                                        vlib_buffer_length_in_chain (vm, b0),
+                                        thread_index);
          /* Per-user LRU list maintenance */
          nat44_session_update_lru (sm, s0, thread_index);
 
@@ -2024,8 +2038,8 @@ nat44_ed_in2out_reass_node_fn_inline (vlib_main_t * vm,
 
          /* Accounting */
          nat44_session_update_counters (s0, now,
-                                        vlib_buffer_length_in_chain (vm,
-                                                                     b0));
+                                        vlib_buffer_length_in_chain (vm, b0),
+                                        thread_index);
          /* Per-user LRU list maintenance */
          nat44_session_update_lru (sm, s0, thread_index);
 
index f41428b..f09d560 100644 (file)
@@ -14,6 +14,7 @@
  */
 
 option version = "4.1.0";
+import "vnet/ip/ip_types.api";
 
 /**
  * @file nat.api
@@ -367,6 +368,127 @@ define nat_get_mss_clamping_reply {
   u8 enable;
 };
 
+/** \brief Set HA listener (local settings)
+    @param client_index - opaque cookie to identify the sender
+    @param context - sender context, to match reply w/ request
+    @param ip_address - local IP4 address
+    @param port - local UDP port number
+    @param path_mtu - path MTU between local and failover
+*/
+autoreply define nat_ha_set_listener {
+  u32 client_index;
+  u32 context;
+  vl_api_ip4_address_t ip_address;
+  u16 port;
+  u32 path_mtu;
+};
+
+/** \brief Set HA failover (remote settings)
+    @param client_index - opaque cookie to identify the sender
+    @param context - sender context, to match reply w/ request
+    @param ip_address - failover IP4 address
+    @param port - failvoer UDP port number
+    @param session_refresh_interval - number of seconds after which to send
+                                      session counters refresh
+*/
+autoreply define nat_ha_set_failover {
+  u32 client_index;
+  u32 context;
+  vl_api_ip4_address_t ip_address;
+  u16 port;
+  u32 session_refresh_interval;
+};
+
+/** \brief Get HA listener/local configuration
+    @param client_index - opaque cookie to identify the sender
+    @param context - sender context, to match reply w/ request
+*/
+define nat_ha_get_listener {
+  u32 client_index;
+  u32 context;
+};
+
+/** \brief Get HA listener/local configuration reply
+    @param context - sender context, to match reply w/ request
+    @param retval - return code
+    @param ip_address - local IP4 address
+    @param port - local UDP port number
+    @param path_mtu - Path MTU between local and failover
+*/
+define nat_ha_get_listener_reply {
+  u32 context;
+  i32 retval;
+  vl_api_ip4_address_t ip_address;
+  u16 port;
+  u32 path_mtu;
+};
+
+/** \brief Get HA failover/remote settings
+    @param client_index - opaque cookie to identify the sender
+    @param context - sender context, to match reply w/ request
+*/
+define nat_ha_get_failover {
+  u32 client_index;
+  u32 context;
+};
+
+/** \brief Get HA failover/remote settings reply
+    @param context - sender context, to match reply w/ request
+    @param retval - return code
+    @param ip_address - failover IP4 address
+    @param port - failvoer UDP port number
+    @param session_refresh_interval - number of seconds after which to send
+                                      session counters refresh
+*/
+define nat_ha_get_failover_reply {
+  u32 context;
+  i32 retval;
+  vl_api_ip4_address_t ip_address;
+  u16 port;
+  u32 session_refresh_interval;
+};
+
+/** \brief Flush the current HA data (for testing)
+    @param client_index - opaque cookie to identify the sender
+    @param context - sender context, to match reply w/ request
+*/
+autoreply define nat_ha_flush {
+  u32 client_index;
+  u32 context;
+};
+
+/** \brief Resync HA (resend existing sessions to new failover)
+    @param context - sender context, to match reply w/ request
+    @param retval - return code
+    @param want_resync_event - resync completed event sent to the sender via
+                               nat_ha_resync_completed_event API message if
+                               non-zero
+    @param pid - sender's pid
+*/
+autoreply define nat_ha_resync
+{
+  u32 client_index;
+  u32 context;
+  u8 want_resync_event;
+  u32 pid;
+};
+
+/** \brief Tell client about a HA resync completion event
+    @param client_index - opaque cookie to identify the sender
+    @param pid - client pid registered to receive notification
+    @param missed_count - number of missed (not ACKed) messages
+*/
+define nat_ha_resync_completed_event
+{
+  u32 client_index;
+  u32 pid;
+  u32 missed_count;
+};
+
+service {
+  rpc nat_ha_resync returns nat_ha_resync_reply events nat_ha_resync_completed_event;
+};
+
 /*
  * NAT44 APIs
  */
index 79c1eaa..3856add 100755 (executable)
@@ -30,6 +30,7 @@
 #include <nat/nat_inlines.h>
 #include <nat/nat_affinity.h>
 #include <nat/nat_syslog.h>
+#include <nat/nat_ha.h>
 #include <vnet/fib/fib_table.h>
 #include <vnet/fib/ip4_fib.h>
 
@@ -175,7 +176,8 @@ VLIB_PLUGIN_REGISTER () = {
 /* *INDENT-ON* */
 
 void
-nat_free_session_data (snat_main_t * sm, snat_session_t * s, u32 thread_index)
+nat_free_session_data (snat_main_t * sm, snat_session_t * s, u32 thread_index,
+                      u8 is_ha)
 {
   snat_session_key_t key;
   clib_bihash_kv_8_8_t kv;
@@ -238,12 +240,13 @@ nat_free_session_data (snat_main_t * sm, snat_session_t * s, u32 thread_index)
       if (clib_bihash_add_del_16_8 (&tsm->in2out_ed, &ed_kv, 0))
        nat_log_warn ("in2out_ed key del failed");
 
-      nat_syslog_nat44_sdel (s->user_index, s->in2out.fib_index,
-                            &s->in2out.addr, s->in2out.port,
-                            &s->ext_host_nat_addr, s->ext_host_nat_port,
-                            &s->out2in.addr, s->out2in.port,
-                            &s->ext_host_addr, s->ext_host_port,
-                            s->in2out.protocol, is_twice_nat_session (s));
+      if (!is_ha)
+       nat_syslog_nat44_sdel (s->user_index, s->in2out.fib_index,
+                              &s->in2out.addr, s->in2out.port,
+                              &s->ext_host_nat_addr, s->ext_host_nat_port,
+                              &s->out2in.addr, s->out2in.port,
+                              &s->ext_host_addr, s->ext_host_port,
+                              s->in2out.protocol, is_twice_nat_session (s));
     }
   else
     {
@@ -254,22 +257,31 @@ nat_free_session_data (snat_main_t * sm, snat_session_t * s, u32 thread_index)
       if (clib_bihash_add_del_8_8 (&tsm->out2in, &kv, 0))
        nat_log_warn ("out2in key del failed");
 
-      nat_syslog_nat44_apmdel (s->user_index, s->in2out.fib_index,
-                              &s->in2out.addr, s->in2out.port,
-                              &s->out2in.addr, s->out2in.port,
-                              s->in2out.protocol);
+      if (!is_ha)
+       nat_syslog_nat44_apmdel (s->user_index, s->in2out.fib_index,
+                                &s->in2out.addr, s->in2out.port,
+                                &s->out2in.addr, s->out2in.port,
+                                s->in2out.protocol);
     }
 
   if (snat_is_unk_proto_session (s))
     return;
 
-  /* log NAT event */
-  snat_ipfix_logging_nat44_ses_delete (thread_index,
-                                      s->in2out.addr.as_u32,
-                                      s->out2in.addr.as_u32,
-                                      s->in2out.protocol,
-                                      s->in2out.port,
-                                      s->out2in.port, s->in2out.fib_index);
+  if (!is_ha)
+    {
+      /* log NAT event */
+      snat_ipfix_logging_nat44_ses_delete (thread_index,
+                                          s->in2out.addr.as_u32,
+                                          s->out2in.addr.as_u32,
+                                          s->in2out.protocol,
+                                          s->in2out.port,
+                                          s->out2in.port,
+                                          s->in2out.fib_index);
+
+      nat_ha_sdel (&s->out2in.addr, s->out2in.port, &s->ext_host_addr,
+                  s->ext_host_port, s->out2in.protocol, s->out2in.fib_index,
+                  thread_index);
+    }
 
   /* Twice NAT address and port for external host */
   if (is_twice_nat_session (s))
@@ -337,7 +349,7 @@ nat_user_get_or_create (snat_main_t * sm, ip4_address_t * addr, u32 fib_index,
 
 snat_session_t *
 nat_session_alloc_or_recycle (snat_main_t * sm, snat_user_t * u,
-                             u32 thread_index)
+                             u32 thread_index, f64 now)
 {
   snat_session_t *s;
   snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index];
@@ -368,7 +380,7 @@ nat_session_alloc_or_recycle (snat_main_t * sm, snat_user_t * u,
 
       /* Get the session */
       s = pool_elt_at_index (tsm->sessions, session_index);
-      nat_free_session_data (sm, s, thread_index);
+      nat_free_session_data (sm, s, thread_index, 0);
       if (snat_is_session_static (s))
        u->nstaticsessions--;
       else
@@ -405,6 +417,8 @@ nat_session_alloc_or_recycle (snat_main_t * sm, snat_user_t * u,
                               pool_elts (tsm->sessions));
     }
 
+  s->ha_last_refreshed = now;
+
   return s;
 }
 
@@ -431,7 +445,7 @@ nat_ed_session_alloc (snat_main_t * sm, snat_user_t * u, u32 thread_index,
     {
       clib_dlist_addtail (tsm->list_pool,
                          u->sessions_per_user_list_head_index, oldest_index);
-      nat_free_session_data (sm, s, thread_index);
+      nat_free_session_data (sm, s, thread_index, 0);
       if (snat_is_session_static (s))
        u->nstaticsessions--;
       else
@@ -482,6 +496,8 @@ nat_ed_session_alloc (snat_main_t * sm, snat_user_t * u, u32 thread_index,
                               pool_elts (tsm->sessions));
     }
 
+  s->ha_last_refreshed = now;
+
   return s;
 }
 
@@ -963,7 +979,7 @@ snat_add_static_mapping (ip4_address_t l_addr, ip4_address_t e_addr,
                        continue;
 
                      nat_free_session_data (sm, s,
-                                            tsm - sm->per_thread_data);
+                                            tsm - sm->per_thread_data, 0);
                      nat44_delete_session (sm, s, tsm - sm->per_thread_data);
 
                      if (!addr_only && !sm->endpoint_dependent)
@@ -1087,7 +1103,7 @@ snat_add_static_mapping (ip4_address_t l_addr, ip4_address_t e_addr,
                        continue;
 
                      nat_free_session_data (sm, s,
-                                            tsm - sm->per_thread_data);
+                                            tsm - sm->per_thread_data, 0);
                      nat44_delete_session (sm, s, tsm - sm->per_thread_data);
 
                      if (!addr_only && !sm->endpoint_dependent)
@@ -1396,7 +1412,7 @@ nat44_add_del_lb_static_mapping (ip4_address_t e_addr, u16 e_port,
                           (clib_net_to_host_u16 (s->in2out.port) != local->port))
                         continue;
 
-                      nat_free_session_data (sm, s, tsm - sm->per_thread_data);
+                      nat_free_session_data (sm, s, tsm - sm->per_thread_data, 0);
                       nat44_delete_session (sm, s, tsm - sm->per_thread_data);
                     }
                 }
@@ -1550,7 +1566,7 @@ nat44_lb_static_mapping_add_del_local (ip4_address_t e_addr, u16 e_port,
                       match_local->port))
                    continue;
 
-                 nat_free_session_data (sm, s, tsm - sm->per_thread_data);
+                 nat_free_session_data (sm, s, tsm - sm->per_thread_data, 0);
                  nat44_delete_session (sm, s, tsm - sm->per_thread_data);
                }
            }
@@ -1660,7 +1676,7 @@ snat_del_address (snat_main_t * sm, ip4_address_t addr, u8 delete_sm,
           pool_foreach (ses, tsm->sessions, ({
             if (ses->out2in.addr.as_u32 == addr.as_u32)
               {
-                nat_free_session_data (sm, ses, tsm - sm->per_thread_data);
+                nat_free_session_data (sm, ses, tsm - sm->per_thread_data, 0);
                 vec_add1 (ses_to_be_removed, ses - tsm->sessions);
               }
           }));
@@ -2433,6 +2449,42 @@ snat_free_outside_address_and_port (snat_address_t * addresses,
     }
 }
 
+static int
+nat_set_outside_address_and_port (snat_address_t * addresses,
+                                 u32 thread_index, snat_session_key_t * k)
+{
+  snat_address_t *a = 0;
+  u32 address_index;
+  u16 port_host_byte_order = clib_net_to_host_u16 (k->port);
+
+  for (address_index = 0; address_index < vec_len (addresses);
+       address_index++)
+    {
+      if (addresses[address_index].addr.as_u32 != k->addr.as_u32)
+       continue;
+
+      a = addresses + address_index;
+      switch (k->protocol)
+       {
+#define _(N, j, n, s) \
+        case SNAT_PROTOCOL_##N: \
+          if (clib_bitmap_get_no_check (a->busy_##n##_port_bitmap, port_host_byte_order)) \
+            return VNET_API_ERROR_INSTANCE_IN_USE; \
+          clib_bitmap_set_no_check (a->busy_##n##_port_bitmap, port_host_byte_order, 1); \
+          a->busy_##n##_ports_per_thread[thread_index]++; \
+          a->busy_##n##_ports++; \
+          return 0;
+         foreach_snat_protocol
+#undef _
+       default:
+         nat_log_info ("unknown protocol");
+         return 1;
+       }
+    }
+
+  return VNET_API_ERROR_NO_SUCH_ENTRY;
+}
+
 int
 snat_static_mapping_match (snat_main_t * sm,
                           snat_session_key_t match,
@@ -3107,6 +3159,334 @@ nat44_ed_get_worker_out2in_cb (ip4_header_t * ip, u32 rx_fib_index)
   return next_worker_index;
 }
 
+void
+nat_ha_sadd_cb (ip4_address_t * in_addr, u16 in_port,
+               ip4_address_t * out_addr, u16 out_port,
+               ip4_address_t * eh_addr, u16 eh_port,
+               ip4_address_t * ehn_addr, u16 ehn_port, u8 proto,
+               u32 fib_index, u16 flags, u32 thread_index)
+{
+  snat_main_t *sm = &snat_main;
+  snat_session_key_t key;
+  snat_user_t *u;
+  snat_session_t *s;
+  clib_bihash_kv_8_8_t kv;
+  f64 now = vlib_time_now (sm->vlib_main);
+  nat_outside_fib_t *outside_fib;
+  fib_node_index_t fei = FIB_NODE_INDEX_INVALID;
+  snat_main_per_thread_data_t *tsm;
+  fib_prefix_t pfx = {
+    .fp_proto = FIB_PROTOCOL_IP4,
+    .fp_len = 32,
+    .fp_addr = {
+               .ip4.as_u32 = eh_addr->as_u32,
+               },
+  };
+
+  tsm = vec_elt_at_index (sm->per_thread_data, thread_index);
+
+  key.addr.as_u32 = out_addr->as_u32;
+  key.port = out_port;
+  key.protocol = proto;
+
+  if (!(flags & SNAT_SESSION_FLAG_STATIC_MAPPING))
+    {
+      if (nat_set_outside_address_and_port
+         (sm->addresses, thread_index, &key))
+       return;
+    }
+
+  u = nat_user_get_or_create (sm, in_addr, fib_index, thread_index);
+  if (!u)
+    return;
+
+  s = nat_session_alloc_or_recycle (sm, u, thread_index, now);
+  if (!s)
+    return;
+
+  s->last_heard = now;
+  s->flags = flags;
+  s->ext_host_addr.as_u32 = eh_addr->as_u32;
+  s->ext_host_port = eh_port;
+  user_session_increment (sm, u, snat_is_session_static (s));
+  switch (vec_len (sm->outside_fibs))
+    {
+    case 0:
+      key.fib_index = sm->outside_fib_index;
+      break;
+    case 1:
+      key.fib_index = sm->outside_fibs[0].fib_index;
+      break;
+    default:
+      /* *INDENT-OFF* */
+      vec_foreach (outside_fib, sm->outside_fibs)
+        {
+          fei = fib_table_lookup (outside_fib->fib_index, &pfx);
+          if (FIB_NODE_INDEX_INVALID != fei)
+            {
+              if (fib_entry_get_resolving_interface (fei) != ~0)
+                {
+                  key.fib_index = outside_fib->fib_index;
+                  break;
+                }
+            }
+        }
+      /* *INDENT-ON* */
+      break;
+    }
+  s->out2in = key;
+  kv.key = key.as_u64;
+  kv.value = s - tsm->sessions;
+  if (clib_bihash_add_del_8_8 (&tsm->out2in, &kv, 1))
+    nat_log_warn ("out2in key add failed");
+
+  key.addr.as_u32 = in_addr->as_u32;
+  key.port = in_port;
+  key.fib_index = fib_index;
+  s->in2out = key;
+  kv.key = key.as_u64;
+  if (clib_bihash_add_del_8_8 (&tsm->in2out, &kv, 1))
+    nat_log_warn ("in2out key add failed");
+}
+
+void
+nat_ha_sdel_cb (ip4_address_t * out_addr, u16 out_port,
+               ip4_address_t * eh_addr, u16 eh_port, u8 proto, u32 fib_index,
+               u32 ti)
+{
+  snat_main_t *sm = &snat_main;
+  snat_session_key_t key;
+  clib_bihash_kv_8_8_t kv, value;
+  u32 thread_index;
+  snat_session_t *s;
+  snat_main_per_thread_data_t *tsm;
+
+  if (sm->num_workers > 1)
+    thread_index =
+      sm->first_worker_index +
+      (sm->workers[(clib_net_to_host_u16 (out_port) -
+                   1024) / sm->port_per_thread]);
+  else
+    thread_index = sm->num_workers;
+  tsm = vec_elt_at_index (sm->per_thread_data, thread_index);
+
+  key.addr.as_u32 = out_addr->as_u32;
+  key.port = out_port;
+  key.protocol = proto;
+  key.fib_index = fib_index;
+  kv.key = key.as_u64;
+  if (clib_bihash_search_8_8 (&tsm->out2in, &kv, &value))
+    return;
+
+  s = pool_elt_at_index (tsm->sessions, value.value);
+  nat_free_session_data (sm, s, thread_index, 1);
+  nat44_delete_session (sm, s, thread_index);
+}
+
+void
+nat_ha_sref_cb (ip4_address_t * out_addr, u16 out_port,
+               ip4_address_t * eh_addr, u16 eh_port, u8 proto, u32 fib_index,
+               u32 total_pkts, u64 total_bytes, u32 thread_index)
+{
+  snat_main_t *sm = &snat_main;
+  snat_session_key_t key;
+  clib_bihash_kv_8_8_t kv, value;
+  snat_session_t *s;
+  snat_main_per_thread_data_t *tsm;
+
+  tsm = vec_elt_at_index (sm->per_thread_data, thread_index);
+
+  key.addr.as_u32 = out_addr->as_u32;
+  key.port = out_port;
+  key.protocol = proto;
+  key.fib_index = fib_index;
+  kv.key = key.as_u64;
+  if (clib_bihash_search_8_8 (&tsm->out2in, &kv, &value))
+    return;
+
+  s = pool_elt_at_index (tsm->sessions, value.value);
+  s->total_pkts = total_pkts;
+  s->total_bytes = total_bytes;
+}
+
+void
+nat_ha_sadd_ed_cb (ip4_address_t * in_addr, u16 in_port,
+                  ip4_address_t * out_addr, u16 out_port,
+                  ip4_address_t * eh_addr, u16 eh_port,
+                  ip4_address_t * ehn_addr, u16 ehn_port, u8 proto,
+                  u32 fib_index, u16 flags, u32 thread_index)
+{
+  snat_main_t *sm = &snat_main;
+  snat_session_key_t key;
+  snat_user_t *u;
+  snat_session_t *s;
+  clib_bihash_kv_16_8_t kv;
+  f64 now = vlib_time_now (sm->vlib_main);
+  nat_outside_fib_t *outside_fib;
+  fib_node_index_t fei = FIB_NODE_INDEX_INVALID;
+  snat_main_per_thread_data_t *tsm;
+  fib_prefix_t pfx = {
+    .fp_proto = FIB_PROTOCOL_IP4,
+    .fp_len = 32,
+    .fp_addr = {
+               .ip4.as_u32 = eh_addr->as_u32,
+               },
+  };
+
+  tsm = vec_elt_at_index (sm->per_thread_data, thread_index);
+
+  key.addr.as_u32 = out_addr->as_u32;
+  key.port = out_port;
+  key.protocol = proto;
+
+  if (!(flags & SNAT_SESSION_FLAG_STATIC_MAPPING))
+    {
+      if (nat_set_outside_address_and_port
+         (sm->addresses, thread_index, &key))
+       return;
+    }
+
+  key.addr.as_u32 = ehn_addr->as_u32;
+  key.port = ehn_port;
+  if (flags & SNAT_SESSION_FLAG_TWICE_NAT)
+    {
+      if (nat_set_outside_address_and_port
+         (sm->twice_nat_addresses, thread_index, &key))
+       return;
+    }
+
+  u = nat_user_get_or_create (sm, in_addr, fib_index, thread_index);
+  if (!u)
+    return;
+
+  s = nat_ed_session_alloc (sm, u, thread_index, now);
+  if (!s)
+    return;
+
+  s->last_heard = now;
+  s->flags = flags;
+  s->ext_host_nat_addr.as_u32 = s->ext_host_addr.as_u32 = eh_addr->as_u32;
+  s->ext_host_nat_port = s->ext_host_port = eh_port;
+  if (is_twice_nat_session (s))
+    {
+      s->ext_host_nat_addr.as_u32 = ehn_addr->as_u32;
+      s->ext_host_nat_port = ehn_port;
+    }
+  user_session_increment (sm, u, snat_is_session_static (s));
+  switch (vec_len (sm->outside_fibs))
+    {
+    case 0:
+      key.fib_index = sm->outside_fib_index;
+      break;
+    case 1:
+      key.fib_index = sm->outside_fibs[0].fib_index;
+      break;
+    default:
+      /* *INDENT-OFF* */
+      vec_foreach (outside_fib, sm->outside_fibs)
+        {
+          fei = fib_table_lookup (outside_fib->fib_index, &pfx);
+          if (FIB_NODE_INDEX_INVALID != fei)
+            {
+              if (fib_entry_get_resolving_interface (fei) != ~0)
+                {
+                  key.fib_index = outside_fib->fib_index;
+                  break;
+                }
+            }
+        }
+      /* *INDENT-ON* */
+      break;
+    }
+  key.addr.as_u32 = out_addr->as_u32;
+  key.port = out_port;
+  s->out2in = key;
+  kv.value = s - tsm->sessions;
+
+  key.addr.as_u32 = in_addr->as_u32;
+  key.port = in_port;
+  key.fib_index = fib_index;
+  s->in2out = key;
+
+  make_ed_kv (&kv, in_addr, &s->ext_host_nat_addr,
+             snat_proto_to_ip_proto (proto), fib_index, in_port,
+             s->ext_host_nat_port);
+  if (clib_bihash_add_del_16_8 (&tsm->in2out_ed, &kv, 1))
+    nat_log_warn ("in2out key add failed");
+
+  make_ed_kv (&kv, out_addr, eh_addr, snat_proto_to_ip_proto (proto),
+             s->out2in.fib_index, out_port, eh_port);
+  if (clib_bihash_add_del_16_8 (&tsm->out2in_ed, &kv, 1))
+    nat_log_warn ("out2in key add failed");
+}
+
+void
+nat_ha_sdel_ed_cb (ip4_address_t * out_addr, u16 out_port,
+                  ip4_address_t * eh_addr, u16 eh_port, u8 proto,
+                  u32 fib_index, u32 ti)
+{
+  snat_main_t *sm = &snat_main;
+  nat_ed_ses_key_t key;
+  clib_bihash_kv_16_8_t kv, value;
+  u32 thread_index;
+  snat_session_t *s;
+  snat_main_per_thread_data_t *tsm;
+
+  if (sm->num_workers > 1)
+    thread_index =
+      sm->first_worker_index +
+      (sm->workers[(clib_net_to_host_u16 (out_port) -
+                   1024) / sm->port_per_thread]);
+  else
+    thread_index = sm->num_workers;
+  tsm = vec_elt_at_index (sm->per_thread_data, thread_index);
+
+  key.l_addr.as_u32 = out_addr->as_u32;
+  key.l_port = out_port;
+  key.r_addr.as_u32 = eh_addr->as_u32;
+  key.r_port = eh_port;
+  key.proto = proto;
+  key.fib_index = fib_index;
+  kv.key[0] = key.as_u64[0];
+  kv.key[1] = key.as_u64[1];
+  if (clib_bihash_search_16_8 (&tsm->out2in_ed, &kv, &value))
+    return;
+
+  s = pool_elt_at_index (tsm->sessions, value.value);
+  nat_free_session_data (sm, s, thread_index, 1);
+  nat44_delete_session (sm, s, thread_index);
+}
+
+void
+nat_ha_sref_ed_cb (ip4_address_t * out_addr, u16 out_port,
+                  ip4_address_t * eh_addr, u16 eh_port, u8 proto,
+                  u32 fib_index, u32 total_pkts, u64 total_bytes,
+                  u32 thread_index)
+{
+  snat_main_t *sm = &snat_main;
+  nat_ed_ses_key_t key;
+  clib_bihash_kv_16_8_t kv, value;
+  snat_session_t *s;
+  snat_main_per_thread_data_t *tsm;
+
+  tsm = vec_elt_at_index (sm->per_thread_data, thread_index);
+
+  key.l_addr.as_u32 = out_addr->as_u32;
+  key.l_port = out_port;
+  key.r_addr.as_u32 = eh_addr->as_u32;
+  key.r_port = eh_port;
+  key.proto = proto;
+  key.fib_index = fib_index;
+  kv.key[0] = key.as_u64[0];
+  kv.key[1] = key.as_u64[1];
+  if (clib_bihash_search_16_8 (&tsm->out2in_ed, &kv, &value))
+    return;
+
+  s = pool_elt_at_index (tsm->sessions, value.value);
+  s->total_pkts = total_pkts;
+  s->total_bytes = total_bytes;
+}
+
 static clib_error_t *
 snat_config (vlib_main_t * vm, unformat_input_t * input)
 {
@@ -3244,6 +3624,8 @@ snat_config (vlib_main_t * vm, unformat_input_t * input)
          sm->icmp_match_in2out_cb = icmp_match_in2out_ed;
          sm->icmp_match_out2in_cb = icmp_match_out2in_ed;
          nat_affinity_init (vm);
+         nat_ha_init (vm, nat_ha_sadd_ed_cb, nat_ha_sdel_ed_cb,
+                      nat_ha_sref_ed_cb);
        }
       else
        {
@@ -3254,6 +3636,7 @@ snat_config (vlib_main_t * vm, unformat_input_t * input)
          sm->out2in_node_index = snat_out2in_node.index;
          sm->icmp_match_in2out_cb = icmp_match_in2out_slow;
          sm->icmp_match_out2in_cb = icmp_match_out2in_slow;
+         nat_ha_init (vm, nat_ha_sadd_cb, nat_ha_sdel_cb, nat_ha_sref_cb);
        }
       if (!static_mapping_only ||
          (static_mapping_only && static_mapping_connection_tracking))
@@ -3574,7 +3957,7 @@ nat44_del_session (snat_main_t * sm, ip4_address_t * addr, u16 port,
        return VNET_API_ERROR_UNSPECIFIED;
 
       s = pool_elt_at_index (tsm->sessions, value.value);
-      nat_free_session_data (sm, s, tsm - sm->per_thread_data);
+      nat_free_session_data (sm, s, tsm - sm->per_thread_data, 0);
       nat44_delete_session (sm, s, tsm - sm->per_thread_data);
       return 0;
     }
@@ -3621,7 +4004,7 @@ nat44_del_ed_session (snat_main_t * sm, ip4_address_t * addr, u16 port,
   if (pool_is_free_index (tsm->sessions, value.value))
     return VNET_API_ERROR_UNSPECIFIED;
   s = pool_elt_at_index (tsm->sessions, value.value);
-  nat_free_session_data (sm, s, tsm - sm->per_thread_data);
+  nat_free_session_data (sm, s, tsm - sm->per_thread_data, 0);
   nat44_delete_session (sm, s, tsm - sm->per_thread_data);
   return 0;
 }
index c9139b3..dabb616 100644 (file)
@@ -206,6 +206,9 @@ typedef CLIB_PACKED(struct
   /* Last heard timer */
   f64 last_heard;
 
+  /* Last HA refresh */
+  f64 ha_last_refreshed;
+
   /* Counters */
   u64 total_bytes;
   u32 total_pkts;
@@ -971,9 +974,10 @@ int nat44_del_ed_session (snat_main_t * sm, ip4_address_t * addr, u16 port,
  *
  * @param s            NAT session
  * @param thread_index thread index
+ * @param is_ha        is HA event
  */
 void nat_free_session_data (snat_main_t * sm, snat_session_t * s,
-                           u32 thread_index);
+                           u32 thread_index, u8 is_ha);
 
 /**
  * @brief Find or create NAT user
@@ -997,7 +1001,7 @@ snat_user_t *nat_user_get_or_create (snat_main_t * sm, ip4_address_t * addr,
  */
 snat_session_t *nat_session_alloc_or_recycle (snat_main_t * sm,
                                              snat_user_t * u,
-                                             u32 thread_index);
+                                             u32 thread_index, f64 now);
 
 /**
  * @brief Allocate NAT endpoint-dependent session
@@ -1102,7 +1106,6 @@ int snat_static_mapping_match (snat_main_t * sm,
 void snat_add_del_addr_to_fib (ip4_address_t * addr,
                               u8 p_len, u32 sw_if_index, int is_add);
 
-
 /*
  * Why is this here? Because we don't need to touch this layer to
  * simply reply to an icmp. We need to change id to a unique
index eba5d57..54a90fc 100644 (file)
@@ -24,6 +24,7 @@
 #include <nat/nat_inlines.h>
 #include <nat/nat_affinity.h>
 #include <vnet/fib/fib_table.h>
+#include <nat/nat_ha.h>
 
 #define UNSUPPORTED_IN_DET_MODE_STR \
   "This command is unsupported in deterministic mode"
@@ -347,6 +348,143 @@ nat_show_mss_clamping_command_fn (vlib_main_t * vm, unformat_input_t * input,
   return 0;
 }
 
+static clib_error_t *
+nat_ha_failover_command_fn (vlib_main_t * vm, unformat_input_t * input,
+                           vlib_cli_command_t * cmd)
+{
+  unformat_input_t _line_input, *line_input = &_line_input;
+  ip4_address_t addr;
+  u32 port, session_refresh_interval = 10;
+  int rv;
+  clib_error_t *error = 0;
+
+  /* Get a line of input. */
+  if (!unformat_user (input, unformat_line_input, line_input))
+    return 0;
+
+  while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+    {
+      if (unformat (line_input, "%U:%u", unformat_ip4_address, &addr, &port))
+       ;
+      else
+       if (unformat
+           (line_input, "refresh-intervval %u", &session_refresh_interval))
+       ;
+      else
+       {
+         error = clib_error_return (0, "unknown input '%U'",
+                                    format_unformat_error, line_input);
+         goto done;
+       }
+    }
+
+  rv = nat_ha_set_failover (&addr, (u16) port, session_refresh_interval);
+  if (rv)
+    error = clib_error_return (0, "set HA failover failed");
+
+done:
+  unformat_free (line_input);
+
+  return error;
+}
+
+static clib_error_t *
+nat_ha_listener_command_fn (vlib_main_t * vm, unformat_input_t * input,
+                           vlib_cli_command_t * cmd)
+{
+  unformat_input_t _line_input, *line_input = &_line_input;
+  ip4_address_t addr;
+  u32 port, path_mtu = 512;
+  int rv;
+  clib_error_t *error = 0;
+
+  /* Get a line of input. */
+  if (!unformat_user (input, unformat_line_input, line_input))
+    return 0;
+
+  while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+    {
+      if (unformat (line_input, "%U:%u", unformat_ip4_address, &addr, &port))
+       ;
+      else if (unformat (line_input, "path-mtu %u", &path_mtu))
+       ;
+      else
+       {
+         error = clib_error_return (0, "unknown input '%U'",
+                                    format_unformat_error, line_input);
+         goto done;
+       }
+    }
+
+  rv = nat_ha_set_listener (&addr, (u16) port, path_mtu);
+  if (rv)
+    error = clib_error_return (0, "set HA listener failed");
+
+done:
+  unformat_free (line_input);
+
+  return error;
+}
+
+static clib_error_t *
+nat_show_ha_command_fn (vlib_main_t * vm, unformat_input_t * input,
+                       vlib_cli_command_t * cmd)
+{
+  ip4_address_t addr;
+  u16 port;
+  u32 path_mtu, session_refresh_interval, resync_ack_missed;
+  u8 in_resync;
+
+  nat_ha_get_listener (&addr, &port, &path_mtu);
+  if (!port)
+    {
+      vlib_cli_output (vm, "NAT HA disabled\n");
+      return 0;
+    }
+
+  vlib_cli_output (vm, "LISTENER:\n");
+  vlib_cli_output (vm, "  %U:%u path-mtu %u\n",
+                  format_ip4_address, &addr, port, path_mtu);
+
+  nat_ha_get_failover (&addr, &port, &session_refresh_interval);
+  vlib_cli_output (vm, "FAILOVER:\n");
+  if (port)
+    vlib_cli_output (vm, "  %U:%u refresh-intervval %usec\n",
+                    format_ip4_address, &addr, port,
+                    session_refresh_interval);
+  else
+    vlib_cli_output (vm, "  NA\n");
+
+  nat_ha_get_resync_status (&in_resync, &resync_ack_missed);
+  vlib_cli_output (vm, "RESYNC:\n");
+  if (in_resync)
+    vlib_cli_output (vm, "  in progress\n");
+  else
+    vlib_cli_output (vm, "  completed (%d ACK missed)\n", resync_ack_missed);
+
+  return 0;
+}
+
+static clib_error_t *
+nat_ha_flush_command_fn (vlib_main_t * vm, unformat_input_t * input,
+                        vlib_cli_command_t * cmd)
+{
+  nat_ha_flush (0);
+  return 0;
+}
+
+static clib_error_t *
+nat_ha_resync_command_fn (vlib_main_t * vm, unformat_input_t * input,
+                         vlib_cli_command_t * cmd)
+{
+  clib_error_t *error = 0;
+
+  if (nat_ha_resync (0, 0, 0))
+    error = clib_error_return (0, "NAT HA resync already running");
+
+  return error;
+}
+
 static clib_error_t *
 add_address_command_fn (vlib_main_t * vm,
                        unformat_input_t * input, vlib_cli_command_t * cmd)
@@ -1858,6 +1996,7 @@ VLIB_CLI_COMMAND (nat44_show_alloc_addr_and_port_alg_command, static) = {
  *  vpp# nat mss-clamping 1452
  * To disbale TCP MSS rewriting use:
  *  vpp# nat mss-clamping disable
+ * @cliexend
 ?*/
 VLIB_CLI_COMMAND (nat_set_mss_clamping_command, static) = {
     .path = "nat mss-clamping",
@@ -1867,8 +2006,9 @@ VLIB_CLI_COMMAND (nat_set_mss_clamping_command, static) = {
 
 /*?
  * @cliexpar
- * @cliexstart{nat mss-clamping}
+ * @cliexstart{show nat mss-clamping}
  * Show TCP MSS rewriting configuration
+ * @cliexend
 ?*/
 VLIB_CLI_COMMAND (nat_show_mss_clamping_command, static) = {
     .path = "show nat mss-clamping",
@@ -1876,6 +2016,66 @@ VLIB_CLI_COMMAND (nat_show_mss_clamping_command, static) = {
     .function = nat_show_mss_clamping_command_fn,
 };
 
+/*?
+ * @cliexpar
+ * @cliexstart{nat ha failover}
+ * Set HA failover (remote settings)
+ * @cliexend
+?*/
+VLIB_CLI_COMMAND (nat_ha_failover_command, static) = {
+    .path = "nat ha failover",
+    .short_help = "nat ha failover <ip4-address>:<port> [refresh-intervval <sec>]",
+    .function = nat_ha_failover_command_fn,
+};
+
+/*?
+ * @cliexpar
+ * @cliexstart{nat ha listener}
+ * Set HA listener (local settings)
+ * @cliexend
+?*/
+VLIB_CLI_COMMAND (nat_ha_listener_command, static) = {
+    .path = "nat ha listener",
+    .short_help = "nat ha listener <ip4-address>:<port> [path-mtu <path-mtu>]",
+    .function = nat_ha_listener_command_fn,
+};
+
+/*?
+ * @cliexpar
+ * @cliexstart{show nat ha}
+ * Show HA configuration/status
+ * @cliexend
+?*/
+VLIB_CLI_COMMAND (nat_show_ha_command, static) = {
+    .path = "show nat ha",
+    .short_help = "show nat ha",
+    .function = nat_show_ha_command_fn,
+};
+
+/*?
+ * @cliexpar
+ * @cliexstart{nat ha flush}
+ * Flush the current HA data (for testing)
+ * @cliexend
+?*/
+VLIB_CLI_COMMAND (nat_ha_flush_command, static) = {
+    .path = "nat ha flush",
+    .short_help = "nat ha flush",
+    .function = nat_ha_flush_command_fn,
+};
+
+/*?
+ * @cliexpar
+ * @cliexstart{nat ha resync}
+ * Resync HA (resend existing sessions to new failover)
+ * @cliexend
+?*/
+VLIB_CLI_COMMAND (nat_ha_resync_command, static) = {
+    .path = "nat ha resync",
+    .short_help = "nat ha resync",
+    .function = nat_ha_resync_command_fn,
+};
+
 /*?
  * @cliexpar
  * @cliexstart{show nat44 hash tables}
index 865f772..516b27f 100644 (file)
@@ -25,6 +25,7 @@
 #include <nat/dslite.h>
 #include <nat/nat_reass.h>
 #include <nat/nat_inlines.h>
+#include <nat/nat_ha.h>
 #include <vlibapi/api.h>
 #include <vlibmemory/api.h>
 
@@ -630,6 +631,203 @@ vl_api_nat_get_mss_clamping_t_print (vl_api_nat_get_mss_clamping_t * mp,
   FINISH;
 }
 
+static void
+vl_api_nat_ha_set_listener_t_handler (vl_api_nat_ha_set_listener_t * mp)
+{
+  snat_main_t *sm = &snat_main;
+  vl_api_nat_ha_set_listener_reply_t *rmp;
+  ip4_address_t addr;
+  int rv;
+
+  memcpy (&addr, &mp->ip_address, sizeof (addr));
+  rv =
+    nat_ha_set_listener (&addr, clib_net_to_host_u16 (mp->port),
+                        clib_net_to_host_u32 (mp->path_mtu));
+
+  REPLY_MACRO (VL_API_NAT_HA_SET_LISTENER_REPLY);
+}
+
+static void *
+vl_api_nat_ha_set_listener_t_print (vl_api_nat_ha_set_listener_t * mp,
+                                   void *handle)
+{
+  u8 *s;
+
+  s = format (0, "SCRIPT: nat_ha_set_listener ");
+  s = format (s, "ip_address %U ", format_ip4_address, mp->ip_address);
+  s = format (s, "port %d ", clib_net_to_host_u16 (mp->port));
+  s = format (s, "path_mtu %d", clib_net_to_host_u32 (mp->path_mtu));
+
+  FINISH;
+}
+
+static void
+vl_api_nat_ha_get_listener_t_handler (vl_api_nat_ha_get_listener_t * mp)
+{
+  snat_main_t *sm = &snat_main;
+  vl_api_nat_ha_get_listener_reply_t *rmp;
+  int rv = 0;
+  ip4_address_t addr;
+  u16 port;
+  u32 path_mtu;
+
+  nat_ha_get_listener (&addr, &port, &path_mtu);
+
+  /* *INDENT-OFF* */
+  REPLY_MACRO2 (VL_API_NAT_HA_GET_LISTENER_REPLY,
+  ({
+    clib_memcpy (rmp->ip_address, &addr, sizeof (ip4_address_t));
+    rmp->port = clib_host_to_net_u16 (port);
+    rmp->path_mtu = clib_host_to_net_u32 (path_mtu);
+  }))
+  /* *INDENT-ON* */
+}
+
+static void *
+vl_api_nat_ha_get_listener_t_print (vl_api_nat_ha_get_listener_t * mp,
+                                   void *handle)
+{
+  u8 *s;
+
+  s = format (0, "SCRIPT: nat_ha_get_listener");
+
+  FINISH;
+}
+
+static void
+vl_api_nat_ha_set_failover_t_handler (vl_api_nat_ha_set_failover_t * mp)
+{
+  snat_main_t *sm = &snat_main;
+  vl_api_nat_ha_set_failover_reply_t *rmp;
+  ip4_address_t addr;
+  int rv;
+
+  memcpy (&addr, &mp->ip_address, sizeof (addr));
+  rv =
+    nat_ha_set_failover (&addr, clib_net_to_host_u16 (mp->port),
+                        clib_net_to_host_u32 (mp->session_refresh_interval));
+
+  REPLY_MACRO (VL_API_NAT_HA_SET_FAILOVER_REPLY);
+}
+
+static void *
+vl_api_nat_ha_set_failover_t_print (vl_api_nat_ha_set_failover_t * mp,
+                                   void *handle)
+{
+  u8 *s;
+
+  s = format (0, "SCRIPT: nat_ha_set_failover ");
+  s = format (s, "ip_address %U ", format_ip4_address, mp->ip_address);
+  s = format (s, "port %d ", clib_net_to_host_u16 (mp->port));
+
+  FINISH;
+}
+
+static void
+vl_api_nat_ha_get_failover_t_handler (vl_api_nat_ha_get_failover_t * mp)
+{
+  snat_main_t *sm = &snat_main;
+  vl_api_nat_ha_get_failover_reply_t *rmp;
+  int rv = 0;
+  ip4_address_t addr;
+  u16 port;
+  u32 session_refresh_interval;
+
+  nat_ha_get_failover (&addr, &port, &session_refresh_interval);
+
+  /* *INDENT-OFF* */
+  REPLY_MACRO2 (VL_API_NAT_HA_GET_FAILOVER_REPLY,
+  ({
+    clib_memcpy (rmp->ip_address, &addr, sizeof (ip4_address_t));
+    rmp->port = clib_host_to_net_u16 (port);
+    rmp->session_refresh_interval = clib_host_to_net_u32 (session_refresh_interval);
+  }))
+  /* *INDENT-ON* */
+}
+
+static void *
+vl_api_nat_ha_get_failover_t_print (vl_api_nat_ha_get_failover_t * mp,
+                                   void *handle)
+{
+  u8 *s;
+
+  s = format (0, "SCRIPT: nat_ha_get_failover");
+
+  FINISH;
+}
+
+static void
+vl_api_nat_ha_flush_t_handler (vl_api_nat_ha_flush_t * mp)
+{
+  snat_main_t *sm = &snat_main;
+  vl_api_nat_ha_flush_reply_t *rmp;
+  int rv = 0;
+
+  nat_ha_flush (0);
+
+  REPLY_MACRO (VL_API_NAT_HA_FLUSH_REPLY);
+}
+
+static void *
+vl_api_nat_ha_flush_t_print (vl_api_nat_ha_flush_t * mp, void *handle)
+{
+  u8 *s;
+
+  s = format (0, "SCRIPT: nat_ha_flush ");
+
+  FINISH;
+}
+
+static void
+nat_ha_resync_completed_event_cb (u32 client_index, u32 pid, u32 missed_count)
+{
+  snat_main_t *sm = &snat_main;
+  vl_api_registration_t *reg;
+  vl_api_nat_ha_resync_completed_event_t *mp;
+
+  reg = vl_api_client_index_to_registration (client_index);
+  if (!reg)
+    return;
+
+  mp = vl_msg_api_alloc (sizeof (*mp));
+  clib_memset (mp, 0, sizeof (*mp));
+  mp->client_index = client_index;
+  mp->pid = pid;
+  mp->missed_count = clib_host_to_net_u32 (missed_count);
+  mp->_vl_msg_id =
+    ntohs (VL_API_NAT_HA_RESYNC_COMPLETED_EVENT + sm->msg_id_base);
+
+  vl_api_send_msg (reg, (u8 *) mp);
+}
+
+static void
+vl_api_nat_ha_resync_t_handler (vl_api_nat_ha_resync_t * mp)
+{
+  snat_main_t *sm = &snat_main;
+  vl_api_nat_ha_resync_reply_t *rmp;
+  int rv;
+
+  rv =
+    nat_ha_resync (mp->client_index, mp->pid,
+                  mp->want_resync_event ? nat_ha_resync_completed_event_cb :
+                  NULL);
+
+  REPLY_MACRO (VL_API_NAT_HA_RESYNC_REPLY);
+}
+
+static void *
+vl_api_nat_ha_resync_t_print (vl_api_nat_ha_resync_t * mp, void *handle)
+{
+  u8 *s;
+
+  s = format (0, "SCRIPT: nat_ha_resync ");
+  s =
+    format (s, "want_resync_event %d pid %d", mp->want_resync_event,
+           clib_host_to_net_u32 (mp->pid));
+
+  FINISH;
+}
+
 /*************/
 /*** NAT44 ***/
 /*************/
@@ -1846,7 +2044,7 @@ static void
         vec_foreach (ses_index, ses_to_be_removed)
         {
           s = pool_elt_at_index(tsm->sessions, ses_index[0]);
-          nat_free_session_data (sm, s, tsm - sm->per_thread_data);
+          nat_free_session_data (sm, s, tsm - sm->per_thread_data, 0);
           nat44_delete_session (sm, s, tsm - sm->per_thread_data);
         }
         vec_free (ses_to_be_removed);
@@ -3198,6 +3396,12 @@ _(NAT_SET_ADDR_AND_PORT_ALLOC_ALG, nat_set_addr_and_port_alloc_alg)     \
 _(NAT_GET_ADDR_AND_PORT_ALLOC_ALG, nat_get_addr_and_port_alloc_alg)     \
 _(NAT_SET_MSS_CLAMPING, nat_set_mss_clamping)                           \
 _(NAT_GET_MSS_CLAMPING, nat_get_mss_clamping)                           \
+_(NAT_HA_SET_LISTENER, nat_ha_set_listener)                             \
+_(NAT_HA_SET_FAILOVER, nat_ha_set_failover)                             \
+_(NAT_HA_GET_LISTENER, nat_ha_get_listener)                             \
+_(NAT_HA_GET_FAILOVER, nat_ha_get_failover)                             \
+_(NAT_HA_FLUSH, nat_ha_flush)                                           \
+_(NAT_HA_RESYNC, nat_ha_resync)                                         \
 _(NAT44_ADD_DEL_ADDRESS_RANGE, nat44_add_del_address_range)             \
 _(NAT44_INTERFACE_ADD_DEL_FEATURE, nat44_interface_add_del_feature)     \
 _(NAT44_ADD_DEL_STATIC_MAPPING, nat44_add_del_static_mapping)           \
diff --git a/src/plugins/nat/nat_ha.c b/src/plugins/nat/nat_ha.c
new file mode 100644 (file)
index 0000000..a7eaf6e
--- /dev/null
@@ -0,0 +1,1179 @@
+/*
+ * Copyright (c) 2019 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "nat_ha.h"
+#include <vnet/udp/udp.h>
+#include <nat/nat.h>
+#include <vppinfra/atomics.h>
+
+/* number of retries */
+#define NAT_HA_RETRIES 3
+
+#define foreach_nat_ha_counter           \
+_(RECV_ADD, "add-event-recv", 0)         \
+_(RECV_DEL, "del-event-recv", 1)         \
+_(RECV_REFRESH, "refresh-event-recv", 2) \
+_(SEND_ADD, "add-event-send", 3)         \
+_(SEND_DEL, "del-event-send", 4)         \
+_(SEND_REFRESH, "refresh-event-send", 5) \
+_(RECV_ACK, "ack-recv", 6)               \
+_(SEND_ACK, "ack-send", 7)               \
+_(RETRY_COUNT, "retry-count", 8)         \
+_(MISSED_COUNT, "missed-count", 9)
+
+/* NAT HA protocol version */
+#define NAT_HA_VERSION 0x01
+
+/* NAT HA protocol flags */
+#define NAT_HA_FLAG_ACK 0x01
+
+/* NAT HA event types */
+typedef enum
+{
+  NAT_HA_ADD = 1,
+  NAT_HA_DEL,
+  NAT_HA_REFRESH,
+} nat_ha_event_type_t;
+
+/* NAT HA protocol header */
+typedef struct
+{
+  /* version */
+  u8 version;
+  /* flags */
+  u8 flags;
+  /* event count */
+  u16 count;
+  /* sequence number */
+  u32 sequence_number;
+  /* thread index where events originated */
+  u32 thread_index;
+} __attribute__ ((packed)) nat_ha_message_header_t;
+
+/* NAT HA protocol event data */
+typedef struct
+{
+  /* event type */
+  u8 event_type;
+  /* session data */
+  u8 protocol;
+  u16 flags;
+  u32 in_addr;
+  u32 out_addr;
+  u16 in_port;
+  u16 out_port;
+  u32 eh_addr;
+  u32 ehn_addr;
+  u16 eh_port;
+  u16 ehn_port;
+  u32 fib_index;
+  u32 total_pkts;
+  u64 total_bytes;
+} __attribute__ ((packed)) nat_ha_event_t;
+
+typedef enum
+{
+#define _(N, s, v) NAT_HA_COUNTER_##N = v,
+  foreach_nat_ha_counter
+#undef _
+  NAT_HA_N_COUNTERS
+} nat_ha_counter_t;
+
+/* data waiting for ACK */
+typedef struct
+{
+  /* sequence number */
+  u32 seq;
+  /* retry count */
+  u32 retry_count;
+  /* next retry time */
+  f64 retry_timer;
+  /* 1 if HA resync */
+  u8 is_resync;
+  /* packet data */
+  u8 *data;
+} nat_ha_resend_entry_t;
+
+/* per thread data */
+typedef struct
+{
+  /* buffer under construction */
+  vlib_buffer_t *state_sync_buffer;
+  /* frame containing NAT HA buffers */
+  vlib_frame_t *state_sync_frame;
+  /* number of events */
+  u16 state_sync_count;
+  /* next event offset */
+  u32 state_sync_next_event_offset;
+  /* data waiting for ACK */
+  nat_ha_resend_entry_t *resend_queue;
+} nat_ha_per_thread_data_t;
+
+/* NAT HA settings */
+typedef struct nat_ha_main_s
+{
+  /* local IP address and UDP port */
+  ip4_address_t src_ip_address;
+  u16 src_port;
+  /* failvoer IP address and UDP port */
+  ip4_address_t dst_ip_address;
+  u16 dst_port;
+  /* path MTU between local and failover */
+  u32 state_sync_path_mtu;
+  /* number of seconds after which to send session counters refresh */
+  u32 session_refresh_interval;
+  /* counters */
+  vlib_simple_counter_main_t counters[NAT_HA_N_COUNTERS];
+  vlib_main_t *vlib_main;
+  /* sequence number counter */
+  u32 sequence_number;
+  /* 1 if resync in progress */
+  u8 in_resync;
+  /* number of remaing ACK for resync */
+  u32 resync_ack_count;
+  /* number of missed ACK for resync */
+  u32 resync_ack_missed;
+  /* resync data */
+  nat_ha_resync_event_cb_t event_callback;
+  u32 client_index;
+  u32 pid;
+  /* call back functions for received HA events on failover */
+  nat_ha_sadd_cb_t sadd_cb;
+  nat_ha_sdel_cb_t sdel_cb;
+  nat_ha_sref_cb_t sref_cb;
+  /* per thread data */
+  u32 num_workers;
+  nat_ha_per_thread_data_t *per_thread_data;
+  /* worker handoff frame-queue index */
+  u32 fq_index;
+} nat_ha_main_t;
+
+nat_ha_main_t nat_ha_main;
+vlib_node_registration_t nat_ha_process_node;
+vlib_node_registration_t nat_ha_worker_node;
+vlib_node_registration_t nat_ha_node;
+vlib_node_registration_t nat_ha_handoff_node;
+
+static void
+nat_ha_resync_fin (void)
+{
+  nat_ha_main_t *ha = &nat_ha_main;
+
+  /* if no more resync ACK remainig we are done */
+  if (ha->resync_ack_count)
+    return;
+
+  ha->in_resync = 0;
+  nat_log_info ("resync completed with result %s",
+               ha->resync_ack_missed ? "FAILED" : "SUCESS");
+  if (ha->event_callback)
+    ha->event_callback (ha->client_index, ha->pid, ha->resync_ack_missed);
+}
+
+/* cache HA NAT data waiting for ACK */
+static int
+nat_ha_resend_queue_add (u32 seq, u8 * data, u8 data_len, u8 is_resync,
+                        u32 thread_index)
+{
+  nat_ha_main_t *ha = &nat_ha_main;
+  nat_ha_per_thread_data_t *td = &ha->per_thread_data[thread_index];
+  nat_ha_resend_entry_t *entry;
+  f64 now = vlib_time_now (ha->vlib_main);
+
+  vec_add2 (td->resend_queue, entry, 1);
+  clib_memset (entry, 0, sizeof (*entry));
+  entry->retry_timer = now + 2.0;
+  entry->seq = seq;
+  entry->is_resync = is_resync;
+  vec_add (entry->data, data, data_len);
+
+  return 0;
+}
+
+static_always_inline void
+nat_ha_ack_recv (u32 seq, u32 thread_index)
+{
+  nat_ha_main_t *ha = &nat_ha_main;
+  nat_ha_per_thread_data_t *td = &ha->per_thread_data[thread_index];
+  u32 i;
+
+  vec_foreach_index (i, td->resend_queue)
+  {
+    if (td->resend_queue[i].seq != seq)
+      continue;
+
+    vlib_increment_simple_counter (&ha->counters[NAT_HA_COUNTER_RECV_ACK],
+                                  thread_index, 0, 1);
+    /* ACK received remove cached data */
+    if (td->resend_queue[i].is_resync)
+      {
+       clib_atomic_fetch_sub (&ha->resync_ack_count, 1);
+       nat_ha_resync_fin ();
+      }
+    vec_free (td->resend_queue[i].data);
+    vec_del1 (td->resend_queue, i);
+    nat_log_debug ("ACK for seq %d received", clib_net_to_host_u32 (seq));
+
+    return;
+  }
+}
+
+/* scan non-ACKed HA NAT for retry */
+static void
+nat_ha_resend_scan (f64 now, u32 thread_index)
+{
+  nat_ha_main_t *ha = &nat_ha_main;
+  nat_ha_per_thread_data_t *td = &ha->per_thread_data[thread_index];
+  u32 i, *del, *to_delete = 0;
+  vlib_main_t *vm = ha->vlib_main;
+  vlib_buffer_t *b = 0;
+  vlib_frame_t *f;
+  u32 bi, *to_next;
+  ip4_header_t *ip;
+
+  vec_foreach_index (i, td->resend_queue)
+  {
+    if (td->resend_queue[i].retry_timer > now)
+      continue;
+
+    /* maximum retry reached delete cached data */
+    if (td->resend_queue[i].retry_count >= NAT_HA_RETRIES)
+      {
+       nat_log_notice ("seq %d missed",
+                       clib_net_to_host_u32 (td->resend_queue[i].seq));
+       if (td->resend_queue[i].is_resync)
+         {
+           clib_atomic_fetch_add (&ha->resync_ack_missed, 1);
+           clib_atomic_fetch_sub (&ha->resync_ack_count, 1);
+           nat_ha_resync_fin ();
+         }
+       vec_add1 (to_delete, i);
+       vlib_increment_simple_counter (&ha->counters
+                                      [NAT_HA_COUNTER_MISSED_COUNT],
+                                      thread_index, 0, 1);
+       continue;
+      }
+
+    /* retry to send non-ACKed data */
+    nat_log_debug ("state sync seq %d resend",
+                  clib_net_to_host_u32 (td->resend_queue[i].seq));
+    td->resend_queue[i].retry_count++;
+    vlib_increment_simple_counter (&ha->counters[NAT_HA_COUNTER_RETRY_COUNT],
+                                  thread_index, 0, 1);
+    if (vlib_buffer_alloc (vm, &bi, 1) != 1)
+      {
+       nat_log_warn ("HA NAT state sync can't allocate buffer");
+       return;
+      }
+    b = vlib_get_buffer (vm, bi);
+    b->current_length = vec_len (td->resend_queue[i].data);
+    b->flags |= VLIB_BUFFER_TOTAL_LENGTH_VALID;
+    b->flags |= VNET_BUFFER_F_LOCALLY_ORIGINATED;
+    vnet_buffer (b)->sw_if_index[VLIB_RX] = 0;
+    vnet_buffer (b)->sw_if_index[VLIB_TX] = 0;
+    ip = vlib_buffer_get_current (b);
+    clib_memcpy (ip, td->resend_queue[i].data,
+                vec_len (td->resend_queue[i].data));
+    f = vlib_get_frame_to_node (vm, ip4_lookup_node.index);
+    to_next = vlib_frame_vector_args (f);
+    to_next[0] = bi;
+    f->n_vectors = 1;
+    vlib_put_frame_to_node (vm, ip4_lookup_node.index, f);
+    td->resend_queue[i].retry_timer = now + 2.0;
+  }
+
+  vec_foreach (del, to_delete)
+  {
+    vec_free (td->resend_queue[*del].data);
+    vec_del1 (td->resend_queue, *del);
+  }
+  vec_free (to_delete);
+}
+
+void
+nat_ha_init (vlib_main_t * vm, nat_ha_sadd_cb_t sadd_cb,
+            nat_ha_sdel_cb_t sdel_cb, nat_ha_sref_cb_t sref_cb)
+{
+  nat_ha_main_t *ha = &nat_ha_main;
+  vlib_thread_main_t *tm = vlib_get_thread_main ();
+  vlib_thread_registration_t *tr;
+  uword *p;
+
+  ha->src_ip_address.as_u32 = 0;
+  ha->src_port = 0;
+  ha->dst_ip_address.as_u32 = 0;
+  ha->dst_port = 0;
+  ha->in_resync = 0;
+  ha->resync_ack_count = 0;
+  ha->resync_ack_missed = 0;
+  ha->vlib_main = vm;
+  ha->sadd_cb = sadd_cb;
+  ha->sdel_cb = sdel_cb;
+  ha->sref_cb = sref_cb;
+  ha->num_workers = 0;
+  vec_validate (ha->per_thread_data, tm->n_vlib_mains - 1);
+  ha->fq_index = ~0;
+  p = hash_get_mem (tm->thread_registrations_by_name, "workers");
+  if (p)
+    {
+      tr = (vlib_thread_registration_t *) p[0];
+      if (tr)
+       ha->num_workers = tr->count;
+    }
+
+#define _(N, s, v) ha->counters[v].name = s;          \
+  ha->counters[v].stat_segment_name = "/nat44/ha/" s; \
+  vlib_validate_simple_counter(&ha->counters[v], 0);  \
+  vlib_zero_simple_counter(&ha->counters[v], 0);
+  foreach_nat_ha_counter
+#undef _
+}
+
+int
+nat_ha_set_listener (ip4_address_t * addr, u16 port, u32 path_mtu)
+{
+  nat_ha_main_t *ha = &nat_ha_main;
+
+  /* unregister previously set UDP port */
+  if (ha->src_port)
+    udp_unregister_dst_port (ha->vlib_main, ha->src_port, 1);
+
+  ha->src_ip_address.as_u32 = addr->as_u32;
+  ha->src_port = port;
+  ha->state_sync_path_mtu = path_mtu;
+
+  if (port)
+    {
+      /* if multiple worker threads first go to handoff node */
+      if (ha->num_workers > 1)
+       {
+         if (ha->fq_index == ~0)
+           ha->fq_index = vlib_frame_queue_main_init (nat_ha_node.index, 0);
+         udp_register_dst_port (ha->vlib_main, port,
+                                nat_ha_handoff_node.index, 1);
+       }
+      else
+       {
+         udp_register_dst_port (ha->vlib_main, port, nat_ha_node.index, 1);
+       }
+      nat_log_info ("HA listening on port %d for state sync", port);
+    }
+
+  return 0;
+}
+
+void
+nat_ha_get_listener (ip4_address_t * addr, u16 * port, u32 * path_mtu)
+{
+  nat_ha_main_t *ha = &nat_ha_main;
+
+  addr->as_u32 = ha->src_ip_address.as_u32;
+  *port = ha->src_port;
+  *path_mtu = ha->state_sync_path_mtu;
+}
+
+int
+nat_ha_set_failover (ip4_address_t * addr, u16 port,
+                    u32 session_refresh_interval)
+{
+  nat_ha_main_t *ha = &nat_ha_main;
+
+  ha->dst_ip_address.as_u32 = addr->as_u32;
+  ha->dst_port = port;
+  ha->session_refresh_interval = session_refresh_interval;
+
+  vlib_process_signal_event (ha->vlib_main, nat_ha_process_node.index, 1, 0);
+
+  return 0;
+}
+
+void
+nat_ha_get_failover (ip4_address_t * addr, u16 * port,
+                    u32 * session_refresh_interval)
+{
+  nat_ha_main_t *ha = &nat_ha_main;
+
+  addr->as_u32 = ha->dst_ip_address.as_u32;
+  *port = ha->dst_port;
+  *session_refresh_interval = ha->session_refresh_interval;
+}
+
+static_always_inline void
+nat_ha_recv_add (nat_ha_event_t * event, f64 now, u32 thread_index)
+{
+  nat_ha_main_t *ha = &nat_ha_main;
+  ip4_address_t in_addr, out_addr, eh_addr, ehn_addr;
+  u32 fib_index;
+  u16 flags;
+
+  vlib_increment_simple_counter (&ha->counters[NAT_HA_COUNTER_RECV_ADD],
+                                thread_index, 0, 1);
+
+  in_addr.as_u32 = event->in_addr;
+  out_addr.as_u32 = event->out_addr;
+  eh_addr.as_u32 = event->eh_addr;
+  ehn_addr.as_u32 = event->ehn_addr;
+  fib_index = clib_net_to_host_u32 (event->fib_index);
+  flags = clib_net_to_host_u16 (event->flags);
+
+  ha->sadd_cb (&in_addr, event->in_port, &out_addr, event->out_port, &eh_addr,
+              event->eh_port, &ehn_addr, event->ehn_port, event->protocol,
+              fib_index, flags, thread_index);
+}
+
+static_always_inline void
+nat_ha_recv_del (nat_ha_event_t * event, u32 thread_index)
+{
+  nat_ha_main_t *ha = &nat_ha_main;
+  ip4_address_t out_addr, eh_addr;
+  u32 fib_index;
+
+  vlib_increment_simple_counter (&ha->counters[NAT_HA_COUNTER_RECV_DEL],
+                                thread_index, 0, 1);
+
+  out_addr.as_u32 = event->out_addr;
+  eh_addr.as_u32 = event->eh_addr;
+  fib_index = clib_net_to_host_u32 (event->fib_index);
+
+  ha->sdel_cb (&out_addr, event->out_port, &eh_addr, event->eh_port,
+              event->protocol, fib_index, thread_index);
+}
+
+static_always_inline void
+nat_ha_recv_refresh (nat_ha_event_t * event, f64 now, u32 thread_index)
+{
+  nat_ha_main_t *ha = &nat_ha_main;
+  ip4_address_t out_addr, eh_addr;
+  u32 fib_index, total_pkts;
+  u64 total_bytes;
+
+  vlib_increment_simple_counter (&ha->counters[NAT_HA_COUNTER_RECV_REFRESH],
+                                thread_index, 0, 1);
+
+  out_addr.as_u32 = event->out_addr;
+  eh_addr.as_u32 = event->eh_addr;
+  fib_index = clib_net_to_host_u32 (event->fib_index);
+  total_pkts = clib_net_to_host_u32 (event->total_pkts);
+  total_bytes = clib_net_to_host_u64 (event->total_bytes);
+
+  ha->sref_cb (&out_addr, event->out_port, &eh_addr, event->eh_port,
+              event->protocol, fib_index, total_pkts, total_bytes,
+              thread_index);
+}
+
+/* process received NAT HA event */
+static_always_inline void
+nat_ha_event_process (nat_ha_event_t * event, f64 now, u32 thread_index)
+{
+  switch (event->event_type)
+    {
+    case NAT_HA_ADD:
+      nat_ha_recv_add (event, now, thread_index);
+      break;
+    case NAT_HA_DEL:
+      nat_ha_recv_del (event, thread_index);
+      break;
+    case NAT_HA_REFRESH:
+      nat_ha_recv_refresh (event, now, thread_index);
+      break;
+    default:
+      nat_log_notice ("Unsupported HA event type %d", event->event_type);
+      break;
+    }
+}
+
+static inline void
+nat_ha_header_create (vlib_buffer_t * b, u32 * offset, u32 thread_index)
+{
+  nat_ha_main_t *ha = &nat_ha_main;
+  nat_ha_message_header_t *h;
+  ip4_header_t *ip;
+  udp_header_t *udp;
+  u32 sequence_number;
+
+  b->current_data = 0;
+  b->current_length = sizeof (*ip) + sizeof (*udp) + sizeof (*h);
+  b->flags |= VLIB_BUFFER_TOTAL_LENGTH_VALID;
+  b->flags |= VNET_BUFFER_F_LOCALLY_ORIGINATED;
+  vnet_buffer (b)->sw_if_index[VLIB_RX] = 0;
+  vnet_buffer (b)->sw_if_index[VLIB_TX] = 0;
+  ip = vlib_buffer_get_current (b);
+  udp = (udp_header_t *) (ip + 1);
+  h = (nat_ha_message_header_t *) (udp + 1);
+
+  /* IP header */
+  ip->ip_version_and_header_length = 0x45;
+  ip->ttl = 254;
+  ip->protocol = IP_PROTOCOL_UDP;
+  ip->flags_and_fragment_offset =
+    clib_host_to_net_u16 (IP4_HEADER_FLAG_DONT_FRAGMENT);
+  ip->src_address.as_u32 = ha->src_ip_address.as_u32;
+  ip->dst_address.as_u32 = ha->dst_ip_address.as_u32;
+  /* UDP header */
+  udp->src_port = clib_host_to_net_u16 (ha->src_port);
+  udp->dst_port = clib_host_to_net_u16 (ha->dst_port);
+  udp->checksum = 0;
+
+  /* NAT HA protocol header */
+  h->version = NAT_HA_VERSION;
+  h->flags = 0;
+  h->count = 0;
+  h->thread_index = clib_host_to_net_u32 (thread_index);
+  sequence_number = clib_atomic_fetch_add (&ha->sequence_number, 1);
+  h->sequence_number = clib_host_to_net_u32 (sequence_number);
+
+  *offset =
+    sizeof (ip4_header_t) + sizeof (udp_header_t) +
+    sizeof (nat_ha_message_header_t);
+}
+
+static inline void
+nat_ha_send (vlib_frame_t * f, vlib_buffer_t * b, u8 is_resync,
+            u32 thread_index)
+{
+  nat_ha_main_t *ha = &nat_ha_main;
+  nat_ha_per_thread_data_t *td = &ha->per_thread_data[thread_index];
+  nat_ha_message_header_t *h;
+  ip4_header_t *ip;
+  udp_header_t *udp;
+  vlib_main_t *vm = vlib_mains[thread_index];
+
+  ip = vlib_buffer_get_current (b);
+  udp = ip4_next_header (ip);
+  h = (nat_ha_message_header_t *) (udp + 1);
+
+  h->count = clib_host_to_net_u16 (td->state_sync_count);
+
+  ip->length = clib_host_to_net_u16 (b->current_length);
+  ip->checksum = ip4_header_checksum (ip);
+  udp->length = clib_host_to_net_u16 (b->current_length - sizeof (*ip));
+
+  nat_ha_resend_queue_add (h->sequence_number, (u8 *) ip, b->current_length,
+                          is_resync, thread_index);
+
+  vlib_put_frame_to_node (vm, ip4_lookup_node.index, f);
+}
+
+/* add NAT HA protocol event */
+static_always_inline void
+nat_ha_event_add (nat_ha_event_t * event, u8 do_flush, u32 thread_index,
+                 u8 is_resync)
+{
+  nat_ha_main_t *ha = &nat_ha_main;
+  nat_ha_per_thread_data_t *td = &ha->per_thread_data[thread_index];
+  vlib_main_t *vm = vlib_mains[thread_index];
+  vlib_buffer_t *b = 0;
+  vlib_frame_t *f;
+  u32 bi = ~0, offset;
+
+  b = td->state_sync_buffer;
+
+  if (PREDICT_FALSE (b == 0))
+    {
+      if (do_flush)
+       return;
+
+      if (vlib_buffer_alloc (vm, &bi, 1) != 1)
+       {
+         nat_log_warn ("HA NAT state sync can't allocate buffer");
+         return;
+       }
+
+      b = td->state_sync_buffer = vlib_get_buffer (vm, bi);
+      clib_memset (vnet_buffer (b), 0, sizeof (*vnet_buffer (b)));
+      VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b);
+      offset = 0;
+    }
+  else
+    {
+      bi = vlib_get_buffer_index (vm, b);
+      offset = td->state_sync_next_event_offset;
+    }
+
+  f = td->state_sync_frame;
+  if (PREDICT_FALSE (f == 0))
+    {
+      u32 *to_next;
+      f = vlib_get_frame_to_node (vm, ip4_lookup_node.index);
+      td->state_sync_frame = f;
+      to_next = vlib_frame_vector_args (f);
+      to_next[0] = bi;
+      f->n_vectors = 1;
+    }
+
+  if (PREDICT_FALSE (td->state_sync_count == 0))
+    nat_ha_header_create (b, &offset, thread_index);
+
+  if (PREDICT_TRUE (do_flush == 0))
+    {
+      clib_memcpy_fast (b->data + offset, event, sizeof (*event));
+      offset += sizeof (*event);
+      td->state_sync_count++;
+      b->current_length += sizeof (*event);
+
+      switch (event->event_type)
+       {
+       case NAT_HA_ADD:
+         vlib_increment_simple_counter (&ha->counters
+                                        [NAT_HA_COUNTER_SEND_ADD],
+                                        thread_index, 0, 1);
+         break;
+       case NAT_HA_DEL:
+         vlib_increment_simple_counter (&ha->counters
+                                        [NAT_HA_COUNTER_SEND_DEL],
+                                        thread_index, 0, 1);
+         break;
+       case NAT_HA_REFRESH:
+         vlib_increment_simple_counter (&ha->counters
+                                        [NAT_HA_COUNTER_SEND_REFRESH],
+                                        thread_index, 0, 1);
+         break;
+       default:
+         break;
+       }
+    }
+
+  if (PREDICT_FALSE
+      (do_flush || offset + (sizeof (*event)) > ha->state_sync_path_mtu))
+    {
+      nat_ha_send (f, b, is_resync, thread_index);
+      td->state_sync_buffer = 0;
+      td->state_sync_frame = 0;
+      td->state_sync_count = 0;
+      offset = 0;
+      if (is_resync)
+       {
+         clib_atomic_fetch_add (&ha->resync_ack_count, 1);
+         nat_ha_resync_fin ();
+       }
+    }
+
+  td->state_sync_next_event_offset = offset;
+}
+
+#define skip_if_disabled()          \
+do {                                \
+  nat_ha_main_t *ha = &nat_ha_main; \
+  if (PREDICT_TRUE (!ha->dst_port)) \
+    return;                         \
+} while (0)
+
+void
+nat_ha_flush (u8 is_resync)
+{
+  skip_if_disabled ();
+  nat_ha_event_add (0, 1, 0, is_resync);
+}
+
+void
+nat_ha_sadd (ip4_address_t * in_addr, u16 in_port, ip4_address_t * out_addr,
+            u16 out_port, ip4_address_t * eh_addr, u16 eh_port,
+            ip4_address_t * ehn_addr, u16 ehn_port, u8 proto, u32 fib_index,
+            u16 flags, u32 thread_index, u8 is_resync)
+{
+  nat_ha_event_t event;
+
+  skip_if_disabled ();
+
+  clib_memset (&event, 0, sizeof (event));
+  event.event_type = NAT_HA_ADD;
+  event.flags = clib_host_to_net_u16 (flags);
+  event.in_addr = in_addr->as_u32;
+  event.in_port = in_port;
+  event.out_addr = out_addr->as_u32;
+  event.out_port = out_port;
+  event.eh_addr = eh_addr->as_u32;
+  event.eh_port = eh_port;
+  event.ehn_addr = ehn_addr->as_u32;
+  event.ehn_port = ehn_port;
+  event.fib_index = clib_host_to_net_u32 (fib_index);
+  event.protocol = proto;
+  nat_ha_event_add (&event, 0, thread_index, is_resync);
+}
+
+void
+nat_ha_sdel (ip4_address_t * out_addr, u16 out_port, ip4_address_t * eh_addr,
+            u16 eh_port, u8 proto, u32 fib_index, u32 thread_index)
+{
+  nat_ha_event_t event;
+
+  skip_if_disabled ();
+
+  clib_memset (&event, 0, sizeof (event));
+  event.event_type = NAT_HA_DEL;
+  event.out_addr = out_addr->as_u32;
+  event.out_port = out_port;
+  event.eh_addr = eh_addr->as_u32;
+  event.eh_port = eh_port;
+  event.fib_index = clib_host_to_net_u32 (fib_index);
+  event.protocol = proto;
+  nat_ha_event_add (&event, 0, thread_index, 0);
+}
+
+void
+nat_ha_sref (ip4_address_t * out_addr, u16 out_port, ip4_address_t * eh_addr,
+            u16 eh_port, u8 proto, u32 fib_index, u32 total_pkts,
+            u64 total_bytes, u32 thread_index, f64 * last_refreshed, f64 now)
+{
+  nat_ha_main_t *ha = &nat_ha_main;
+  nat_ha_event_t event;
+
+  skip_if_disabled ();
+
+  if ((*last_refreshed + ha->session_refresh_interval) > now)
+    return;
+
+  *last_refreshed = now;
+  clib_memset (&event, 0, sizeof (event));
+  event.event_type = NAT_HA_REFRESH;
+  event.out_addr = out_addr->as_u32;
+  event.out_port = out_port;
+  event.eh_addr = eh_addr->as_u32;
+  event.eh_port = eh_port;
+  event.fib_index = clib_host_to_net_u32 (fib_index);
+  event.protocol = proto;
+  event.total_pkts = clib_host_to_net_u32 (total_pkts);
+  event.total_bytes = clib_host_to_net_u64 (total_bytes);
+  nat_ha_event_add (&event, 0, thread_index, 0);
+}
+
+/* per thread process waiting for interrupt */
+static uword
+nat_ha_worker_fn (vlib_main_t * vm, vlib_node_runtime_t * rt,
+                 vlib_frame_t * f)
+{
+  u32 thread_index = vm->thread_index;
+  /* flush HA NAT data under construction */
+  nat_ha_event_add (0, 1, thread_index, 0);
+  /* scan if we need to resend some non-ACKed data */
+  nat_ha_resend_scan (vlib_time_now (vm), thread_index);
+  return 0;
+}
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (nat_ha_worker_node) = {
+    .function = nat_ha_worker_fn,
+    .type = VLIB_NODE_TYPE_INPUT,
+    .state = VLIB_NODE_STATE_INTERRUPT,
+    .name = "nat-ha-worker",
+};
+/* *INDENT-ON* */
+
+/* periodically send interrupt to each thread */
+static uword
+nat_ha_process (vlib_main_t * vm, vlib_node_runtime_t * rt, vlib_frame_t * f)
+{
+  nat_ha_main_t *ha = &nat_ha_main;
+  uword event_type;
+  uword *event_data = 0;
+  u32 ti;
+
+  vlib_process_wait_for_event (vm);
+  event_type = vlib_process_get_events (vm, &event_data);
+  if (event_type)
+    nat_log_info ("nat-ha-process: bogus kickoff event received");
+  vec_reset_length (event_data);
+
+  while (1)
+    {
+      vlib_process_wait_for_event_or_clock (vm, 1.0);
+      event_type = vlib_process_get_events (vm, &event_data);
+      vec_reset_length (event_data);
+      for (ti = 0; ti < vec_len (vlib_mains); ti++)
+       {
+         if (ti >= vec_len (ha->per_thread_data))
+           continue;
+
+         vlib_node_set_interrupt_pending (vlib_mains[ti],
+                                          nat_ha_worker_node.index);
+       }
+    }
+
+  return 0;
+}
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (nat_ha_process_node) = {
+    .function = nat_ha_process,
+    .type = VLIB_NODE_TYPE_PROCESS,
+    .name = "nat-ha-process",
+};
+/* *INDENT-ON* */
+
+void
+nat_ha_get_resync_status (u8 * in_resync, u32 * resync_ack_missed)
+{
+  nat_ha_main_t *ha = &nat_ha_main;
+
+  *in_resync = ha->in_resync;
+  *resync_ack_missed = ha->resync_ack_missed;
+}
+
+int
+nat44_ha_resync (u32 client_index, u32 pid,
+                nat_ha_resync_event_cb_t event_callback)
+{
+  nat_ha_main_t *ha = &nat_ha_main;
+  snat_main_t *sm = &snat_main;
+  snat_session_t *ses;
+  snat_main_per_thread_data_t *tsm;
+
+  if (ha->in_resync)
+    return VNET_API_ERROR_IN_PROGRESS;
+
+  ha->in_resync = 1;
+  ha->resync_ack_count = 0;
+  ha->resync_ack_missed = 0;
+  ha->event_callback = event_callback;
+  ha->client_index = client_index;
+  ha->pid = pid;
+
+  /* *INDENT-OFF* */
+  vec_foreach (tsm, sm->per_thread_data)
+    {
+      pool_foreach (ses, tsm->sessions, ({
+        nat_ha_sadd (&ses->in2out.addr, ses->in2out.port,
+                     &ses->out2in.addr, ses->out2in.port,
+                     &ses->ext_host_addr, ses->ext_host_port,
+                     &ses->ext_host_nat_addr, ses->ext_host_nat_port,
+                     ses->in2out.protocol, ses->in2out.fib_index,
+                     ses->flags, 0, 1);
+      }));
+    }
+  /* *INDENT-ON* */
+
+  nat_ha_flush (1);
+
+  return 0;
+}
+
+typedef struct
+{
+  ip4_address_t addr;
+  u32 event_count;
+} nat_ha_trace_t;
+
+static u8 *
+format_nat_ha_trace (u8 * s, va_list * args)
+{
+  CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+  CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+  nat_ha_trace_t *t = va_arg (*args, nat_ha_trace_t *);
+
+  s =
+    format (s, "nat-ha: %u events from %U", t->event_count,
+           format_ip4_address, &t->addr);
+
+  return s;
+}
+
+typedef enum
+{
+  NAT_HA_NEXT_IP4_LOOKUP,
+  NAT_HA_NEXT_DROP,
+  NAT_HA_N_NEXT,
+} nat_ha_next_t;
+
+#define foreach_nat_ha_error   \
+_(PROCESSED, "pkts-processed") \
+_(BAD_VERSION, "bad-version")
+
+typedef enum
+{
+#define _(sym, str) NAT_HA_ERROR_##sym,
+  foreach_nat_ha_error
+#undef _
+    NAT_HA_N_ERROR,
+} nat_ha_error_t;
+
+static char *nat_ha_error_strings[] = {
+#define _(sym, str) str,
+  foreach_nat_ha_error
+#undef _
+};
+
+/* process received HA NAT protocol messages */
+static uword
+nat_ha_node_fn (vlib_main_t * vm, vlib_node_runtime_t * node,
+               vlib_frame_t * frame)
+{
+  u32 n_left_from, *from, next_index, *to_next;
+  f64 now = vlib_time_now (vm);
+  u32 thread_index = vm->thread_index;
+  u32 pkts_processed = 0;
+  ip4_main_t *i4m = &ip4_main;
+  u8 host_config_ttl = i4m->host_config.ttl;
+  nat_ha_main_t *ha = &nat_ha_main;
+
+  from = vlib_frame_vector_args (frame);
+  n_left_from = frame->n_vectors;
+  next_index = node->cached_next_index;
+
+  while (n_left_from > 0)
+    {
+      u32 n_left_to_next;
+
+      vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+
+      while (n_left_from > 0 && n_left_to_next > 0)
+       {
+         u32 bi0, next0, src_addr0, dst_addr0;;
+         vlib_buffer_t *b0;
+         nat_ha_message_header_t *h0;
+         nat_ha_event_t *e0;
+         u16 event_count0, src_port0, dst_port0, old_len0;
+         ip4_header_t *ip0;
+         udp_header_t *udp0;
+         ip_csum_t sum0;
+
+         bi0 = from[0];
+         to_next[0] = bi0;
+         from += 1;
+         to_next += 1;
+         n_left_from -= 1;
+         n_left_to_next -= 1;
+
+         b0 = vlib_get_buffer (vm, bi0);
+         h0 = vlib_buffer_get_current (b0);
+         vlib_buffer_advance (b0, -sizeof (*udp0));
+         udp0 = vlib_buffer_get_current (b0);
+         vlib_buffer_advance (b0, -sizeof (*ip0));
+         ip0 = vlib_buffer_get_current (b0);
+
+         next0 = NAT_HA_NEXT_DROP;
+
+         if (h0->version != NAT_HA_VERSION)
+           {
+             b0->error = node->errors[NAT_HA_ERROR_BAD_VERSION];
+             goto done0;
+           }
+
+         event_count0 = clib_net_to_host_u16 (h0->count);
+         /* ACK for previously send data */
+         if (!event_count0 && (h0->flags & NAT_HA_FLAG_ACK))
+           {
+             nat_ha_ack_recv (h0->sequence_number, thread_index);
+             b0->error = node->errors[NAT_HA_ERROR_PROCESSED];
+             goto done0;
+           }
+
+         e0 = (nat_ha_event_t *) (h0 + 1);
+
+         /* process each event */
+         while (event_count0)
+           {
+             nat_ha_event_process (e0, now, thread_index);
+             event_count0--;
+             e0 = (nat_ha_event_t *) ((u8 *) e0 + sizeof (nat_ha_event_t));
+           }
+
+         next0 = NAT_HA_NEXT_IP4_LOOKUP;
+         pkts_processed++;
+
+         /* reply with ACK */
+         b0->current_length = sizeof (*ip0) + sizeof (*udp0) + sizeof (*h0);
+
+         src_addr0 = ip0->src_address.data_u32;
+         dst_addr0 = ip0->dst_address.data_u32;
+         ip0->src_address.data_u32 = dst_addr0;
+         ip0->dst_address.data_u32 = src_addr0;
+         old_len0 = ip0->length;
+         ip0->length = clib_host_to_net_u16 (b0->current_length);
+
+         sum0 = ip0->checksum;
+         sum0 = ip_csum_update (sum0, ip0->ttl, host_config_ttl,
+                                ip4_header_t, ttl);
+         ip0->ttl = host_config_ttl;
+         sum0 =
+           ip_csum_update (sum0, old_len0, ip0->length, ip4_header_t,
+                           length);
+         ip0->checksum = ip_csum_fold (sum0);
+
+         udp0->checksum = 0;
+         src_port0 = udp0->src_port;
+         dst_port0 = udp0->dst_port;
+         udp0->src_port = dst_port0;
+         udp0->dst_port = src_port0;
+         udp0->length =
+           clib_host_to_net_u16 (b0->current_length - sizeof (*ip0));
+
+         h0->flags = NAT_HA_FLAG_ACK;
+         h0->count = 0;
+         vlib_increment_simple_counter (&ha->counters
+                                        [NAT_HA_COUNTER_SEND_ACK],
+                                        thread_index, 0, 1);
+
+       done0:
+         if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)
+                            && (b0->flags & VLIB_BUFFER_IS_TRACED)))
+           {
+             nat_ha_trace_t *t = vlib_add_trace (vm, node, b0, sizeof (*t));
+             ip4_header_t *ip =
+               (void *) (b0->data + vnet_buffer (b0)->l3_hdr_offset);
+             t->event_count = clib_net_to_host_u16 (h0->count);
+             t->addr.as_u32 = ip->src_address.data_u32;
+           }
+
+         vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
+                                          to_next, n_left_to_next,
+                                          bi0, next0);
+       }
+
+      vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+    }
+
+  vlib_node_increment_counter (vm, nat_ha_node.index,
+                              NAT_HA_ERROR_PROCESSED, pkts_processed);
+
+  return frame->n_vectors;
+}
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (nat_ha_node) = {
+  .function = nat_ha_node_fn,
+  .name = "nat-ha",
+  .vector_size = sizeof (u32),
+  .format_trace = format_nat_ha_trace,
+  .type = VLIB_NODE_TYPE_INTERNAL,
+  .n_errors = ARRAY_LEN (nat_ha_error_strings),
+  .error_strings = nat_ha_error_strings,
+  .n_next_nodes = NAT_HA_N_NEXT,
+  .next_nodes = {
+     [NAT_HA_NEXT_IP4_LOOKUP] = "ip4-lookup",
+     [NAT_HA_NEXT_DROP] = "error-drop",
+  },
+};
+/* *INDENT-ON* */
+
+typedef struct
+{
+  u32 next_worker_index;
+  u8 in2out;
+} nat_ha_handoff_trace_t;
+
+#define foreach_nat_ha_handoff_error  \
+_(CONGESTION_DROP, "congestion drop") \
+_(SAME_WORKER, "same worker")         \
+_(DO_HANDOFF, "do handoff")
+
+typedef enum
+{
+#define _(sym,str) NAT_HA_HANDOFF_ERROR_##sym,
+  foreach_nat_ha_handoff_error
+#undef _
+    NAT44_HANDOFF_N_ERROR,
+} nat_ha_handoff_error_t;
+
+static char *nat_ha_handoff_error_strings[] = {
+#define _(sym,string) string,
+  foreach_nat_ha_handoff_error
+#undef _
+};
+
+static u8 *
+format_nat_ha_handoff_trace (u8 * s, va_list * args)
+{
+  CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+  CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+  nat_ha_handoff_trace_t *t = va_arg (*args, nat_ha_handoff_trace_t *);
+
+  s =
+    format (s, "NAT_HA_WORKER_HANDOFF: next-worker %d", t->next_worker_index);
+
+  return s;
+}
+
+/* do worker handoff based on thread_index in NAT HA protcol header */
+static uword
+nat_ha_handoff_node_fn (vlib_main_t * vm, vlib_node_runtime_t * node,
+                       vlib_frame_t * frame)
+{
+  nat_ha_main_t *ha = &nat_ha_main;
+  vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b;
+  u32 n_enq, n_left_from, *from;
+  u16 thread_indices[VLIB_FRAME_SIZE], *ti;
+  u32 thread_index = vm->thread_index;
+  u32 do_handoff = 0, same_worker = 0;
+
+  from = vlib_frame_vector_args (frame);
+  n_left_from = frame->n_vectors;
+  vlib_get_buffers (vm, from, bufs, n_left_from);
+
+  b = bufs;
+  ti = thread_indices;
+
+  while (n_left_from > 0)
+    {
+      nat_ha_message_header_t *h0;
+
+      h0 = vlib_buffer_get_current (b[0]);
+      ti[0] = clib_net_to_host_u32 (h0->thread_index);
+
+      if (ti[0] != thread_index)
+       do_handoff++;
+      else
+       same_worker++;
+
+      if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)
+                        && (b[0]->flags & VLIB_BUFFER_IS_TRACED)))
+       {
+         nat_ha_handoff_trace_t *t =
+           vlib_add_trace (vm, node, b[0], sizeof (*t));
+         t->next_worker_index = ti[0];
+       }
+
+      n_left_from -= 1;
+      ti += 1;
+      b += 1;
+    }
+
+  n_enq =
+    vlib_buffer_enqueue_to_thread (vm, ha->fq_index, from, thread_indices,
+                                  frame->n_vectors, 1);
+
+  if (n_enq < frame->n_vectors)
+    vlib_node_increment_counter (vm, node->node_index,
+                                NAT_HA_HANDOFF_ERROR_CONGESTION_DROP,
+                                frame->n_vectors - n_enq);
+  vlib_node_increment_counter (vm, node->node_index,
+                              NAT_HA_HANDOFF_ERROR_SAME_WORKER, same_worker);
+  vlib_node_increment_counter (vm, node->node_index,
+                              NAT_HA_HANDOFF_ERROR_DO_HANDOFF, do_handoff);
+  return frame->n_vectors;
+}
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (nat_ha_handoff_node) = {
+  .function = nat_ha_handoff_node_fn,
+  .name = "nat-ha-handoff",
+  .vector_size = sizeof (u32),
+  .format_trace = format_nat_ha_handoff_trace,
+  .type = VLIB_NODE_TYPE_INTERNAL,
+  .n_errors = ARRAY_LEN(nat_ha_handoff_error_strings),
+  .error_strings = nat_ha_handoff_error_strings,
+  .n_next_nodes = 1,
+  .next_nodes = {
+    [0] = "error-drop",
+  },
+};
+/* *INDENT-ON* */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/plugins/nat/nat_ha.h b/src/plugins/nat/nat_ha.h
new file mode 100644 (file)
index 0000000..ec62789
--- /dev/null
@@ -0,0 +1,170 @@
+/*
+ * Copyright (c) 2019 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/**
+ * @file
+ * @brief NAT active-passive HA
+ */
+
+#ifndef __included_nat_ha_h__
+#define __included_nat_ha_h__
+
+#include <vnet/vnet.h>
+#include <vnet/ip/ip.h>
+
+/* Call back functions for received HA events on passive/failover */
+typedef void (*nat_ha_sadd_cb_t) (ip4_address_t * in_addr, u16 in_port,
+                                 ip4_address_t * out_addr, u16 out_port,
+                                 ip4_address_t * eh_addr, u16 eh_port,
+                                 ip4_address_t * ehn_addr, u16 ehn_port,
+                                 u8 proto, u32 fib_index, u16 flags,
+                                 u32 thread_index);
+typedef void (*nat_ha_sdel_cb_t) (ip4_address_t * out_addr, u16 out_port,
+                                 ip4_address_t * eh_addr, u16 eh_port,
+                                 u8 proto, u32 fib_index, u32 thread_index);
+typedef void (*nat_ha_sref_cb_t) (ip4_address_t * out_addr, u16 out_port,
+                                 ip4_address_t * eh_addr, u16 eh_port,
+                                 u8 proto, u32 fib_index, u32 total_pkts,
+                                 u64 total_bytes, u32 thread_index);
+
+/**
+ * @brief Initialize NAT HA
+ */
+void nat_ha_init (vlib_main_t * vm, nat_ha_sadd_cb_t sadd_cb,
+                 nat_ha_sdel_cb_t sdel_cb, nat_ha_sref_cb_t sref_cb);
+
+/**
+ * @brief Set HA listener (local settings)
+ *
+ * @param addr local IP4 address
+ * @param port local UDP port number
+ * @param path_mtu path MTU between local and failover
+ *
+ * @returns 0 on success, non-zero value otherwise.
+ */
+int nat_ha_set_listener (ip4_address_t * addr, u16 port, u32 path_mtu);
+
+/**
+ * @brief Get HA listener/local configuration
+ */
+void nat_ha_get_listener (ip4_address_t * addr, u16 * port, u32 * path_mtu);
+
+/**
+ * @brief Set HA failover (remote settings)
+ *
+ * @param addr failover IP4 address
+ * @param port failvoer UDP port number
+ * @param session_refresh_interval number of seconds after which to send
+ *                                 session counters refresh
+ *
+ * @returns 0 on success, non-zero value otherwise.
+ */
+int nat_ha_set_failover (ip4_address_t * addr, u16 port,
+                        u32 session_refresh_interval);
+
+/**
+ * @brief Get HA failover/remote settings
+ */
+void nat_ha_get_failover (ip4_address_t * addr, u16 * port,
+                         u32 * session_refresh_interval);
+
+/**
+ * @brief Create session add HA event
+ *
+ * @param in_addr inside IPv4 address
+ * @param in_port inside L4 port number
+ * @param out_addr outside IPv4 address
+ * @param out_port outside L4 port number
+ * @param eh_addr external host IPv4 address
+ * @param eh_port external host L4 port number
+ * @param ehn_addr external host IPv4 address after translation
+ * @param ehn_port external host L4 port number after translation
+ * @param proto L4 protocol
+ * @param fib_index fib index
+ * @param flags session flags
+ * @param thread_index thread index
+ * @param is_resync 1 if HA resync
+ */
+void nat_ha_sadd (ip4_address_t * in_addr, u16 in_port,
+                 ip4_address_t * out_addr, u16 out_port,
+                 ip4_address_t * eh_addr, u16 eh_port,
+                 ip4_address_t * ehn_addr, u16 ehn_port, u8 proto,
+                 u32 fib_index, u16 flags, u32 thread_index, u8 is_resync);
+
+/**
+ * @brief Create session delete HA event
+ *
+ * @param out_addr outside IPv4 address
+ * @param out_port outside L4 port number
+ * @param eh_addr external host IPv4 address
+ * @param eh_port external host L4 port number
+ * @param proto L4 protocol
+ * @param fib_index fib index
+ * @param thread_index thread index
+ */
+void nat_ha_sdel (ip4_address_t * out_addr, u16 out_port,
+                 ip4_address_t * eh_addr, u16 eh_port, u8 proto,
+                 u32 fib_index, u32 thread_index);
+
+/**
+ * @brief Create session refresh HA event
+ *
+ * @param out_addr outside IPv4 address
+ * @param out_port outside L4 port number
+ * @param eh_addr external host IPv4 address
+ * @param eh_port external host L4 port number
+ * @param proto L4 protocol
+ * @param fib_index fib index
+ * @param total_pkts total packets processed
+ * @param total_bytes total bytes processed
+ * @param thread_index thread index
+ * @param last_refreshed last session refresh time
+ * @param now current time
+ */
+void nat_ha_sref (ip4_address_t * out_addr, u16 out_port,
+                 ip4_address_t * eh_addr, u16 eh_port, u8 proto,
+                 u32 fib_index, u32 total_pkts, u64 total_bytes,
+                 u32 thread_index, f64 * last_refreshed, f64 now);
+
+/**
+ * @brief Flush the current HA data (for testing)
+ */
+void nat_ha_flush (u8 is_resync);
+
+typedef void (*nat_ha_resync_event_cb_t) (u32 client_index, u32 pid,
+                                         u32 missed_count);
+
+/**
+ * @brief Resync HA (resend existing sessions to new failover)
+ */
+int nat_ha_resync (u32 client_index, u32 pid,
+                  nat_ha_resync_event_cb_t event_callback);
+
+/**
+ * @brief Get resync status
+ *
+ * @param in_resync 1 if resync in progress
+ * @param resync_ack_missed number of missed (not ACKed) messages
+ */
+void nat_ha_get_resync_status (u8 * in_resync, u32 * resync_ack_missed);
+
+#endif /* __included_nat_ha_h__ */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/plugins/nat/nat_ha_doc.md b/src/plugins/nat/nat_ha_doc.md
new file mode 100644 (file)
index 0000000..82f7f28
--- /dev/null
@@ -0,0 +1,68 @@
+# Active-Passive NAT HA
+
+## Introduction
+
+One NAT node actively manages traffic while the other is synchronized and ready to transition to the active state and takes over seamlessly and enforces the same NAT sessions when failure occur. Both nodes share the same configuration settings.
+
+## Configuration
+
+### NAT HA protocol
+Session synchronization traffic is distributed through an IPv4 UDP connection. The active node sends NAT HA protocol events to passive node. To achieve reliable transfer NAT HA protocol uses acknowledgement with re-transmission. This require the passive node to respond with an acknowledgement message as it receives the data. The active node keeps a record of each packet it sends and maintains a timer from when the packet was sent. The active node re-transmits a packet if the timer expires before receiving the acknowledgement.
+
+### Topology
+
+The two NAT nodes have a dedicated link (interface GE0/0/3 on both) to synchronize NAT sessions using NAT HA protocol.
+
+        +-----------------------+
+        |    outside network    |
+        +-----------------------+
+         /                     \
+        /                       \
+       /                         \
+      /                           \
+     /                             \
++---------+                   +---------+
+| GE0/0/1 | Active    Passive | GE0/0/1 |
+|         |                   |         |
+|  GE0/0/3|-------------------|GE0/0/3  |
+|         |   sync network    |         |
+| GE0/0/0 |                   | GE0/0/0 |
++---------+                   +---------+
+     \                             /
+      \                           /
+       \                         /
+        \                       /
+         \                     /
+        +-----------------------+
+        |    inside network     |
+        +-----------------------+
+
+### Active node configuration
+
+```
+set interface ip address GigabitEthernet0/0/1 10.15.7.101/24
+set interface ip address GigabitEthernet0/0/0 172.16.10.101/24
+set interface ip address GigabitEthernet0/0/3 10.0.0.1/24
+set interface state GigabitEthernet0/0/0 up
+set interface state GigabitEthernet0/0/1 up
+set interface state GigabitEthernet0/0/3 up
+set interface nat44 in GigabitEthernet0/0/0 out GigabitEthernet0/0/1
+nat44 add address 10.15.7.100
+nat ha listener 10.0.0.1:1234
+nat ha failover 10.0.0.2:2345
+```
+
+### Passive node configuration
+
+```
+set interface ip address GigabitEthernet0/0/1 10.15.7.102/24
+set interface ip address GigabitEthernet0/0/0 172.16.10.102/24
+set interface ip address GigabitEthernet0/0/3 10.0.0.2/24
+set interface state GigabitEthernet0/0/0 up
+set interface state GigabitEthernet0/0/1 up
+set interface state GigabitEthernet0/0/3 up
+set interface nat44 in GigabitEthernet0/0/0 out GigabitEthernet0/0/1
+nat44 add address 10.15.7.100
+nat ha listener 10.0.0.2:2345
+```
+
index db5063c..64b1f25 100644 (file)
@@ -21,6 +21,7 @@
 
 #include <vnet/fib/ip4_fib.h>
 #include <nat/nat.h>
+#include <nat/nat_ha.h>
 
 always_inline u32
 ip_proto_to_snat_proto (u8 ip_proto)
@@ -228,7 +229,7 @@ nat44_set_tcp_session_state_i2o (snat_main_t * sm, snat_session_t * ses,
     {
       nat_log_debug ("TCP close connection %U", format_snat_session,
                     &sm->per_thread_data[thread_index], ses);
-      nat_free_session_data (sm, ses, thread_index);
+      nat_free_session_data (sm, ses, thread_index, 0);
       nat44_delete_session (sm, ses, thread_index);
       return 1;
     }
@@ -262,7 +263,7 @@ nat44_set_tcp_session_state_o2i (snat_main_t * sm, snat_session_t * ses,
     {
       nat_log_debug ("TCP close connection %U", format_snat_session,
                     &sm->per_thread_data[thread_index], ses);
-      nat_free_session_data (sm, ses, thread_index);
+      nat_free_session_data (sm, ses, thread_index, 0);
       nat44_delete_session (sm, ses, thread_index);
       return 1;
     }
@@ -293,11 +294,16 @@ nat44_session_get_timeout (snat_main_t * sm, snat_session_t * s)
 }
 
 always_inline void
-nat44_session_update_counters (snat_session_t * s, f64 now, uword bytes)
+nat44_session_update_counters (snat_session_t * s, f64 now, uword bytes,
+                              u32 thread_index)
 {
   s->last_heard = now;
   s->total_pkts++;
   s->total_bytes += bytes;
+  nat_ha_sref (&s->out2in.addr, s->out2in.port, &s->ext_host_addr,
+              s->ext_host_port, s->out2in.protocol, s->out2in.fib_index,
+              s->total_pkts, s->total_bytes, thread_index,
+              &s->ha_last_refreshed, now);
 }
 
 /** \brief Per-user LRU list maintenance */
index b743c08..3d045a9 100755 (executable)
@@ -30,6 +30,7 @@
 #include <nat/nat_reass.h>
 #include <nat/nat_inlines.h>
 #include <nat/nat_syslog.h>
+#include <nat/nat_ha.h>
 
 #include <vppinfra/hash.h>
 #include <vppinfra/error.h>
@@ -143,6 +144,10 @@ nat44_o2i_is_idle_session_cb (clib_bihash_kv_8_8_t * kv, void *arg)
                               &s->out2in.addr, s->out2in.port,
                               s->in2out.protocol);
 
+      nat_ha_sdel (&s->out2in.addr, s->out2in.port, &s->ext_host_addr,
+                  s->ext_host_port, s->out2in.protocol, s->out2in.fib_index,
+                  ctx->thread_index);
+
       if (!snat_is_session_static (s))
        snat_free_outside_address_and_port (sm->addresses, ctx->thread_index,
                                            &s->out2in);
@@ -202,7 +207,7 @@ create_session_for_static_mapping (snat_main_t * sm,
       return 0;
     }
 
-  s = nat_session_alloc_or_recycle (sm, u, thread_index);
+  s = nat_session_alloc_or_recycle (sm, u, thread_index, now);
   if (!s)
     {
       nat44_delete_user_with_no_session (sm, u, thread_index);
@@ -247,6 +252,12 @@ create_session_for_static_mapping (snat_main_t * sm,
                           &s->in2out.addr, s->in2out.port, &s->out2in.addr,
                           s->out2in.port, s->in2out.protocol);
 
+  nat_ha_sadd (&s->in2out.addr, s->in2out.port, &s->out2in.addr,
+              s->out2in.port, &s->ext_host_addr, s->ext_host_port,
+              &s->ext_host_nat_addr, s->ext_host_nat_port,
+              s->in2out.protocol, s->in2out.fib_index, s->flags,
+              thread_index, 0);
+
   return s;
 }
 
@@ -653,7 +664,7 @@ icmp_out2in_slow_path (snat_main_t * sm,
       /* Accounting */
       nat44_session_update_counters (s0, now,
                                     vlib_buffer_length_in_chain
-                                    (sm->vlib_main, b0));
+                                    (sm->vlib_main, b0), thread_index);
       /* Per-user LRU list maintenance */
       nat44_session_update_lru (sm, s0, thread_index);
     }
@@ -909,8 +920,8 @@ VLIB_NODE_FN (snat_out2in_node) (vlib_main_t * vm,
 
          /* Accounting */
          nat44_session_update_counters (s0, now,
-                                        vlib_buffer_length_in_chain (vm,
-                                                                     b0));
+                                        vlib_buffer_length_in_chain (vm, b0),
+                                        thread_index);
          /* Per-user LRU list maintenance */
          nat44_session_update_lru (sm, s0, thread_index);
        trace0:
@@ -1075,8 +1086,8 @@ VLIB_NODE_FN (snat_out2in_node) (vlib_main_t * vm,
 
          /* Accounting */
          nat44_session_update_counters (s1, now,
-                                        vlib_buffer_length_in_chain (vm,
-                                                                     b1));
+                                        vlib_buffer_length_in_chain (vm, b1),
+                                        thread_index);
          /* Per-user LRU list maintenance */
          nat44_session_update_lru (sm, s1, thread_index);
        trace1:
@@ -1278,8 +1289,8 @@ VLIB_NODE_FN (snat_out2in_node) (vlib_main_t * vm,
 
          /* Accounting */
          nat44_session_update_counters (s0, now,
-                                        vlib_buffer_length_in_chain (vm,
-                                                                     b0));
+                                        vlib_buffer_length_in_chain (vm, b0),
+                                        thread_index);
          /* Per-user LRU list maintenance */
          nat44_session_update_lru (sm, s0, thread_index);
        trace00:
@@ -1588,8 +1599,8 @@ VLIB_NODE_FN (nat44_out2in_reass_node) (vlib_main_t * vm,
 
          /* Accounting */
          nat44_session_update_counters (s0, now,
-                                        vlib_buffer_length_in_chain (vm,
-                                                                     b0));
+                                        vlib_buffer_length_in_chain (vm, b0),
+                                        thread_index);
          /* Per-user LRU list maintenance */
          nat44_session_update_lru (sm, s0, thread_index);
 
index 88b248f..41f9bfe 100644 (file)
@@ -30,6 +30,7 @@
 #include <nat/nat_reass.h>
 #include <nat/nat_inlines.h>
 #include <nat/nat_syslog.h>
+#include <nat/nat_ha.h>
 
 #define foreach_nat_out2in_ed_error                     \
 _(UNSUPPORTED_PROTOCOL, "unsupported protocol")         \
@@ -116,7 +117,7 @@ icmp_out2in_ed_slow_path (snat_main_t * sm, vlib_buffer_t * b0,
       /* Accounting */
       nat44_session_update_counters (s0, now,
                                     vlib_buffer_length_in_chain
-                                    (sm->vlib_main, b0));
+                                    (sm->vlib_main, b0), thread_index);
       /* Per-user LRU list maintenance */
       nat44_session_update_lru (sm, s0, thread_index);
     }
@@ -186,6 +187,10 @@ nat44_o2i_ed_is_idle_session_cb (clib_bihash_kv_16_8_t * kv, void *arg)
                             &s->ext_host_addr, s->ext_host_port,
                             s->in2out.protocol, is_twice_nat_session (s));
 
+      nat_ha_sdel (&s->out2in.addr, s->out2in.port, &s->ext_host_addr,
+                  s->ext_host_port, s->out2in.protocol, s->out2in.fib_index,
+                  ctx->thread_index);
+
       if (is_twice_nat_session (s))
        {
          for (i = 0; i < vec_len (sm->twice_nat_addresses); i++)
@@ -324,13 +329,19 @@ create_session_for_static_mapping_ed (snat_main_t * sm,
                                       s->in2out.port,
                                       s->out2in.port, s->in2out.fib_index);
 
-  nat_syslog_nat44_sdel (s->user_index, s->in2out.fib_index,
+  nat_syslog_nat44_sadd (s->user_index, s->in2out.fib_index,
                         &s->in2out.addr, s->in2out.port,
                         &s->ext_host_nat_addr, s->ext_host_nat_port,
                         &s->out2in.addr, s->out2in.port,
                         &s->ext_host_addr, s->ext_host_port,
                         s->in2out.protocol, is_twice_nat_session (s));
 
+  nat_ha_sadd (&s->in2out.addr, s->in2out.port, &s->out2in.addr,
+              s->out2in.port, &s->ext_host_addr, s->ext_host_port,
+              &s->ext_host_nat_addr, s->ext_host_nat_port,
+              s->in2out.protocol, s->in2out.fib_index, s->flags,
+              thread_index, 0);
+
   return s;
 }
 
@@ -483,7 +494,7 @@ create_bypass_for_fwd (snat_main_t * sm, ip4_header_t * ip, u32 rx_fib_index,
     }
 
   /* Accounting */
-  nat44_session_update_counters (s, now, 0);
+  nat44_session_update_counters (s, now, 0, thread_index);
   /* Per-user LRU list maintenance */
   nat44_session_update_lru (sm, s, thread_index);
 }
@@ -703,7 +714,8 @@ nat44_ed_out2in_unknown_proto (snat_main_t * sm,
   vnet_buffer (b)->sw_if_index[VLIB_TX] = s->in2out.fib_index;
 
   /* Accounting */
-  nat44_session_update_counters (s, now, vlib_buffer_length_in_chain (vm, b));
+  nat44_session_update_counters (s, now, vlib_buffer_length_in_chain (vm, b),
+                                thread_index);
   /* Per-user LRU list maintenance */
   nat44_session_update_lru (sm, s, thread_index);
 
@@ -996,8 +1008,8 @@ nat44_ed_out2in_node_fn_inline (vlib_main_t * vm,
 
          /* Accounting */
          nat44_session_update_counters (s0, now,
-                                        vlib_buffer_length_in_chain (vm,
-                                                                     b0));
+                                        vlib_buffer_length_in_chain (vm, b0),
+                                        thread_index);
          /* Per-user LRU list maintenance */
          nat44_session_update_lru (sm, s0, thread_index);
 
@@ -1230,8 +1242,8 @@ nat44_ed_out2in_node_fn_inline (vlib_main_t * vm,
 
          /* Accounting */
          nat44_session_update_counters (s1, now,
-                                        vlib_buffer_length_in_chain (vm,
-                                                                     b1));
+                                        vlib_buffer_length_in_chain (vm, b1),
+                                        thread_index);
          /* Per-user LRU list maintenance */
          nat44_session_update_lru (sm, s1, thread_index);
 
@@ -1498,8 +1510,8 @@ nat44_ed_out2in_node_fn_inline (vlib_main_t * vm,
 
          /* Accounting */
          nat44_session_update_counters (s0, now,
-                                        vlib_buffer_length_in_chain (vm,
-                                                                     b0));
+                                        vlib_buffer_length_in_chain (vm, b0),
+                                        thread_index);
          /* Per-user LRU list maintenance */
          nat44_session_update_lru (sm, s0, thread_index);
 
@@ -1884,8 +1896,8 @@ VLIB_NODE_FN (nat44_ed_out2in_reass_node) (vlib_main_t * vm,
 
          /* Accounting */
          nat44_session_update_counters (s0, now,
-                                        vlib_buffer_length_in_chain (vm,
-                                                                     b0));
+                                        vlib_buffer_length_in_chain (vm, b0),
+                                        thread_index);
          /* Per-user LRU list maintenance */
          nat44_session_update_lru (sm, s0, thread_index);
 
index 0d74cb6..fce7efe 100644 (file)
@@ -24,6 +24,45 @@ from syslog_rfc5424_parser.constants import SyslogFacility, SyslogSeverity
 from vpp_papi_provider import SYSLOG_SEVERITY
 from io import BytesIO
 from vpp_papi import VppEnum
+from scapy.all import bind_layers, Packet, ByteEnumField, ShortField, \
+    IPField, IntField, LongField, XByteField, FlagsField, FieldLenField, \
+    PacketListField
+
+
+# NAT HA protocol event data
+class Event(Packet):
+    name = "Event"
+    fields_desc = [ByteEnumField("event_type", None,
+                                 {1: "add", 2: "del", 3: "refresh"}),
+                   ByteEnumField("protocol", None,
+                                 {0: "udp", 1: "tcp", 2: "icmp"}),
+                   ShortField("flags", 0),
+                   IPField("in_addr", None),
+                   IPField("out_addr", None),
+                   ShortField("in_port", None),
+                   ShortField("out_port", None),
+                   IPField("eh_addr", None),
+                   IPField("ehn_addr", None),
+                   ShortField("eh_port", None),
+                   ShortField("ehn_port", None),
+                   IntField("fib_index", None),
+                   IntField("total_pkts", 0),
+                   LongField("total_bytes", 0)]
+
+    def extract_padding(self, s):
+        return "", s
+
+
+# NAT HA protocol header
+class HANATStateSync(Packet):
+    name = "HA NAT state sync"
+    fields_desc = [XByteField("version", 1),
+                   FlagsField("flags", 0, 8, ['ACK']),
+                   FieldLenField("count", None, count_of="events"),
+                   IntField("sequence_number", 1),
+                   IntField("thread_index", 0),
+                   PacketListField("events", [], Event,
+                                   count_from=lambda pkt:pkt.count)]
 
 
 class MethodHolder(VppTestCase):
@@ -75,6 +114,9 @@ class MethodHolder(VppTestCase):
 
         self.vapi.syslog_set_filter(SYSLOG_SEVERITY.EMERG)
 
+        self.vapi.nat_ha_set_listener('0.0.0.0', 0)
+        self.vapi.nat_ha_set_failover('0.0.0.0', 0)
+
         interfaces = self.vapi.nat44_interface_dump()
         for intf in interfaces:
             if intf.is_inside > 1:
@@ -1460,6 +1502,7 @@ class TestNAT44(MethodHolder):
             cls.ipfix_src_port = 4739
             cls.ipfix_domain_id = 1
             cls.tcp_external_port = 80
+            cls.udp_external_port = 69
 
             cls.create_pg_interfaces(range(10))
             cls.interfaces = list(cls.pg_interfaces[0:4])
@@ -2247,8 +2290,8 @@ class TestNAT44(MethodHolder):
             self.assertTrue(session.is_static)
             self.assertEqual(session.inside_ip_address[0:4],
                              self.pg6.remote_ip4n)
-            self.assertEqual(map(ord, session.outside_ip_address[0:4]),
-                             map(int, static_nat_ip.split('.')))
+            self.assertEqual(session.outside_ip_address,
+                             socket.inet_pton(socket.AF_INET, static_nat_ip))
             self.assertTrue(session.inside_port in
                             [self.tcp_port_in, self.udp_port_in,
                              self.icmp_id_in])
@@ -3803,6 +3846,311 @@ class TestNAT44(MethodHolder):
         # Negotiated MSS value smaller than configured - unchanged
         self.verify_mss_value(capture[0], 1400)
 
+    @unittest.skipUnless(running_extended_tests, "part of extended tests")
+    def test_ha_send(self):
+        """ Send HA session synchronization events (active) """
+        self.nat44_add_address(self.nat_addr)
+        self.vapi.nat44_interface_add_del_feature(self.pg0.sw_if_index)
+        self.vapi.nat44_interface_add_del_feature(self.pg1.sw_if_index,
+                                                  is_inside=0)
+        self.vapi.nat_ha_set_listener(self.pg3.local_ip4, port=12345)
+        self.vapi.nat_ha_set_failover(self.pg3.remote_ip4, port=12346)
+        bind_layers(UDP, HANATStateSync, sport=12345)
+
+        # create sessions
+        pkts = self.create_stream_in(self.pg0, self.pg1)
+        self.pg0.add_stream(pkts)
+        self.pg_enable_capture(self.pg_interfaces)
+        self.pg_start()
+        capture = self.pg1.get_capture(len(pkts))
+        self.verify_capture_out(capture)
+        # active send HA events
+        self.vapi.nat_ha_flush()
+        stats = self.statistics.get_counter('/nat44/ha/add-event-send')
+        self.assertEqual(stats[0][0], 3)
+        capture = self.pg3.get_capture(1)
+        p = capture[0]
+        self.assert_packet_checksums_valid(p)
+        try:
+            ip = p[IP]
+            udp = p[UDP]
+            hanat = p[HANATStateSync]
+        except IndexError:
+            self.logger.error(ppp("Invalid packet:", p))
+            raise
+        else:
+            self.assertEqual(ip.src, self.pg3.local_ip4)
+            self.assertEqual(ip.dst, self.pg3.remote_ip4)
+            self.assertEqual(udp.sport, 12345)
+            self.assertEqual(udp.dport, 12346)
+            self.assertEqual(hanat.version, 1)
+            self.assertEqual(hanat.thread_index, 0)
+            self.assertEqual(hanat.count, 3)
+            seq = hanat.sequence_number
+            for event in hanat.events:
+                self.assertEqual(event.event_type, 1)
+                self.assertEqual(event.in_addr, self.pg0.remote_ip4)
+                self.assertEqual(event.out_addr, self.nat_addr)
+                self.assertEqual(event.fib_index, 0)
+
+        # ACK received events
+        ack = (Ether(dst=self.pg3.local_mac, src=self.pg3.remote_mac) /
+               IP(src=self.pg3.remote_ip4, dst=self.pg3.local_ip4) /
+               UDP(sport=12346, dport=12345) /
+               HANATStateSync(sequence_number=seq, flags='ACK'))
+        self.pg3.add_stream(ack)
+        self.pg_start()
+        stats = self.statistics.get_counter('/nat44/ha/ack-recv')
+        self.assertEqual(stats[0][0], 1)
+
+        # delete one session
+        self.pg_enable_capture(self.pg_interfaces)
+        self.vapi.nat44_del_session(self.pg0.remote_ip4n, self.tcp_port_in,
+                                    IP_PROTOS.tcp)
+        self.vapi.nat_ha_flush()
+        stats = self.statistics.get_counter('/nat44/ha/del-event-send')
+        self.assertEqual(stats[0][0], 1)
+        capture = self.pg3.get_capture(1)
+        p = capture[0]
+        try:
+            hanat = p[HANATStateSync]
+        except IndexError:
+            self.logger.error(ppp("Invalid packet:", p))
+            raise
+        else:
+            self.assertGreater(hanat.sequence_number, seq)
+
+        # do not send ACK, active retry send HA event again
+        self.pg_enable_capture(self.pg_interfaces)
+        sleep(12)
+        stats = self.statistics.get_counter('/nat44/ha/retry-count')
+        self.assertEqual(stats[0][0], 3)
+        stats = self.statistics.get_counter('/nat44/ha/missed-count')
+        self.assertEqual(stats[0][0], 1)
+        capture = self.pg3.get_capture(3)
+        for packet in capture:
+            self.assertEqual(packet, p)
+
+        # session counters refresh
+        pkts = self.create_stream_out(self.pg1)
+        self.pg1.add_stream(pkts)
+        self.pg_enable_capture(self.pg_interfaces)
+        self.pg_start()
+        self.pg0.get_capture(2)
+        self.vapi.nat_ha_flush()
+        stats = self.statistics.get_counter('/nat44/ha/refresh-event-send')
+        self.assertEqual(stats[0][0], 2)
+        capture = self.pg3.get_capture(1)
+        p = capture[0]
+        self.assert_packet_checksums_valid(p)
+        try:
+            ip = p[IP]
+            udp = p[UDP]
+            hanat = p[HANATStateSync]
+        except IndexError:
+            self.logger.error(ppp("Invalid packet:", p))
+            raise
+        else:
+            self.assertEqual(ip.src, self.pg3.local_ip4)
+            self.assertEqual(ip.dst, self.pg3.remote_ip4)
+            self.assertEqual(udp.sport, 12345)
+            self.assertEqual(udp.dport, 12346)
+            self.assertEqual(hanat.version, 1)
+            self.assertEqual(hanat.count, 2)
+            seq = hanat.sequence_number
+            for event in hanat.events:
+                self.assertEqual(event.event_type, 3)
+                self.assertEqual(event.out_addr, self.nat_addr)
+                self.assertEqual(event.fib_index, 0)
+                self.assertEqual(event.total_pkts, 2)
+                self.assertGreater(event.total_bytes, 0)
+
+        ack = (Ether(dst=self.pg3.local_mac, src=self.pg3.remote_mac) /
+               IP(src=self.pg3.remote_ip4, dst=self.pg3.local_ip4) /
+               UDP(sport=12346, dport=12345) /
+               HANATStateSync(sequence_number=seq, flags='ACK'))
+        self.pg3.add_stream(ack)
+        self.pg_start()
+        stats = self.statistics.get_counter('/nat44/ha/ack-recv')
+        self.assertEqual(stats[0][0], 2)
+
+    def test_ha_recv(self):
+        """ Receive HA session synchronization events (passive) """
+        self.nat44_add_address(self.nat_addr)
+        self.vapi.nat44_interface_add_del_feature(self.pg0.sw_if_index)
+        self.vapi.nat44_interface_add_del_feature(self.pg1.sw_if_index,
+                                                  is_inside=0)
+        self.vapi.nat_ha_set_listener(self.pg3.local_ip4, port=12345)
+        bind_layers(UDP, HANATStateSync, sport=12345)
+
+        self.tcp_port_out = random.randint(1025, 65535)
+        self.udp_port_out = random.randint(1025, 65535)
+
+        # send HA session add events to failover/passive
+        p = (Ether(dst=self.pg3.local_mac, src=self.pg3.remote_mac) /
+             IP(src=self.pg3.remote_ip4, dst=self.pg3.local_ip4) /
+             UDP(sport=12346, dport=12345) /
+             HANATStateSync(sequence_number=1, events=[
+                 Event(event_type='add', protocol='tcp',
+                       in_addr=self.pg0.remote_ip4, out_addr=self.nat_addr,
+                       in_port=self.tcp_port_in, out_port=self.tcp_port_out,
+                       eh_addr=self.pg1.remote_ip4,
+                       ehn_addr=self.pg1.remote_ip4,
+                       eh_port=self.tcp_external_port,
+                       ehn_port=self.tcp_external_port, fib_index=0),
+                 Event(event_type='add', protocol='udp',
+                       in_addr=self.pg0.remote_ip4, out_addr=self.nat_addr,
+                       in_port=self.udp_port_in, out_port=self.udp_port_out,
+                       eh_addr=self.pg1.remote_ip4,
+                       ehn_addr=self.pg1.remote_ip4,
+                       eh_port=self.udp_external_port,
+                       ehn_port=self.udp_external_port, fib_index=0)]))
+
+        self.pg3.add_stream(p)
+        self.pg_enable_capture(self.pg_interfaces)
+        self.pg_start()
+        # receive ACK
+        capture = self.pg3.get_capture(1)
+        p = capture[0]
+        try:
+            hanat = p[HANATStateSync]
+        except IndexError:
+            self.logger.error(ppp("Invalid packet:", p))
+            raise
+        else:
+            self.assertEqual(hanat.sequence_number, 1)
+            self.assertEqual(hanat.flags, 'ACK')
+            self.assertEqual(hanat.version, 1)
+            self.assertEqual(hanat.thread_index, 0)
+        stats = self.statistics.get_counter('/nat44/ha/ack-send')
+        self.assertEqual(stats[0][0], 1)
+        stats = self.statistics.get_counter('/nat44/ha/add-event-recv')
+        self.assertEqual(stats[0][0], 2)
+        users = self.statistics.get_counter('/nat44/total-users')
+        self.assertEqual(users[0][0], 1)
+        sessions = self.statistics.get_counter('/nat44/total-sessions')
+        self.assertEqual(sessions[0][0], 2)
+        users = self.vapi.nat44_user_dump()
+        self.assertEqual(len(users), 1)
+        self.assertEqual(users[0].ip_address, self.pg0.remote_ip4n)
+        # there should be 2 sessions created by HA
+        sessions = self.vapi.nat44_user_session_dump(users[0].ip_address,
+                                                     users[0].vrf_id)
+        self.assertEqual(len(sessions), 2)
+        for session in sessions:
+            self.assertEqual(session.inside_ip_address, self.pg0.remote_ip4n)
+            self.assertEqual(session.outside_ip_address, self.nat_addr_n)
+            self.assertIn(session.inside_port,
+                          [self.tcp_port_in, self.udp_port_in])
+            self.assertIn(session.outside_port,
+                          [self.tcp_port_out, self.udp_port_out])
+            self.assertIn(session.protocol, [IP_PROTOS.tcp, IP_PROTOS.udp])
+
+        # send HA session delete event to failover/passive
+        p = (Ether(dst=self.pg3.local_mac, src=self.pg3.remote_mac) /
+             IP(src=self.pg3.remote_ip4, dst=self.pg3.local_ip4) /
+             UDP(sport=12346, dport=12345) /
+             HANATStateSync(sequence_number=2, events=[
+                 Event(event_type='del', protocol='udp',
+                       in_addr=self.pg0.remote_ip4, out_addr=self.nat_addr,
+                       in_port=self.udp_port_in, out_port=self.udp_port_out,
+                       eh_addr=self.pg1.remote_ip4,
+                       ehn_addr=self.pg1.remote_ip4,
+                       eh_port=self.udp_external_port,
+                       ehn_port=self.udp_external_port, fib_index=0)]))
+
+        self.pg3.add_stream(p)
+        self.pg_enable_capture(self.pg_interfaces)
+        self.pg_start()
+        # receive ACK
+        capture = self.pg3.get_capture(1)
+        p = capture[0]
+        try:
+            hanat = p[HANATStateSync]
+        except IndexError:
+            self.logger.error(ppp("Invalid packet:", p))
+            raise
+        else:
+            self.assertEqual(hanat.sequence_number, 2)
+            self.assertEqual(hanat.flags, 'ACK')
+            self.assertEqual(hanat.version, 1)
+        users = self.vapi.nat44_user_dump()
+        self.assertEqual(len(users), 1)
+        self.assertEqual(users[0].ip_address, self.pg0.remote_ip4n)
+        # now we should have only 1 session, 1 deleted by HA
+        sessions = self.vapi.nat44_user_session_dump(users[0].ip_address,
+                                                     users[0].vrf_id)
+        self.assertEqual(len(sessions), 1)
+        stats = self.statistics.get_counter('/nat44/ha/del-event-recv')
+        self.assertEqual(stats[0][0], 1)
+
+        stats = self.statistics.get_counter('/err/nat-ha/pkts-processed')
+        self.assertEqual(stats, 2)
+
+        # send HA session refresh event to failover/passive
+        p = (Ether(dst=self.pg3.local_mac, src=self.pg3.remote_mac) /
+             IP(src=self.pg3.remote_ip4, dst=self.pg3.local_ip4) /
+             UDP(sport=12346, dport=12345) /
+             HANATStateSync(sequence_number=3, events=[
+                 Event(event_type='refresh', protocol='tcp',
+                       in_addr=self.pg0.remote_ip4, out_addr=self.nat_addr,
+                       in_port=self.tcp_port_in, out_port=self.tcp_port_out,
+                       eh_addr=self.pg1.remote_ip4,
+                       ehn_addr=self.pg1.remote_ip4,
+                       eh_port=self.tcp_external_port,
+                       ehn_port=self.tcp_external_port, fib_index=0,
+                       total_bytes=1024, total_pkts=2)]))
+        self.pg3.add_stream(p)
+        self.pg_enable_capture(self.pg_interfaces)
+        self.pg_start()
+        # receive ACK
+        capture = self.pg3.get_capture(1)
+        p = capture[0]
+        try:
+            hanat = p[HANATStateSync]
+        except IndexError:
+            self.logger.error(ppp("Invalid packet:", p))
+            raise
+        else:
+            self.assertEqual(hanat.sequence_number, 3)
+            self.assertEqual(hanat.flags, 'ACK')
+            self.assertEqual(hanat.version, 1)
+        users = self.vapi.nat44_user_dump()
+        self.assertEqual(len(users), 1)
+        self.assertEqual(users[0].ip_address, self.pg0.remote_ip4n)
+        sessions = self.vapi.nat44_user_session_dump(users[0].ip_address,
+                                                     users[0].vrf_id)
+        self.assertEqual(len(sessions), 1)
+        session = sessions[0]
+        self.assertEqual(session.total_bytes, 1024)
+        self.assertEqual(session.total_pkts, 2)
+        stats = self.statistics.get_counter('/nat44/ha/refresh-event-recv')
+        self.assertEqual(stats[0][0], 1)
+
+        stats = self.statistics.get_counter('/err/nat-ha/pkts-processed')
+        self.assertEqual(stats, 3)
+
+        # send packet to test session created by HA
+        p = (Ether(dst=self.pg1.local_mac, src=self.pg1.remote_mac) /
+             IP(src=self.pg1.remote_ip4, dst=self.nat_addr) /
+             TCP(sport=self.tcp_external_port, dport=self.tcp_port_out))
+        self.pg1.add_stream(p)
+        self.pg_enable_capture(self.pg_interfaces)
+        self.pg_start()
+        capture = self.pg0.get_capture(1)
+        p = capture[0]
+        try:
+            ip = p[IP]
+            tcp = p[TCP]
+        except IndexError:
+            self.logger.error(ppp("Invalid packet:", p))
+            raise
+        else:
+            self.assertEqual(ip.src, self.pg1.remote_ip4)
+            self.assertEqual(ip.dst, self.pg0.remote_ip4)
+            self.assertEqual(tcp.sport, self.tcp_external_port)
+            self.assertEqual(tcp.dport, self.tcp_port_in)
+
     def tearDown(self):
         super(TestNAT44, self).tearDown()
         if not self.vpp_dead:
@@ -3816,6 +4164,7 @@ class TestNAT44(MethodHolder):
             self.logger.info(self.vapi.cli("show nat timeouts"))
             self.logger.info(
                 self.vapi.cli("show nat addr-port-assignment-alg"))
+            self.logger.info(self.vapi.cli("show nat ha"))
             self.clear_nat44()
             self.vapi.cli("clear logging")
 
index 249288b..f6069e3 100644 (file)
@@ -2201,6 +2201,50 @@ class VppPapiProvider(object):
         """
         return self.api(self.papi.nat66_static_mapping_dump, {})
 
+    def nat_ha_set_listener(self, addr, port, path_mtu=512):
+        """Set HA listener (local settings)
+
+        :param addr: local IP4 address
+        :param port: local UDP port number
+        :param path_mtu: path MTU (Default value = 512)
+        """
+        return self.api(self.papi.nat_ha_set_listener,
+                        {'ip_address': addr,
+                         'port': port,
+                         'path_mtu': path_mtu})
+
+    def nat_ha_get_listener(self):
+        """Get HA listener/local configuration"""
+        return self.api(self.papi.nat_ha_get_listener, {})
+
+    def nat_ha_set_failover(self, addr, port, refresh=10):
+        """Set HA failover (remote settings)
+
+        :param addr: failover IP4 address
+        :param port: failvoer UDP port number
+        :param refresh: number of seconds after which to send session refresh
+        """
+        return self.api(self.papi.nat_ha_set_failover,
+                        {'ip_address': addr,
+                         'port': port,
+                         'session_refresh_interval': refresh})
+
+    def nat_ha_get_failover(self):
+        """Get HA failover/remote settings reply"""
+        return self.api(self.papi.nat_ha_get_failover, {})
+
+    def nat_ha_flush(self):
+        """Flush the current HA data"""
+        return self.api(self.papi.nat_ha_flush, {})
+
+    def nat_ha_resync(self, want_resync_event=1):
+        """Resync HA (resend existing sessions to new failover)
+        :param want_resync_event: if non-zero resync completed event sent
+        """
+        return self.api(self.papi.nat_ha_resync,
+                        {'want_resync_event': want_resync_event,
+                         'pid': os.getpid()})
+
     def control_ping(self):
         self.api(self.papi.control_ping)