vcl: ldp support SO_ORIGINAL_DST 31/39131/6
authorqinyang <qiny@yusur.tech>
Tue, 27 Jun 2023 08:11:53 +0000 (01:11 -0700)
committerFlorin Coras <florin.coras@gmail.com>
Fri, 21 Jul 2023 16:27:14 +0000 (16:27 +0000)
Type: improvement

Support SO_ORIGINAL_DST socket option to get original dst_ip4 and dst_port if nat44 rule enabled.

Change-Id: If00e00d03e48f3b78a23a68f1b078954d79dd0f7
Signed-off-by: qinyang <qiny@yusur.tech>
14 files changed:
src/plugins/nat/nat44-ed/nat44_ed.c
src/vcl/ldp.c
src/vcl/vcl_bapi.c
src/vcl/vcl_cfg.c
src/vcl/vcl_private.h
src/vcl/vcl_sapi.c
src/vcl/vppcom.c
src/vcl/vppcom.h
src/vnet/session/application.c
src/vnet/session/application.h
src/vnet/session/application_interface.h
src/vnet/session/session.c
src/vnet/session/session.h
src/vnet/session/session_api.c

index 2ccd461..74359cc 100644 (file)
@@ -4104,7 +4104,49 @@ nat_syslog_nat44_sdel (u32 ssubix, u32 sfibix, ip4_address_t *isaddr,
                         idaddr, idport, xdaddr, xdport, proto, 0,
                         is_twicenat);
 }
+__clib_export void
+nat44_original_dst_lookup (ip4_address_t *i2o_src, u16 i2o_src_port,
+                          ip4_address_t *i2o_dst, u16 i2o_dst_port,
+                          ip_protocol_t proto, u32 *original_dst,
+                          u16 *original_dst_port)
+{
+  snat_main_per_thread_data_t *tsm;
+  snat_main_t *sm = &snat_main;
+  u32 fib_index = 0;
+  snat_session_t *s;
+  ip4_header_t ip;
+
+  ip.src_address.as_u32 = i2o_src->as_u32;
+  fib_index = fib_table_find (FIB_PROTOCOL_IP4, 0);
 
+  if (sm->num_workers > 1)
+    {
+      tsm = vec_elt_at_index (
+       sm->per_thread_data,
+       nat44_ed_get_in2out_worker_index (0, &ip, fib_index, 0));
+    }
+  else
+    {
+      tsm = vec_elt_at_index (sm->per_thread_data, sm->num_workers);
+    }
+
+  /* query */
+  clib_bihash_kv_16_8_t kv = { 0 }, value;
+  init_ed_k (&kv, i2o_src->as_u32, i2o_src_port, i2o_dst->as_u32, i2o_dst_port,
+            fib_index, proto);
+  if (tsm->sessions == NULL ||
+      clib_bihash_search_16_8 (&sm->flow_hash, &kv, &value))
+    {
+      return;
+    }
+  s = pool_elt_at_index (tsm->sessions, ed_value_get_session_index (&value));
+  if (s)
+    {
+      *original_dst = s->i2o.rewrite.saddr.as_u32;
+      *original_dst_port = s->i2o.rewrite.sport;
+    }
+  return;
+}
 /*
  * fd.io coding-style-patch-verification: ON
  *
index 2256a2b..a8062b4 100644 (file)
 #define UDP_SEGMENT 103
 #endif
 
+#ifndef SO_ORIGINAL_DST
+/* from <linux/netfilter_ipv4.h> */
+#define SO_ORIGINAL_DST 80
+#endif
 typedef struct ldp_worker_ctx_
 {
   u8 *io_buffer;
@@ -2043,6 +2047,21 @@ getsockopt (int fd, int level, int optname,
              break;
            }
          break;
+       case SOL_IP:
+         switch (optname)
+           {
+           case SO_ORIGINAL_DST:
+             rv =
+               vls_attr (vlsh, VPPCOM_ATTR_GET_ORIGINAL_DST, optval, optlen);
+             break;
+           default:
+             LDBG (0,
+                   "ERROR: fd %d: getsockopt SOL_IP: vlsh %u "
+                   "optname %d unsupported!",
+                   fd, vlsh, optname);
+             break;
+           }
+         break;
        case SOL_IPV6:
          switch (optname)
            {
index afe8824..6071f64 100644 (file)
@@ -360,7 +360,8 @@ vcl_bapi_send_attach (void)
     (vcm->cfg.app_scope_global ? APP_OPTIONS_FLAGS_USE_GLOBAL_SCOPE : 0) |
     (app_is_proxy ? APP_OPTIONS_FLAGS_IS_PROXY : 0) |
     (vcm->cfg.use_mq_eventfd ? APP_OPTIONS_FLAGS_EVT_MQ_USE_EVENTFD : 0) |
-    (vcm->cfg.huge_page ? APP_OPTIONS_FLAGS_USE_HUGE_PAGE : 0);
+    (vcm->cfg.huge_page ? APP_OPTIONS_FLAGS_USE_HUGE_PAGE : 0) |
+    (vcm->cfg.app_original_dst ? APP_OPTIONS_FLAGS_GET_ORIGINAL_DST : 0);
   bmp->options[APP_OPTIONS_PROXY_TRANSPORT] =
     (u64) ((vcm->cfg.app_proxy_transport_tcp ? 1 << TRANSPORT_PROTO_TCP : 0) |
           (vcm->cfg.app_proxy_transport_udp ? 1 << TRANSPORT_PROTO_UDP : 0));
index be142ea..edea60d 100644 (file)
@@ -464,6 +464,11 @@ vppcom_cfg_read_file (char *conf_fname)
              VCFG_DBG (0, "VCL<%d>: configured with multithread workers",
                        getpid ());
            }
+         else if (unformat (line_input, "app_original_dst"))
+           {
+             vcl_cfg->app_original_dst = 1;
+             VCFG_DBG (0, "VCL<%d>: support original destination", getpid ());
+           }
          else if (unformat (line_input, "}"))
            {
              vc_cfg_input = 0;
index 39a0f05..8345e34 100644 (file)
@@ -180,6 +180,9 @@ typedef struct vcl_session_
 #if VCL_ELOG
   elog_track_t elog_track;
 #endif
+
+  u16 original_dst_port; /**< original dst port (network order) */
+  u32 original_dst_ip4;         /**< original dst ip4 (network order) */
 } vcl_session_t;
 
 typedef struct vppcom_cfg_t_
@@ -208,6 +211,7 @@ typedef struct vppcom_cfg_t_
   u32 tls_engine;
   u8 mt_wrk_supported;
   u8 huge_page;
+  u8 app_original_dst;
 } vppcom_cfg_t;
 
 void vppcom_cfg (vppcom_cfg_t * vcl_cfg);
index 3a97fa2..e3e2b6a 100644 (file)
@@ -130,7 +130,8 @@ vcl_api_send_attach (clib_socket_t * cs)
     (vcm->cfg.app_scope_global ? APP_OPTIONS_FLAGS_USE_GLOBAL_SCOPE : 0) |
     (app_is_proxy ? APP_OPTIONS_FLAGS_IS_PROXY : 0) |
     (vcm->cfg.use_mq_eventfd ? APP_OPTIONS_FLAGS_EVT_MQ_USE_EVENTFD : 0) |
-    (vcm->cfg.huge_page ? APP_OPTIONS_FLAGS_USE_HUGE_PAGE : 0);
+    (vcm->cfg.huge_page ? APP_OPTIONS_FLAGS_USE_HUGE_PAGE : 0) |
+    (vcm->cfg.app_original_dst ? APP_OPTIONS_FLAGS_GET_ORIGINAL_DST : 0);
   mp->options[APP_OPTIONS_PROXY_TRANSPORT] =
     (u64) ((vcm->cfg.app_proxy_transport_tcp ? 1 << TRANSPORT_PROTO_TCP : 0) |
           (vcm->cfg.app_proxy_transport_udp ? 1 << TRANSPORT_PROTO_UDP : 0));
index d9cc885..06a345d 100644 (file)
@@ -351,6 +351,11 @@ vcl_session_accepted_handler (vcl_worker_t * wrk, session_accepted_msg_t * mp,
 
   session->vpp_handle = mp->handle;
   session->session_state = VCL_STATE_READY;
+  if (mp->rmt.is_ip4)
+    {
+      session->original_dst_ip4 = mp->original_dst_ip4;
+      session->original_dst_port = mp->original_dst_port;
+    }
   session->transport.rmt_port = mp->rmt.port;
   session->transport.is_ip4 = mp->rmt.is_ip4;
   clib_memcpy_fast (&session->transport.rmt_ip, &mp->rmt.ip,
@@ -3611,6 +3616,33 @@ vppcom_session_attr (uint32_t session_handle, uint32_t op,
        rv = VPPCOM_EINVAL;
       break;
 
+    case VPPCOM_ATTR_GET_ORIGINAL_DST:
+      if (!session->transport.is_ip4)
+       {
+         /* now original dst only support ipv4*/
+         rv = VPPCOM_EAFNOSUPPORT;
+         break;
+       }
+      if (PREDICT_TRUE (buffer && buflen && (*buflen >= sizeof (*ep)) &&
+                       ep->ip))
+       {
+         ep->is_ip4 = session->transport.is_ip4;
+         ep->port = session->original_dst_port;
+         clib_memcpy_fast (ep->ip, &session->original_dst_ip4,
+                           sizeof (ip4_address_t));
+         *buflen = sizeof (*ep);
+         VDBG (1,
+               "VPPCOM_ATTR_GET_ORIGINAL_DST: sh %u, is_ip4 = %u, addr = %U"
+               " port %d",
+               session_handle, ep->is_ip4, vcl_format_ip4_address,
+               (ip4_address_t *) (&session->original_dst_ip4),
+               ep->is_ip4 ? IP46_TYPE_IP4 : IP46_TYPE_IP6,
+               clib_net_to_host_u16 (ep->port));
+       }
+      else
+       rv = VPPCOM_EINVAL;
+      break;
+
     case VPPCOM_ATTR_SET_LCL_ADDR:
       if (PREDICT_TRUE (buffer && buflen &&
                        (*buflen >= sizeof (*ep)) && ep->ip))
index 71a49ab..7826076 100644 (file)
@@ -176,6 +176,7 @@ typedef enum
   VPPCOM_ATTR_SET_DSCP,
   VPPCOM_ATTR_SET_IP_PKTINFO,
   VPPCOM_ATTR_GET_IP_PKTINFO,
+  VPPCOM_ATTR_GET_ORIGINAL_DST,
 } vppcom_attr_op_t;
 
 typedef struct _vcl_poll
index cfa9719..5998921 100644 (file)
@@ -1529,6 +1529,12 @@ application_has_global_scope (application_t * app)
   return app->flags & APP_OPTIONS_FLAGS_USE_GLOBAL_SCOPE;
 }
 
+int
+application_original_dst_is_enabled (application_t *app)
+{
+  return app->flags & APP_OPTIONS_FLAGS_GET_ORIGINAL_DST;
+}
+
 static clib_error_t *
 application_start_stop_proxy_fib_proto (application_t * app, u8 fib_proto,
                                        u8 transport_proto, u8 is_start)
index 09737a6..e100fe8 100644 (file)
@@ -300,6 +300,7 @@ u8 application_has_global_scope (application_t * app);
 void application_setup_proxy (application_t * app);
 void application_remove_proxy (application_t * app);
 void application_namespace_cleanup (app_namespace_t *app_ns);
+int application_original_dst_is_enabled (application_t *app);
 
 segment_manager_props_t *application_get_segment_manager_properties (u32
                                                                     app_index);
index 138953b..510068b 100644 (file)
@@ -233,7 +233,8 @@ typedef enum
   _ (USE_LOCAL_SCOPE, "App can use local session scope")                      \
   _ (EVT_MQ_USE_EVENTFD, "Use eventfds for signaling")                        \
   _ (MEMFD_FOR_BUILTIN, "Use memfd for builtin app segs")                     \
-  _ (USE_HUGE_PAGE, "Use huge page for FIFO")
+  _ (USE_HUGE_PAGE, "Use huge page for FIFO")                                 \
+  _ (GET_ORIGINAL_DST, "Get original dst enabled")
 
 typedef enum _app_options
 {
@@ -299,15 +300,15 @@ typedef struct app_session_transport_
   u8 is_ip4;                   /**< set if uses ip4 networking */
 } app_session_transport_t;
 
-#define foreach_app_session_field                                      \
-  _(svm_fifo_t, *rx_fifo)              /**< rx fifo */                 \
-  _(svm_fifo_t, *tx_fifo)              /**< tx fifo */                 \
-  _(session_type_t, session_type)      /**< session type */            \
-  _(volatile u8, session_state)                /**< session state */           \
-  _(u32, session_index)                        /**< index in owning pool */    \
-  _(app_session_transport_t, transport)        /**< transport info */          \
-  _(svm_msg_q_t, *vpp_evt_q)           /**< vpp event queue  */        \
-  _(u8, is_dgram)                      /**< flag for dgram mode */     \
+#define foreach_app_session_field                                             \
+  _ (svm_fifo_t, *rx_fifo)              /**< rx fifo */                      \
+  _ (svm_fifo_t, *tx_fifo)              /**< tx fifo */                      \
+  _ (session_type_t, session_type)      /**< session type */                 \
+  _ (volatile u8, session_state)        /**< session state */                \
+  _ (u32, session_index)                /**< index in owning pool */         \
+  _ (app_session_transport_t, transport) /**< transport info */               \
+  _ (svm_msg_q_t, *vpp_evt_q)           /**< vpp event queue  */             \
+  _ (u8, is_dgram)                      /**< flag for dgram mode */
 
 typedef struct
 {
@@ -386,6 +387,8 @@ typedef struct session_accepted_msg_
   transport_endpoint_t lcl;
   transport_endpoint_t rmt;
   u8 flags;
+  u32 original_dst_ip4;
+  u16 original_dst_port;
 } __clib_packed session_accepted_msg_t;
 
 typedef struct session_accepted_reply_msg_
index 5bb5776..228234c 100644 (file)
@@ -17,6 +17,7 @@
  * @brief Session and session manager
  */
 
+#include <vnet/plugin/plugin.h>
 #include <vnet/session/session.h>
 #include <vnet/session/application.h>
 #include <vnet/dpo/load_balance.h>
@@ -1762,6 +1763,22 @@ session_segment_handle (session_t * s)
                                              f->segment_index);
 }
 
+void
+session_get_original_dst (transport_endpoint_t *i2o_src,
+                         transport_endpoint_t *i2o_dst,
+                         transport_proto_t transport_proto, u32 *original_dst,
+                         u16 *original_dst_port)
+{
+  session_main_t *smm = vnet_get_session_main ();
+  ip_protocol_t proto =
+    (transport_proto == TRANSPORT_PROTO_TCP ? IPPROTO_TCP : IPPROTO_UDP);
+  if (!smm->original_dst_lookup || !i2o_dst->is_ip4)
+    return;
+  smm->original_dst_lookup (&i2o_src->ip.ip4, i2o_src->port, &i2o_dst->ip.ip4,
+                           i2o_dst->port, proto, original_dst,
+                           original_dst_port);
+}
+
 /* *INDENT-OFF* */
 static session_fifo_rx_fn *session_tx_fns[TRANSPORT_TX_N_FNS] = {
     session_tx_fifo_peek_and_snd,
@@ -2292,6 +2309,11 @@ session_config_fn (vlib_main_t * vm, unformat_input_t * input)
        smm->no_adaptive = 1;
       else if (unformat (input, "use-dma"))
        smm->dma_enabled = 1;
+      else if (unformat (input, "nat44-original-dst-enable"))
+       {
+         smm->original_dst_lookup = vlib_get_plugin_symbol (
+           "nat_plugin.so", "nat44_original_dst_lookup");
+       }
       /*
        * Deprecated but maintained for compatibility
        */
index 10bae27..9c08f1a 100644 (file)
@@ -193,6 +193,10 @@ extern session_fifo_rx_fn session_tx_fifo_dequeue_internal;
 u8 session_node_lookup_fifo_event (svm_fifo_t * f, session_event_t * e);
 
 typedef void (*session_update_time_fn) (f64 time_now, u8 thread_index);
+typedef void (*nat44_original_dst_lookup_fn) (
+  ip4_address_t *i2o_src, u16 i2o_src_port, ip4_address_t *i2o_dst,
+  u16 i2o_dst_port, ip_protocol_t proto, u32 *original_dst,
+  u16 *original_dst_port);
 
 typedef struct session_main_
 {
@@ -281,6 +285,9 @@ typedef struct session_main_
   u32 preallocated_sessions;
 
   u16 msg_id_base;
+
+  /** Query nat44-ed session to get original dst ip4 & dst port. */
+  nat44_original_dst_lookup_fn original_dst_lookup;
 } session_main_t;
 
 extern session_main_t session_main;
@@ -812,6 +819,10 @@ void session_wrk_handle_evts_main_rpc (void *);
 
 session_t *session_alloc_for_connection (transport_connection_t * tc);
 session_t *session_alloc_for_half_open (transport_connection_t *tc);
+void session_get_original_dst (transport_endpoint_t *i2o_src,
+                              transport_endpoint_t *i2o_dst,
+                              transport_proto_t transport_proto,
+                              u32 *original_dst, u16 *original_dst_port);
 
 typedef void (pool_safe_realloc_rpc_fn) (void *rpc_args);
 
index 3e99938..3d70733 100644 (file)
@@ -136,6 +136,13 @@ mq_send_session_accepted_cb (session_t * s)
       m.mq_index = s->thread_index;
     }
 
+  if (application_original_dst_is_enabled (app))
+    {
+      session_get_original_dst (&m.lcl, &m.rmt,
+                               session_get_transport_proto (s),
+                               &m.original_dst_ip4, &m.original_dst_port);
+    }
+
   app_wrk_send_ctrl_evt (app_wrk, SESSION_CTRL_EVT_ACCEPTED, &m, sizeof (m));
 
   return 0;