ipsec: fast path outbound policy matching implementation for ipv6
[vpp.git] / src / vnet / ipsec / ipsec_output.c
index df93b5e..028d976 100644 (file)
 #include <vnet/ip/ip.h>
 
 #include <vnet/ipsec/ipsec.h>
-
-#if DPDK_CRYPTO==1
-#define ESP_NODE "dpdk-esp-encrypt"
-#else
-#define ESP_NODE "esp-encrypt"
-#endif
-
-#if WITH_LIBSSL > 0
-
-#define foreach_ipsec_output_next                \
-_(DROP, "error-drop")                            \
-_(ESP_ENCRYPT, ESP_NODE)
-
-#define _(v, s) IPSEC_OUTPUT_NEXT_##v,
-typedef enum
-{
-  foreach_ipsec_output_next
-#undef _
-    IPSEC_OUTPUT_N_NEXT,
-} ipsec_output_next_t;
-
+#include <vnet/ipsec/ipsec_io.h>
+#include <vnet/ipsec/ipsec_output.h>
 
 #define foreach_ipsec_output_error                   \
  _(RX_PKTS, "IPSec pkts received")                   \
@@ -50,7 +31,6 @@ typedef enum
  _(POLICY_BYPASS, "IPSec policy bypass")             \
  _(ENCAPS_FAILED, "IPSec encapsulation failed")
 
-
 typedef enum
 {
 #define _(sym,str) IPSEC_OUTPUT_ERROR_##sym,
@@ -65,12 +45,10 @@ static char *ipsec_output_error_strings[] = {
 #undef _
 };
 
-static vlib_node_registration_t ipsec_output_ip4_node;
-static vlib_node_registration_t ipsec_output_ip6_node;
-
 typedef struct
 {
   u32 spd_id;
+  u32 policy_id;
 } ipsec_output_trace_t;
 
 /* packet trace format function */
@@ -81,117 +59,9 @@ format_ipsec_output_trace (u8 * s, va_list * args)
   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
   ipsec_output_trace_t *t = va_arg (*args, ipsec_output_trace_t *);
 
-  if (t->spd_id != ~0)
-    {
-      s = format (s, "spd %u ", t->spd_id);
-    }
-  else
-    {
-      s = format (s, "no spd");
-    }
-  return s;
-}
-
-always_inline ipsec_policy_t *
-ipsec_output_policy_match (ipsec_spd_t * spd, u8 pr, u32 la, u32 ra, u16 lp,
-                          u16 rp)
-{
-  ipsec_policy_t *p;
-  u32 *i;
-
-  if (!spd)
-    return 0;
-
-  vec_foreach (i, spd->ipv4_outbound_policies)
-  {
-    p = pool_elt_at_index (spd->policies, *i);
-    if (PREDICT_FALSE (p->protocol && (p->protocol != pr)))
-      continue;
-
-    if (la < clib_net_to_host_u32 (p->laddr.start.ip4.as_u32))
-      continue;
-
-    if (la > clib_net_to_host_u32 (p->laddr.stop.ip4.as_u32))
-      continue;
-
-    if (ra < clib_net_to_host_u32 (p->raddr.start.ip4.as_u32))
-      continue;
-
-    if (ra > clib_net_to_host_u32 (p->raddr.stop.ip4.as_u32))
-      continue;
-
-    if (PREDICT_FALSE ((pr != IP_PROTOCOL_TCP) && (pr != IP_PROTOCOL_UDP)))
-      return p;
+  s = format (s, "spd %u policy %d", t->spd_id, t->policy_id);
 
-    if (lp < p->lport.start)
-      continue;
-
-    if (lp > p->lport.stop)
-      continue;
-
-    if (rp < p->rport.start)
-      continue;
-
-    if (rp > p->rport.stop)
-      continue;
-
-    return p;
-  }
-  return 0;
-}
-
-always_inline uword
-ip6_addr_match_range (ip6_address_t * a, ip6_address_t * la,
-                     ip6_address_t * ua)
-{
-  if ((memcmp (a->as_u64, la->as_u64, 2 * sizeof (u64)) >= 0) &&
-      (memcmp (a->as_u64, ua->as_u64, 2 * sizeof (u64)) <= 0))
-    return 1;
-  return 0;
-}
-
-always_inline ipsec_policy_t *
-ipsec_output_ip6_policy_match (ipsec_spd_t * spd,
-                              ip6_address_t * la,
-                              ip6_address_t * ra, u16 lp, u16 rp, u8 pr)
-{
-  ipsec_policy_t *p;
-  u32 *i;
-
-  if (!spd)
-    return 0;
-
-  vec_foreach (i, spd->ipv6_outbound_policies)
-  {
-    p = pool_elt_at_index (spd->policies, *i);
-    if (PREDICT_FALSE (p->protocol && (p->protocol != pr)))
-      continue;
-
-    if (!ip6_addr_match_range (ra, &p->raddr.start.ip6, &p->raddr.stop.ip6))
-      continue;
-
-    if (!ip6_addr_match_range (la, &p->laddr.start.ip6, &p->laddr.stop.ip6))
-      continue;
-
-    if (PREDICT_FALSE ((pr != IP_PROTOCOL_TCP) && (pr != IP_PROTOCOL_UDP)))
-      return p;
-
-    if (lp < p->lport.start)
-      continue;
-
-    if (lp > p->lport.stop)
-      continue;
-
-    if (rp < p->rport.start)
-      continue;
-
-    if (rp > p->rport.stop)
-      continue;
-
-    return p;
-  }
-
-  return 0;
+  return s;
 }
 
 static inline uword
@@ -200,29 +70,41 @@ ipsec_output_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
 {
   ipsec_main_t *im = &ipsec_main;
 
-  u32 *from, *to_next = 0;
+  u32 *from, *to_next = 0, thread_index;
   u32 n_left_from, sw_if_index0, last_sw_if_index = (u32) ~ 0;
   u32 next_node_index = (u32) ~ 0, last_next_node_index = (u32) ~ 0;
   vlib_frame_t *f = 0;
   u32 spd_index0 = ~0;
   ipsec_spd_t *spd0 = 0;
+  int bogus;
   u64 nc_protect = 0, nc_bypass = 0, nc_discard = 0, nc_nomatch = 0;
+  u8 flow_cache_enabled = im->output_flow_cache_flag;
 
   from = vlib_frame_vector_args (from_frame);
   n_left_from = from_frame->n_vectors;
+  thread_index = vm->thread_index;
 
   while (n_left_from > 0)
     {
-      u32 bi0;
-      vlib_buffer_t *b0;
-      ipsec_policy_t *p0;
+      u32 bi0, pi0, bi1;
+      vlib_buffer_t *b0, *b1;
+      ipsec_policy_t *p0 = NULL;
       ip4_header_t *ip0;
       ip6_header_t *ip6_0 = 0;
       udp_header_t *udp0;
       u32 iph_offset = 0;
+      tcp_header_t *tcp0;
+      u64 bytes0;
 
       bi0 = from[0];
       b0 = vlib_get_buffer (vm, bi0);
+      if (n_left_from > 1)
+       {
+         bi1 = from[1];
+         b1 = vlib_get_buffer (vm, bi1);
+         CLIB_PREFETCH (b1, CLIB_CACHE_LINE_BYTES * 2, STORE);
+         vlib_prefetch_buffer_data (b1, LOAD);
+       }
       sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_TX];
       iph_offset = vnet_buffer (b0)->ip.save_rewrite_length;
       ip0 = (ip4_header_t *) ((u8 *) vlib_buffer_get_current (b0)
@@ -232,7 +114,7 @@ ipsec_output_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
       if (PREDICT_FALSE (last_sw_if_index != sw_if_index0))
        {
          uword *p = hash_get (im->spd_index_by_sw_if_index, sw_if_index0);
-         ASSERT (p);
+         ALWAYS_ASSERT (p);
          spd_index0 = p[0];
          spd0 = pool_elt_at_index (im->spds, spd_index0);
          last_sw_if_index = sw_if_index0;
@@ -253,14 +135,13 @@ ipsec_output_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
             spd0->id);
 #endif
 
-         p0 = ipsec_output_ip6_policy_match (spd0,
-                                             &ip6_0->src_address,
-                                             &ip6_0->dst_address,
-                                             clib_net_to_host_u16
-                                             (udp0->src_port),
-                                             clib_net_to_host_u16
-                                             (udp0->dst_port),
-                                             ip6_0->protocol);
+         p0 = ipsec6_output_policy_match (spd0,
+                                          &ip6_0->src_address,
+                                          &ip6_0->dst_address,
+                                          clib_net_to_host_u16
+                                          (udp0->src_port),
+                                          clib_net_to_host_u16
+                                          (udp0->dst_port), ip6_0->protocol);
        }
       else
        {
@@ -275,72 +156,126 @@ ipsec_output_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
                        sw_if_index0, spd_index0, spd0->id);
 #endif
 
-         p0 = ipsec_output_policy_match (spd0, ip0->protocol,
-                                         clib_net_to_host_u32
-                                         (ip0->src_address.as_u32),
-                                         clib_net_to_host_u32
-                                         (ip0->dst_address.as_u32),
-                                         clib_net_to_host_u16
-                                         (udp0->src_port),
-                                         clib_net_to_host_u16
-                                         (udp0->dst_port));
+         /*
+          * Check whether flow cache is enabled.
+          */
+         if (flow_cache_enabled)
+           {
+             p0 = ipsec4_out_spd_find_flow_cache_entry (
+               im, ip0->protocol, ip0->src_address.as_u32,
+               ip0->dst_address.as_u32, udp0->src_port, udp0->dst_port);
+           }
+
+         /* Fall back to linear search if flow cache lookup fails */
+         if (p0 == NULL)
+           {
+             p0 = ipsec_output_policy_match (
+               spd0, ip0->protocol,
+               clib_net_to_host_u32 (ip0->src_address.as_u32),
+               clib_net_to_host_u32 (ip0->dst_address.as_u32),
+               clib_net_to_host_u16 (udp0->src_port),
+               clib_net_to_host_u16 (udp0->dst_port), flow_cache_enabled);
+           }
        }
+      tcp0 = (void *) udp0;
 
       if (PREDICT_TRUE (p0 != NULL))
        {
+         pi0 = p0 - im->policies;
+
+         vlib_prefetch_combined_counter (&ipsec_spd_policy_counters,
+                                         thread_index, pi0);
+
+         if (is_ipv6)
+           {
+             bytes0 = clib_net_to_host_u16 (ip6_0->payload_length);
+             bytes0 += sizeof (ip6_header_t);
+           }
+         else
+           {
+             bytes0 = clib_net_to_host_u16 (ip0->length);
+           }
+
          if (p0->policy == IPSEC_POLICY_ACTION_PROTECT)
            {
+             ipsec_sa_t *sa = 0;
              nc_protect++;
-             next_node_index = im->esp_encrypt_node_index;
-             vnet_buffer (b0)->ipsec.sad_index = p0->sa_index;
-             vlib_buffer_advance (b0, iph_offset);
-             p0->counter.packets++;
-             if (is_ipv6)
-               {
-                 p0->counter.bytes +=
-                   clib_net_to_host_u16 (ip6_0->payload_length);
-                 p0->counter.bytes += sizeof (ip6_header_t);
-               }
+             sa = ipsec_sa_get (p0->sa_index);
+             if (sa->protocol == IPSEC_PROTOCOL_ESP)
+               if (is_ipv6)
+                 next_node_index = im->esp6_encrypt_node_index;
+               else
+                 next_node_index = im->esp4_encrypt_node_index;
+             else if (is_ipv6)
+               next_node_index = im->ah6_encrypt_node_index;
              else
+               next_node_index = im->ah4_encrypt_node_index;
+             vnet_buffer (b0)->ipsec.sad_index = p0->sa_index;
+
+             if (PREDICT_FALSE (b0->flags & VNET_BUFFER_F_OFFLOAD))
                {
-                 p0->counter.bytes += clib_net_to_host_u16 (ip0->length);
+                 vnet_buffer_oflags_t oflags = vnet_buffer (b0)->oflags;
+
+                 /*
+                  * Clearing offload flags before checksum is computed
+                  * It guarantees the cache hit!
+                  */
+                 vnet_buffer_offload_flags_clear (b0, oflags);
+
+                 if (is_ipv6)
+                   {
+                     if (PREDICT_FALSE (oflags &
+                                        VNET_BUFFER_OFFLOAD_F_TCP_CKSUM))
+                       {
+                         tcp0->checksum = ip6_tcp_udp_icmp_compute_checksum (
+                           vm, b0, ip6_0, &bogus);
+                       }
+                     if (PREDICT_FALSE (oflags &
+                                        VNET_BUFFER_OFFLOAD_F_UDP_CKSUM))
+                       {
+                         udp0->checksum = ip6_tcp_udp_icmp_compute_checksum (
+                           vm, b0, ip6_0, &bogus);
+                       }
+                   }
+                 else
+                   {
+                     if (PREDICT_FALSE (oflags &
+                                        VNET_BUFFER_OFFLOAD_F_IP_CKSUM))
+                       {
+                         ip0->checksum = ip4_header_checksum (ip0);
+                       }
+                     if (PREDICT_FALSE (oflags &
+                                        VNET_BUFFER_OFFLOAD_F_TCP_CKSUM))
+                       {
+                         tcp0->checksum =
+                           ip4_tcp_udp_compute_checksum (vm, b0, ip0);
+                       }
+                     if (PREDICT_FALSE (oflags &
+                                        VNET_BUFFER_OFFLOAD_F_UDP_CKSUM))
+                       {
+                         udp0->checksum =
+                           ip4_tcp_udp_compute_checksum (vm, b0, ip0);
+                       }
+                   }
                }
+             vlib_buffer_advance (b0, iph_offset);
            }
          else if (p0->policy == IPSEC_POLICY_ACTION_BYPASS)
            {
              nc_bypass++;
              next_node_index = get_next_output_feature_node_index (b0, node);
-             p0->counter.packets++;
-             if (is_ipv6)
-               {
-                 p0->counter.bytes +=
-                   clib_net_to_host_u16 (ip6_0->payload_length);
-                 p0->counter.bytes += sizeof (ip6_header_t);
-               }
-             else
-               {
-                 p0->counter.bytes += clib_net_to_host_u16 (ip0->length);
-               }
            }
          else
            {
              nc_discard++;
-             p0->counter.packets++;
-             if (is_ipv6)
-               {
-                 p0->counter.bytes +=
-                   clib_net_to_host_u16 (ip6_0->payload_length);
-                 p0->counter.bytes += sizeof (ip6_header_t);
-               }
-             else
-               {
-                 p0->counter.bytes += clib_net_to_host_u16 (ip0->length);
-               }
              next_node_index = im->error_drop_node_index;
            }
+         vlib_increment_combined_counter
+           (&ipsec_spd_policy_counters, thread_index, pi0, 1, bytes0);
        }
       else
        {
+         pi0 = ~0;
          nc_nomatch++;
          next_node_index = im->error_drop_node_index;
        }
@@ -357,6 +292,11 @@ ipsec_output_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
          last_next_node_index = next_node_index;
 
          f = vlib_get_frame_to_node (vm, next_node_index);
+
+         /* frame->frame_flags, copy it from node */
+         /* Copy trace flag from next_frame and from runtime. */
+         f->frame_flags |= node->flags & VLIB_NODE_FLAG_TRACE;
+
          to_next = vlib_frame_vector_args (f);
        }
 
@@ -364,12 +304,14 @@ ipsec_output_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
       to_next += 1;
       f->n_vectors++;
 
-      if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
+      if (PREDICT_FALSE (node->flags & VLIB_NODE_FLAG_TRACE) &&
+         PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
        {
          ipsec_output_trace_t *tr =
            vlib_add_trace (vm, node, b0, sizeof (*tr));
          if (spd0)
            tr->spd_id = spd0->id;
+         tr->policy_id = pi0;
        }
     }
 
@@ -386,17 +328,16 @@ ipsec_output_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
   return from_frame->n_vectors;
 }
 
-static uword
-ipsec_output_ip4_node_fn (vlib_main_t * vm, vlib_node_runtime_t * node,
-                         vlib_frame_t * frame)
+VLIB_NODE_FN (ipsec4_output_node) (vlib_main_t * vm,
+                                  vlib_node_runtime_t * node,
+                                  vlib_frame_t * frame)
 {
   return ipsec_output_inline (vm, node, frame, 0);
 }
 
 /* *INDENT-OFF* */
-VLIB_REGISTER_NODE (ipsec_output_ip4_node,static) = {
-  .function = ipsec_output_ip4_node_fn,
-  .name = "ipsec-output-ip4",
+VLIB_REGISTER_NODE (ipsec4_output_node) = {
+  .name = "ipsec4-output-feature",
   .vector_size = sizeof (u32),
   .format_trace = format_ipsec_output_trace,
   .type = VLIB_NODE_TYPE_INTERNAL,
@@ -413,18 +354,15 @@ VLIB_REGISTER_NODE (ipsec_output_ip4_node,static) = {
 };
 /* *INDENT-ON* */
 
-VLIB_NODE_FUNCTION_MULTIARCH (ipsec_output_ip4_node, ipsec_output_ip4_node_fn)
-     static uword
-       ipsec_output_ip6_node_fn (vlib_main_t * vm, vlib_node_runtime_t * node,
-                                vlib_frame_t * frame)
+VLIB_NODE_FN (ipsec6_output_node) (vlib_main_t * vm,
+                                  vlib_node_runtime_t * node,
+                                  vlib_frame_t * frame)
 {
   return ipsec_output_inline (vm, node, frame, 1);
 }
 
-/* *INDENT-OFF* */
-VLIB_REGISTER_NODE (ipsec_output_ip6_node,static) = {
-  .function = ipsec_output_ip6_node_fn,
-  .name = "ipsec-output-ip6",
+VLIB_REGISTER_NODE (ipsec6_output_node) = {
+  .name = "ipsec6-output-feature",
   .vector_size = sizeof (u32),
   .format_trace = format_ipsec_output_trace,
   .type = VLIB_NODE_TYPE_INTERNAL,
@@ -439,40 +377,4 @@ VLIB_REGISTER_NODE (ipsec_output_ip6_node,static) = {
 #undef _
   },
 };
-/* *INDENT-ON* */
-
-VLIB_NODE_FUNCTION_MULTIARCH (ipsec_output_ip6_node, ipsec_output_ip6_node_fn)
-#else /* IPSEC > 1 */
-
-/* Dummy ipsec output node, in case when IPSec is disabled */
-
-static uword
-ipsec_output_node_fn (vlib_main_t * vm,
-                     vlib_node_runtime_t * node, vlib_frame_t * frame)
-{
-  clib_warning ("IPSec disabled");
-  return 0;
-}
 
-/* *INDENT-OFF* */
-VLIB_REGISTER_NODE (ipsec_output_node) = {
-  .vector_size = sizeof (u32),
-  .function = ipsec_output_node_fn,
-  .name = "ipsec-output-ip4",
-};
-
-VLIB_REGISTER_NODE (ipsec_output_node) = {
-  .vector_size = sizeof (u32),
-  .function = ipsec_output_node_fn,
-  .name = "ipsec-output-ip6",
-};
-/* *INDENT-ON* */
-#endif
-
-/*
- * fd.io coding-style-patch-verification: ON
- *
- * Local Variables:
- * eval: (c-set-style "gnu")
- * End:
- */