IPSEC: tidy the policy types
[vpp.git] / src / vnet / ipsec / ipsec_if_in.c
index 655668a..b12e36c 100644 (file)
 
 #include <vnet/ipsec/ipsec.h>
 #include <vnet/ipsec/esp.h>
+#include <vnet/ipsec/ipsec_io.h>
 
 /* Statistics (not really errors) */
 #define foreach_ipsec_if_input_error                             \
 _(RX, "good packets received")                                   \
-_(DISABLED, "ipsec packets received on disabled interface")
+_(DISABLED, "ipsec packets received on disabled interface")       \
+_(NO_TUNNEL, "no matching tunnel")
 
 static char *ipsec_if_input_error_strings[] = {
 #define _(sym,string) string,
@@ -48,7 +50,7 @@ typedef struct
   u32 seq;
 } ipsec_if_input_trace_t;
 
-u8 *
+static u8 *
 format_ipsec_if_input_trace (u8 * s, va_list * args)
 {
   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
@@ -59,152 +61,368 @@ format_ipsec_if_input_trace (u8 * s, va_list * args)
   return s;
 }
 
-static uword
-ipsec_if_input_node_fn (vlib_main_t * vm, vlib_node_runtime_t * node,
-                       vlib_frame_t * from_frame)
+
+always_inline uword
+ipsec_if_input_inline (vlib_main_t * vm,
+                      vlib_node_runtime_t * node, vlib_frame_t * from_frame)
 {
   ipsec_main_t *im = &ipsec_main;
   vnet_main_t *vnm = im->vnet_main;
   vnet_interface_main_t *vim = &vnm->interface_main;
-  ipsec_proto_main_t *em = &ipsec_proto_main;
-  u32 *from, *to_next = 0, next_index;
-  u32 n_left_from, last_sw_if_index = ~0;
-  u32 thread_index = vlib_get_thread_index ();
+
+  int is_trace = node->flags & VLIB_NODE_FLAG_TRACE;
+  u32 thread_index = vm->thread_index;
+
+  u32 n_left_from, *from;
+  u16 nexts[VLIB_FRAME_SIZE], *next;
+  vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b;
+
+  from = vlib_frame_vector_args (from_frame);
+  n_left_from = from_frame->n_vectors;
+
+  vlib_get_buffers (vm, from, bufs, n_left_from);
+  b = bufs;
+  next = nexts;
+
+  clib_memset_u16 (nexts, im->esp4_decrypt_next_index, n_left_from);
+
   u64 n_bytes = 0, n_packets = 0;
-  u8 icv_len;
-  ipsec_tunnel_if_t *last_t = NULL;
-  ipsec_sa_t *sa0;
+  u32 n_disabled = 0, n_no_tunnel = 0;
+
+  u32 last_sw_if_index = ~0;
+  u32 last_tunnel_id = ~0;
+  u64 last_key = ~0;
+
   vlib_combined_counter_main_t *rx_counter;
   vlib_combined_counter_main_t *drop_counter;
-  u32 n_disabled = 0;
 
   rx_counter = vim->combined_sw_if_counters + VNET_INTERFACE_COUNTER_RX;
   drop_counter = vim->combined_sw_if_counters + VNET_INTERFACE_COUNTER_DROP;
 
-  from = vlib_frame_vector_args (from_frame);
-  n_left_from = from_frame->n_vectors;
-  next_index = node->cached_next_index;
-
-  while (n_left_from > 0)
+  while (n_left_from >= 2)
     {
-      u32 n_left_to_next;
-
-      vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
-
-      while (n_left_from > 0 && n_left_to_next > 0)
-       {
-         u32 bi0, next0, sw_if_index0;
-         vlib_buffer_t *b0;
-         ip4_header_t *ip0;
-         esp_header_t *esp0;
-         uword *p;
-         u32 len0;
-
-         bi0 = to_next[0] = from[0];
-         from += 1;
-         n_left_from -= 1;
-         to_next += 1;
-         n_left_to_next -= 1;
-         b0 = vlib_get_buffer (vm, bi0);
-         ip0 = vlib_buffer_get_current (b0);
-         esp0 = (esp_header_t *) ((u8 *) ip0 + ip4_header_bytes (ip0));
+      u32 sw_if_index0, sw_if_index1;
+      ip4_header_t *ip0, *ip1;
+      esp_header_t *esp0, *esp1;
+      u32 len0, len1;
+      u16 buf_adv0, buf_adv1;
+      u32 tid0, tid1;
+      ipsec_tunnel_if_t *t0, *t1;
+      u64 key0, key1;
+
+      if (n_left_from >= 4)
+       {
+         CLIB_PREFETCH (b[2], CLIB_CACHE_LINE_BYTES, STORE);
+         CLIB_PREFETCH (b[2]->data, CLIB_CACHE_LINE_BYTES, LOAD);
+         CLIB_PREFETCH (b[3], CLIB_CACHE_LINE_BYTES, STORE);
+         CLIB_PREFETCH (b[3]->data, CLIB_CACHE_LINE_BYTES, LOAD);
+       }
+
+      ip0 = (ip4_header_t *) (b[0]->data + vnet_buffer (b[0])->l3_hdr_offset);
+      ip1 = (ip4_header_t *) (b[1]->data + vnet_buffer (b[1])->l3_hdr_offset);
 
-         next0 = IPSEC_INPUT_NEXT_DROP;
+      /* NAT UDP port 4500 case, don't advance any more */
+      if (ip0->protocol == IP_PROTOCOL_UDP)
+       {
+         esp0 =
+           (esp_header_t *) ((u8 *) ip0 + ip4_header_bytes (ip0) +
+                             sizeof (udp_header_t));
+         buf_adv0 = 0;
+       }
+      else
+       {
+         esp0 = (esp_header_t *) ((u8 *) ip0 + ip4_header_bytes (ip0));
+         buf_adv0 = ip4_header_bytes (ip0);
+       }
+      if (ip1->protocol == IP_PROTOCOL_UDP)
+       {
+         esp1 =
+           (esp_header_t *) ((u8 *) ip1 + ip4_header_bytes (ip1) +
+                             sizeof (udp_header_t));
+         buf_adv1 = 0;
+       }
+      else
+       {
+         esp1 = (esp_header_t *) ((u8 *) ip1 + ip4_header_bytes (ip1));
+         buf_adv1 = ip4_header_bytes (ip1);
+       }
 
-         u64 key = (u64) ip0->src_address.as_u32 << 32 |
-           (u64) clib_net_to_host_u32 (esp0->spi);
+      vlib_buffer_advance (b[0], buf_adv0);
+      vlib_buffer_advance (b[1], buf_adv1);
 
-         p = hash_get (im->ipsec_if_pool_index_by_key, key);
+      len0 = vlib_buffer_length_in_chain (vm, b[0]);
+      len1 = vlib_buffer_length_in_chain (vm, b[1]);
 
-         len0 = vlib_buffer_length_in_chain (vm, b0);
+      key0 = (u64) ip0->src_address.as_u32 << 32 | (u64) esp0->spi;
+      key1 = (u64) ip1->src_address.as_u32 << 32 | (u64) esp1->spi;
 
+      if (key0 == last_key)
+       {
+         tid0 = last_tunnel_id;
+       }
+      else
+       {
+         uword *p = hash_get (im->ipsec_if_pool_index_by_key, key0);
          if (p)
            {
-             ipsec_tunnel_if_t *t;
-             t = pool_elt_at_index (im->tunnel_interfaces, p[0]);
-             vnet_buffer (b0)->ipsec.sad_index = t->input_sa_index;
-             if (t->hw_if_index != ~0)
+             tid0 = p[0];
+             last_tunnel_id = tid0;
+             last_key = key0;
+           }
+         else
+           {
+             n_no_tunnel++;
+             next[0] = IPSEC_INPUT_NEXT_DROP;
+             goto pkt1;
+           }
+       }
+
+      t0 = pool_elt_at_index (im->tunnel_interfaces, tid0);
+      vnet_buffer (b[0])->ipsec.sad_index = t0->input_sa_index;
+
+      if (PREDICT_TRUE (t0->hw_if_index != ~0))
+       {
+         vnet_buffer (b[0])->ipsec.flags = 0;
+         sw_if_index0 = t0->sw_if_index;
+         vnet_buffer (b[0])->sw_if_index[VLIB_RX] = sw_if_index0;
+
+         if (PREDICT_FALSE (!(t0->flags & VNET_HW_INTERFACE_FLAG_LINK_UP)))
+           {
+             vlib_increment_combined_counter
+               (drop_counter, thread_index, sw_if_index0, 1, len0);
+             n_disabled++;
+             next[0] = IPSEC_INPUT_NEXT_DROP;
+             goto pkt1;
+           }
+
+         if (PREDICT_TRUE (sw_if_index0 == last_sw_if_index))
+           {
+             n_packets++;
+             n_bytes += len0;
+           }
+         else
+           {
+             if (n_packets)
                {
-                 vnet_hw_interface_t *hi;
-
-                 vnet_buffer (b0)->ipsec.flags = 0;
-                 hi = vnet_get_hw_interface (vnm, t->hw_if_index);
-                 sw_if_index0 = hi->sw_if_index;
-                 vnet_buffer (b0)->sw_if_index[VLIB_RX] = sw_if_index0;
-
-                 if (PREDICT_FALSE
-                     (!(hi->flags & VNET_HW_INTERFACE_FLAG_LINK_UP)))
-                   {
-                     vlib_increment_combined_counter
-                       (drop_counter, thread_index, sw_if_index0, 1, len0);
-                     b0->error = node->errors[IPSEC_IF_INPUT_ERROR_DISABLED];
-                     n_disabled++;
-                     goto trace;
-                   }
-
-                 if (PREDICT_TRUE (sw_if_index0 == last_sw_if_index))
-                   {
-                     n_packets++;
-                     n_bytes += len0;
-                   }
-                 else
-                   {
-                     sa0 = pool_elt_at_index (im->sad, t->input_sa_index);
-                     icv_len =
-                       em->ipsec_proto_main_integ_algs[sa0->
-                                                       integ_alg].trunc_size;
-
-                     /* length = packet length - ESP/tunnel overhead */
-                     n_bytes -= n_packets * (sizeof (ip4_header_t) +
-                                             sizeof (esp_header_t) +
-                                             sizeof (esp_footer_t) +
-                                             16 /* aes-cbc IV */  + icv_len);
-
-                     if (last_t)
-                       {
-                         vlib_increment_combined_counter
-                           (rx_counter, thread_index, sw_if_index0,
-                            n_packets, n_bytes);
-                       }
-
-                     last_sw_if_index = sw_if_index0;
-                     last_t = t;
-                     n_packets = 1;
-                     n_bytes = len0;
-                   }
+                 vlib_increment_combined_counter
+                   (rx_counter, thread_index, last_sw_if_index,
+                    n_packets, n_bytes);
                }
-             else
+
+             last_sw_if_index = sw_if_index0;
+             n_packets = 1;
+             n_bytes = len0;
+           }
+       }
+      else
+       {
+         vnet_buffer (b[0])->ipsec.flags = IPSEC_FLAG_IPSEC_GRE_TUNNEL;
+       }
+
+    pkt1:
+      if (key1 == last_key)
+       {
+         tid1 = last_tunnel_id;
+       }
+      else
+       {
+         uword *p = hash_get (im->ipsec_if_pool_index_by_key, key1);
+         if (p)
+           {
+             tid1 = p[0];
+             last_tunnel_id = tid1;
+             last_key = key1;
+           }
+         else
+           {
+             n_no_tunnel++;
+             next[1] = IPSEC_INPUT_NEXT_DROP;
+             goto trace1;
+           }
+       }
+
+      t1 = pool_elt_at_index (im->tunnel_interfaces, tid1);
+      vnet_buffer (b[1])->ipsec.sad_index = t1->input_sa_index;
+
+      if (PREDICT_TRUE (t1->hw_if_index != ~0))
+       {
+         vnet_buffer (b[1])->ipsec.flags = 0;
+         sw_if_index1 = t1->sw_if_index;
+         vnet_buffer (b[1])->sw_if_index[VLIB_RX] = sw_if_index1;
+
+         if (PREDICT_FALSE (!(t1->flags & VNET_HW_INTERFACE_FLAG_LINK_UP)))
+           {
+             vlib_increment_combined_counter
+               (drop_counter, thread_index, sw_if_index1, 1, len1);
+             n_disabled++;
+             next[1] = IPSEC_INPUT_NEXT_DROP;
+             goto trace1;
+           }
+
+         if (PREDICT_TRUE (sw_if_index1 == last_sw_if_index))
+           {
+             n_packets++;
+             n_bytes += len1;
+           }
+         else
+           {
+             if (n_packets)
                {
-                 vnet_buffer (b0)->ipsec.flags = IPSEC_FLAG_IPSEC_GRE_TUNNEL;
+                 vlib_increment_combined_counter
+                   (rx_counter, thread_index, last_sw_if_index,
+                    n_packets, n_bytes);
                }
 
-             vlib_buffer_advance (b0, ip4_header_bytes (ip0));
-             next0 = im->esp_decrypt_next_index;
+             last_sw_if_index = sw_if_index1;
+             n_packets = 1;
+             n_bytes = len1;
            }
+       }
+      else
+       {
+         vnet_buffer (b[1])->ipsec.flags = IPSEC_FLAG_IPSEC_GRE_TUNNEL;
+       }
 
-       trace:
-         if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
+    trace1:
+      if (is_trace)
+       {
+         if (PREDICT_FALSE (b[0]->flags & VLIB_BUFFER_IS_TRACED))
            {
              ipsec_if_input_trace_t *tr =
-               vlib_add_trace (vm, node, b0, sizeof (*tr));
+               vlib_add_trace (vm, node, b[0], sizeof (*tr));
              tr->spi = clib_host_to_net_u32 (esp0->spi);
              tr->seq = clib_host_to_net_u32 (esp0->seq);
            }
+         if (PREDICT_FALSE (b[1]->flags & VLIB_BUFFER_IS_TRACED))
+           {
+             ipsec_if_input_trace_t *tr =
+               vlib_add_trace (vm, node, b[1], sizeof (*tr));
+             tr->spi = clib_host_to_net_u32 (esp1->spi);
+             tr->seq = clib_host_to_net_u32 (esp1->seq);
+           }
+       }
 
-         vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
-                                          n_left_to_next, bi0, next0);
+      /* next */
+      b += 2;
+      next += 2;
+      n_left_from -= 2;
+    }
+  while (n_left_from > 0)
+    {
+      u32 sw_if_index0;
+      ip4_header_t *ip0;
+      esp_header_t *esp0;
+      u32 len0;
+      u16 buf_adv0;
+      u32 tid0;
+      ipsec_tunnel_if_t *t0;
+      u64 key0;
+
+      ip0 = (ip4_header_t *) (b[0]->data + vnet_buffer (b[0])->l3_hdr_offset);
+
+      /* NAT UDP port 4500 case, don't advance any more */
+      if (ip0->protocol == IP_PROTOCOL_UDP)
+       {
+         esp0 =
+           (esp_header_t *) ((u8 *) ip0 + ip4_header_bytes (ip0) +
+                             sizeof (udp_header_t));
+         buf_adv0 = 0;
        }
-      vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+      else
+       {
+         esp0 = (esp_header_t *) ((u8 *) ip0 + ip4_header_bytes (ip0));
+         buf_adv0 = ip4_header_bytes (ip0);
+       }
+
+      /* stats for the tunnel include all the data after the IP header
+         just like a norml IP-IP tunnel */
+      vlib_buffer_advance (b[0], buf_adv0);
+      len0 = vlib_buffer_length_in_chain (vm, b[0]);
+
+      key0 = (u64) ip0->src_address.as_u32 << 32 | (u64) esp0->spi;
+      if (key0 == last_key)
+       {
+         tid0 = last_tunnel_id;
+       }
+      else
+       {
+         uword *p = hash_get (im->ipsec_if_pool_index_by_key, key0);
+         if (p)
+           {
+             tid0 = p[0];
+             last_tunnel_id = tid0;
+             last_key = key0;
+           }
+         else
+           {
+             n_no_tunnel++;
+             next[0] = IPSEC_INPUT_NEXT_DROP;
+             goto trace00;
+           }
+       }
+
+      t0 = pool_elt_at_index (im->tunnel_interfaces, tid0);
+      vnet_buffer (b[0])->ipsec.sad_index = t0->input_sa_index;
+
+      if (PREDICT_TRUE (t0->hw_if_index != ~0))
+       {
+         vnet_buffer (b[0])->ipsec.flags = 0;
+         sw_if_index0 = t0->sw_if_index;
+         vnet_buffer (b[0])->sw_if_index[VLIB_RX] = sw_if_index0;
+
+         if (PREDICT_FALSE (!(t0->flags & VNET_HW_INTERFACE_FLAG_LINK_UP)))
+           {
+             vlib_increment_combined_counter
+               (drop_counter, thread_index, sw_if_index0, 1, len0);
+             n_disabled++;
+             next[0] = IPSEC_INPUT_NEXT_DROP;
+             goto trace00;
+           }
+
+         if (PREDICT_TRUE (sw_if_index0 == last_sw_if_index))
+           {
+             n_packets++;
+             n_bytes += len0;
+           }
+         else
+           {
+             if (n_packets)
+               {
+                 vlib_increment_combined_counter
+                   (rx_counter, thread_index, last_sw_if_index,
+                    n_packets, n_bytes);
+               }
+
+             last_sw_if_index = sw_if_index0;
+             n_packets = 1;
+             n_bytes = len0;
+           }
+       }
+      else
+       {
+         vnet_buffer (b[0])->ipsec.flags = IPSEC_FLAG_IPSEC_GRE_TUNNEL;
+       }
+
+    trace00:
+      if (is_trace)
+       {
+         if (PREDICT_FALSE (b[0]->flags & VLIB_BUFFER_IS_TRACED))
+           {
+             ipsec_if_input_trace_t *tr =
+               vlib_add_trace (vm, node, b[0], sizeof (*tr));
+             tr->spi = clib_host_to_net_u32 (esp0->spi);
+             tr->seq = clib_host_to_net_u32 (esp0->seq);
+           }
+       }
+
+      /* next */
+      b += 1;
+      next += 1;
+      n_left_from -= 1;
     }
 
-  if (last_t)
+  if (n_packets)
     {
-      sa0 = pool_elt_at_index (im->sad, last_t->input_sa_index);
-      icv_len = em->ipsec_proto_main_integ_algs[sa0->integ_alg].trunc_size;
-
-      n_bytes -= n_packets * (sizeof (ip4_header_t) + sizeof (esp_header_t) +
-                             sizeof (esp_footer_t) + 16 /* aes-cbc IV */  +
-                             icv_len);
       vlib_increment_combined_counter (rx_counter,
                                       thread_index,
                                       last_sw_if_index, n_packets, n_bytes);
@@ -213,29 +431,35 @@ ipsec_if_input_node_fn (vlib_main_t * vm, vlib_node_runtime_t * node,
   vlib_node_increment_counter (vm, ipsec_if_input_node.index,
                               IPSEC_IF_INPUT_ERROR_RX,
                               from_frame->n_vectors - n_disabled);
-
   vlib_node_increment_counter (vm, ipsec_if_input_node.index,
                               IPSEC_IF_INPUT_ERROR_DISABLED, n_disabled);
+  vlib_node_increment_counter (vm, ipsec_if_input_node.index,
+                              IPSEC_IF_INPUT_ERROR_NO_TUNNEL, n_no_tunnel);
+
+  vlib_buffer_enqueue_to_next (vm, node, from, nexts, from_frame->n_vectors);
 
   return from_frame->n_vectors;
 }
 
+VLIB_NODE_FN (ipsec_if_input_node) (vlib_main_t * vm,
+                                   vlib_node_runtime_t * node,
+                                   vlib_frame_t * from_frame)
+{
+  return ipsec_if_input_inline (vm, node, from_frame);
+}
+
 /* *INDENT-OFF* */
 VLIB_REGISTER_NODE (ipsec_if_input_node) = {
-  .function = ipsec_if_input_node_fn,
   .name = "ipsec-if-input",
   .vector_size = sizeof (u32),
   .format_trace = format_ipsec_if_input_trace,
   .type = VLIB_NODE_TYPE_INTERNAL,
-
   .n_errors = ARRAY_LEN(ipsec_if_input_error_strings),
   .error_strings = ipsec_if_input_error_strings,
-
-  .sibling_of = "ipsec-input-ip4",
+  .sibling_of = "ipsec4-input-feature",
 };
 /* *INDENT-ON* */
 
-VLIB_NODE_FUNCTION_MULTIARCH (ipsec_if_input_node, ipsec_if_input_node_fn)
 /*
  * fd.io coding-style-patch-verification: ON
  *