ipsec: USE_EXTENDED_SEQ_NUM -> USE_ESN
[vpp.git] / src / plugins / dpdk / ipsec / esp_decrypt.c
index 9377970..47aff17 100644 (file)
@@ -1,10 +1,10 @@
 /*
  * esp_decrypt.c : IPSec ESP Decrypt node using DPDK Cryptodev
  *
- * Copyright (c) 2016 Intel and/or its affiliates.
+ * Copyright (c) 2017 Intel and/or its affiliates.
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at:
+ * You may obtain a opy of the License at:
  *
  *     http://www.apache.org/licenses/LICENSE-2.0
  *
 #include <vnet/ip/ip.h>
 
 #include <vnet/ipsec/ipsec.h>
+#include <vnet/ipsec/esp.h>
+#include <dpdk/buffer.h>
 #include <dpdk/ipsec/ipsec.h>
-#include <dpdk/ipsec/esp.h>
 #include <dpdk/device/dpdk.h>
 #include <dpdk/device/dpdk_priv.h>
 
 #define foreach_esp_decrypt_next              \
 _(DROP, "error-drop")                         \
-_(IP4_INPUT, "ip4-input")                     \
+_(IP4_INPUT, "ip4-input-no-checksum")         \
 _(IP6_INPUT, "ip6-input")
 
 #define _(v, s) ESP_DECRYPT_NEXT_##v,
-typedef enum {
+typedef enum
+{
   foreach_esp_decrypt_next
 #undef _
-  ESP_DECRYPT_N_NEXT,
+    ESP_DECRYPT_N_NEXT,
 } esp_decrypt_next_t;
 
 #define foreach_esp_decrypt_error               \
@@ -43,75 +45,90 @@ typedef enum {
  _(REPLAY, "SA replayed packet")                \
  _(NOT_IP, "Not IP packet (dropped)")           \
  _(ENQ_FAIL, "Enqueue failed (buffer full)")     \
- _(NO_CRYPTODEV, "Cryptodev not configured")     \
+ _(DISCARD, "Not enough crypto operations, discarding frame")  \
  _(BAD_LEN, "Invalid ciphertext length")         \
- _(UNSUPPORTED, "Cipher/Auth not supported")
+ _(SESSION, "Failed to get crypto session")      \
+ _(NOSUP, "Cipher/Auth not supported")
 
 
-typedef enum {
+typedef enum
+{
 #define _(sym,str) ESP_DECRYPT_ERROR_##sym,
   foreach_esp_decrypt_error
 #undef _
-  ESP_DECRYPT_N_ERROR,
+    ESP_DECRYPT_N_ERROR,
 } esp_decrypt_error_t;
 
-static char * esp_decrypt_error_strings[] = {
+static char *esp_decrypt_error_strings[] = {
 #define _(sym,string) string,
   foreach_esp_decrypt_error
 #undef _
 };
 
-vlib_node_registration_t dpdk_esp_decrypt_node;
+extern vlib_node_registration_t dpdk_esp4_decrypt_node;
+extern vlib_node_registration_t dpdk_esp6_decrypt_node;
 
-typedef struct {
+typedef struct
+{
   ipsec_crypto_alg_t crypto_alg;
   ipsec_integ_alg_t integ_alg;
+  u8 packet_data[64];
 } esp_decrypt_trace_t;
 
 /* packet trace format function */
-static u8 * format_esp_decrypt_trace (u8 * s, va_list * args)
+static u8 *
+format_esp_decrypt_trace (u8 * s, va_list * args)
 {
   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
-  esp_decrypt_trace_t * t = va_arg (*args, esp_decrypt_trace_t *);
+  esp_decrypt_trace_t *t = va_arg (*args, esp_decrypt_trace_t *);
+  u32 indent = format_get_indent (s);
 
-  s = format (s, "esp: crypto %U integrity %U",
+  s = format (s, "cipher %U auth %U\n",
              format_ipsec_crypto_alg, t->crypto_alg,
              format_ipsec_integ_alg, t->integ_alg);
+  s = format (s, "%U%U",
+             format_white_space, indent, format_esp_header, t->packet_data);
   return s;
 }
 
-static uword
-dpdk_esp_decrypt_node_fn (vlib_main_t * vm,
-            vlib_node_runtime_t * node,
-            vlib_frame_t * from_frame)
+always_inline uword
+dpdk_esp_decrypt_inline (vlib_main_t * vm,
+                        vlib_node_runtime_t * node,
+                        vlib_frame_t * from_frame, int is_ip6)
 {
-  u32 n_left_from, *from, *to_next, next_index;
+  u32 n_left_from, *from, *to_next, next_index, thread_index;
   ipsec_main_t *im = &ipsec_main;
-  u32 thread_index = vlib_get_thread_index();
-  dpdk_crypto_main_t * dcm = &dpdk_crypto_main;
-  dpdk_esp_main_t * em = &dpdk_esp_main;
-  u32 i;
+  u32 thread_idx = vlib_get_thread_index ();
+  dpdk_crypto_main_t *dcm = &dpdk_crypto_main;
+  crypto_resource_t *res = 0;
+  ipsec_sa_t *sa0 = 0;
+  crypto_alg_t *cipher_alg = 0, *auth_alg = 0;
+  struct rte_cryptodev_sym_session *session = 0;
+  u32 ret, last_sa_index = ~0;
+  u8 numa = rte_socket_id ();
+  u8 is_aead = 0;
+  crypto_worker_main_t *cwm =
+    vec_elt_at_index (dcm->workers_main, thread_idx);
+  struct rte_crypto_op **ops = cwm->ops;
 
   from = vlib_frame_vector_args (from_frame);
   n_left_from = from_frame->n_vectors;
+  thread_index = vm->thread_index;
 
-  crypto_worker_main_t *cwm =
-    vec_elt_at_index(dcm->workers_main, thread_index);
-  u32 n_qps = vec_len(cwm->qp_data);
-  struct rte_crypto_op ** cops_to_enq[n_qps];
-  u32 n_cop_qp[n_qps], * bi_to_enq[n_qps];
-
-  for (i = 0; i < n_qps; i++)
+  ret = crypto_alloc_ops (numa, ops, n_left_from);
+  if (ret)
     {
-      bi_to_enq[i] = cwm->qp_data[i].bi;
-      cops_to_enq[i] = cwm->qp_data[i].cops;
+      if (is_ip6)
+       vlib_node_increment_counter (vm, dpdk_esp6_decrypt_node.index,
+                                    ESP_DECRYPT_ERROR_DISCARD, 1);
+      else
+       vlib_node_increment_counter (vm, dpdk_esp4_decrypt_node.index,
+                                    ESP_DECRYPT_ERROR_DISCARD, 1);
+      /* Discard whole frame */
+      return n_left_from;
     }
 
-  memset(n_cop_qp, 0, n_qps * sizeof(u32));
-
-  crypto_alloc_cops();
-
   next_index = ESP_DECRYPT_NEXT_DROP;
 
   while (n_left_from > 0)
@@ -122,247 +139,266 @@ dpdk_esp_decrypt_node_fn (vlib_main_t * vm,
 
       while (n_left_from > 0 && n_left_to_next > 0)
        {
-         u32 bi0, sa_index0 = ~0, seq, icv_size, iv_size;
-         vlib_buffer_t * b0;
-         esp_header_t * esp0;
-         ipsec_sa_t * sa0;
-         struct rte_mbuf * mb0 = 0;
-         const int BLOCK_SIZE = 16;
-         crypto_sa_session_t * sa_sess;
-         void * sess;
-         u16 qp_index;
-         struct rte_crypto_op * cop = 0;
+         clib_error_t *error;
+         u32 bi0, sa_index0, iv_size;
+         u8 trunc_size;
+         vlib_buffer_t *b0;
+         esp_header_t *esp0;
+         struct rte_mbuf *mb0;
+         struct rte_crypto_op *op;
+         u16 res_idx;
 
          bi0 = from[0];
          from += 1;
          n_left_from -= 1;
 
          b0 = vlib_get_buffer (vm, bi0);
+         mb0 = rte_mbuf_from_vlib_buffer (b0);
          esp0 = vlib_buffer_get_current (b0);
 
-         sa_index0 = vnet_buffer(b0)->ipsec.sad_index;
-         sa0 = pool_elt_at_index (im->sad, sa_index0);
+         /* ih0/ih6_0 */
+         CLIB_PREFETCH (esp0, sizeof (esp0[0]) + 16, LOAD);
+         /* mb0 */
+         CLIB_PREFETCH (mb0, CLIB_CACHE_LINE_BYTES, STORE);
 
-         seq = clib_host_to_net_u32(esp0->seq);
+         op = ops[0];
+         ops += 1;
+         ASSERT (op->status == RTE_CRYPTO_OP_STATUS_NOT_PROCESSED);
 
-         /* anti-replay check */
-         if (sa0->use_anti_replay)
+         dpdk_op_priv_t *priv = crypto_op_get_priv (op);
+         /* store bi in op private */
+         priv->bi = bi0;
+
+         u16 op_len =
+           sizeof (op[0]) + sizeof (op[0].sym[0]) + sizeof (priv[0]);
+         CLIB_PREFETCH (op, op_len, STORE);
+
+         sa_index0 = vnet_buffer (b0)->ipsec.sad_index;
+         vlib_prefetch_combined_counter (&ipsec_sa_counters,
+                                         thread_index, sa_index0);
+
+         if (sa_index0 != last_sa_index)
            {
-             int rv = 0;
+             sa0 = pool_elt_at_index (im->sad, sa_index0);
 
-             if (PREDICT_TRUE(sa0->use_esn))
-               rv = esp_replay_check_esn(sa0, seq);
-             else
-               rv = esp_replay_check(sa0, seq);
+             cipher_alg =
+               vec_elt_at_index (dcm->cipher_algs, sa0->crypto_alg);
+             auth_alg = vec_elt_at_index (dcm->auth_algs, sa0->integ_alg);
+
+             is_aead = (cipher_alg->type == RTE_CRYPTO_SYM_XFORM_AEAD);
+             if (is_aead)
+               auth_alg = cipher_alg;
 
-             if (PREDICT_FALSE(rv))
+             res_idx = get_resource (cwm, sa0);
+
+             if (PREDICT_FALSE (res_idx == (u16) ~ 0))
                {
-                 clib_warning ("anti-replay SPI %u seq %u", sa0->spi, seq);
-                 vlib_node_increment_counter (vm, dpdk_esp_decrypt_node.index,
-                                              ESP_DECRYPT_ERROR_REPLAY, 1);
+                 clib_warning ("unsupported SA by thread index %u",
+                               thread_idx);
+                 if (is_ip6)
+                   vlib_node_increment_counter (vm,
+                                                dpdk_esp6_decrypt_node.index,
+                                                ESP_DECRYPT_ERROR_NOSUP, 1);
+                 else
+                   vlib_node_increment_counter (vm,
+                                                dpdk_esp4_decrypt_node.index,
+                                                ESP_DECRYPT_ERROR_NOSUP, 1);
                  to_next[0] = bi0;
                  to_next += 1;
                  n_left_to_next -= 1;
                  goto trace;
                }
-           }
-
-         sa0->total_data_size += b0->current_length;
-
-         if (PREDICT_FALSE(sa0->integ_alg == IPSEC_INTEG_ALG_NONE) ||
-                 PREDICT_FALSE(sa0->crypto_alg == IPSEC_CRYPTO_ALG_NONE))
-           {
-             clib_warning ("SPI %u : only cipher + auth supported", sa0->spi);
-             vlib_node_increment_counter (vm, dpdk_esp_decrypt_node.index,
-                                          ESP_DECRYPT_ERROR_UNSUPPORTED, 1);
-             to_next[0] = bi0;
-             to_next += 1;
-             n_left_to_next -= 1;
-             goto trace;
-           }
-
-         sa_sess = pool_elt_at_index(cwm->sa_sess_d[0], sa_index0);
-
-         if (PREDICT_FALSE(!sa_sess->sess))
-           {
-             int ret = create_sym_sess(sa0, sa_sess, 0);
+             res = vec_elt_at_index (dcm->resource, res_idx);
 
-             if (PREDICT_FALSE (ret))
+             error = crypto_get_session (&session, sa_index0, res, cwm, 0);
+             if (PREDICT_FALSE (error || !session))
                {
+                 clib_warning ("failed to get crypto session");
+                 if (is_ip6)
+                   vlib_node_increment_counter (vm,
+                                                dpdk_esp6_decrypt_node.index,
+                                                ESP_DECRYPT_ERROR_SESSION,
+                                                1);
+                 else
+                   vlib_node_increment_counter (vm,
+                                                dpdk_esp4_decrypt_node.index,
+                                                ESP_DECRYPT_ERROR_SESSION,
+                                                1);
                  to_next[0] = bi0;
                  to_next += 1;
                  n_left_to_next -= 1;
                  goto trace;
                }
-           }
 
-         sess = sa_sess->sess;
-         qp_index = sa_sess->qp_index;
+             last_sa_index = sa_index0;
+           }
 
-         ASSERT (vec_len (vec_elt (cwm->qp_data, qp_index).free_cops) > 0);
-         cop = vec_pop (vec_elt (cwm->qp_data, qp_index).free_cops);
-         ASSERT (cop->status == RTE_CRYPTO_OP_STATUS_NOT_PROCESSED);
+         /* anti-replay check */
+         if (ipsec_sa_anti_replay_check (sa0, &esp0->seq))
+           {
+             clib_warning ("failed anti-replay check");
+             if (is_ip6)
+               vlib_node_increment_counter (vm,
+                                            dpdk_esp6_decrypt_node.index,
+                                            ESP_DECRYPT_ERROR_REPLAY, 1);
+             else
+               vlib_node_increment_counter (vm,
+                                            dpdk_esp4_decrypt_node.index,
+                                            ESP_DECRYPT_ERROR_REPLAY, 1);
+             to_next[0] = bi0;
+             to_next += 1;
+             n_left_to_next -= 1;
+             goto trace;
+           }
 
-         cops_to_enq[qp_index][0] = cop;
-         cops_to_enq[qp_index] += 1;
-         n_cop_qp[qp_index] += 1;
-         bi_to_enq[qp_index][0] = bi0;
-         bi_to_enq[qp_index] += 1;
+         if (is_ip6)
+           priv->next = DPDK_CRYPTO_INPUT_NEXT_DECRYPT6_POST;
+         else
+           priv->next = DPDK_CRYPTO_INPUT_NEXT_DECRYPT4_POST;
 
-         rte_crypto_op_attach_sym_session(cop, sess);
+         /* FIXME multi-seg */
+         vlib_increment_combined_counter
+           (&ipsec_sa_counters, thread_index, sa_index0,
+            1, b0->current_length);
 
-         icv_size = em->esp_integ_algs[sa0->integ_alg].trunc_size;
-         iv_size = em->esp_crypto_algs[sa0->crypto_alg].iv_len;
+         res->ops[res->n_ops] = op;
+         res->bi[res->n_ops] = bi0;
+         res->n_ops += 1;
 
          /* Convert vlib buffer to mbuf */
-         mb0 = rte_mbuf_from_vlib_buffer(b0);
          mb0->data_len = b0->current_length;
          mb0->pkt_len = b0->current_length;
          mb0->data_off = RTE_PKTMBUF_HEADROOM + b0->current_data;
 
+         trunc_size = auth_alg->trunc_size;
+         iv_size = cipher_alg->iv_len;
+
          /* Outer IP header has already been stripped */
-         u16 payload_len = rte_pktmbuf_pkt_len(mb0) - sizeof (esp_header_t) -
-             iv_size - icv_size;
+         u16 payload_len =
+           b0->current_length - sizeof (esp_header_t) - iv_size - trunc_size;
 
-         if ((payload_len & (BLOCK_SIZE - 1)) || (payload_len <= 0))
+         ASSERT (payload_len >= 4);
+
+         if (payload_len & (cipher_alg->boundary - 1))
            {
              clib_warning ("payload %u not multiple of %d\n",
-                           payload_len, BLOCK_SIZE);
-             vlib_node_increment_counter (vm, dpdk_esp_decrypt_node.index,
-                                          ESP_DECRYPT_ERROR_BAD_LEN, 1);
-             vec_add (vec_elt (cwm->qp_data, qp_index).free_cops, &cop, 1);
-             bi_to_enq[qp_index] -= 1;
-             cops_to_enq[qp_index] -= 1;
-             n_cop_qp[qp_index] -= 1;
+                           payload_len, cipher_alg->boundary);
+             if (is_ip6)
+               vlib_node_increment_counter (vm, dpdk_esp6_decrypt_node.index,
+                                            ESP_DECRYPT_ERROR_BAD_LEN, 1);
+             else
+               vlib_node_increment_counter (vm, dpdk_esp4_decrypt_node.index,
+                                            ESP_DECRYPT_ERROR_BAD_LEN, 1);
+             res->n_ops -= 1;
              to_next[0] = bi0;
              to_next += 1;
              n_left_to_next -= 1;
              goto trace;
            }
 
-         struct rte_crypto_sym_op *sym_cop = (struct rte_crypto_sym_op *)(cop + 1);
-
-         sym_cop->m_src = mb0;
-         sym_cop->cipher.data.offset = sizeof (esp_header_t) + iv_size;
-         sym_cop->cipher.data.length = payload_len;
-
-          u8 *iv = rte_pktmbuf_mtod_offset(mb0, void*, sizeof (esp_header_t));
-          dpdk_cop_priv_t * priv = (dpdk_cop_priv_t *)(sym_cop + 1);
-
-          if (sa0->crypto_alg == IPSEC_CRYPTO_ALG_AES_GCM_128)
-            {
-              dpdk_gcm_cnt_blk *icb = &priv->cb;
-              icb->salt = sa0->salt;
-              clib_memcpy(icb->iv, iv, 8);
-              icb->cnt = clib_host_to_net_u32(1);
-              sym_cop->cipher.iv.data = (u8 *)icb;
-              sym_cop->cipher.iv.phys_addr = cop->phys_addr +
-               (uintptr_t)icb - (uintptr_t)cop;
-              sym_cop->cipher.iv.length = 16;
-
-              u8 *aad = priv->aad;
-              clib_memcpy(aad, iv - sizeof(esp_header_t), 8);
-              sym_cop->auth.aad.data = aad;
-              sym_cop->auth.aad.phys_addr = cop->phys_addr +
-                  (uintptr_t)aad - (uintptr_t)cop;
-              if (sa0->use_esn)
-                {
-                  *((u32*)&aad[8]) = sa0->seq_hi;
-                  sym_cop->auth.aad.length = 12;
-                }
-              else
-                {
-                  sym_cop->auth.aad.length = 8;
-                }
-
-              sym_cop->auth.digest.data = rte_pktmbuf_mtod_offset(mb0, void*,
-                       rte_pktmbuf_pkt_len(mb0) - icv_size);
-              sym_cop->auth.digest.phys_addr = rte_pktmbuf_mtophys_offset(mb0,
-                       rte_pktmbuf_pkt_len(mb0) - icv_size);
-              sym_cop->auth.digest.length = icv_size;
-
-            }
-          else
-            {
-              sym_cop->cipher.iv.data = rte_pktmbuf_mtod_offset(mb0, void*,
-                       sizeof (esp_header_t));
-              sym_cop->cipher.iv.phys_addr = rte_pktmbuf_mtophys_offset(mb0,
-                       sizeof (esp_header_t));
-              sym_cop->cipher.iv.length = iv_size;
-
-              if (sa0->use_esn)
-                {
-                  dpdk_cop_priv_t* priv = (dpdk_cop_priv_t*) (sym_cop + 1);
-                  u8* payload_end = rte_pktmbuf_mtod_offset(
-                      mb0, u8*, sizeof(esp_header_t) + iv_size + payload_len);
-
-                  clib_memcpy (priv->icv, payload_end, icv_size);
-                  *((u32*) payload_end) = sa0->seq_hi;
-                  sym_cop->auth.data.offset = 0;
-                  sym_cop->auth.data.length = sizeof(esp_header_t) + iv_size
-                      + payload_len + sizeof(sa0->seq_hi);
-                  sym_cop->auth.digest.data = priv->icv;
-                  sym_cop->auth.digest.phys_addr = cop->phys_addr
-                      + (uintptr_t) priv->icv - (uintptr_t) cop;
-                  sym_cop->auth.digest.length = icv_size;
-                }
-              else
-                {
-                  sym_cop->auth.data.offset = 0;
-                  sym_cop->auth.data.length = sizeof(esp_header_t) +
-                           iv_size + payload_len;
-
-                  sym_cop->auth.digest.data = rte_pktmbuf_mtod_offset(mb0, void*,
-                           rte_pktmbuf_pkt_len(mb0) - icv_size);
-                  sym_cop->auth.digest.phys_addr = rte_pktmbuf_mtophys_offset(mb0,
-                           rte_pktmbuf_pkt_len(mb0) - icv_size);
-                  sym_cop->auth.digest.length = icv_size;
-                }
-            }
-
-trace:
-         if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED))
+         u32 cipher_off, cipher_len;
+         u32 auth_len = 0;
+         u8 *aad = NULL;
+
+         u8 *iv = (u8 *) (esp0 + 1);
+
+         dpdk_gcm_cnt_blk *icb = &priv->cb;
+
+         cipher_off = sizeof (esp_header_t) + iv_size;
+         cipher_len = payload_len;
+
+         u8 *digest = vlib_buffer_get_tail (b0) - trunc_size;
+         u64 digest_paddr =
+           mb0->buf_physaddr + digest - ((u8 *) mb0->buf_addr);
+
+         if (!is_aead && cipher_alg->alg == RTE_CRYPTO_CIPHER_AES_CBC)
+           clib_memcpy_fast (icb, iv, 16);
+         else                  /* CTR/GCM */
+           {
+             u32 *_iv = (u32 *) iv;
+
+             crypto_set_icb (icb, sa0->salt, _iv[0], _iv[1]);
+           }
+
+         if (is_aead)
+           {
+             aad = priv->aad;
+             u32 *_aad = (u32 *) aad;
+             clib_memcpy_fast (aad, esp0, 8);
+
+             /* _aad[3] should always be 0 */
+             if (PREDICT_FALSE (ipsec_sa_is_set_USE_ESN (sa0)))
+               _aad[2] = clib_host_to_net_u32 (sa0->seq_hi);
+             else
+               _aad[2] = 0;
+           }
+         else
            {
-             esp_decrypt_trace_t *tr = vlib_add_trace (vm, node, b0, sizeof (*tr));
+             auth_len = sizeof (esp_header_t) + iv_size + payload_len;
+
+             if (ipsec_sa_is_set_USE_ESN (sa0))
+               {
+                 clib_memcpy_fast (priv->icv, digest, trunc_size);
+                 u32 *_digest = (u32 *) digest;
+                 _digest[0] = clib_host_to_net_u32 (sa0->seq_hi);
+                 auth_len += sizeof (sa0->seq_hi);
+
+                 digest = priv->icv;
+                 digest_paddr =
+                   op->phys_addr + (uintptr_t) priv->icv - (uintptr_t) op;
+               }
+           }
+
+         crypto_op_setup (is_aead, mb0, op, session, cipher_off, cipher_len,
+                          0, auth_len, aad, digest, digest_paddr);
+       trace:
+         if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
+           {
+             esp_decrypt_trace_t *tr =
+               vlib_add_trace (vm, node, b0, sizeof (*tr));
              tr->crypto_alg = sa0->crypto_alg;
              tr->integ_alg = sa0->integ_alg;
+             clib_memcpy_fast (tr->packet_data, vlib_buffer_get_current (b0),
+                               sizeof (esp_header_t));
            }
        }
       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
     }
-  vlib_node_increment_counter (vm, dpdk_esp_decrypt_node.index,
-                              ESP_DECRYPT_ERROR_RX_PKTS,
-                              from_frame->n_vectors);
-  crypto_qp_data_t *qpd;
-  /* *INDENT-OFF* */
-  vec_foreach_index (i, cwm->qp_data)
-    {
-      u32 enq;
 
-      qpd = vec_elt_at_index(cwm->qp_data, i);
-      enq = rte_cryptodev_enqueue_burst(qpd->dev_id, qpd->qp_id,
-                                       qpd->cops, n_cop_qp[i]);
-      qpd->inflights += enq;
+  if (is_ip6)
+    {
+      vlib_node_increment_counter (vm, dpdk_esp6_decrypt_node.index,
+                                  ESP_DECRYPT_ERROR_RX_PKTS,
+                                  from_frame->n_vectors);
 
-      if (PREDICT_FALSE(enq < n_cop_qp[i]))
-       {
-         crypto_free_cop (qpd, &qpd->cops[enq], n_cop_qp[i] - enq);
-         vlib_buffer_free (vm, &qpd->bi[enq], n_cop_qp[i] - enq);
+      crypto_enqueue_ops (vm, cwm, dpdk_esp6_decrypt_node.index,
+                         ESP_DECRYPT_ERROR_ENQ_FAIL, numa);
+    }
+  else
+    {
+      vlib_node_increment_counter (vm, dpdk_esp4_decrypt_node.index,
+                                  ESP_DECRYPT_ERROR_RX_PKTS,
+                                  from_frame->n_vectors);
 
-         vlib_node_increment_counter (vm, dpdk_esp_decrypt_node.index,
-                                      ESP_DECRYPT_ERROR_ENQ_FAIL,
-                                      n_cop_qp[i] - enq);
-       }
+      crypto_enqueue_ops (vm, cwm, dpdk_esp4_decrypt_node.index,
+                         ESP_DECRYPT_ERROR_ENQ_FAIL, numa);
     }
-  /* *INDENT-ON* */
+
+  crypto_free_ops (numa, ops, cwm->ops + from_frame->n_vectors - ops);
 
   return from_frame->n_vectors;
 }
 
+VLIB_NODE_FN (dpdk_esp4_decrypt_node) (vlib_main_t * vm,
+                                      vlib_node_runtime_t * node,
+                                      vlib_frame_t * from_frame)
+{
+  return dpdk_esp_decrypt_inline (vm, node, from_frame, 0 /*is_ip6 */ );
+}
+
 /* *INDENT-OFF* */
-VLIB_REGISTER_NODE (dpdk_esp_decrypt_node) = {
-  .function = dpdk_esp_decrypt_node_fn,
-  .name = "dpdk-esp-decrypt",
+VLIB_REGISTER_NODE (dpdk_esp4_decrypt_node) = {
+  .name = "dpdk-esp4-decrypt",
   .vector_size = sizeof (u32),
   .format_trace = format_esp_decrypt_trace,
   .type = VLIB_NODE_TYPE_INTERNAL,
@@ -379,7 +415,31 @@ VLIB_REGISTER_NODE (dpdk_esp_decrypt_node) = {
 };
 /* *INDENT-ON* */
 
-VLIB_NODE_FUNCTION_MULTIARCH (dpdk_esp_decrypt_node, dpdk_esp_decrypt_node_fn)
+VLIB_NODE_FN (dpdk_esp6_decrypt_node) (vlib_main_t * vm,
+                                      vlib_node_runtime_t * node,
+                                      vlib_frame_t * from_frame)
+{
+  return dpdk_esp_decrypt_inline (vm, node, from_frame, 1 /*is_ip6 */ );
+}
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (dpdk_esp6_decrypt_node) = {
+  .name = "dpdk-esp6-decrypt",
+  .vector_size = sizeof (u32),
+  .format_trace = format_esp_decrypt_trace,
+  .type = VLIB_NODE_TYPE_INTERNAL,
+
+  .n_errors = ARRAY_LEN(esp_decrypt_error_strings),
+  .error_strings = esp_decrypt_error_strings,
+
+  .n_next_nodes = ESP_DECRYPT_N_NEXT,
+  .next_nodes = {
+#define _(s,n) [ESP_DECRYPT_NEXT_##s] = n,
+    foreach_esp_decrypt_next
+#undef _
+  },
+};
+/* *INDENT-ON* */
 
 /*
  * Decrypt Post Node
@@ -388,36 +448,56 @@ VLIB_NODE_FUNCTION_MULTIARCH (dpdk_esp_decrypt_node, dpdk_esp_decrypt_node_fn)
 #define foreach_esp_decrypt_post_error       \
  _(PKTS, "ESP post pkts")
 
-typedef enum {
+typedef enum
+{
 #define _(sym,str) ESP_DECRYPT_POST_ERROR_##sym,
   foreach_esp_decrypt_post_error
 #undef _
-  ESP_DECRYPT_POST_N_ERROR,
+    ESP_DECRYPT_POST_N_ERROR,
 } esp_decrypt_post_error_t;
 
-static char * esp_decrypt_post_error_strings[] = {
+static char *esp_decrypt_post_error_strings[] = {
 #define _(sym,string) string,
   foreach_esp_decrypt_post_error
 #undef _
 };
 
-vlib_node_registration_t dpdk_esp_decrypt_post_node;
+extern vlib_node_registration_t dpdk_esp4_decrypt_post_node;
+extern vlib_node_registration_t dpdk_esp6_decrypt_post_node;
 
-static u8 * format_esp_decrypt_post_trace (u8 * s, va_list * args)
+static u8 *
+format_esp_decrypt_post_trace (u8 * s, va_list * args)
 {
+  CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+  CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+  esp_decrypt_trace_t *t = va_arg (*args, esp_decrypt_trace_t *);
+  u32 indent = format_get_indent (s);
+
+  s = format (s, "cipher %U auth %U\n",
+             format_ipsec_crypto_alg, t->crypto_alg,
+             format_ipsec_integ_alg, t->integ_alg);
+
+  ip4_header_t *ih4 = (ip4_header_t *) t->packet_data;
+  if ((ih4->ip_version_and_header_length & 0xF0) == 0x60)
+    s =
+      format (s, "%U%U", format_white_space, indent, format_ip6_header, ih4);
+  else
+    s =
+      format (s, "%U%U", format_white_space, indent, format_ip4_header, ih4);
+
   return s;
 }
 
-static uword
-dpdk_esp_decrypt_post_node_fn (vlib_main_t * vm,
-            vlib_node_runtime_t * node,
-            vlib_frame_t * from_frame)
+always_inline uword
+dpdk_esp_decrypt_post_inline (vlib_main_t * vm,
+                             vlib_node_runtime_t * node,
+                             vlib_frame_t * from_frame, int is_ip6)
 {
   u32 n_left_from, *from, *to_next = 0, next_index;
-  ipsec_sa_t * sa0;
+  ipsec_sa_t *sa0;
   u32 sa_index0 = ~0;
   ipsec_main_t *im = &ipsec_main;
-  dpdk_esp_main_t *em = &dpdk_esp_main;
+  dpdk_crypto_main_t *dcm = &dpdk_crypto_main;
 
   from = vlib_frame_vector_args (from_frame);
   n_left_from = from_frame->n_vectors;
@@ -432,13 +512,15 @@ dpdk_esp_decrypt_post_node_fn (vlib_main_t * vm,
 
       while (n_left_from > 0 && n_left_to_next > 0)
        {
-         esp_footer_t * f0;
-         u32 bi0, next0, icv_size, iv_size;
-         vlib_buffer_t * b0 = 0;
+         esp_footer_t *f0;
+         u32 bi0, iv_size, next0;
+         vlib_buffer_t *b0 = 0;
          ip4_header_t *ih4 = 0, *oh4 = 0;
          ip6_header_t *ih6 = 0, *oh6 = 0;
-         u8 tunnel_mode = 1;
-         u8 transport_ip6 = 0;
+         crypto_alg_t *cipher_alg, *auth_alg;
+         esp_header_t *esp0;
+         u8 trunc_size, is_aead;
+         u16 udp_encap_adv = 0;
 
          next0 = ESP_DECRYPT_NEXT_DROP;
 
@@ -448,130 +530,166 @@ dpdk_esp_decrypt_post_node_fn (vlib_main_t * vm,
          n_left_to_next -= 1;
 
          b0 = vlib_get_buffer (vm, bi0);
+         esp0 = vlib_buffer_get_current (b0);
 
-         sa_index0 = vnet_buffer(b0)->ipsec.sad_index;
+         sa_index0 = vnet_buffer (b0)->ipsec.sad_index;
          sa0 = pool_elt_at_index (im->sad, sa_index0);
 
          to_next[0] = bi0;
          to_next += 1;
 
-         icv_size = em->esp_integ_algs[sa0->integ_alg].trunc_size;
-         iv_size = em->esp_crypto_algs[sa0->crypto_alg].iv_len;
+         cipher_alg = vec_elt_at_index (dcm->cipher_algs, sa0->crypto_alg);
+         auth_alg = vec_elt_at_index (dcm->auth_algs, sa0->integ_alg);
+         is_aead = cipher_alg->type == RTE_CRYPTO_SYM_XFORM_AEAD;
+         if (is_aead)
+           auth_alg = cipher_alg;
 
-         if (sa0->use_anti_replay)
+         trunc_size = auth_alg->trunc_size;
+
+         iv_size = cipher_alg->iv_len;
+
+         ipsec_sa_anti_replay_advance (sa0, &esp0->seq);
+
+         /* if UDP encapsulation is used adjust the address of the IP header */
+         if (ipsec_sa_is_set_UDP_ENCAP (sa0)
+             && (b0->flags & VNET_BUFFER_F_IS_IP4))
            {
-             esp_header_t * esp0 = vlib_buffer_get_current (b0);
-             u32 seq;
-             seq = clib_host_to_net_u32(esp0->seq);
-             if (PREDICT_TRUE(sa0->use_esn))
-               esp_replay_advance_esn(sa0, seq);
-             else
-               esp_replay_advance(sa0, seq);
+             udp_encap_adv = sizeof (udp_header_t);
            }
 
-         ih4 = (ip4_header_t *) (b0->data + sizeof(ethernet_header_t));
+         if (b0->flags & VNET_BUFFER_F_IS_IP4)
+           ih4 = (ip4_header_t *)
+             ((u8 *) esp0 - udp_encap_adv - sizeof (ip4_header_t));
+         else
+           ih4 = (ip4_header_t *) ((u8 *) esp0 - sizeof (ip6_header_t));
+
          vlib_buffer_advance (b0, sizeof (esp_header_t) + iv_size);
 
-         b0->current_length -= (icv_size + 2);
          b0->flags |= VLIB_BUFFER_TOTAL_LENGTH_VALID;
-         f0 = (esp_footer_t *) ((u8 *) vlib_buffer_get_current (b0) +
-                                b0->current_length);
-         b0->current_length -= f0->pad_length;
-
-         /* transport mode */
-         if (PREDICT_FALSE(!sa0->is_tunnel && !sa0->is_tunnel_ip6))
+         f0 = (esp_footer_t *) (vlib_buffer_get_tail (b0) - trunc_size - 2);
+         b0->current_length -= (f0->pad_length + trunc_size + 2);
+#if 0
+         /* check padding */
+         const u8 *padding = vlib_buffer_get_tail (b0);
+         if (PREDICT_FALSE (memcmp (padding, pad_data, f0->pad_length)))
            {
-             tunnel_mode = 0;
-
-             if (PREDICT_TRUE((ih4->ip_version_and_header_length & 0xF0) != 0x40))
-               {
-                 if (PREDICT_TRUE((ih4->ip_version_and_header_length & 0xF0) == 0x60))
-                   transport_ip6 = 1;
-                 else
-                   {
-                     clib_warning("next header: 0x%x", f0->next_header);
-                     vlib_node_increment_counter (vm, dpdk_esp_decrypt_node.index,
-                                                  ESP_DECRYPT_ERROR_NOT_IP, 1);
-                     goto trace;
-                   }
-               }
+             clib_warning ("bad padding");
+             vlib_node_increment_counter (vm, dpdk_esp_decrypt_node.index,
+                                          ESP_DECRYPT_ERROR_DECRYPTION_FAILED,
+                                          1);
+             goto trace;
            }
-
-         if (PREDICT_TRUE (tunnel_mode))
+#endif
+         if (ipsec_sa_is_set_IS_TUNNEL (sa0))
            {
-             if (PREDICT_TRUE(f0->next_header == IP_PROTOCOL_IP_IN_IP))
+             if (f0->next_header == IP_PROTOCOL_IP_IN_IP)
                next0 = ESP_DECRYPT_NEXT_IP4_INPUT;
-             else if (f0->next_header == IP_PROTOCOL_IPV6)
+             else if (ipsec_sa_is_set_IS_TUNNEL_V6 (sa0)
+                      && f0->next_header == IP_PROTOCOL_IPV6)
                next0 = ESP_DECRYPT_NEXT_IP6_INPUT;
              else
                {
-                 clib_warning("next header: 0x%x", f0->next_header);
-                 vlib_node_increment_counter (vm, dpdk_esp_decrypt_node.index,
-                                              ESP_DECRYPT_ERROR_DECRYPTION_FAILED,
-                                              1);
+                 clib_warning ("next header: 0x%x", f0->next_header);
+                 if (is_ip6)
+                   vlib_node_increment_counter (vm,
+                                                dpdk_esp6_decrypt_node.index,
+                                                ESP_DECRYPT_ERROR_DECRYPTION_FAILED,
+                                                1);
+                 else
+                   vlib_node_increment_counter (vm,
+                                                dpdk_esp4_decrypt_node.index,
+                                                ESP_DECRYPT_ERROR_DECRYPTION_FAILED,
+                                                1);
                  goto trace;
                }
            }
-         /* transport mode */
-         else
+         else                  /* transport mode */
            {
-             if (PREDICT_FALSE(transport_ip6))
+             if ((ih4->ip_version_and_header_length & 0xF0) == 0x40)
+               {
+                 u16 ih4_len = ip4_header_bytes (ih4);
+                 vlib_buffer_advance (b0, -ih4_len - udp_encap_adv);
+                 next0 = ESP_DECRYPT_NEXT_IP4_INPUT;
+                 if (!ipsec_sa_is_set_UDP_ENCAP (sa0))
+                   {
+                     oh4 = vlib_buffer_get_current (b0);
+                     memmove (oh4, ih4, ih4_len);
+                     oh4->protocol = f0->next_header;
+                     oh4->length = clib_host_to_net_u16 (b0->current_length);
+                     oh4->checksum = ip4_header_checksum (oh4);
+                   }
+               }
+             else if ((ih4->ip_version_and_header_length & 0xF0) == 0x60)
                {
-                 ih6 = (ip6_header_t *) (b0->data + sizeof(ethernet_header_t));
-                 vlib_buffer_advance (b0, -sizeof(ip6_header_t));
+                 ih6 = (ip6_header_t *) ih4;
+                 vlib_buffer_advance (b0, -sizeof (ip6_header_t));
                  oh6 = vlib_buffer_get_current (b0);
-                 memmove(oh6, ih6, sizeof(ip6_header_t));
+                 memmove (oh6, ih6, sizeof (ip6_header_t));
 
                  next0 = ESP_DECRYPT_NEXT_IP6_INPUT;
                  oh6->protocol = f0->next_header;
-                 oh6->payload_length =
-                     clib_host_to_net_u16 (
-                         vlib_buffer_length_in_chain(vm, b0) -
-                         sizeof (ip6_header_t));
+                 u16 len = b0->current_length - sizeof (ip6_header_t);
+                 oh6->payload_length = clib_host_to_net_u16 (len);
                }
              else
                {
-                 vlib_buffer_advance (b0, -sizeof(ip4_header_t));
-                 oh4 = vlib_buffer_get_current (b0);
-                 memmove(oh4, ih4, sizeof(ip4_header_t));
-
-                 next0 = ESP_DECRYPT_NEXT_IP4_INPUT;
-                 oh4->ip_version_and_header_length = 0x45;
-                 oh4->fragment_id = 0;
-                 oh4->flags_and_fragment_offset = 0;
-                 oh4->protocol = f0->next_header;
-                 oh4->length = clib_host_to_net_u16 (
-                     vlib_buffer_length_in_chain (vm, b0));
-                 oh4->checksum = ip4_header_checksum (oh4);
+                 clib_warning ("next header: 0x%x", f0->next_header);
+                 if (is_ip6)
+                   vlib_node_increment_counter (vm,
+                                                dpdk_esp6_decrypt_node.index,
+                                                ESP_DECRYPT_ERROR_DECRYPTION_FAILED,
+                                                1);
+                 else
+                   vlib_node_increment_counter (vm,
+                                                dpdk_esp4_decrypt_node.index,
+                                                ESP_DECRYPT_ERROR_DECRYPTION_FAILED,
+                                                1);
+                 goto trace;
                }
            }
 
-         vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32)~0;
+         vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
 
-trace:
-         if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED))
+       trace:
+         if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
            {
-             esp_decrypt_trace_t *tr = vlib_add_trace (vm, node, b0, sizeof (*tr));
+             esp_decrypt_trace_t *tr =
+               vlib_add_trace (vm, node, b0, sizeof (*tr));
              tr->crypto_alg = sa0->crypto_alg;
              tr->integ_alg = sa0->integ_alg;
+             ih4 = vlib_buffer_get_current (b0);
+             clib_memcpy_fast (tr->packet_data, ih4, sizeof (ip6_header_t));
            }
 
          vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
-                                          to_next, n_left_to_next, bi0, next0);
+                                          to_next, n_left_to_next, bi0,
+                                          next0);
        }
       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
     }
-  vlib_node_increment_counter (vm, dpdk_esp_decrypt_post_node.index,
-                              ESP_DECRYPT_POST_ERROR_PKTS,
-                              from_frame->n_vectors);
+
+  if (is_ip6)
+    vlib_node_increment_counter (vm, dpdk_esp6_decrypt_post_node.index,
+                                ESP_DECRYPT_POST_ERROR_PKTS,
+                                from_frame->n_vectors);
+  else
+    vlib_node_increment_counter (vm, dpdk_esp4_decrypt_post_node.index,
+                                ESP_DECRYPT_POST_ERROR_PKTS,
+                                from_frame->n_vectors);
 
   return from_frame->n_vectors;
 }
 
+VLIB_NODE_FN (dpdk_esp4_decrypt_post_node) (vlib_main_t * vm,
+                                           vlib_node_runtime_t * node,
+                                           vlib_frame_t * from_frame)
+{
+  return dpdk_esp_decrypt_post_inline (vm, node, from_frame, 0 /*is_ip6 */ );
+}
+
 /* *INDENT-OFF* */
-VLIB_REGISTER_NODE (dpdk_esp_decrypt_post_node) = {
-  .function = dpdk_esp_decrypt_post_node_fn,
-  .name = "dpdk-esp-decrypt-post",
+VLIB_REGISTER_NODE (dpdk_esp4_decrypt_post_node) = {
+  .name = "dpdk-esp4-decrypt-post",
   .vector_size = sizeof (u32),
   .format_trace = format_esp_decrypt_post_trace,
   .type = VLIB_NODE_TYPE_INTERNAL,
@@ -588,4 +706,36 @@ VLIB_REGISTER_NODE (dpdk_esp_decrypt_post_node) = {
 };
 /* *INDENT-ON* */
 
-VLIB_NODE_FUNCTION_MULTIARCH (dpdk_esp_decrypt_post_node, dpdk_esp_decrypt_post_node_fn)
+VLIB_NODE_FN (dpdk_esp6_decrypt_post_node) (vlib_main_t * vm,
+                                           vlib_node_runtime_t * node,
+                                           vlib_frame_t * from_frame)
+{
+  return dpdk_esp_decrypt_post_inline (vm, node, from_frame, 0 /*is_ip6 */ );
+}
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (dpdk_esp6_decrypt_post_node) = {
+  .name = "dpdk-esp6-decrypt-post",
+  .vector_size = sizeof (u32),
+  .format_trace = format_esp_decrypt_post_trace,
+  .type = VLIB_NODE_TYPE_INTERNAL,
+
+  .n_errors = ARRAY_LEN(esp_decrypt_post_error_strings),
+  .error_strings = esp_decrypt_post_error_strings,
+
+  .n_next_nodes = ESP_DECRYPT_N_NEXT,
+  .next_nodes = {
+#define _(s,n) [ESP_DECRYPT_NEXT_##s] = n,
+    foreach_esp_decrypt_next
+#undef _
+  },
+};
+/* *INDENT-ON* */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */