dpdk/ipsec: rework plus improved cli commands 18/8518/5
authorSergio Gonzalez Monroy <sergio.gonzalez.monroy@intel.com>
Sat, 26 Aug 2017 14:22:05 +0000 (15:22 +0100)
committerDamjan Marion <dmarion.lists@gmail.com>
Thu, 5 Oct 2017 09:54:34 +0000 (09:54 +0000)
This patch reworks the DPDK ipsec implementation including the cryptodev
management as well as replacing new cli commands for better usability.

For the data path:
 - The dpdk-esp-encrypt-post node is not necessary anymore.
 - IPv4 packets in the decrypt path are sent to ip4-input-no-checksum instead
 of ip4-input.

The DPDK cryptodev cli commands are replaced by the following new commands:
 - show dpdk crypto devices
 - show dpdk crypto placement [verbose]
 - set dpdk crypto placement (<device> <thread> | auto)
 - clear dpdk crypto placement <device> [<thread>]
 - show dpdk crypto pools

Change-Id: I47324517ede82d3e6e0e9f9c71c1a3433714b27b
Signed-off-by: Sergio Gonzalez Monroy <sergio.gonzalez.monroy@intel.com>
15 files changed:
dpdk/Makefile
src/plugins/dpdk.am
src/plugins/dpdk/ipsec/cli.c
src/plugins/dpdk/ipsec/crypto_node.c
src/plugins/dpdk/ipsec/esp.h [deleted file]
src/plugins/dpdk/ipsec/esp_decrypt.c
src/plugins/dpdk/ipsec/esp_encrypt.c
src/plugins/dpdk/ipsec/ipsec.c
src/plugins/dpdk/ipsec/ipsec.h
src/vnet.am
src/vnet/ipsec/esp.h
src/vnet/ipsec/esp_format.c [new file with mode: 0644]
src/vnet/ipsec/ipsec.c
src/vnet/ipsec/ipsec.h
src/vnet/ipsec/ipsec_if.c

index 4e0ad4f..0734750 100644 (file)
@@ -146,7 +146,6 @@ JOBS := $(if $(shell [ -f /proc/cpuinfo ] && head /proc/cpuinfo),\
 
 # compiler/linker custom arguments
 DPDK_CPU_CFLAGS := -pie -fPIC
-DPDK_EXTRA_LDFLAGS := -g
 
 ifeq ($(DPDK_DEBUG),n)
 DPDK_EXTRA_CFLAGS := -g -mtune=$(DPDK_TUNE)
@@ -297,7 +296,8 @@ config: $(B)/.config.ok
 build-ipsec-mb:
        mkdir -p $(I)/lib/
        # Do not build GCM stuff if we are building ISA_L
-       make -C $(AESNIMB_LIB_SOURCE) -j NO_GCM=$(ISA_L_CRYPTO_LIB)
+       make -C $(AESNIMB_LIB_SOURCE) -j NO_GCM=$(ISA_L_CRYPTO_LIB) \
+         DEBUG=$(DPDK_DEBUG) EXTRA_CFLAGS=-fPIC
        cp $(AESNIMB_LIB_SOURCE)/libIPSec_MB.a $(I)/lib/
 
 .PHONY: build-isal-crypto
index 15195a2..905ba20 100644 (file)
@@ -61,11 +61,8 @@ API_FILES += dpdk/api/dpdk.api
 
 nobase_include_HEADERS +=                                      \
   dpdk/device/dpdk.h                                           \
-  dpdk/api/dpdk_all_api_h.h
-
-nobase_include_HEADERS +=                      \
- dpdk/ipsec/ipsec.h            \
- dpdk/ipsec/esp.h
+  dpdk/api/dpdk_all_api_h.h                                    \
+  dpdk/ipsec/ipsec.h
 
 dpdk_test_plugin_la_SOURCES =                                  \
   dpdk/api/dpdk_test.c dpdk/api/dpdk_plugin.api.h
index a9cf250..2dcfe1d 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016 Intel and/or its affiliates.
+ * Copyright (c) 2017 Intel and/or its affiliates.
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
  * You may obtain a copy of the License at:
 #include <dpdk/device/dpdk.h>
 #include <dpdk/ipsec/ipsec.h>
 
-static void
-dpdk_ipsec_show_mapping (vlib_main_t * vm, u16 detail_display)
+static u8 *
+format_crypto (u8 * s, va_list * args)
 {
   dpdk_crypto_main_t *dcm = &dpdk_crypto_main;
-  vlib_thread_main_t *tm = vlib_get_thread_main ();
-  u32 i, skip_master;
+  crypto_dev_t *dev = va_arg (*args, crypto_dev_t *);
+  crypto_drv_t *drv = vec_elt_at_index (dcm->drv, dev->drv_id);
+  u64 feat, mask;
+  u32 i;
+  i8 *pre = "  ";
 
-  if (!dcm->enabled)
+  s = format (s, "%-25s%-20s%-10s\n", dev->name, drv->name,
+             rte_cryptodevs[dev->id].data->dev_started ? "up" : "down");
+  s = format (s, "  numa_node %u, max_queues %u\n", dev->numa, dev->max_qp);
+  s = format (s, "  free_resources %u, used_resources %u\n",
+             vec_len (dev->free_resources), vec_len (dev->used_resources));
+
+  if (dev->features)
     {
-      vlib_cli_output (vm, "DPDK Cryptodev support is disabled\n");
-      return;
+      for (mask = 1; mask != 0; mask <<= 1)
+       {
+         feat = dev->features & mask;
+         if (feat)
+           {
+             s =
+               format (s, "%s%s", pre,
+                       rte_cryptodev_get_feature_name (feat));
+             pre = ", ";
+           }
+       }
+      s = format (s, "\n");
     }
 
-  if (detail_display)
-    vlib_cli_output (vm, "worker\t%10s\t%15s\tdir\tdev\tqp\n",
-                    "cipher", "auth");
-  else
-    vlib_cli_output (vm, "worker\tcrypto device id(type)\n");
+  s = format (s, "  Cipher:");
+  pre = " ";
+  for (i = 0; i < IPSEC_CRYPTO_N_ALG; i++)
+    if (dev->cipher_support[i])
+      {
+       s = format (s, "%s%s", pre, dcm->cipher_algs[i].name);
+       pre = ", ";
+      }
+  s = format (s, "\n");
 
-  skip_master = vlib_num_workers () > 0;
+  s = format (s, "  Auth:");
+  pre = " ";
+  for (i = 0; i < IPSEC_INTEG_N_ALG; i++)
+    if (dev->auth_support[i])
+      {
+       s = format (s, "%s%s", pre, dcm->auth_algs[i].name);
+       pre = ", ";
+      }
+  s = format (s, "\n\n");
 
-  for (i = 0; i < tm->n_vlib_mains; i++)
-    {
-      uword key, data;
-      u32 thread_index = vlib_mains[i]->thread_index;
-      crypto_worker_main_t *cwm = &dcm->workers_main[thread_index];
-      u8 *s = 0;
+  return s;
+}
 
-      if (skip_master)
-       {
-         skip_master = 0;
-         continue;
-       }
+static clib_error_t *
+show_dpdk_crypto_fn (vlib_main_t * vm, unformat_input_t * input,
+                    vlib_cli_command_t * cmd)
+{
+  dpdk_crypto_main_t *dcm = &dpdk_crypto_main;
+  crypto_dev_t *dev;
 
-      if (!detail_display)
-       {
-         i32 last_cdev = -1;
-         crypto_qp_data_t *qpd;
+  /* *INDENT-OFF* */
+  vec_foreach (dev, dcm->dev)
+    vlib_cli_output (vm, "%U", format_crypto, dev);
+  /* *INDENT-ON* */
 
-         s = format (s, "%u\t", thread_index);
+  return NULL;
+}
 
-         /* *INDENT-OFF* */
-         vec_foreach (qpd, cwm->qp_data)
-           {
-             u32 dev_id = qpd->dev_id;
+/*?
+ * This command is used to display the DPDK Crypto device information.
+ *
+ * @cliexpar
+ * Example of how to display the DPDK Crypto device information:
+ * @cliexsart{show dpdk crypto devices}
+ * vpp# show dpdk crypto devices
+ *   cryptodev_aesni_mb_pmd   crypto_aesni_mb     down
+ *   numa_node 1, max_queues 8
+ *   free_resources 2, used_resources 2
+ *   SYMMETRIC_CRYPTO, SYM_OPERATION_CHAINING, CPU_AVX2, CPU_AESNI
+ *   Cipher: aes-cbc-128, aes-cbc-192, aes-cbc-256, aes-ctr-128, aes-ctr-192, aes-ctr-256
+ *   Auth: md5-96, sha1-96, sha-256-128, sha-384-192, sha-512-256
+ *
+ * cryptodev_aesni_gcm_pmd  crypto_aesni_gcm    down
+ *   numa_node 1, max_queues 8
+ *   free_resources 2, used_resources 2
+ *   SYMMETRIC_CRYPTO, SYM_OPERATION_CHAINING, CPU_AVX2, CPU_AESNI, MBUF_SCATTER_GATHER
+ *   Cipher: aes-gcm-128, aes-gcm-192, aes-gcm-256
+ *   Auth:
+ * @cliexend
+ * Example of displaying the DPDK Crypto device data when enabled:
+ * @cliexend
+?*/
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (show_dpdk_crypto, static) = {
+    .path = "show dpdk crypto devices",
+    .short_help = "show dpdk crypto devices",
+    .function = show_dpdk_crypto_fn,
+};
 
-             if ((u16) last_cdev != dev_id)
-               {
-                 struct rte_cryptodev_info cdev_info;
+/* *INDENT-ON* */
+static u8 *
+format_crypto_worker (u8 * s, va_list * args)
+{
+  u32 thread_idx = va_arg (*args, u32);
+  u8 verbose = (u8) va_arg (*args, u32);
+  dpdk_crypto_main_t *dcm = &dpdk_crypto_main;
+  crypto_worker_main_t *cwm;
+  crypto_resource_t *res;
+  u16 *res_idx;
+  i8 *pre, *ind;
+  u32 i;
 
-                 rte_cryptodev_info_get (dev_id, &cdev_info);
+  cwm = vec_elt_at_index (dcm->workers_main, thread_idx);
 
-                 s = format(s, "%u(%s)\t", dev_id, cdev_info.feature_flags &
-                            RTE_CRYPTODEV_FF_HW_ACCELERATED ? "HW" : "SW");
-               }
-             last_cdev = dev_id;
-           }
-         /* *INDENT-ON* */
-         vlib_cli_output (vm, "%s", s);
-       }
-      else
+  s = format (s, "Thread %u (%v):\n", thread_idx,
+             vlib_worker_threads[thread_idx].name);
+
+  /* *INDENT-OFF* */
+  vec_foreach (res_idx, cwm->resource_idx)
+    {
+      ind = "  ";
+      res = vec_elt_at_index (dcm->resource, res_idx[0]);
+      s = format (s, "%s%-20s dev-id %2u inbound-queue %2u outbound-queue %2u\n",
+                 ind, vec_elt_at_index (dcm->dev, res->dev_id)->name,
+                 res->dev_id, res->qp_id, res->qp_id + 1);
+
+      ind = "    ";
+      if (verbose)
        {
-         char cipher_str[15], auth_str[15];
-         struct rte_cryptodev_capabilities cap;
-         crypto_worker_qp_key_t *p_key = (crypto_worker_qp_key_t *) & key;
-         /* *INDENT-OFF* */
-         hash_foreach (key, data, cwm->algo_qp_map,
-         ({
-           cap.op = RTE_CRYPTO_OP_TYPE_SYMMETRIC;
-#if DPDK_NO_AEAD
-           cap.sym.xform_type = RTE_CRYPTO_SYM_XFORM_CIPHER;
-           cap.sym.cipher.algo = p_key->cipher_algo;
-#else
-           if (p_key->is_aead)
+         s = format (s, "%sCipher:", ind);
+         pre = " ";
+         for (i = 0; i < IPSEC_CRYPTO_N_ALG; i++)
+           if (cwm->cipher_resource_idx[i] == res_idx[0])
              {
-               cap.sym.xform_type = RTE_CRYPTO_SYM_XFORM_AEAD;
-               cap.sym.aead.algo = p_key->cipher_algo;
+               s = format (s, "%s%s", pre, dcm->cipher_algs[i].name);
+               pre = ", ";
              }
-           else
+         s = format (s, "\n");
+
+         s = format (s, "%sAuth:", ind);
+         pre = " ";
+         for (i = 0; i < IPSEC_INTEG_N_ALG; i++)
+           if (cwm->auth_resource_idx[i] == res_idx[0])
              {
-               cap.sym.xform_type = RTE_CRYPTO_SYM_XFORM_CIPHER;
-               cap.sym.cipher.algo = p_key->cipher_algo;
+               s = format (s, "%s%s", pre, dcm->auth_algs[i].name);
+               pre = ", ";
              }
-#endif
-           check_algo_is_supported (&cap, cipher_str);
-
-           cap.op = RTE_CRYPTO_OP_TYPE_SYMMETRIC;
-           cap.sym.xform_type = RTE_CRYPTO_SYM_XFORM_AUTH;
-           cap.sym.auth.algo = p_key->auth_algo;
-           check_algo_is_supported (&cap, auth_str);
-
-           vlib_cli_output (vm, "%u\t%10s\t%15s\t%3s\t%u\t%u\n",
-                            vlib_mains[i]->thread_index, cipher_str, auth_str,
-                            p_key->is_outbound ? "out" : "in",
-                            cwm->qp_data[data].dev_id,
-                            cwm->qp_data[data].qp_id);
-         }));
-         /* *INDENT-ON* */
+         s = format (s, "\n");
        }
     }
+  /* *INDENT-ON* */
+
+  return s;
 }
 
 static clib_error_t *
-lcore_cryptodev_map_fn (vlib_main_t * vm, unformat_input_t * input,
-                       vlib_cli_command_t * cmd)
+common_crypto_placement_fn (vlib_main_t * vm, unformat_input_t * input,
+                           vlib_cli_command_t * cmd, u8 verbose)
 {
-  unformat_input_t _line_input, *line_input = &_line_input;
-  u16 detail = 0;
+  dpdk_crypto_main_t *dcm = &dpdk_crypto_main;
   clib_error_t *error = NULL;
+  u32 i;
+  u8 skip_master;
+
+  if (!dcm->enabled)
+    {
+      vlib_cli_output (vm, "\nDPDK Cryptodev support is disabled\n");
+      return error;
+    }
+
+  skip_master = vlib_num_workers () > 0;
+
+  /* *INDENT-OFF* */
+  vec_foreach_index (i, dcm->workers_main)
+    {
+      if (i < skip_master)
+       continue;
+
+      vlib_cli_output (vm, "%U\n", format_crypto_worker, i, verbose);
+    }
+  /* *INDENT-ON* */
+
+  return error;
+}
+
+static clib_error_t *
+show_dpdk_crypto_placement_fn (vlib_main_t * vm, unformat_input_t * input,
+                              vlib_cli_command_t * cmd)
+{
+  return common_crypto_placement_fn (vm, input, cmd, 0);
+}
+
+static clib_error_t *
+show_dpdk_crypto_placement_v_fn (vlib_main_t * vm, unformat_input_t * input,
+                                vlib_cli_command_t * cmd)
+{
+  return common_crypto_placement_fn (vm, input, cmd, 1);
+}
+
+/*?
+ * This command is used to display the DPDK Crypto device placement.
+ *
+ * @cliexpar
+ * Example of displaying the DPDK Crypto device placement:
+ * @cliexstart{show dpdk crypto placement}
+ * vpp# show dpdk crypto placement
+ * Thread 1 (vpp_wk_0):
+ *   cryptodev_aesni_mb_p dev-id  0 inbound-queue  0 outbound-queue  1
+ *   cryptodev_aesni_gcm_ dev-id  1 inbound-queue  0 outbound-queue  1
+ *
+ * Thread 2 (vpp_wk_1):
+ *   cryptodev_aesni_mb_p dev-id  0 inbound-queue  2 outbound-queue  3
+ *   cryptodev_aesni_gcm_ dev-id  1 inbound-queue  2 outbound-queue  3
+ * @cliexend
+?*/
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (show_dpdk_crypto_placement, static) = {
+    .path = "show dpdk crypto placement",
+    .short_help = "show dpdk crypto placement",
+    .function = show_dpdk_crypto_placement_fn,
+};
+/* *INDENT-ON* */
+
+/*?
+ * This command is used to display the DPDK Crypto device placement
+ * with verbose output.
+ *
+ * @cliexpar
+ * Example of displaying the DPDK Crypto device placement verbose:
+ * @cliexstart{show dpdk crypto placement verbose}
+ * vpp# show dpdk crypto placement verbose
+ * Thread 1 (vpp_wk_0):
+ *   cryptodev_aesni_mb_p dev-id  0 inbound-queue  0 outbound-queue  1
+ *     Cipher: aes-cbc-128, aes-cbc-192, aes-cbc-256, aes-ctr-128, aes-ctr-192, aes-ctr-256
+ *     Auth: md5-96, sha1-96, sha-256-128, sha-384-192, sha-512-256
+ *     cryptodev_aesni_gcm_ dev-id  1 inbound-queue  0 outbound-queue  1
+ *     Cipher: aes-gcm-128, aes-gcm-192, aes-gcm-256
+ *     Auth:
+ *
+ * Thread 2 (vpp_wk_1):
+ *   cryptodev_aesni_mb_p dev-id  0 inbound-queue  2 outbound-queue  3
+ *     Cipher: aes-cbc-128, aes-cbc-192, aes-cbc-256, aes-ctr-128, aes-ctr-192, aes-ctr-256
+ *     Auth: md5-96, sha1-96, sha-256-128, sha-384-192, sha-512-256
+ *     cryptodev_aesni_gcm_ dev-id  1 inbound-queue  2 outbound-queue  3
+ *     Cipher: aes-gcm-128, aes-gcm-192, aes-gcm-256
+ *     Auth:
+ *
+ * @cliexend
+?*/
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (show_dpdk_crypto_placement_v, static) = {
+    .path = "show dpdk crypto placement verbose",
+    .short_help = "show dpdk crypto placement verbose",
+    .function = show_dpdk_crypto_placement_v_fn,
+};
+/* *INDENT-ON* */
+
+static clib_error_t *
+set_dpdk_crypto_placement_fn (vlib_main_t * vm,
+                             unformat_input_t * input,
+                             vlib_cli_command_t * cmd)
+{
+  unformat_input_t _line_input, *line_input = &_line_input;
+  dpdk_crypto_main_t *dcm = &dpdk_crypto_main;
+  crypto_worker_main_t *cwm;
+  crypto_dev_t *dev;
+  u32 thread_idx, i;
+  u16 res_idx, *idx;
+  u8 dev_idx, auto_en;
 
   if (!unformat_user (input, unformat_line_input, line_input))
-    return 0;
+    return clib_error_return (0, "invalid syntax");
 
   while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
     {
-      if (unformat (line_input, "verbose"))
-       detail = 1;
+      if (unformat (line_input, "%u %u", &dev_idx, &thread_idx))
+       ;
+      else if (unformat (line_input, "auto"))
+       auto_en = 1;
       else
        {
-         error = clib_error_return (0, "parse error: '%U'",
-                                    format_unformat_error, line_input);
-         goto done;
+         unformat_free (line_input);
+         return clib_error_return (0, "parse error: '%U'",
+                                   format_unformat_error, line_input);
        }
     }
 
-  dpdk_ipsec_show_mapping (vm, detail);
+  unformat_free (line_input);
+
+  if (auto_en)
+    {
+      crypto_auto_placement ();
+      return 0;
+    }
+
+  /* TODO support device name */
+
+  if (!(dev_idx < vec_len (dcm->dev)))
+    return clib_error_return (0, "please specify valid device index");
+
+  if (thread_idx != (u32) ~ 0 && !(thread_idx < vec_len (dcm->workers_main)))
+    return clib_error_return (0, "invalid thread index");
+
+  dev = vec_elt_at_index (dcm->dev, dev_idx);
+  if (!(vec_len (dev->free_resources)))
+    return clib_error_return (0, "all device resources are being used");
+
+  /* Check thread is not already using the device */
+  /* *INDENT-OFF* */
+  vec_foreach (idx, dev->used_resources)
+    if (dcm->resource[idx[0]].thread_idx == thread_idx)
+      return clib_error_return (0, "thread %u already using device %u",
+                               thread_idx, dev_idx);
+  /* *INDENT-ON* */
+
+  res_idx = vec_pop (dev->free_resources);
+  vec_add1 (dev->used_resources, res_idx);
+
+  cwm = vec_elt_at_index (dcm->workers_main, thread_idx);
+
+  ASSERT (dcm->resource[res_idx].thread_idx == (u16) ~ 0);
+  dcm->resource[res_idx].thread_idx = thread_idx;
+
+  /* Add device to vector of polling resources */
+  vec_add1 (cwm->resource_idx, res_idx);
+
+  /* Set device as default for all supported algos */
+  for (i = 0; i < IPSEC_CRYPTO_N_ALG; i++)
+    if (dev->cipher_support[i])
+      {
+       if (cwm->cipher_resource_idx[i] == (u16) ~ 0)
+         dcm->cipher_algs[i].disabled--;
+       cwm->cipher_resource_idx[i] = res_idx;
+      }
+
+  for (i = 0; i < IPSEC_INTEG_N_ALG; i++)
+    if (dev->auth_support[i])
+      {
+       if (cwm->auth_resource_idx[i] == (u16) ~ 0)
+         dcm->auth_algs[i].disabled--;
+       cwm->auth_resource_idx[i] = res_idx;
+      }
+
+  /* Check if any unused resource */
+
+  u8 used = 0;
+  /* *INDENT-OFF* */
+  vec_foreach (idx, cwm->resource_idx)
+    {
+      if (idx[0] == res_idx)
+       continue;
+
+      for (i = 0; i < IPSEC_CRYPTO_N_ALG; i++)
+       used |= cwm->cipher_resource_idx[i] == idx[0];
+
+      for (i = 0; i < IPSEC_INTEG_N_ALG; i++)
+       used |= cwm->auth_resource_idx[i] == idx[0];
+
+      vec_elt_at_index (dcm->resource, idx[0])->remove = !used;
+    }
+  /* *INDENT-ON* */
+
+  return 0;
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (set_dpdk_crypto_placement, static) = {
+    .path = "set dpdk crypto placement",
+    .short_help = "set dpdk crypto placement (<device> <thread> | auto)",
+    .function = set_dpdk_crypto_placement_fn,
+};
+/* *INDENT-ON* */
+
+/*
+ * The thread will not enqueue more operatios to the device but will poll
+ * from it until there are no more inflight operations.
+*/
+static void
+dpdk_crypto_clear_resource (u16 res_idx)
+{
+  dpdk_crypto_main_t *dcm = &dpdk_crypto_main;
+  crypto_resource_t *res = vec_elt_at_index (dcm->resource, res_idx);
+  crypto_worker_main_t *cwm = &dcm->workers_main[res->thread_idx];
+  u32 i;
+
+  for (i = 0; i < IPSEC_CRYPTO_N_ALG; i++)
+    if (cwm->cipher_resource_idx[i] == res_idx)
+      {
+       cwm->cipher_resource_idx[i] = (u16) ~ 0;
+       dcm->cipher_algs[i].disabled++;
+      }
+
+  for (i = 0; i < IPSEC_INTEG_N_ALG; i++)
+    if (cwm->auth_resource_idx[i] == res_idx)
+      {
+       cwm->auth_resource_idx[i] = (u16) ~ 0;
+       dcm->auth_algs[i].disabled++;
+      }
+
+  /* Fully remove device on crypto_node once there are no inflights */
+  res->remove = 1;
+}
+
+static clib_error_t *
+clear_dpdk_crypto_placement_fn (vlib_main_t * vm,
+                               unformat_input_t *
+                               input, vlib_cli_command_t * cmd)
+{
+  unformat_input_t _line_input, *line_input = &_line_input;
+  dpdk_crypto_main_t *dcm = &dpdk_crypto_main;
+  crypto_dev_t *dev;
+  u32 thread_idx = (u32) ~ 0;
+  u16 *res_idx;
+  u8 dev_idx = (u8) ~ 0;
+  u8 free_all = 0;
+
+  if (!unformat_user (input, unformat_line_input, line_input))
+    return clib_error_return (0, "invalid syntax");
+
+  while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+    {
+      if (unformat (line_input, "%u %u", &dev_idx, &thread_idx))
+       ;
+      else if (unformat (line_input, "%u", &dev_idx))
+       free_all = 1;
+      else
+       {
+         unformat_free (line_input);
+         return clib_error_return (0, "parse error: '%U'",
+                                   format_unformat_error, line_input);
+       }
+    }
 
-done:
   unformat_free (line_input);
 
-  return error;
+  if (!(dev_idx < vec_len (dcm->dev)))
+    return clib_error_return (0, "invalid device index");
+
+  dev = vec_elt_at_index (dcm->dev, dev_idx);
+
+  /* Clear all resources placements */
+  if (free_all)
+    {
+    /* *INDENT-OFF* */
+    vec_foreach (res_idx, dev->used_resources)
+      dpdk_crypto_clear_resource (res_idx[0]);
+    /* *INDENT-ON* */
+
+      return 0;
+    }
+
+  if (!(thread_idx < vec_len (dcm->workers_main)))
+    return clib_error_return (0, "invalid thread index");
+
+  /* Clear placement of device for given thread index */
+  /* *INDENT-OFF* */
+  vec_foreach (res_idx, dev->used_resources)
+    if (dcm->resource[res_idx[0]].thread_idx == thread_idx)
+      break;
+  /* *INDENT-ON* */
+
+  if (!(res_idx < vec_end (dev->used_resources)))
+    return clib_error_return (0, "thread %u is not using device %u",
+                             thread_idx, dev_idx);
+
+  dpdk_crypto_clear_resource (res_idx[0]);
+
+  return 0;
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (clear_dpdk_crypto_placement, static) = {
+    .path = "clear dpdk crypto placement",
+    .short_help = "clear dpdk crypto placement <device> [<thread>]",
+    .function = clear_dpdk_crypto_placement_fn,
+};
+/* *INDENT-ON* */
+
+u8 *
+format_dpdk_mempool (u8 * s, va_list * args)
+{
+  struct rte_mempool *mp = va_arg (*args, struct rte_mempool *);
+  uword indent = format_get_indent (s);
+  u32 count = rte_mempool_avail_count (mp);
+
+  s = format (s, "%s\n%Uavailable %7d, allocated %7d total %7d\n",
+             mp->name, format_white_space, indent + 2,
+             count, mp->size - count, mp->size);
+  s = format (s, "%Uphys_addr %p, flags %08x, nb_mem_chunks %u\n",
+             format_white_space, indent + 2,
+             mp->mz->phys_addr, mp->flags, mp->nb_mem_chunks);
+  s = format (s, "%Uelt_size %4u, header_size %3u, trailer_size %u\n",
+             format_white_space, indent + 2,
+             mp->elt_size, mp->header_size, mp->trailer_size);
+  s = format (s, "%Uprivate_data_size %3u, total_elt_size %u\n",
+             format_white_space, indent + 2,
+             mp->private_data_size,
+             mp->elt_size + mp->header_size + mp->trailer_size);
+  return s;
+}
+
+static clib_error_t *
+show_dpdk_crypto_pools_fn (vlib_main_t * vm,
+                          unformat_input_t * input, vlib_cli_command_t * cmd)
+{
+  dpdk_crypto_main_t *dcm = &dpdk_crypto_main;
+  crypto_data_t *data;
+
+  /* *INDENT-OFF* */
+  vec_foreach (data, dcm->data)
+  {
+    if (data->crypto_op)
+      vlib_cli_output (vm, "%U\n", format_dpdk_mempool, data->crypto_op);
+#if ! DPDK_NO_AEAD
+    if (data->session_h)
+      vlib_cli_output (vm, "%U\n", format_dpdk_mempool, data->session_h);
+
+    struct rte_mempool **mp;
+    vec_foreach (mp, data->session_drv)
+      if (mp[0])
+       vlib_cli_output (vm, "%U\n", format_dpdk_mempool, mp[0]);
+#endif
+  }
+  /* *INDENT-ON* */
+
+#if DPDK_NO_AEAD
+  crypto_dev_t *dev;
+  /* *INDENT-OFF* */
+  vec_foreach (dev, dcm->dev) if (rte_cryptodevs[dev->id].data->session_pool)
+    vlib_cli_output (vm, "%U\n", format_dpdk_mempool,
+                    rte_cryptodevs[dev->id].data->session_pool);
+  /* *INDENT-ON* */
+#endif
+
+  return NULL;
 }
 
 /*?
- * This command is used to display the DPDK Crypto device data. See
- * @ref dpdk_crypto_ipsec_doc for more details on initializing the
- * DPDK Crypto device.
+ * This command is used to display the DPDK Crypto pools information.
  *
  * @cliexpar
- * Example of displaying the DPDK Crypto device data when disabled:
+ * Example of how to display the DPDK Cypto pools information:
  * @cliexstart{show crypto device mapping}
- * DPDK Cryptodev support is disabled
- * @cliexend
- * Example of displaying the DPDK Crypto device data when enabled:
- * @cliexstart{show crypto device mapping}
- * worker  crypto device id(type)
- * 1       1(SW)
- * 2       1(SW)
- * @cliexend
- * Example of displaying the DPDK Crypto device data when enabled with verbose:
- * @cliexstart{show crypto device mapping verbose}
- * worker      cipher                 auth dir     dev     qp
- * 1          AES_CTR         AES-XCBC-MAC  in     1       0
- * 1          AES_CTR          HMAC-SHA384  in     1       0
- * 1          AES_CTR          HMAC-SHA384 out     1       1
- * 1          AES_CBC          HMAC-SHA512  in     1       0
- * 1          AES_CBC          HMAC-SHA256  in     1       0
- * 1          AES_CBC         AES-XCBC-MAC out     1       1
- * 1          AES_CTR         AES-XCBC-MAC out     1       1
- * 1          AES_CBC          HMAC-SHA256 out     1       1
- * 1          AES_CTR          HMAC-SHA512 out     1       1
- * 1          AES_CTR          HMAC-SHA256  in     1       0
- * 1          AES_CTR            HMAC-SHA1  in     1       0
- * 1          AES_CBC          HMAC-SHA512 out     1       1
- * 1          AES_CBC          HMAC-SHA384 out     1       1
- * 1          AES_CTR            HMAC-SHA1 out     1       1
- * 1          AES_CTR          HMAC-SHA256 out     1       1
- * 1          AES_CBC            HMAC-SHA1  in     1       0
- * 1          AES_CBC         AES-XCBC-MAC  in     1       0
- * 1          AES_CTR          HMAC-SHA512  in     1       0
- * 1          AES_CBC            HMAC-SHA1 out     1       1
- * 1          AES_CBC          HMAC-SHA384  in     1       0
- * 2          AES_CTR         AES-XCBC-MAC  in     1       2
- * 2          AES_CTR          HMAC-SHA384  in     1       2
- * 2          AES_CTR          HMAC-SHA384 out     1       3
- * 2          AES_CBC          HMAC-SHA512  in     1       2
- * 2          AES_CBC          HMAC-SHA256  in     1       2
- * 2          AES_CBC         AES-XCBC-MAC out     1       3
- * 2          AES_CTR         AES-XCBC-MAC out     1       3
- * 2          AES_CBC          HMAC-SHA256 out     1       3
- * 2          AES_CTR          HMAC-SHA512 out     1       3
- * 2          AES_CTR          HMAC-SHA256  in     1       2
- * 2          AES_CTR            HMAC-SHA1  in     1       2
- * 2          AES_CBC          HMAC-SHA512 out     1       3
- * 2          AES_CBC          HMAC-SHA384 out     1       3
- * 2          AES_CTR            HMAC-SHA1 out     1       3
- * 2          AES_CTR          HMAC-SHA256 out     1       3
- * 2          AES_CBC            HMAC-SHA1  in     1       2
- * 2          AES_CBC         AES-XCBC-MAC  in     1       2
- * 2          AES_CTR          HMAC-SHA512  in     1       2
- * 2          AES_CBC            HMAC-SHA1 out     1       3
- * 2          AES_CBC          HMAC-SHA384  in     1       2
+ * vpp# show dpdk crypto pools
+ * crypto_pool_numa1
+ * available   15872, allocated     512 total   16384
+ * phys_addr 0xf3d2086c0, flags 00000010, nb_mem_chunks 1
+ * elt_size  160, header_size  64, trailer_size 96
+ * private_data_size  64, total_elt_size 320
+ *
+ * session_h_pool_numa1
+ * available   19998, allocated       2 total   20000
+ * phys_addr 0xf3c9c4380, flags 00000010, nb_mem_chunks 1
+ * elt_size   40, header_size  64, trailer_size 88
+ * private_data_size   0, total_elt_size 192
+ *
+ * session_drv0_pool_numa1
+ * available   19998, allocated       2 total   20000
+ * phys_addr 0xf3ad42d80, flags 00000010, nb_mem_chunks 1
+ * elt_size  512, header_size  64, trailer_size 0
+ * private_data_size   0, total_elt_size 576
  * @cliexend
 ?*/
 /* *INDENT-OFF* */
-VLIB_CLI_COMMAND (lcore_cryptodev_map, static) = {
-    .path = "show crypto device mapping",
-    .short_help =
-    "show cryptodev device mapping [verbose]",
-    .function = lcore_cryptodev_map_fn,
+VLIB_CLI_COMMAND (show_dpdk_crypto_pools, static) = {
+    .path = "show dpdk crypto pools",
+    .short_help = "show dpdk crypto pools",
+    .function = show_dpdk_crypto_pools_fn,
 };
 /* *INDENT-ON* */
 
+/* TODO Allow user define number of sessions supported */
+/* TODO Allow user define descriptor queue size */
+
 /*
  * fd.io coding-style-patch-verification: ON
  *
index a3c4590..edebaf6 100644 (file)
@@ -2,10 +2,10 @@
  *------------------------------------------------------------------
  * crypto_node.c - DPDK Cryptodev input node
  *
- * Copyright (c) 2016 Intel and/or its affiliates.
+ * Copyright (c) 2017 Intel and/or its affiliates.
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at:
+ * You may obtain a opy of the License at:
  *
  *     http://www.apache.org/licenses/LICENSE-2.0
  *
 #include <dpdk/device/dpdk_priv.h>
 #include <dpdk/ipsec/ipsec.h>
 
-#define foreach_dpdk_crypto_input_next         \
-  _(DROP, "error-drop")                                \
-  _(ENCRYPT_POST, "dpdk-esp-encrypt-post")     \
-  _(DECRYPT_POST, "dpdk-esp-decrypt-post")
-
-typedef enum
-{
-#define _(f,s) DPDK_CRYPTO_INPUT_NEXT_##f,
-  foreach_dpdk_crypto_input_next
-#undef _
-    DPDK_CRYPTO_INPUT_N_NEXT,
-} dpdk_crypto_input_next_t;
-
 #define foreach_dpdk_crypto_input_error                \
   _(DQ_COPS, "Crypto ops dequeued")            \
-  _(COP_FAILED, "Crypto op failed")
+  _(STATUS, "Crypto operation failed")
 
 typedef enum
 {
@@ -61,13 +48,34 @@ vlib_node_registration_t dpdk_crypto_input_node;
 
 typedef struct
 {
-  u32 cdev;
-  u32 qp;
   u32 status;
-  u32 sa_idx;
-  u32 next_index;
 } dpdk_crypto_input_trace_t;
 
+#define foreach_cryptodev_status \
+    _(SUCCESS, "success") \
+    _(NOT_PROCESSED, "not processed") \
+    _(AUTH_FAILED, "auth failed") \
+    _(INVALID_SESSION, "invalid session") \
+    _(INVALID_ARGS, "invalid arguments") \
+    _(ERROR, "error")
+
+static u8 *
+format_cryptodev_status (u8 * s, va_list * args)
+{
+  u32 status = va_arg (*args, u32);
+  i8 *str = 0;
+
+  switch (status)
+    {
+#define _(x, z) case RTE_CRYPTO_OP_STATUS_##x: str = z; break;
+      foreach_cryptodev_status
+#undef _
+    }
+  s = format (s, "%s", str);
+
+  return s;
+}
+
 static u8 *
 format_dpdk_crypto_input_trace (u8 * s, va_list * args)
 {
@@ -75,96 +83,97 @@ format_dpdk_crypto_input_trace (u8 * s, va_list * args)
   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
   dpdk_crypto_input_trace_t *t = va_arg (*args, dpdk_crypto_input_trace_t *);
 
-  s = format (s, "dpdk_crypto: cryptodev-id %u queue-pair %u next-index %d",
-             t->cdev, t->qp, t->next_index);
-
-  s = format (s, " status %u sa-idx %u\n", t->status, t->sa_idx);
+  s = format (s, "status: %U", format_cryptodev_status, t->status);
 
   return s;
 }
 
 static_always_inline u32
 dpdk_crypto_dequeue (vlib_main_t * vm, vlib_node_runtime_t * node,
-                    crypto_qp_data_t * qpd)
+                    crypto_resource_t * res, u8 outbound)
 {
-  u32 n_deq, *to_next = 0, next_index, n_cops, def_next_index;
-  struct rte_crypto_op **cops = qpd->cops;
-
-  if (qpd->inflights == 0)
-    return 0;
-
-  if (qpd->is_outbound)
-    def_next_index = DPDK_CRYPTO_INPUT_NEXT_ENCRYPT_POST;
-  else
-    def_next_index = DPDK_CRYPTO_INPUT_NEXT_DECRYPT_POST;
-
-  n_cops = rte_cryptodev_dequeue_burst (qpd->dev_id, qpd->qp_id,
-                                       cops, VLIB_FRAME_SIZE);
-  n_deq = n_cops;
-  next_index = def_next_index;
+  u32 n_deq, total_n_deq = 0, *to_next = 0, n_ops, next_index;
+  u32 thread_idx = vlib_get_thread_index ();
+  dpdk_crypto_main_t *dcm = &dpdk_crypto_main;
+  u8 numa = rte_socket_id ();
+  crypto_worker_main_t *cwm =
+    vec_elt_at_index (dcm->workers_main, thread_idx);
+  struct rte_crypto_op **ops;
 
-  qpd->inflights -= n_cops;
-  ASSERT (qpd->inflights >= 0);
+  next_index = node->cached_next_index;
 
-  while (n_cops > 0)
+  do
     {
-      u32 n_left_to_next;
+      ops = cwm->ops;
+      n_ops = rte_cryptodev_dequeue_burst (res->dev_id,
+                                          res->qp_id + outbound,
+                                          ops, VLIB_FRAME_SIZE);
+      res->inflights[outbound] -= n_ops;
+      ASSERT (res->inflights >= 0);
 
-      vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+      n_deq = n_ops;
+      total_n_deq += n_ops;
 
-      while (n_cops > 0 && n_left_to_next > 0)
+      while (n_ops > 0)
        {
-         u32 bi0, next0;
-         vlib_buffer_t *b0 = 0;
-         struct rte_crypto_op *cop;
-         struct rte_crypto_sym_op *sym_cop;
+         u32 n_left_to_next;
 
-         cop = cops[0];
-         cops += 1;
-         n_cops -= 1;
-         n_left_to_next -= 1;
+         vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
 
-         next0 = def_next_index;
-
-         if (PREDICT_FALSE (cop->status != RTE_CRYPTO_OP_STATUS_SUCCESS))
+         while (n_ops > 0 && n_left_to_next > 0)
            {
-             next0 = DPDK_CRYPTO_INPUT_NEXT_DROP;
-             vlib_node_increment_counter (vm, dpdk_crypto_input_node.index,
-                                          DPDK_CRYPTO_INPUT_ERROR_COP_FAILED,
-                                          1);
+             u32 bi0, next0;
+             vlib_buffer_t *b0 = 0;
+             struct rte_crypto_op *op;
+
+             op = ops[0];
+             ops += 1;
+             n_ops -= 1;
+             n_left_to_next -= 1;
+
+             dpdk_op_priv_t *priv = crypto_op_get_priv (op);
+             next0 = priv->next;
+
+             if (PREDICT_FALSE (op->status != RTE_CRYPTO_OP_STATUS_SUCCESS))
+               {
+                 next0 = DPDK_CRYPTO_INPUT_NEXT_DROP;
+                 vlib_node_increment_counter (vm,
+                                              dpdk_crypto_input_node.index,
+                                              DPDK_CRYPTO_INPUT_ERROR_STATUS,
+                                              1);
+               }
+
+             /* XXX store bi0 and next0 in op private? */
+
+             b0 = vlib_buffer_from_rte_mbuf (op->sym[0].m_src);
+             bi0 = vlib_get_buffer_index (vm, b0);
+
+             to_next[0] = bi0;
+             to_next += 1;
+
+             if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
+               {
+                 vlib_trace_next_frame (vm, node, next0);
+                 dpdk_crypto_input_trace_t *tr =
+                   vlib_add_trace (vm, node, b0, sizeof (*tr));
+                 tr->status = op->status;
+               }
+
+             op->status = RTE_CRYPTO_OP_STATUS_NOT_PROCESSED;
+
+             vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
+                                              n_left_to_next, bi0, next0);
            }
-         cop->status = RTE_CRYPTO_OP_STATUS_NOT_PROCESSED;
-
-         sym_cop = (struct rte_crypto_sym_op *) (cop + 1);
-         b0 = vlib_buffer_from_rte_mbuf (sym_cop->m_src);
-         bi0 = vlib_get_buffer_index (vm, b0);
-
-         to_next[0] = bi0;
-         to_next += 1;
-
-         if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
-           {
-             vlib_trace_next_frame (vm, node, next0);
-             dpdk_crypto_input_trace_t *tr =
-               vlib_add_trace (vm, node, b0, sizeof (*tr));
-             tr->cdev = qpd->dev_id;
-             tr->qp = qpd->qp_id;
-             tr->status = cop->status;
-             tr->next_index = next0;
-             tr->sa_idx = vnet_buffer (b0)->ipsec.sad_index;
-           }
-
-         vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
-                                          n_left_to_next, bi0, next0);
+         vlib_put_next_frame (vm, node, next_index, n_left_to_next);
        }
-      vlib_put_next_frame (vm, node, next_index, n_left_to_next);
-    }
 
-  crypto_free_cop (qpd, qpd->cops, n_deq);
+      crypto_free_ops (numa, cwm->ops, n_deq);
+    }
+  while (n_deq == VLIB_FRAME_SIZE && res->inflights[outbound]);
 
   vlib_node_increment_counter (vm, dpdk_crypto_input_node.index,
-                              DPDK_CRYPTO_INPUT_ERROR_DQ_COPS, n_deq);
-  return n_deq;
+                              DPDK_CRYPTO_INPUT_ERROR_DQ_COPS, total_n_deq);
+  return total_n_deq;
 }
 
 static uword
@@ -174,14 +183,55 @@ dpdk_crypto_input_fn (vlib_main_t * vm, vlib_node_runtime_t * node,
   u32 thread_index = vlib_get_thread_index ();
   dpdk_crypto_main_t *dcm = &dpdk_crypto_main;
   crypto_worker_main_t *cwm = &dcm->workers_main[thread_index];
-  crypto_qp_data_t *qpd;
+  crypto_resource_t *res;
   u32 n_deq = 0;
+  u8 outbound;
+  u16 *remove = NULL, *res_idx;
+  word i;
 
   /* *INDENT-OFF* */
-  vec_foreach (qpd, cwm->qp_data)
-      n_deq += dpdk_crypto_dequeue(vm, node, qpd);
+  vec_foreach (res_idx, cwm->resource_idx)
+    {
+      res = vec_elt_at_index (dcm->resource, res_idx[0]);
+
+      outbound = 0;
+      if (res->inflights[outbound])
+       n_deq += dpdk_crypto_dequeue (vm, node, res, outbound);
+
+      outbound = 1;
+      if (res->inflights[outbound])
+       n_deq += dpdk_crypto_dequeue (vm, node, res, outbound);
+
+      if (unlikely(res->remove && !(res->inflights[0] || res->inflights[1])))
+       vec_add1 (remove, res_idx[0]);
+    }
   /* *INDENT-ON* */
 
+  /* TODO removal on master thread? */
+  if (PREDICT_FALSE (remove != NULL))
+    {
+      /* *INDENT-OFF* */
+      vec_foreach (res_idx, remove)
+       {
+         i = vec_search (cwm->resource_idx, res_idx[0]);
+         vec_del1 (cwm->resource_idx, i);
+
+         res = vec_elt_at_index (dcm->resource, res_idx[0]);
+         res->thread_idx = (u16) ~0;
+         res->remove = 0;
+
+         i = vec_search (dcm->dev[res->dev_id].used_resources, res_idx[0]);
+         ASSERT (i != (u16) ~0);
+         vec_del1 (dcm->dev[res->dev_id].used_resources, i);
+         vec_add1 (dcm->dev[res->dev_id].free_resources, res_idx[0]);
+       }
+      /* *INDENT-ON* */
+
+      vec_free (remove);
+    }
+
+  /* TODO Clear all sessions in device */
+
   return n_deq;
 }
 
diff --git a/src/plugins/dpdk/ipsec/esp.h b/src/plugins/dpdk/ipsec/esp.h
deleted file mode 100644 (file)
index 5b5c81a..0000000
+++ /dev/null
@@ -1,403 +0,0 @@
-/*
- * Copyright (c) 2016 Intel and/or its affiliates.
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at:
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#ifndef __DPDK_ESP_H__
-#define __DPDK_ESP_H__
-
-#include <dpdk/ipsec/ipsec.h>
-#include <vnet/ipsec/ipsec.h>
-#include <vnet/ipsec/esp.h>
-
-typedef struct
-{
-  enum rte_crypto_cipher_algorithm algo;
-#if ! DPDK_NO_AEAD
-  enum rte_crypto_aead_algorithm aead_algo;
-#endif
-  u8 key_len;
-  u8 iv_len;
-} dpdk_esp_crypto_alg_t;
-
-typedef struct
-{
-  enum rte_crypto_auth_algorithm algo;
-  u8 trunc_size;
-} dpdk_esp_integ_alg_t;
-
-typedef struct
-{
-  dpdk_esp_crypto_alg_t *esp_crypto_algs;
-  dpdk_esp_integ_alg_t *esp_integ_algs;
-} dpdk_esp_main_t;
-
-dpdk_esp_main_t dpdk_esp_main;
-
-static_always_inline void
-dpdk_esp_init ()
-{
-  dpdk_esp_main_t *em = &dpdk_esp_main;
-  dpdk_esp_integ_alg_t *i;
-  dpdk_esp_crypto_alg_t *c;
-
-  vec_validate (em->esp_crypto_algs, IPSEC_CRYPTO_N_ALG - 1);
-
-  c = &em->esp_crypto_algs[IPSEC_CRYPTO_ALG_AES_CBC_128];
-  c->algo = RTE_CRYPTO_CIPHER_AES_CBC;
-  c->key_len = 16;
-  c->iv_len = 16;
-
-  c = &em->esp_crypto_algs[IPSEC_CRYPTO_ALG_AES_CBC_192];
-  c->algo = RTE_CRYPTO_CIPHER_AES_CBC;
-  c->key_len = 24;
-  c->iv_len = 16;
-
-  c = &em->esp_crypto_algs[IPSEC_CRYPTO_ALG_AES_CBC_256];
-  c->algo = RTE_CRYPTO_CIPHER_AES_CBC;
-  c->key_len = 32;
-  c->iv_len = 16;
-
-  c = &em->esp_crypto_algs[IPSEC_CRYPTO_ALG_AES_GCM_128];
-#if DPDK_NO_AEAD
-  c->algo = RTE_CRYPTO_CIPHER_AES_GCM;
-#else
-  c->aead_algo = RTE_CRYPTO_AEAD_AES_GCM;
-#endif
-  c->key_len = 16;
-  c->iv_len = 8;
-
-  vec_validate (em->esp_integ_algs, IPSEC_INTEG_N_ALG - 1);
-
-  i = &em->esp_integ_algs[IPSEC_INTEG_ALG_SHA1_96];
-  i->algo = RTE_CRYPTO_AUTH_SHA1_HMAC;
-  i->trunc_size = 12;
-
-  i = &em->esp_integ_algs[IPSEC_INTEG_ALG_SHA_256_96];
-  i->algo = RTE_CRYPTO_AUTH_SHA256_HMAC;
-  i->trunc_size = 12;
-
-  i = &em->esp_integ_algs[IPSEC_INTEG_ALG_SHA_256_128];
-  i->algo = RTE_CRYPTO_AUTH_SHA256_HMAC;
-  i->trunc_size = 16;
-
-  i = &em->esp_integ_algs[IPSEC_INTEG_ALG_SHA_384_192];
-  i->algo = RTE_CRYPTO_AUTH_SHA384_HMAC;
-  i->trunc_size = 24;
-
-  i = &em->esp_integ_algs[IPSEC_INTEG_ALG_SHA_512_256];
-  i->algo = RTE_CRYPTO_AUTH_SHA512_HMAC;
-  i->trunc_size = 32;
-#if DPDK_NO_AEAD
-  i = &em->esp_integ_algs[IPSEC_INTEG_ALG_AES_GCM_128];
-  i->algo = RTE_CRYPTO_AUTH_AES_GCM;
-  i->trunc_size = 16;
-#endif
-}
-
-static_always_inline int
-translate_crypto_algo (ipsec_crypto_alg_t crypto_algo,
-                      struct rte_crypto_sym_xform *xform, u8 use_esn)
-{
-#if ! DPDK_NO_AEAD
-  const u16 iv_off =
-    sizeof (struct rte_crypto_op) + sizeof (struct rte_crypto_sym_op) +
-    offsetof (dpdk_cop_priv_t, cb);
-#endif
-
-  xform->type = RTE_CRYPTO_SYM_XFORM_CIPHER;
-
-  switch (crypto_algo)
-    {
-    case IPSEC_CRYPTO_ALG_NONE:
-#if ! DPDK_NO_AEAD
-      xform->cipher.iv.offset = iv_off;
-      xform->cipher.iv.length = 0;
-#endif
-      xform->cipher.algo = RTE_CRYPTO_CIPHER_NULL;
-      break;
-    case IPSEC_CRYPTO_ALG_AES_CBC_128:
-    case IPSEC_CRYPTO_ALG_AES_CBC_192:
-    case IPSEC_CRYPTO_ALG_AES_CBC_256:
-#if ! DPDK_NO_AEAD
-      xform->cipher.iv.offset = iv_off;
-      xform->cipher.iv.length = 16;
-#endif
-      xform->cipher.algo = RTE_CRYPTO_CIPHER_AES_CBC;
-      break;
-    case IPSEC_CRYPTO_ALG_AES_GCM_128:
-#if DPDK_NO_AEAD
-      xform->cipher.algo = RTE_CRYPTO_CIPHER_AES_GCM;
-#else
-      xform->type = RTE_CRYPTO_SYM_XFORM_AEAD;
-      xform->aead.algo = RTE_CRYPTO_AEAD_AES_GCM;
-      xform->aead.iv.offset = iv_off;
-      xform->aead.iv.length = 12;      /* GCM IV, not ESP IV */
-      xform->aead.digest_length = 16;
-      xform->aead.aad_length = use_esn ? 12 : 8;
-#endif
-      break;
-    default:
-      return -1;
-    }
-
-  return 0;
-}
-
-static_always_inline int
-translate_integ_algo (ipsec_integ_alg_t integ_alg,
-                     struct rte_crypto_sym_xform *auth_xform, u8 use_esn)
-{
-  auth_xform->type = RTE_CRYPTO_SYM_XFORM_AUTH;
-
-  switch (integ_alg)
-    {
-    case IPSEC_INTEG_ALG_NONE:
-      auth_xform->auth.algo = RTE_CRYPTO_AUTH_NULL;
-      auth_xform->auth.digest_length = 0;
-      break;
-    case IPSEC_INTEG_ALG_SHA1_96:
-      auth_xform->auth.algo = RTE_CRYPTO_AUTH_SHA1_HMAC;
-      auth_xform->auth.digest_length = 12;
-      break;
-    case IPSEC_INTEG_ALG_SHA_256_96:
-      auth_xform->auth.algo = RTE_CRYPTO_AUTH_SHA256_HMAC;
-      auth_xform->auth.digest_length = 12;
-      break;
-    case IPSEC_INTEG_ALG_SHA_256_128:
-      auth_xform->auth.algo = RTE_CRYPTO_AUTH_SHA256_HMAC;
-      auth_xform->auth.digest_length = 16;
-      break;
-    case IPSEC_INTEG_ALG_SHA_384_192:
-      auth_xform->auth.algo = RTE_CRYPTO_AUTH_SHA384_HMAC;
-      auth_xform->auth.digest_length = 24;
-      break;
-    case IPSEC_INTEG_ALG_SHA_512_256:
-      auth_xform->auth.algo = RTE_CRYPTO_AUTH_SHA512_HMAC;
-      auth_xform->auth.digest_length = 32;
-      break;
-#if DPDK_NO_AEAD
-    case IPSEC_INTEG_ALG_AES_GCM_128:
-      auth_xform->auth.algo = RTE_CRYPTO_AUTH_AES_GCM;
-      auth_xform->auth.digest_length = 16;
-      auth_xform->auth.add_auth_data_length = use_esn ? 12 : 8;
-      break;
-#endif
-    default:
-      return -1;
-    }
-
-  return 0;
-}
-
-static_always_inline i32
-create_sym_sess (ipsec_sa_t * sa, crypto_sa_session_t * sa_sess,
-                u8 is_outbound)
-{
-  u32 thread_index = vlib_get_thread_index ();
-  dpdk_crypto_main_t *dcm = &dpdk_crypto_main;
-  crypto_worker_main_t *cwm = &dcm->workers_main[thread_index];
-  struct rte_crypto_sym_xform cipher_xform = { 0 };
-  struct rte_crypto_sym_xform auth_xform = { 0 };
-  struct rte_crypto_sym_xform *xfs;
-  uword key = 0, *data;
-  crypto_worker_qp_key_t *p_key = (crypto_worker_qp_key_t *) & key;
-#if ! DPDK_NO_AEAD
-  i32 socket_id = rte_socket_id ();
-  i32 ret;
-#endif
-
-  if (sa->crypto_alg == IPSEC_CRYPTO_ALG_AES_GCM_128)
-    {
-      sa->crypto_key_len -= 4;
-      clib_memcpy (&sa->salt, &sa->crypto_key[sa->crypto_key_len], 4);
-    }
-  else
-    {
-      u32 seed = (u32) clib_cpu_time_now ();
-      sa->salt = random_u32 (&seed);
-    }
-
-  if (translate_crypto_algo (sa->crypto_alg, &cipher_xform, sa->use_esn) < 0)
-    return -1;
-  p_key->cipher_algo = cipher_xform.cipher.algo;
-
-  if (translate_integ_algo (sa->integ_alg, &auth_xform, sa->use_esn) < 0)
-    return -1;
-  p_key->auth_algo = auth_xform.auth.algo;
-
-#if ! DPDK_NO_AEAD
-  if (sa->crypto_alg == IPSEC_CRYPTO_ALG_AES_GCM_128)
-    {
-      cipher_xform.aead.key.data = sa->crypto_key;
-      cipher_xform.aead.key.length = sa->crypto_key_len;
-
-      if (is_outbound)
-       cipher_xform.cipher.op =
-         (enum rte_crypto_cipher_operation) RTE_CRYPTO_AEAD_OP_ENCRYPT;
-      else
-       cipher_xform.cipher.op =
-         (enum rte_crypto_cipher_operation) RTE_CRYPTO_AEAD_OP_DECRYPT;
-      cipher_xform.next = NULL;
-      xfs = &cipher_xform;
-      p_key->is_aead = 1;
-    }
-  else                         /* Cipher + Auth */
-#endif
-    {
-      cipher_xform.cipher.key.data = sa->crypto_key;
-      cipher_xform.cipher.key.length = sa->crypto_key_len;
-
-      auth_xform.auth.key.data = sa->integ_key;
-      auth_xform.auth.key.length = sa->integ_key_len;
-
-      if (is_outbound)
-       {
-         cipher_xform.cipher.op = RTE_CRYPTO_CIPHER_OP_ENCRYPT;
-         auth_xform.auth.op = RTE_CRYPTO_AUTH_OP_GENERATE;
-         cipher_xform.next = &auth_xform;
-         xfs = &cipher_xform;
-       }
-      else
-       {
-         cipher_xform.cipher.op = RTE_CRYPTO_CIPHER_OP_DECRYPT;
-         auth_xform.auth.op = RTE_CRYPTO_AUTH_OP_VERIFY;
-         auth_xform.next = &cipher_xform;
-         xfs = &auth_xform;
-       }
-      p_key->is_aead = 0;
-    }
-
-  p_key->is_outbound = is_outbound;
-
-  data = hash_get (cwm->algo_qp_map, key);
-  if (!data)
-    return -1;
-
-#if DPDK_NO_AEAD
-  sa_sess->sess =
-    rte_cryptodev_sym_session_create (cwm->qp_data[*data].dev_id, xfs);
-  if (!sa_sess->sess)
-    return -1;
-#else
-  sa_sess->sess =
-    rte_cryptodev_sym_session_create (dcm->sess_h_pools[socket_id]);
-  if (!sa_sess->sess)
-    return -1;
-
-  ret =
-    rte_cryptodev_sym_session_init (cwm->qp_data[*data].dev_id, sa_sess->sess,
-                                   xfs, dcm->sess_pools[socket_id]);
-  if (ret)
-    return -1;
-#endif
-
-  sa_sess->qp_index = (u8) * data;
-
-  return 0;
-}
-
-static_always_inline void
-crypto_set_icb (dpdk_gcm_cnt_blk * icb, u32 salt, u32 seq, u32 seq_hi)
-{
-  icb->salt = salt;
-  icb->iv[0] = seq;
-  icb->iv[1] = seq_hi;
-#if DPDK_NO_AEAD
-  icb->cnt = clib_host_to_net_u32 (1);
-#endif
-}
-
-#define __unused __attribute__((unused))
-static_always_inline void
-crypto_op_setup (u8 is_aead, struct rte_mbuf *mb0,
-                struct rte_crypto_op *cop, void *session,
-                u32 cipher_off, u32 cipher_len,
-                u8 * icb __unused, u32 iv_size __unused,
-                u32 auth_off, u32 auth_len,
-                u8 * aad __unused, u32 aad_size __unused,
-                u8 * digest, u64 digest_paddr, u32 digest_size __unused)
-{
-  struct rte_crypto_sym_op *sym_cop;
-
-  sym_cop = (struct rte_crypto_sym_op *) (cop + 1);
-
-  sym_cop->m_src = mb0;
-  rte_crypto_op_attach_sym_session (cop, session);
-
-  if (!digest_paddr)
-    digest_paddr =
-      rte_pktmbuf_mtophys_offset (mb0, (uintptr_t) digest - (uintptr_t) mb0);
-
-#if DPDK_NO_AEAD
-  sym_cop->cipher.data.offset = cipher_off;
-  sym_cop->cipher.data.length = cipher_len;
-
-  sym_cop->cipher.iv.data = icb;
-  sym_cop->cipher.iv.phys_addr =
-    cop->phys_addr + (uintptr_t) icb - (uintptr_t) cop;
-  sym_cop->cipher.iv.length = iv_size;
-
-  if (is_aead)
-    {
-      sym_cop->auth.aad.data = aad;
-      sym_cop->auth.aad.phys_addr =
-       cop->phys_addr + (uintptr_t) aad - (uintptr_t) cop;
-      sym_cop->auth.aad.length = aad_size;
-    }
-  else
-    {
-      sym_cop->auth.data.offset = auth_off;
-      sym_cop->auth.data.length = auth_len;
-    }
-
-  sym_cop->auth.digest.data = digest;
-  sym_cop->auth.digest.phys_addr = digest_paddr;
-  sym_cop->auth.digest.length = digest_size;
-#else /* ! DPDK_NO_AEAD */
-  if (is_aead)
-    {
-      sym_cop->aead.data.offset = cipher_off;
-      sym_cop->aead.data.length = cipher_len;
-
-      sym_cop->aead.aad.data = aad;
-      sym_cop->aead.aad.phys_addr =
-       cop->phys_addr + (uintptr_t) aad - (uintptr_t) cop;
-
-      sym_cop->aead.digest.data = digest;
-      sym_cop->aead.digest.phys_addr = digest_paddr;
-    }
-  else
-    {
-      sym_cop->cipher.data.offset = cipher_off;
-      sym_cop->cipher.data.length = cipher_len;
-
-      sym_cop->auth.data.offset = auth_off;
-      sym_cop->auth.data.length = auth_len;
-
-      sym_cop->auth.digest.data = digest;
-      sym_cop->auth.digest.phys_addr = digest_paddr;
-    }
-#endif /* DPDK_NO_AEAD */
-}
-
-#undef __unused
-
-#endif /* __DPDK_ESP_H__ */
-
-/*
- * fd.io coding-style-patch-verification: ON
- *
- * Local Variables:
- * eval: (c-set-style "gnu")
- * End:
- */
index c4f295d..90be466 100644 (file)
@@ -1,10 +1,10 @@
 /*
  * esp_decrypt.c : IPSec ESP Decrypt node using DPDK Cryptodev
  *
- * Copyright (c) 2016 Intel and/or its affiliates.
+ * Copyright (c) 2017 Intel and/or its affiliates.
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at:
+ * You may obtain a opy of the License at:
  *
  *     http://www.apache.org/licenses/LICENSE-2.0
  *
 #include <vnet/ip/ip.h>
 
 #include <vnet/ipsec/ipsec.h>
+#include <vnet/ipsec/esp.h>
 #include <dpdk/ipsec/ipsec.h>
-#include <dpdk/ipsec/esp.h>
 #include <dpdk/device/dpdk.h>
 #include <dpdk/device/dpdk_priv.h>
 
 #define foreach_esp_decrypt_next              \
 _(DROP, "error-drop")                         \
-_(IP4_INPUT, "ip4-input")                     \
+_(IP4_INPUT, "ip4-input-no-checksum")         \
 _(IP6_INPUT, "ip6-input")
 
 #define _(v, s) ESP_DECRYPT_NEXT_##v,
@@ -43,8 +43,10 @@ typedef enum {
  _(REPLAY, "SA replayed packet")                \
  _(NOT_IP, "Not IP packet (dropped)")           \
  _(ENQ_FAIL, "Enqueue failed (buffer full)")     \
- _(NO_CRYPTODEV, "Cryptodev not configured")     \
- _(BAD_LEN, "Invalid ciphertext length")
+ _(DISCARD, "Not enough crypto operations, discarding frame")  \
+ _(BAD_LEN, "Invalid ciphertext length")         \
+ _(SESSION, "Failed to get crypto session")      \
+ _(NOSUP, "Cipher/Auth not supported")
 
 
 typedef enum {
@@ -65,6 +67,7 @@ vlib_node_registration_t dpdk_esp_decrypt_node;
 typedef struct {
   ipsec_crypto_alg_t crypto_alg;
   ipsec_integ_alg_t integ_alg;
+  u8 packet_data[64];
 } esp_decrypt_trace_t;
 
 /* packet trace format function */
@@ -73,10 +76,14 @@ static u8 * format_esp_decrypt_trace (u8 * s, va_list * args)
   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
   esp_decrypt_trace_t * t = va_arg (*args, esp_decrypt_trace_t *);
+  uword indent = format_get_indent (s);
 
-  s = format (s, "esp: crypto %U integrity %U",
+  s = format (s, "cipher %U auth %U\n",
              format_ipsec_crypto_alg, t->crypto_alg,
              format_ipsec_integ_alg, t->integ_alg);
+  s = format (s, "%U%U",
+             format_white_space, indent,
+             format_esp_header, t->packet_data);
   return s;
 }
 
@@ -87,30 +94,31 @@ dpdk_esp_decrypt_node_fn (vlib_main_t * vm,
 {
   u32 n_left_from, *from, *to_next, next_index;
   ipsec_main_t *im = &ipsec_main;
-  u32 thread_index = vlib_get_thread_index();
-  dpdk_crypto_main_t * dcm = &dpdk_crypto_main;
-  dpdk_esp_main_t * em = &dpdk_esp_main;
-  u32 i;
+  u32 thread_idx = vlib_get_thread_index();
+  dpdk_crypto_main_t *dcm = &dpdk_crypto_main;
+  crypto_resource_t *res = 0;
+  ipsec_sa_t *sa0 = 0;
+  crypto_alg_t *cipher_alg = 0, *auth_alg = 0;
+  struct rte_cryptodev_sym_session *session = 0;
+  u32 ret, last_sa_index = ~0;
+  u8 numa = rte_socket_id ();
+  u8 is_aead = 0;
+  crypto_worker_main_t *cwm =
+    vec_elt_at_index (dcm->workers_main, thread_idx);
+  struct rte_crypto_op **ops = cwm->ops;
 
   from = vlib_frame_vector_args (from_frame);
   n_left_from = from_frame->n_vectors;
 
-  crypto_worker_main_t *cwm =
-    vec_elt_at_index(dcm->workers_main, thread_index);
-  u32 n_qps = vec_len(cwm->qp_data);
-  struct rte_crypto_op ** cops_to_enq[n_qps];
-  u32 n_cop_qp[n_qps], * bi_to_enq[n_qps];
-
-  for (i = 0; i < n_qps; i++)
+  ret = crypto_alloc_ops (numa, ops, n_left_from);
+  if (ret)
     {
-      bi_to_enq[i] = cwm->qp_data[i].bi;
-      cops_to_enq[i] = cwm->qp_data[i].cops;
+      vlib_node_increment_counter (vm, dpdk_esp_decrypt_node.index,
+                                  ESP_DECRYPT_ERROR_DISCARD, 1);
+      /* Discard whole frame */
+      return n_left_from;
     }
 
-  memset(n_cop_qp, 0, n_qps * sizeof(u32));
-
-  crypto_alloc_cops();
-
   next_index = ESP_DECRYPT_NEXT_DROP;
 
   while (n_left_from > 0)
@@ -121,44 +129,79 @@ dpdk_esp_decrypt_node_fn (vlib_main_t * vm,
 
       while (n_left_from > 0 && n_left_to_next > 0)
        {
-         u32 bi0, sa_index0 = ~0, seq, trunc_size, iv_size;
-         vlib_buffer_t * b0;
-         esp_header_t * esp0;
-         ipsec_sa_t * sa0;
-         struct rte_mbuf * mb0 = 0;
-         const int BLOCK_SIZE = 16;
-         crypto_sa_session_t * sa_sess;
-         void * sess;
-         u16 qp_index;
-         struct rte_crypto_op * cop = 0;
+         clib_error_t *error;
+         u32 bi0, sa_index0, seq, iv_size;
+         u8 trunc_size;
+         vlib_buffer_t *b0;
+         esp_header_t *esp0;
+         struct rte_mbuf *mb0;
+         struct rte_crypto_op *op;
+         u16 res_idx;
 
          bi0 = from[0];
          from += 1;
          n_left_from -= 1;
 
          b0 = vlib_get_buffer (vm, bi0);
+         mb0 = rte_mbuf_from_vlib_buffer(b0);
          esp0 = vlib_buffer_get_current (b0);
 
-         sa_index0 = vnet_buffer(b0)->ipsec.sad_index;
-         sa0 = pool_elt_at_index (im->sad, sa_index0);
+         /* ih0/ih6_0 */
+         CLIB_PREFETCH (esp0, sizeof (esp0[0]) + 16, LOAD);
+         /* mb0 */
+         CLIB_PREFETCH (mb0, CLIB_CACHE_LINE_BYTES, STORE);
 
-         seq = clib_host_to_net_u32(esp0->seq);
+         op = ops[0];
+         ops += 1;
+         ASSERT (op->status == RTE_CRYPTO_OP_STATUS_NOT_PROCESSED);
 
-         /* anti-replay check */
-         if (sa0->use_anti_replay)
+         dpdk_op_priv_t *priv = crypto_op_get_priv (op);
+
+         u16 op_len =
+           sizeof (op[0]) + sizeof (op[0].sym[0]) + sizeof (priv[0]);
+         CLIB_PREFETCH (op, op_len, STORE);
+
+         sa_index0 = vnet_buffer(b0)->ipsec.sad_index;
+
+         if (sa_index0 != last_sa_index)
            {
-             int rv = 0;
+             last_sa_index = sa_index0;
 
-             if (PREDICT_TRUE(sa0->use_esn))
-               rv = esp_replay_check_esn(sa0, seq);
-             else
-               rv = esp_replay_check(sa0, seq);
+             sa0 = pool_elt_at_index (im->sad, sa_index0);
+
+             cipher_alg = vec_elt_at_index (dcm->cipher_algs, sa0->crypto_alg);
+             auth_alg = vec_elt_at_index (dcm->auth_algs, sa0->integ_alg);
+
+#if DPDK_NO_AEAD
+             is_aead = (sa0->crypto_alg == IPSEC_CRYPTO_ALG_AES_GCM_128 |
+                           sa0->crypto_alg == IPSEC_CRYPTO_ALG_AES_GCM_192 |
+                           sa0->crypto_alg == IPSEC_CRYPTO_ALG_AES_GCM_256);
+#else
+             is_aead = (cipher_alg->type == RTE_CRYPTO_SYM_XFORM_AEAD);
+#endif
+             if (is_aead)
+               auth_alg = cipher_alg;
 
-             if (PREDICT_FALSE(rv))
+             res_idx = get_resource (cwm, sa0);
+
+             if (PREDICT_FALSE (res_idx == (u16) ~0))
                {
-                 clib_warning ("anti-replay SPI %u seq %u", sa0->spi, seq);
+                 clib_warning ("unsupported SA by thread index %u", thread_idx);
                  vlib_node_increment_counter (vm, dpdk_esp_decrypt_node.index,
-                                              ESP_DECRYPT_ERROR_REPLAY, 1);
+                                              ESP_DECRYPT_ERROR_NOSUP, 1);
+                 to_next[0] = bi0;
+                 to_next += 1;
+                 n_left_to_next -= 1;
+                 goto trace;
+               }
+             res = vec_elt_at_index (dcm->resource, res_idx);
+
+             error = crypto_get_session (&session, sa_index0, res, cwm, 0);
+             if (PREDICT_FALSE (error || !session))
+               {
+                 clib_warning ("failed to get crypto session");
+                 vlib_node_increment_counter (vm, dpdk_esp_decrypt_node.index,
+                                              ESP_DECRYPT_ERROR_SESSION, 1);
                  to_next[0] = bi0;
                  to_next += 1;
                  n_left_to_next -= 1;
@@ -166,16 +209,23 @@ dpdk_esp_decrypt_node_fn (vlib_main_t * vm,
                }
            }
 
-         sa0->total_data_size += b0->current_length;
+         /* anti-replay check */
+         if (sa0->use_anti_replay)
+           {
+             int rv = 0;
 
-         sa_sess = pool_elt_at_index(cwm->sa_sess_d[0], sa_index0);
+             seq = clib_net_to_host_u32 (esp0->seq);
 
-         if (PREDICT_FALSE(!sa_sess->sess))
-           {
-             int ret = create_sym_sess(sa0, sa_sess, 0);
+             if (PREDICT_TRUE(sa0->use_esn))
+               rv = esp_replay_check_esn (sa0, seq);
+             else
+               rv = esp_replay_check (sa0, seq);
 
-             if (PREDICT_FALSE (ret))
+             if (PREDICT_FALSE (rv))
                {
+                 clib_warning ("failed anti-replay check");
+                 vlib_node_increment_counter (vm, dpdk_esp_decrypt_node.index,
+                                              ESP_DECRYPT_ERROR_REPLAY, 1);
                  to_next[0] = bi0;
                  to_next += 1;
                  n_left_to_next -= 1;
@@ -183,112 +233,101 @@ dpdk_esp_decrypt_node_fn (vlib_main_t * vm,
                }
            }
 
-         sess = sa_sess->sess;
-         qp_index = sa_sess->qp_index;
-
-         ASSERT (vec_len (vec_elt (cwm->qp_data, qp_index).free_cops) > 0);
-         cop = vec_pop (vec_elt (cwm->qp_data, qp_index).free_cops);
-         ASSERT (cop->status == RTE_CRYPTO_OP_STATUS_NOT_PROCESSED);
-
-         cops_to_enq[qp_index][0] = cop;
-         cops_to_enq[qp_index] += 1;
-         n_cop_qp[qp_index] += 1;
-         bi_to_enq[qp_index][0] = bi0;
-         bi_to_enq[qp_index] += 1;
+         priv->next = DPDK_CRYPTO_INPUT_NEXT_DECRYPT_POST;
 
-         rte_crypto_op_attach_sym_session(cop, sess);
+         /* FIXME multi-seg */
+         sa0->total_data_size += b0->current_length;
 
-         if (sa0->crypto_alg == IPSEC_CRYPTO_ALG_AES_GCM_128)
-           trunc_size = 16;
-         else
-           trunc_size = em->esp_integ_algs[sa0->integ_alg].trunc_size;
-         iv_size = em->esp_crypto_algs[sa0->crypto_alg].iv_len;
+         res->ops[res->n_ops] = op;
+         res->bi[res->n_ops] = bi0;
+         res->n_ops += 1;
 
          /* Convert vlib buffer to mbuf */
-         mb0 = rte_mbuf_from_vlib_buffer(b0);
          mb0->data_len = b0->current_length;
          mb0->pkt_len = b0->current_length;
          mb0->data_off = RTE_PKTMBUF_HEADROOM + b0->current_data;
 
+         trunc_size = auth_alg->trunc_size;
+         iv_size = cipher_alg->iv_len;
+
          /* Outer IP header has already been stripped */
-         u16 payload_len = rte_pktmbuf_pkt_len(mb0) - sizeof (esp_header_t) -
-             iv_size - trunc_size;
+         u16 payload_len =
+           b0->current_length - sizeof (esp_header_t) - iv_size - trunc_size;
 
-         if ((payload_len & (BLOCK_SIZE - 1)) || (payload_len <= 0))
+         ASSERT (payload_len >= 4);
+
+         if (payload_len & (cipher_alg->boundary - 1))
            {
              clib_warning ("payload %u not multiple of %d\n",
-                           payload_len, BLOCK_SIZE);
+                           payload_len, cipher_alg->boundary);
              vlib_node_increment_counter (vm, dpdk_esp_decrypt_node.index,
                                           ESP_DECRYPT_ERROR_BAD_LEN, 1);
-             vec_add (vec_elt (cwm->qp_data, qp_index).free_cops, &cop, 1);
-             bi_to_enq[qp_index] -= 1;
-             cops_to_enq[qp_index] -= 1;
-             n_cop_qp[qp_index] -= 1;
+             res->n_ops -= 1;
              to_next[0] = bi0;
              to_next += 1;
              n_left_to_next -= 1;
              goto trace;
            }
 
-         struct rte_crypto_sym_op *sym_cop = (struct rte_crypto_sym_op *)(cop + 1);
-
-         u8 is_aead = sa0->crypto_alg == IPSEC_CRYPTO_ALG_AES_GCM_128;
          u32 cipher_off, cipher_len;
-         u32 auth_off = 0, auth_len = 0, aad_size = 0;
+         u32 auth_len = 0, aad_size = 0;
          u8 *aad = NULL, *digest = NULL;
          u64 digest_paddr = 0;
 
-          u8 *iv = rte_pktmbuf_mtod_offset(mb0, void*, sizeof (esp_header_t));
-          dpdk_cop_priv_t *priv = (dpdk_cop_priv_t *)(sym_cop + 1);
+          u8 *iv = (u8 *) (esp0 + 1);
+
           dpdk_gcm_cnt_blk *icb = &priv->cb;
 
          cipher_off = sizeof (esp_header_t) + iv_size;
          cipher_len = payload_len;
 
-          digest =
-           vlib_buffer_get_current (b0) + sizeof(esp_header_t) +
-           iv_size + payload_len;
+          digest = vlib_buffer_get_tail (b0) - trunc_size;
 
-          if (is_aead)
-            {
+         if (cipher_alg->alg == RTE_CRYPTO_CIPHER_AES_CBC)
+           clib_memcpy(icb, iv, 16);
+         else /* CTR/GCM */
+           {
              u32 *_iv = (u32 *) iv;
 
              crypto_set_icb (icb, sa0->salt, _iv[0], _iv[1]);
+#if DPDK_NO_AEAD
              iv_size = 16;
+#else
+             iv_size = 12;
+#endif
+           }
 
+          if (is_aead)
+            {
               aad = priv->aad;
               clib_memcpy(aad, esp0, 8);
-             aad_size = 8;
-              if (sa0->use_esn)
+              if (PREDICT_FALSE (sa0->use_esn))
                {
                  *((u32*)&aad[8]) = sa0->seq_hi;
                  aad_size = 12;
                }
+             else
+               aad_size = 8;
             }
           else
             {
-             clib_memcpy(icb, iv, 16);
-
-             auth_off = 0;
              auth_len = sizeof(esp_header_t) + iv_size + payload_len;
 
               if (sa0->use_esn)
                 {
-                  dpdk_cop_priv_t* priv = (dpdk_cop_priv_t*) (sym_cop + 1);
-
                   clib_memcpy (priv->icv, digest, trunc_size);
                   *((u32*) digest) = sa0->seq_hi;
                  auth_len += sizeof(sa0->seq_hi);
 
                   digest = priv->icv;
                  digest_paddr =
-                   cop->phys_addr + (uintptr_t) priv->icv - (uintptr_t) cop;
+                   op->phys_addr + (uintptr_t) priv->icv - (uintptr_t) op;
                 }
             }
 
-         crypto_op_setup (is_aead, mb0, cop, sess,
+         crypto_op_setup (is_aead, mb0, op, session,
                           cipher_off, cipher_len, (u8 *) icb, iv_size,
-                          auth_off, auth_len, aad, aad_size,
+                          0, auth_len, aad, aad_size,
                           digest, digest_paddr, trunc_size);
 trace:
          if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED))
@@ -296,38 +335,21 @@ trace:
              esp_decrypt_trace_t *tr = vlib_add_trace (vm, node, b0, sizeof (*tr));
              tr->crypto_alg = sa0->crypto_alg;
              tr->integ_alg = sa0->integ_alg;
+             clib_memcpy (tr->packet_data, vlib_buffer_get_current (b0),
+                          sizeof (esp_header_t));
            }
        }
       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
     }
+
   vlib_node_increment_counter (vm, dpdk_esp_decrypt_node.index,
                               ESP_DECRYPT_ERROR_RX_PKTS,
                               from_frame->n_vectors);
-  crypto_qp_data_t *qpd;
-  /* *INDENT-OFF* */
-  vec_foreach_index (i, cwm->qp_data)
-    {
-      u32 enq;
-
-      if (!n_cop_qp[i])
-       continue;
 
-      qpd = vec_elt_at_index(cwm->qp_data, i);
-      enq = rte_cryptodev_enqueue_burst(qpd->dev_id, qpd->qp_id,
-                                       qpd->cops, n_cop_qp[i]);
-      qpd->inflights += enq;
+  crypto_enqueue_ops (vm, cwm, 0, dpdk_esp_decrypt_node.index,
+                     ESP_DECRYPT_ERROR_ENQ_FAIL, numa);
 
-      if (PREDICT_FALSE(enq < n_cop_qp[i]))
-       {
-         crypto_free_cop (qpd, &qpd->cops[enq], n_cop_qp[i] - enq);
-         vlib_buffer_free (vm, &qpd->bi[enq], n_cop_qp[i] - enq);
-
-         vlib_node_increment_counter (vm, dpdk_esp_decrypt_node.index,
-                                      ESP_DECRYPT_ERROR_ENQ_FAIL,
-                                      n_cop_qp[i] - enq);
-       }
-    }
-  /* *INDENT-ON* */
+  crypto_free_ops (numa, ops, cwm->ops + from_frame->n_vectors - ops);
 
   return from_frame->n_vectors;
 }
@@ -378,6 +400,21 @@ vlib_node_registration_t dpdk_esp_decrypt_post_node;
 
 static u8 * format_esp_decrypt_post_trace (u8 * s, va_list * args)
 {
+  CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+  CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+  esp_decrypt_trace_t * t = va_arg (*args, esp_decrypt_trace_t *);
+  uword indent = format_get_indent (s);
+
+  s = format (s, "cipher %U auth %U\n",
+             format_ipsec_crypto_alg, t->crypto_alg,
+             format_ipsec_integ_alg, t->integ_alg);
+
+  ip4_header_t *ih4 = (ip4_header_t *) t->packet_data;
+  if ((ih4->ip_version_and_header_length & 0xF0) == 0x60)
+    s = format (s, "%U%U", format_white_space, indent, format_ip6_header, ih4);
+  else
+    s = format (s, "%U%U", format_white_space, indent, format_ip4_header, ih4);
+
   return s;
 }
 
@@ -390,7 +427,7 @@ dpdk_esp_decrypt_post_node_fn (vlib_main_t * vm,
   ipsec_sa_t * sa0;
   u32 sa_index0 = ~0;
   ipsec_main_t *im = &ipsec_main;
-  dpdk_esp_main_t *em = &dpdk_esp_main;
+  dpdk_crypto_main_t *dcm = &dpdk_crypto_main;
 
   from = vlib_frame_vector_args (from_frame);
   n_left_from = from_frame->n_vectors;
@@ -406,12 +443,13 @@ dpdk_esp_decrypt_post_node_fn (vlib_main_t * vm,
       while (n_left_from > 0 && n_left_to_next > 0)
        {
          esp_footer_t * f0;
-         u32 bi0, next0, trunc_size, iv_size;
+         u32 bi0, iv_size, next0;
          vlib_buffer_t * b0 = 0;
          ip4_header_t *ih4 = 0, *oh4 = 0;
          ip6_header_t *ih6 = 0, *oh6 = 0;
-         u8 tunnel_mode = 1;
-         u8 transport_ip6 = 0;
+         crypto_alg_t *cipher_alg, *auth_alg;
+         esp_header_t *esp0;
+         u8 trunc_size, is_aead;
 
          next0 = ESP_DECRYPT_NEXT_DROP;
 
@@ -421,6 +459,7 @@ dpdk_esp_decrypt_post_node_fn (vlib_main_t * vm,
          n_left_to_next -= 1;
 
          b0 = vlib_get_buffer (vm, bi0);
+         esp0 = vlib_buffer_get_current (b0);
 
          sa_index0 = vnet_buffer(b0)->ipsec.sad_index;
          sa0 = pool_elt_at_index (im->sad, sa_index0);
@@ -428,15 +467,24 @@ dpdk_esp_decrypt_post_node_fn (vlib_main_t * vm,
          to_next[0] = bi0;
          to_next += 1;
 
-         if (sa0->crypto_alg == IPSEC_CRYPTO_ALG_AES_GCM_128)
-           trunc_size = 16;
-         else
-           trunc_size = em->esp_integ_algs[sa0->integ_alg].trunc_size;
-         iv_size = em->esp_crypto_algs[sa0->crypto_alg].iv_len;
+         cipher_alg = vec_elt_at_index (dcm->cipher_algs, sa0->crypto_alg);
+         auth_alg = vec_elt_at_index (dcm->auth_algs, sa0->integ_alg);
+#if DPDK_NO_AEAD
+         is_aead = (sa0->crypto_alg == IPSEC_CRYPTO_ALG_AES_GCM_128 |
+                       sa0->crypto_alg == IPSEC_CRYPTO_ALG_AES_GCM_192 |
+                       sa0->crypto_alg == IPSEC_CRYPTO_ALG_AES_GCM_256);
+#else
+         is_aead = cipher_alg->type == RTE_CRYPTO_SYM_XFORM_AEAD;
+#endif
+         if (is_aead)
+           auth_alg = cipher_alg;
+
+         trunc_size = auth_alg->trunc_size;
+
+         iv_size = cipher_alg->iv_len;
 
          if (sa0->use_anti_replay)
            {
-             esp_header_t * esp0 = vlib_buffer_get_current (b0);
              u32 seq;
              seq = clib_host_to_net_u32(esp0->seq);
              if (PREDICT_TRUE(sa0->use_esn))
@@ -445,39 +493,30 @@ dpdk_esp_decrypt_post_node_fn (vlib_main_t * vm,
                esp_replay_advance(sa0, seq);
            }
 
+         /* FIXME ip header */
          ih4 = (ip4_header_t *) (b0->data + sizeof(ethernet_header_t));
          vlib_buffer_advance (b0, sizeof (esp_header_t) + iv_size);
 
-         b0->current_length -= (trunc_size + 2);
          b0->flags |= VLIB_BUFFER_TOTAL_LENGTH_VALID;
-         f0 = (esp_footer_t *) ((u8 *) vlib_buffer_get_current (b0) +
-                                b0->current_length);
-         b0->current_length -= f0->pad_length;
-
-         /* transport mode */
-         if (PREDICT_FALSE(!sa0->is_tunnel && !sa0->is_tunnel_ip6))
+         f0 = (esp_footer_t *) (vlib_buffer_get_tail (b0) - trunc_size - 2);
+         b0->current_length -= (f0->pad_length + trunc_size + 2);
+#if 0
+         /* check padding */
+         const u8 *padding = vlib_buffer_get_tail (b0);
+         if (PREDICT_FALSE (memcmp (padding, pad_data, f0->pad_length)))
            {
-             tunnel_mode = 0;
-
-             if (PREDICT_TRUE((ih4->ip_version_and_header_length & 0xF0) != 0x40))
-               {
-                 if (PREDICT_TRUE((ih4->ip_version_and_header_length & 0xF0) == 0x60))
-                   transport_ip6 = 1;
-                 else
-                   {
-                     clib_warning("next header: 0x%x", f0->next_header);
-                     vlib_node_increment_counter (vm, dpdk_esp_decrypt_node.index,
-                                                  ESP_DECRYPT_ERROR_NOT_IP, 1);
-                     goto trace;
-                   }
-               }
+             clib_warning("bad padding");
+             vlib_node_increment_counter (vm, dpdk_esp_decrypt_node.index,
+                                          ESP_DECRYPT_ERROR_DECRYPTION_FAILED,
+                                          1);
+             goto trace;
            }
-
-         if (PREDICT_TRUE (tunnel_mode))
+#endif
+         if (sa0->is_tunnel)
            {
-             if (PREDICT_TRUE(f0->next_header == IP_PROTOCOL_IP_IN_IP))
+             if (f0->next_header == IP_PROTOCOL_IP_IN_IP)
                next0 = ESP_DECRYPT_NEXT_IP4_INPUT;
-             else if (f0->next_header == IP_PROTOCOL_IPV6)
+             else if (sa0->is_tunnel_ip6 && f0->next_header == IP_PROTOCOL_IPV6)
                next0 = ESP_DECRYPT_NEXT_IP6_INPUT;
              else
                {
@@ -488,11 +527,31 @@ dpdk_esp_decrypt_post_node_fn (vlib_main_t * vm,
                  goto trace;
                }
            }
-         /* transport mode */
-         else
+         else /* transport mode */
            {
-             if (PREDICT_FALSE(transport_ip6))
+             if ((ih4->ip_version_and_header_length & 0xF0) == 0x40)
                {
+                 u16 ih4_len = ip4_header_bytes (ih4);
+                 vlib_buffer_advance (b0, - ih4_len);
+                 oh4 = vlib_buffer_get_current (b0);
+                 memmove(oh4, ih4, ih4_len);
+
+                 next0 = ESP_DECRYPT_NEXT_IP4_INPUT;
+                 u16 old_ttl_prot =
+                   ((u16) oh4->ttl) << 8 | (u16) oh4->protocol;
+                 u16 new_ttl_prot =
+                   ((u16) oh4->ttl) << 8 | (u16) f0->next_header;
+                 oh4->protocol = f0->next_header;
+                 u16 new_len = clib_host_to_net_u16 (b0->current_length);
+                 oh4->length = new_len;
+                 /* rfc1264 incremental checksum update */
+                 oh4->checksum = ~(~oh4->checksum + ~oh4->length + new_len +
+                                   ~old_ttl_prot + new_ttl_prot);
+
+               }
+             else if ((ih4->ip_version_and_header_length & 0xF0) == 0x60)
+               {
+                 /* FIXME find ip header */
                  ih6 = (ip6_header_t *) (b0->data + sizeof(ethernet_header_t));
                  vlib_buffer_advance (b0, -sizeof(ip6_header_t));
                  oh6 = vlib_buffer_get_current (b0);
@@ -500,36 +559,29 @@ dpdk_esp_decrypt_post_node_fn (vlib_main_t * vm,
 
                  next0 = ESP_DECRYPT_NEXT_IP6_INPUT;
                  oh6->protocol = f0->next_header;
-                 oh6->payload_length =
-                     clib_host_to_net_u16 (
-                         vlib_buffer_length_in_chain(vm, b0) -
-                         sizeof (ip6_header_t));
+                 u16 len = b0->current_length - sizeof (ip6_header_t);
+                 oh6->payload_length = clib_host_to_net_u16 (len);
                }
              else
                {
-                 vlib_buffer_advance (b0, -sizeof(ip4_header_t));
-                 oh4 = vlib_buffer_get_current (b0);
-                 memmove(oh4, ih4, sizeof(ip4_header_t));
-
-                 next0 = ESP_DECRYPT_NEXT_IP4_INPUT;
-                 oh4->ip_version_and_header_length = 0x45;
-                 oh4->fragment_id = 0;
-                 oh4->flags_and_fragment_offset = 0;
-                 oh4->protocol = f0->next_header;
-                 oh4->length = clib_host_to_net_u16 (
-                     vlib_buffer_length_in_chain (vm, b0));
-                 oh4->checksum = ip4_header_checksum (oh4);
+                 clib_warning("next header: 0x%x", f0->next_header);
+                 vlib_node_increment_counter (vm, dpdk_esp_decrypt_node.index,
+                                              ESP_DECRYPT_ERROR_DECRYPTION_FAILED,
+                                              1);
+                 goto trace;
                }
            }
 
          vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32)~0;
 
-trace:
+       trace:
          if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED))
            {
              esp_decrypt_trace_t *tr = vlib_add_trace (vm, node, b0, sizeof (*tr));
              tr->crypto_alg = sa0->crypto_alg;
              tr->integ_alg = sa0->integ_alg;
+             ih4 = vlib_buffer_get_current (b0);
+             clib_memcpy (tr->packet_data, ih4, sizeof (ip6_header_t));
            }
 
          vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
@@ -537,6 +589,7 @@ trace:
        }
       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
     }
+
   vlib_node_increment_counter (vm, dpdk_esp_decrypt_post_node.index,
                               ESP_DECRYPT_POST_ERROR_PKTS,
                               from_frame->n_vectors);
index 6de444f..3ce2284 100644 (file)
@@ -1,7 +1,7 @@
 /*
  * esp_encrypt.c : IPSec ESP encrypt node using DPDK Cryptodev
  *
- * Copyright (c) 2016 Intel and/or its affiliates.
+ * Copyright (c) 2017 Intel and/or its affiliates.
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
  * You may obtain a copy of the License at:
@@ -20,8 +20,8 @@
 #include <vnet/ip/ip.h>
 
 #include <vnet/ipsec/ipsec.h>
+#include <vnet/ipsec/esp.h>
 #include <dpdk/ipsec/ipsec.h>
-#include <dpdk/ipsec/esp.h>
 #include <dpdk/device/dpdk.h>
 #include <dpdk/device/dpdk_priv.h>
 
@@ -41,9 +41,11 @@ typedef enum
 
 #define foreach_esp_encrypt_error                   \
  _(RX_PKTS, "ESP pkts received")                    \
- _(SEQ_CYCLED, "sequence number cycled")            \
- _(ENQ_FAIL, "Enqueue failed (buffer full)")        \
- _(NO_CRYPTODEV, "Cryptodev not configured")
+ _(SEQ_CYCLED, "Sequence number cycled")            \
+ _(ENQ_FAIL, "Enqueue failed to crypto device")     \
+ _(DISCARD, "Not enough crypto operations, discarding frame")  \
+ _(SESSION, "Failed to get crypto session")         \
+ _(NOSUP, "Cipher/Auth not supported")
 
 
 typedef enum
@@ -64,10 +66,9 @@ vlib_node_registration_t dpdk_esp_encrypt_node;
 
 typedef struct
 {
-  u32 spi;
-  u32 seq;
   ipsec_crypto_alg_t crypto_alg;
   ipsec_integ_alg_t integ_alg;
+  u8 packet_data[64];
 } esp_encrypt_trace_t;
 
 /* packet trace format function */
@@ -77,11 +78,29 @@ format_esp_encrypt_trace (u8 * s, va_list * args)
   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
   esp_encrypt_trace_t *t = va_arg (*args, esp_encrypt_trace_t *);
+  ip4_header_t *ih4 = (ip4_header_t *) t->packet_data;
+  uword indent = format_get_indent (s), offset;
 
-  s = format (s, "esp: spi %u seq %u crypto %U integrity %U",
-             t->spi, t->seq,
+  s = format (s, "cipher %U auth %U\n",
              format_ipsec_crypto_alg, t->crypto_alg,
              format_ipsec_integ_alg, t->integ_alg);
+
+  if ((ih4->ip_version_and_header_length & 0xF0) == 0x60)
+    {
+      s = format (s, "%U%U", format_white_space, indent,
+                 format_ip6_header, ih4);
+      offset = sizeof (ip6_header_t);
+    }
+  else
+    {
+      s = format (s, "%U%U", format_white_space, indent,
+                 format_ip4_header, ih4);
+      offset = ip4_header_bytes (ih4);
+    }
+
+  s = format (s, "\n%U%U", format_white_space, indent,
+             format_esp_header, t->packet_data + offset);
+
   return s;
 }
 
@@ -92,30 +111,31 @@ dpdk_esp_encrypt_node_fn (vlib_main_t * vm,
 {
   u32 n_left_from, *from, *to_next, next_index;
   ipsec_main_t *im = &ipsec_main;
-  u32 thread_index = vlib_get_thread_index ();
+  u32 thread_idx = vlib_get_thread_index ();
   dpdk_crypto_main_t *dcm = &dpdk_crypto_main;
-  dpdk_esp_main_t *em = &dpdk_esp_main;
-  u32 i;
+  crypto_resource_t *res = 0;
+  ipsec_sa_t *sa0 = 0;
+  crypto_alg_t *cipher_alg = 0, *auth_alg = 0;
+  struct rte_cryptodev_sym_session *session = 0;
+  u32 ret, last_sa_index = ~0;
+  u8 numa = rte_socket_id ();
+  u8 is_aead = 0;
+  crypto_worker_main_t *cwm =
+    vec_elt_at_index (dcm->workers_main, thread_idx);
+  struct rte_crypto_op **ops = cwm->ops;
 
   from = vlib_frame_vector_args (from_frame);
   n_left_from = from_frame->n_vectors;
 
-  crypto_worker_main_t *cwm =
-    vec_elt_at_index (dcm->workers_main, thread_index);
-  u32 n_qps = vec_len (cwm->qp_data);
-  struct rte_crypto_op **cops_to_enq[n_qps];
-  u32 n_cop_qp[n_qps], *bi_to_enq[n_qps];
-
-  for (i = 0; i < n_qps; i++)
+  ret = crypto_alloc_ops (numa, ops, n_left_from);
+  if (ret)
     {
-      bi_to_enq[i] = cwm->qp_data[i].bi;
-      cops_to_enq[i] = cwm->qp_data[i].cops;
+      vlib_node_increment_counter (vm, dpdk_esp_encrypt_node.index,
+                                  ESP_ENCRYPT_ERROR_DISCARD, 1);
+      /* Discard whole frame */
+      return n_left_from;
     }
 
-  memset (n_cop_qp, 0, n_qps * sizeof (u32));
-
-  crypto_alloc_cops ();
-
   next_index = ESP_ENCRYPT_NEXT_DROP;
 
   while (n_left_from > 0)
@@ -126,34 +146,99 @@ dpdk_esp_encrypt_node_fn (vlib_main_t * vm,
 
       while (n_left_from > 0 && n_left_to_next > 0)
        {
-         u32 bi0, next0;
+         clib_error_t *error;
+         u32 bi0;
          vlib_buffer_t *b0 = 0;
          u32 sa_index0;
-         ipsec_sa_t *sa0;
          ip4_and_esp_header_t *ih0, *oh0 = 0;
          ip6_and_esp_header_t *ih6_0, *oh6_0 = 0;
-         struct rte_mbuf *mb0 = 0;
+         esp_header_t *esp0;
          esp_footer_t *f0;
-         u8 is_ipv6;
-         u8 ip_hdr_size;
-         u8 next_hdr_type;
-         u8 transport_mode = 0;
-         const int BLOCK_SIZE = 16;
+         u8 is_ipv6, next_hdr_type;
          u32 iv_size;
          u16 orig_sz;
          u8 trunc_size;
-         crypto_sa_session_t *sa_sess;
-         void *sess;
-         struct rte_crypto_op *cop = 0;
-         u16 qp_index;
+         struct rte_mbuf *mb0 = 0;
+         struct rte_crypto_op *op;
+         u16 res_idx;
 
          bi0 = from[0];
          from += 1;
          n_left_from -= 1;
 
          b0 = vlib_get_buffer (vm, bi0);
+         ih0 = vlib_buffer_get_current (b0);
+         mb0 = rte_mbuf_from_vlib_buffer (b0);
+
+         /* ih0/ih6_0 */
+         CLIB_PREFETCH (ih0, sizeof (ih6_0[0]), LOAD);
+         /* f0 */
+         CLIB_PREFETCH (vlib_buffer_get_tail (b0), 20, STORE);
+         /* mb0 */
+         CLIB_PREFETCH (mb0, CLIB_CACHE_LINE_BYTES, STORE);
+
+         op = ops[0];
+         ops += 1;
+         ASSERT (op->status == RTE_CRYPTO_OP_STATUS_NOT_PROCESSED);
+
+         dpdk_op_priv_t *priv = crypto_op_get_priv (op);
+
+         u16 op_len =
+           sizeof (op[0]) + sizeof (op[0].sym[0]) + sizeof (priv[0]);
+         CLIB_PREFETCH (op, op_len, STORE);
+
          sa_index0 = vnet_buffer (b0)->ipsec.sad_index;
-         sa0 = pool_elt_at_index (im->sad, sa_index0);
+
+         if (sa_index0 != last_sa_index)
+           {
+             last_sa_index = sa_index0;
+
+             sa0 = pool_elt_at_index (im->sad, sa_index0);
+
+             cipher_alg =
+               vec_elt_at_index (dcm->cipher_algs, sa0->crypto_alg);
+             auth_alg = vec_elt_at_index (dcm->auth_algs, sa0->integ_alg);
+
+#if DPDK_NO_AEAD
+             is_aead = ((sa0->crypto_alg == IPSEC_CRYPTO_ALG_AES_GCM_128) ||
+                        (sa0->crypto_alg == IPSEC_CRYPTO_ALG_AES_GCM_192) ||
+                        (sa0->crypto_alg == IPSEC_CRYPTO_ALG_AES_GCM_256));
+#else
+             is_aead = (cipher_alg->type == RTE_CRYPTO_SYM_XFORM_AEAD);
+#endif
+
+             if (is_aead)
+               auth_alg = cipher_alg;
+
+             res_idx = get_resource (cwm, sa0);
+
+             if (PREDICT_FALSE (res_idx == (u16) ~ 0))
+               {
+                 clib_warning ("unsupported SA by thread index %u",
+                               thread_idx);
+                 vlib_node_increment_counter (vm,
+                                              dpdk_esp_encrypt_node.index,
+                                              ESP_ENCRYPT_ERROR_NOSUP, 1);
+                 to_next[0] = bi0;
+                 to_next += 1;
+                 n_left_to_next -= 1;
+                 goto trace;
+               }
+             res = vec_elt_at_index (dcm->resource, res_idx);
+
+             error = crypto_get_session (&session, sa_index0, res, cwm, 1);
+             if (PREDICT_FALSE (error || !session))
+               {
+                 clib_warning ("failed to get crypto session");
+                 vlib_node_increment_counter (vm,
+                                              dpdk_esp_encrypt_node.index,
+                                              ESP_ENCRYPT_ERROR_SESSION, 1);
+                 to_next[0] = bi0;
+                 to_next += 1;
+                 n_left_to_next -= 1;
+                 goto trace;
+               }
+           }
 
          if (PREDICT_FALSE (esp_seq_advance (sa0)))
            {
@@ -168,272 +253,232 @@ dpdk_esp_encrypt_node_fn (vlib_main_t * vm,
              goto trace;
            }
 
-         sa0->total_data_size += b0->current_length;
-
-         sa_sess = pool_elt_at_index (cwm->sa_sess_d[1], sa_index0);
-         if (PREDICT_FALSE (!sa_sess->sess))
-           {
-             int ret = create_sym_sess (sa0, sa_sess, 1);
-
-             if (PREDICT_FALSE (ret))
-               {
-                 to_next[0] = bi0;
-                 to_next += 1;
-                 n_left_to_next -= 1;
-                 goto trace;
-               }
-           }
+         orig_sz = b0->current_length;
 
-         qp_index = sa_sess->qp_index;
-         sess = sa_sess->sess;
+         /* TODO multi-seg support - total_length_not_including_first_buffer */
+         sa0->total_data_size += b0->current_length;
 
-         ASSERT (vec_len (vec_elt (cwm->qp_data, qp_index).free_cops) > 0);
-         cop = vec_pop (vec_elt (cwm->qp_data, qp_index).free_cops);
-         ASSERT (cop->status == RTE_CRYPTO_OP_STATUS_NOT_PROCESSED);
+         res->ops[res->n_ops] = op;
+         res->bi[res->n_ops] = bi0;
+         res->n_ops += 1;
 
-         cops_to_enq[qp_index][0] = cop;
-         cops_to_enq[qp_index] += 1;
-         n_cop_qp[qp_index] += 1;
-         bi_to_enq[qp_index][0] = bi0;
-         bi_to_enq[qp_index] += 1;
+         dpdk_gcm_cnt_blk *icb = &priv->cb;
 
-         ssize_t adv;
-         iv_size = em->esp_crypto_algs[sa0->crypto_alg].iv_len;
-         if (sa0->crypto_alg == IPSEC_CRYPTO_ALG_AES_GCM_128)
-           trunc_size = 16;
-         else
-           trunc_size = em->esp_integ_algs[sa0->integ_alg].trunc_size;
+         crypto_set_icb (icb, sa0->salt, sa0->seq, sa0->seq_hi);
 
-         ih0 = vlib_buffer_get_current (b0);
-         orig_sz = b0->current_length;
          is_ipv6 = (ih0->ip4.ip_version_and_header_length & 0xF0) == 0x60;
-         /* is ipv6 */
-         if (PREDICT_TRUE (sa0->is_tunnel))
-           {
-             if (PREDICT_TRUE (!is_ipv6))
-               adv = -sizeof (ip4_and_esp_header_t);
-             else
-               adv = -sizeof (ip6_and_esp_header_t);
-           }
-         else
-           {
-             adv = -sizeof (esp_header_t);
-             if (PREDICT_TRUE (!is_ipv6))
-               orig_sz -= sizeof (ip4_header_t);
-             else
-               orig_sz -= sizeof (ip6_header_t);
-           }
-
-         /*transport mode save the eth header before it is overwritten */
-         if (PREDICT_FALSE (!sa0->is_tunnel))
-           {
-             ethernet_header_t *ieh0 = (ethernet_header_t *)
-               ((u8 *) vlib_buffer_get_current (b0) -
-                sizeof (ethernet_header_t));
-             ethernet_header_t *oeh0 =
-               (ethernet_header_t *) ((u8 *) ieh0 + (adv - iv_size));
-             clib_memcpy (oeh0, ieh0, sizeof (ethernet_header_t));
-           }
-
-         vlib_buffer_advance (b0, adv - iv_size);
 
-         /* XXX IP6/ip4 and IP4/IP6 not supported, only IP4/IP4 and IP6/IP6 */
+         iv_size = cipher_alg->iv_len;
+         trunc_size = auth_alg->trunc_size;
 
-         /* is ipv6 */
-         if (PREDICT_FALSE (is_ipv6))
+         if (sa0->is_tunnel)
            {
-             ih6_0 = (ip6_and_esp_header_t *) ih0;
-             ip_hdr_size = sizeof (ip6_header_t);
-             oh6_0 = vlib_buffer_get_current (b0);
-
-             if (PREDICT_TRUE (sa0->is_tunnel))
+             if (!is_ipv6 && !sa0->is_tunnel_ip6)      /* ip4inip4 */
+               {
+                 /* in tunnel mode send it back to FIB */
+                 priv->next = DPDK_CRYPTO_INPUT_NEXT_IP4_LOOKUP;
+                 u8 adv =
+                   sizeof (ip4_header_t) + sizeof (esp_header_t) + iv_size;
+                 vlib_buffer_advance (b0, -adv);
+                 oh0 = vlib_buffer_get_current (b0);
+                 next_hdr_type = IP_PROTOCOL_IP_IN_IP;
+                 /*
+                  * oh0->ip4.ip_version_and_header_length = 0x45;
+                  * oh0->ip4.tos = ih0->ip4.tos;
+                  * oh0->ip4.fragment_id = 0;
+                  * oh0->ip4.flags_and_fragment_offset = 0;
+                  */
+                 oh0->ip4.checksum_data_64[0] =
+                   clib_host_to_net_u64 (0x45ULL << 56);
+                 /*
+                  * oh0->ip4.ttl = 254;
+                  * oh0->ip4.protocol = IP_PROTOCOL_IPSEC_ESP;
+                  */
+                 oh0->ip4.checksum_data_32[2] =
+                   clib_host_to_net_u32 (0xfe320000);
+
+                 oh0->ip4.src_address.as_u32 =
+                   sa0->tunnel_src_addr.ip4.as_u32;
+                 oh0->ip4.dst_address.as_u32 =
+                   sa0->tunnel_dst_addr.ip4.as_u32;
+                 esp0 = &oh0->esp;
+                 oh0->esp.spi = clib_host_to_net_u32 (sa0->spi);
+                 oh0->esp.seq = clib_host_to_net_u32 (sa0->seq);
+               }
+             else if (is_ipv6 && sa0->is_tunnel_ip6)   /* ip6inip6 */
                {
+                 /* in tunnel mode send it back to FIB */
+                 priv->next = DPDK_CRYPTO_INPUT_NEXT_IP6_LOOKUP;
+
+                 u8 adv =
+                   sizeof (ip6_header_t) + sizeof (esp_header_t) + iv_size;
+                 vlib_buffer_advance (b0, -adv);
+                 ih6_0 = (ip6_and_esp_header_t *) ih0;
+                 oh6_0 = vlib_buffer_get_current (b0);
+
                  next_hdr_type = IP_PROTOCOL_IPV6;
+
                  oh6_0->ip6.ip_version_traffic_class_and_flow_label =
                    ih6_0->ip6.ip_version_traffic_class_and_flow_label;
+
+                 oh6_0->ip6.protocol = IP_PROTOCOL_IPSEC_ESP;
+                 oh6_0->ip6.hop_limit = 254;
+                 oh6_0->ip6.src_address.as_u64[0] =
+                   sa0->tunnel_src_addr.ip6.as_u64[0];
+                 oh6_0->ip6.src_address.as_u64[1] =
+                   sa0->tunnel_src_addr.ip6.as_u64[1];
+                 oh6_0->ip6.dst_address.as_u64[0] =
+                   sa0->tunnel_dst_addr.ip6.as_u64[0];
+                 oh6_0->ip6.dst_address.as_u64[1] =
+                   sa0->tunnel_dst_addr.ip6.as_u64[1];
+                 esp0 = &oh6_0->esp;
+                 oh6_0->esp.spi = clib_host_to_net_u32 (sa0->spi);
+                 oh6_0->esp.seq = clib_host_to_net_u32 (sa0->seq);
                }
-             else
+             else              /* unsupported ip4inip6, ip6inip4 */
                {
-                 next_hdr_type = ih6_0->ip6.protocol;
-                 memmove (oh6_0, ih6_0, sizeof (ip6_header_t));
+                 vlib_node_increment_counter (vm,
+                                              dpdk_esp_encrypt_node.index,
+                                              ESP_ENCRYPT_ERROR_NOSUP, 1);
+                 to_next[0] = bi0;
+                 to_next += 1;
+                 n_left_to_next -= 1;
+                 goto trace;
                }
-
-             oh6_0->ip6.protocol = IP_PROTOCOL_IPSEC_ESP;
-             oh6_0->ip6.hop_limit = 254;
-             oh6_0->esp.spi = clib_net_to_host_u32 (sa0->spi);
-             oh6_0->esp.seq = clib_net_to_host_u32 (sa0->seq);
+             vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
            }
-         else
+         else                  /* transport mode */
            {
-             ip_hdr_size = sizeof (ip4_header_t);
-             oh0 = vlib_buffer_get_current (b0);
-
-             if (PREDICT_TRUE (sa0->is_tunnel))
+             priv->next = DPDK_CRYPTO_INPUT_NEXT_INTERFACE_OUTPUT;
+             u16 rewrite_len = vnet_buffer (b0)->ip.save_rewrite_length;
+             u16 adv = sizeof (esp_header_t) + iv_size;
+             vlib_buffer_advance (b0, -rewrite_len - adv);
+             u8 *src = ((u8 *) ih0) - rewrite_len;
+             u8 *dst = vlib_buffer_get_current (b0);
+             oh0 = (ip4_and_esp_header_t *) (dst + rewrite_len);
+
+             if (is_ipv6)
                {
-                 next_hdr_type = IP_PROTOCOL_IP_IN_IP;
-                 oh0->ip4.tos = ih0->ip4.tos;
+                 orig_sz -= sizeof (ip6_header_t);
+                 ih6_0 = (ip6_and_esp_header_t *) ih0;
+                 next_hdr_type = ih6_0->ip6.protocol;
+                 memmove (dst, src, rewrite_len + sizeof (ip6_header_t));
+                 oh6_0 = (ip6_and_esp_header_t *) oh0;
+                 oh6_0->ip6.protocol = IP_PROTOCOL_IPSEC_ESP;
+                 esp0 = &oh6_0->esp;
                }
-             else
+             else              /* ipv4 */
                {
+                 orig_sz -= ip4_header_bytes (&ih0->ip4);
                  next_hdr_type = ih0->ip4.protocol;
-                 memmove (oh0, ih0, sizeof (ip4_header_t));
+                 memmove (dst, src,
+                          rewrite_len + ip4_header_bytes (&ih0->ip4));
+                 oh0->ip4.protocol = IP_PROTOCOL_IPSEC_ESP;
+                 esp0 =
+                   (esp_header_t *) (oh6_0 + ip4_header_bytes (&ih0->ip4));
                }
-
-             oh0->ip4.ip_version_and_header_length = 0x45;
-             oh0->ip4.fragment_id = 0;
-             oh0->ip4.flags_and_fragment_offset = 0;
-             oh0->ip4.ttl = 254;
-             oh0->ip4.protocol = IP_PROTOCOL_IPSEC_ESP;
-             oh0->esp.spi = clib_net_to_host_u32 (sa0->spi);
-             oh0->esp.seq = clib_net_to_host_u32 (sa0->seq);
+             esp0->spi = clib_host_to_net_u32 (sa0->spi);
+             esp0->seq = clib_host_to_net_u32 (sa0->seq);
            }
 
-         if (PREDICT_TRUE
-             (!is_ipv6 && sa0->is_tunnel && !sa0->is_tunnel_ip6))
-           {
-             oh0->ip4.src_address.as_u32 = sa0->tunnel_src_addr.ip4.as_u32;
-             oh0->ip4.dst_address.as_u32 = sa0->tunnel_dst_addr.ip4.as_u32;
+         ASSERT (is_pow2 (cipher_alg->boundary));
+         u16 mask = cipher_alg->boundary - 1;
+         u16 pad_payload_len = ((orig_sz + 2) + mask) & ~mask;
+         u8 pad_bytes = pad_payload_len - 2 - orig_sz;
 
-             /* in tunnel mode send it back to FIB */
-             next0 = ESP_ENCRYPT_NEXT_IP4_LOOKUP;
-             vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
-           }
-         else if (is_ipv6 && sa0->is_tunnel && sa0->is_tunnel_ip6)
+         u8 *padding =
+           vlib_buffer_put_uninit (b0, pad_bytes + 2 + trunc_size);
+
+         if (pad_bytes)
+           clib_memcpy (padding, pad_data, 16);
+
+         f0 = (esp_footer_t *) (padding + pad_bytes);
+         f0->pad_length = pad_bytes;
+         f0->next_header = next_hdr_type;
+
+         if (is_ipv6)
            {
-             oh6_0->ip6.src_address.as_u64[0] =
-               sa0->tunnel_src_addr.ip6.as_u64[0];
-             oh6_0->ip6.src_address.as_u64[1] =
-               sa0->tunnel_src_addr.ip6.as_u64[1];
-             oh6_0->ip6.dst_address.as_u64[0] =
-               sa0->tunnel_dst_addr.ip6.as_u64[0];
-             oh6_0->ip6.dst_address.as_u64[1] =
-               sa0->tunnel_dst_addr.ip6.as_u64[1];
-
-             /* in tunnel mode send it back to FIB */
-             next0 = ESP_ENCRYPT_NEXT_IP6_LOOKUP;
-             vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
+             u16 len = b0->current_length - sizeof (ip6_header_t);
+             oh6_0->ip6.payload_length = clib_host_to_net_u16 (len);
            }
          else
            {
-             next0 = ESP_ENCRYPT_NEXT_INTERFACE_OUTPUT;
-             transport_mode = 1;
+             oh0->ip4.length = clib_host_to_net_u16 (b0->current_length);
+             oh0->ip4.checksum = ip4_header_checksum (&oh0->ip4);
            }
 
-         int blocks = 1 + (orig_sz + 1) / BLOCK_SIZE;
-
-         /* pad packet in input buffer */
-         u8 pad_bytes = BLOCK_SIZE * blocks - 2 - orig_sz;
-         u8 i;
-         u8 *padding = vlib_buffer_get_current (b0) + b0->current_length;
-
-         for (i = 0; i < pad_bytes; ++i)
-           padding[i] = i + 1;
-
-         f0 = vlib_buffer_get_current (b0) + b0->current_length + pad_bytes;
-         f0->pad_length = pad_bytes;
-         f0->next_header = next_hdr_type;
-         b0->current_length += pad_bytes + 2 + trunc_size;
-
          vnet_buffer (b0)->sw_if_index[VLIB_RX] =
            vnet_buffer (b0)->sw_if_index[VLIB_RX];
          b0->flags |= VLIB_BUFFER_TOTAL_LENGTH_VALID;
 
-         struct rte_crypto_sym_op *sym_cop;
-         sym_cop = (struct rte_crypto_sym_op *) (cop + 1);
+         /* mbuf packet starts at ESP header */
+         mb0->data_len = vlib_buffer_get_tail (b0) - ((u8 *) esp0);
+         mb0->pkt_len = vlib_buffer_get_tail (b0) - ((u8 *) esp0);
+         mb0->data_off = ((void *) esp0) - mb0->buf_addr;
 
-         dpdk_cop_priv_t *priv = (dpdk_cop_priv_t *) (sym_cop + 1);
-
-         vnet_buffer (b0)->unused[0] = next0;
-
-         mb0 = rte_mbuf_from_vlib_buffer (b0);
-         mb0->data_len = b0->current_length;
-         mb0->pkt_len = b0->current_length;
-         mb0->data_off = RTE_PKTMBUF_HEADROOM + b0->current_data;
-
-         dpdk_gcm_cnt_blk *icb = &priv->cb;
-
-         crypto_set_icb (icb, sa0->salt, sa0->seq, sa0->seq_hi);
-
-         u8 is_aead = sa0->crypto_alg == IPSEC_CRYPTO_ALG_AES_GCM_128;
          u32 cipher_off, cipher_len;
-         u32 auth_off = 0, auth_len = 0, aad_size = 0;
-         u8 *aad = NULL, *digest = NULL;
+         u32 auth_len = 0, aad_size = 0;
+         u32 *aad = NULL;
+         u8 *digest = vlib_buffer_get_tail (b0) - trunc_size;
 
-         if (is_aead)
+         if (cipher_alg->alg == RTE_CRYPTO_CIPHER_AES_CBC)
            {
-             u32 *esp_iv =
-               (u32 *) (b0->data + b0->current_data + ip_hdr_size +
-                        sizeof (esp_header_t));
+             cipher_off = sizeof (esp_header_t);
+             cipher_len = iv_size + pad_payload_len;
+           }
+         else                  /* CTR/GCM */
+           {
+             u32 *esp_iv = (u32 *) (esp0 + 1);
              esp_iv[0] = sa0->seq;
              esp_iv[1] = sa0->seq_hi;
 
-             cipher_off = ip_hdr_size + sizeof (esp_header_t) + iv_size;
-             cipher_len = BLOCK_SIZE * blocks;
-             iv_size = 16;     /* GCM IV size, not ESP IV size */
+             cipher_off = sizeof (esp_header_t) + iv_size;
+             cipher_len = pad_payload_len;
+
+             iv_size = 12;     /* CTR/GCM IV size, not ESP IV size */
+           }
+
+         if (is_aead)
+           {
+             aad = (u32 *) priv->aad;
+             aad[0] = clib_host_to_net_u32 (sa0->spi);
+             aad[1] = clib_host_to_net_u32 (sa0->seq);
 
-             aad = priv->aad;
-             clib_memcpy (aad, vlib_buffer_get_current (b0) + ip_hdr_size,
-                          8);
-             aad_size = 8;
-             if (PREDICT_FALSE (sa0->use_esn))
+             if (sa0->use_esn)
                {
-                 *((u32 *) & aad[8]) = sa0->seq_hi;
+                 aad[2] = clib_host_to_net_u32 (sa0->seq_hi);
                  aad_size = 12;
                }
-
-             digest =
-               vlib_buffer_get_current (b0) + b0->current_length -
-               trunc_size;
+             else
+               aad_size = 8;
            }
          else
            {
-             cipher_off = ip_hdr_size + sizeof (esp_header_t);
-             cipher_len = BLOCK_SIZE * blocks + iv_size;
-
-             auth_off = ip_hdr_size;
-             auth_len = b0->current_length - ip_hdr_size - trunc_size;
-
-             digest =
-               vlib_buffer_get_current (b0) + b0->current_length -
-               trunc_size;
-
-             if (PREDICT_FALSE (sa0->use_esn))
+             auth_len =
+               vlib_buffer_get_tail (b0) - ((u8 *) esp0) - trunc_size;
+             if (sa0->use_esn)
                {
                  *((u32 *) digest) = sa0->seq_hi;
-                 auth_len += sizeof (sa0->seq_hi);
+                 auth_len += 4;
                }
            }
 
-         crypto_op_setup (is_aead, mb0, cop, sess,
+         crypto_op_setup (is_aead, mb0, op, session,
                           cipher_off, cipher_len, (u8 *) icb, iv_size,
-                          auth_off, auth_len, aad, aad_size,
+                          0, auth_len, (u8 *) aad, aad_size,
                           digest, 0, trunc_size);
 
-         if (PREDICT_FALSE (is_ipv6))
-           {
-             oh6_0->ip6.payload_length =
-               clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b0) -
-                                     sizeof (ip6_header_t));
-           }
-         else
-           {
-             oh0->ip4.length =
-               clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b0));
-             oh0->ip4.checksum = ip4_header_checksum (&oh0->ip4);
-           }
-
-         if (transport_mode)
-           vlib_buffer_advance (b0, -sizeof (ethernet_header_t));
-
        trace:
          if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
            {
              esp_encrypt_trace_t *tr =
                vlib_add_trace (vm, node, b0, sizeof (*tr));
-             tr->spi = sa0->spi;
-             tr->seq = sa0->seq - 1;
              tr->crypto_alg = sa0->crypto_alg;
              tr->integ_alg = sa0->integ_alg;
+             u8 *p = vlib_buffer_get_current (b0);
+             if (!sa0->is_tunnel)
+               p += vnet_buffer (b0)->ip.save_rewrite_length;
+             clib_memcpy (tr->packet_data, p, sizeof (tr->packet_data));
            }
        }
       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
@@ -441,31 +486,11 @@ dpdk_esp_encrypt_node_fn (vlib_main_t * vm,
   vlib_node_increment_counter (vm, dpdk_esp_encrypt_node.index,
                               ESP_ENCRYPT_ERROR_RX_PKTS,
                               from_frame->n_vectors);
-  crypto_qp_data_t *qpd;
-  /* *INDENT-OFF* */
-  vec_foreach_index (i, cwm->qp_data)
-    {
-      u32 enq;
-
-      if (!n_cop_qp[i])
-       continue;
 
-      qpd = vec_elt_at_index(cwm->qp_data, i);
-      enq = rte_cryptodev_enqueue_burst(qpd->dev_id, qpd->qp_id,
-                                       qpd->cops, n_cop_qp[i]);
-      qpd->inflights += enq;
+  crypto_enqueue_ops (vm, cwm, 1, dpdk_esp_encrypt_node.index,
+                     ESP_ENCRYPT_ERROR_ENQ_FAIL, numa);
 
-      if (PREDICT_FALSE(enq < n_cop_qp[i]))
-       {
-         crypto_free_cop (qpd, &qpd->cops[enq], n_cop_qp[i] - enq);
-         vlib_buffer_free (vm, &qpd->bi[enq], n_cop_qp[i] - enq);
-
-          vlib_node_increment_counter (vm, dpdk_esp_encrypt_node.index,
-                                      ESP_ENCRYPT_ERROR_ENQ_FAIL,
-                                      n_cop_qp[i] - enq);
-        }
-    }
-  /* *INDENT-ON* */
+  crypto_free_ops (numa, ops, cwm->ops + from_frame->n_vectors - ops);
 
   return from_frame->n_vectors;
 }
@@ -488,103 +513,6 @@ VLIB_REGISTER_NODE (dpdk_esp_encrypt_node) = {
 /* *INDENT-ON* */
 
 VLIB_NODE_FUNCTION_MULTIARCH (dpdk_esp_encrypt_node, dpdk_esp_encrypt_node_fn)
-/*
- * ESP Encrypt Post Node
- */
-#define foreach_esp_encrypt_post_error              \
- _(PKTS, "ESP post pkts")
-     typedef enum
-     {
-#define _(sym,str) ESP_ENCRYPT_POST_ERROR_##sym,
-       foreach_esp_encrypt_post_error
-#undef _
-        ESP_ENCRYPT_POST_N_ERROR,
-     } esp_encrypt_post_error_t;
-
-     static char *esp_encrypt_post_error_strings[] = {
-#define _(sym,string) string,
-       foreach_esp_encrypt_post_error
-#undef _
-     };
-
-vlib_node_registration_t dpdk_esp_encrypt_post_node;
-
-static u8 *
-format_esp_encrypt_post_trace (u8 * s, va_list * args)
-{
-  return s;
-}
-
-static uword
-dpdk_esp_encrypt_post_node_fn (vlib_main_t * vm,
-                              vlib_node_runtime_t * node,
-                              vlib_frame_t * from_frame)
-{
-  u32 n_left_from, *from, *to_next = 0, next_index;
-
-  from = vlib_frame_vector_args (from_frame);
-  n_left_from = from_frame->n_vectors;
-
-  next_index = node->cached_next_index;
-
-  while (n_left_from > 0)
-    {
-      u32 n_left_to_next;
-
-      vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
-
-      while (n_left_from > 0 && n_left_to_next > 0)
-       {
-         u32 bi0, next0;
-         vlib_buffer_t *b0 = 0;
-
-         bi0 = from[0];
-         from += 1;
-         n_left_from -= 1;
-         n_left_to_next -= 1;
-
-         b0 = vlib_get_buffer (vm, bi0);
-
-         to_next[0] = bi0;
-         to_next += 1;
-
-         next0 = vnet_buffer (b0)->unused[0];
-
-         vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
-                                          to_next, n_left_to_next, bi0,
-                                          next0);
-       }
-      vlib_put_next_frame (vm, node, next_index, n_left_to_next);
-    }
-
-  vlib_node_increment_counter (vm, dpdk_esp_encrypt_post_node.index,
-                              ESP_ENCRYPT_POST_ERROR_PKTS,
-                              from_frame->n_vectors);
-
-  return from_frame->n_vectors;
-}
-
-/* *INDENT-OFF* */
-VLIB_REGISTER_NODE (dpdk_esp_encrypt_post_node) = {
-  .function = dpdk_esp_encrypt_post_node_fn,
-  .name = "dpdk-esp-encrypt-post",
-  .vector_size = sizeof (u32),
-  .format_trace = format_esp_encrypt_post_trace,
-  .type = VLIB_NODE_TYPE_INTERNAL,
-  .n_errors = ARRAY_LEN (esp_encrypt_post_error_strings),
-  .error_strings = esp_encrypt_post_error_strings,
-  .n_next_nodes = ESP_ENCRYPT_N_NEXT,
-  .next_nodes =
-    {
-#define _(s,n) [ESP_ENCRYPT_NEXT_##s] = n,
-      foreach_esp_encrypt_next
-#undef _
-    }
-};
-/* *INDENT-ON* */
-
-VLIB_NODE_FUNCTION_MULTIARCH (dpdk_esp_encrypt_post_node,
-                             dpdk_esp_encrypt_post_node_fn)
 /*
  * fd.io coding-style-patch-verification: ON
  *
index c9fce3d..2fd331c 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016 Intel and/or its affiliates.
+ * Copyright (c) 2017 Intel and/or its affiliates.
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
  * You may obtain a copy of the License at:
 
 #include <dpdk/device/dpdk.h>
 #include <dpdk/ipsec/ipsec.h>
-#include <dpdk/ipsec/esp.h>
 
-#define DPDK_CRYPTO_NB_SESS_OBJS  20000
-#define DPDK_CRYPTO_CACHE_SIZE   512
-#define DPDK_CRYPTO_PRIV_SIZE    128
-#define DPDK_CRYPTO_N_QUEUE_DESC  1024
-#define DPDK_CRYPTO_NB_COPS      (1024 * 4)
+#define EMPTY_STRUCT {0}
 
-static int
-add_del_sa_sess (u32 sa_index, u8 is_add)
+static void
+algos_init (u32 n_mains)
 {
   dpdk_crypto_main_t *dcm = &dpdk_crypto_main;
-  crypto_worker_main_t *cwm;
-  u8 skip_master = vlib_num_workers () > 0;
+  crypto_alg_t *a;
+
+  vec_validate_aligned (dcm->cipher_algs, IPSEC_CRYPTO_N_ALG - 1, 8);
+
+  {
+#define _(v,f,str) \
+  dcm->cipher_algs[IPSEC_CRYPTO_ALG_##f].name = str; \
+  dcm->cipher_algs[IPSEC_CRYPTO_ALG_##f].disabled = n_mains;
+    foreach_ipsec_crypto_alg
+#undef _
+  }
+
+  /* Minimum boundary for ciphers is 4B, required by ESP */
+  a = &dcm->cipher_algs[IPSEC_CRYPTO_ALG_NONE];
+  a->type = RTE_CRYPTO_SYM_XFORM_CIPHER;
+  a->alg = RTE_CRYPTO_CIPHER_NULL;
+  a->boundary = 4;             /* 1 */
+  a->key_len = 0;
+  a->iv_len = 0;
+
+  a = &dcm->cipher_algs[IPSEC_CRYPTO_ALG_AES_CBC_128];
+  a->type = RTE_CRYPTO_SYM_XFORM_CIPHER;
+  a->alg = RTE_CRYPTO_CIPHER_AES_CBC;
+  a->boundary = 16;
+  a->key_len = 16;
+  a->iv_len = 16;
+
+  a = &dcm->cipher_algs[IPSEC_CRYPTO_ALG_AES_CBC_192];
+  a->type = RTE_CRYPTO_SYM_XFORM_CIPHER;
+  a->alg = RTE_CRYPTO_CIPHER_AES_CBC;
+  a->boundary = 16;
+  a->key_len = 24;
+  a->iv_len = 16;
+
+  a = &dcm->cipher_algs[IPSEC_CRYPTO_ALG_AES_CBC_256];
+  a->type = RTE_CRYPTO_SYM_XFORM_CIPHER;
+  a->alg = RTE_CRYPTO_CIPHER_AES_CBC;
+  a->boundary = 16;
+  a->key_len = 32;
+  a->iv_len = 16;
+
+  a = &dcm->cipher_algs[IPSEC_CRYPTO_ALG_AES_CTR_128];
+  a->type = RTE_CRYPTO_SYM_XFORM_CIPHER;
+  a->alg = RTE_CRYPTO_CIPHER_AES_CTR;
+  a->boundary = 4;             /* 1 */
+  a->key_len = 16;
+  a->iv_len = 8;
+
+  a = &dcm->cipher_algs[IPSEC_CRYPTO_ALG_AES_CTR_192];
+  a->type = RTE_CRYPTO_SYM_XFORM_CIPHER;
+  a->alg = RTE_CRYPTO_CIPHER_AES_CTR;
+  a->boundary = 4;             /* 1 */
+  a->key_len = 24;
+  a->iv_len = 8;
+
+  a = &dcm->cipher_algs[IPSEC_CRYPTO_ALG_AES_CTR_256];
+  a->type = RTE_CRYPTO_SYM_XFORM_CIPHER;
+  a->alg = RTE_CRYPTO_CIPHER_AES_CTR;
+  a->boundary = 4;             /* 1 */
+  a->key_len = 32;
+  a->iv_len = 8;
 
-  /* *INDENT-OFF* */
-  vec_foreach (cwm, dcm->workers_main)
-    {
-      crypto_sa_session_t *sa_sess;
-      u8 is_outbound;
+#if DPDK_NO_AEAD
+#define AES_GCM_TYPE RTE_CRYPTO_SYM_XFORM_CIPHER
+#define AES_GCM_ALG RTE_CRYPTO_CIPHER_AES_GCM
+#else
+#define AES_GCM_TYPE RTE_CRYPTO_SYM_XFORM_AEAD
+#define AES_GCM_ALG RTE_CRYPTO_AEAD_AES_GCM
+#endif
 
-      if (skip_master)
-       {
-         skip_master = 0;
-         continue;
-       }
+  a = &dcm->cipher_algs[IPSEC_CRYPTO_ALG_AES_GCM_128];
+  a->type = AES_GCM_TYPE;
+  a->alg = AES_GCM_ALG;
+  a->boundary = 4;             /* 1 */
+  a->key_len = 16;
+  a->iv_len = 8;
+  a->trunc_size = 16;
+
+  a = &dcm->cipher_algs[IPSEC_CRYPTO_ALG_AES_GCM_192];
+  a->type = AES_GCM_TYPE;
+  a->alg = AES_GCM_ALG;
+  a->boundary = 4;             /* 1 */
+  a->key_len = 24;
+  a->iv_len = 8;
+  a->trunc_size = 16;
+
+  a = &dcm->cipher_algs[IPSEC_CRYPTO_ALG_AES_GCM_256];
+  a->type = AES_GCM_TYPE;
+  a->alg = AES_GCM_ALG;
+  a->boundary = 4;             /* 1 */
+  a->key_len = 32;
+  a->iv_len = 8;
+  a->trunc_size = 16;
+
+  vec_validate (dcm->auth_algs, IPSEC_INTEG_N_ALG - 1);
+
+  {
+#define _(v,f,str) \
+  dcm->auth_algs[IPSEC_INTEG_ALG_##f].name = str; \
+  dcm->auth_algs[IPSEC_INTEG_ALG_##f].disabled = n_mains;
+    foreach_ipsec_integ_alg
+#undef _
+  }
+
+  a = &dcm->auth_algs[IPSEC_INTEG_ALG_NONE];
+  a->type = RTE_CRYPTO_SYM_XFORM_AUTH;
+  a->alg = RTE_CRYPTO_AUTH_NULL;
+  a->key_len = 0;
+  a->trunc_size = 0;
+
+  a = &dcm->auth_algs[IPSEC_INTEG_ALG_MD5_96];
+  a->type = RTE_CRYPTO_SYM_XFORM_AUTH;
+  a->alg = RTE_CRYPTO_AUTH_MD5_HMAC;
+  a->key_len = 16;
+  a->trunc_size = 12;
+
+  a = &dcm->auth_algs[IPSEC_INTEG_ALG_SHA1_96];
+  a->type = RTE_CRYPTO_SYM_XFORM_AUTH;
+  a->alg = RTE_CRYPTO_AUTH_SHA1_HMAC;
+  a->key_len = 20;
+  a->trunc_size = 12;
+
+  a = &dcm->auth_algs[IPSEC_INTEG_ALG_SHA_256_96];
+  a->type = RTE_CRYPTO_SYM_XFORM_AUTH;
+  a->alg = RTE_CRYPTO_AUTH_SHA256_HMAC;
+  a->key_len = 32;
+  a->trunc_size = 12;
+
+  a = &dcm->auth_algs[IPSEC_INTEG_ALG_SHA_256_128];
+  a->type = RTE_CRYPTO_SYM_XFORM_AUTH;
+  a->alg = RTE_CRYPTO_AUTH_SHA256_HMAC;
+  a->key_len = 32;
+  a->trunc_size = 16;
+
+  a = &dcm->auth_algs[IPSEC_INTEG_ALG_SHA_384_192];
+  a->type = RTE_CRYPTO_SYM_XFORM_AUTH;
+  a->alg = RTE_CRYPTO_AUTH_SHA384_HMAC;
+  a->key_len = 48;
+  a->trunc_size = 24;
+
+  a = &dcm->auth_algs[IPSEC_INTEG_ALG_SHA_512_256];
+  a->type = RTE_CRYPTO_SYM_XFORM_AUTH;
+  a->alg = RTE_CRYPTO_AUTH_SHA512_HMAC;
+  a->key_len = 64;
+  a->trunc_size = 32;
+}
 
-      for (is_outbound = 0; is_outbound < 2; is_outbound++)
-       {
-         if (is_add)
-           {
-             pool_get (cwm->sa_sess_d[is_outbound], sa_sess);
-           }
-         else
-           {
-             u8 dev_id;
-             i32 ret;
+static u8
+cipher_alg_index (const crypto_alg_t * alg)
+{
+  dpdk_crypto_main_t *dcm = &dpdk_crypto_main;
 
-             sa_sess = pool_elt_at_index (cwm->sa_sess_d[is_outbound], sa_index);
-             dev_id = cwm->qp_data[sa_sess->qp_index].dev_id;
+  return (alg - dcm->cipher_algs);
+}
 
-             if (!sa_sess->sess)
-               continue;
-#if DPDK_NO_AEAD
-             ret = (rte_cryptodev_sym_session_free(dev_id, sa_sess->sess) == NULL);
-             ASSERT (ret);
-#else
-             ret = rte_cryptodev_sym_session_clear(dev_id, sa_sess->sess);
-             ASSERT (!ret);
+static u8
+auth_alg_index (const crypto_alg_t * alg)
+{
+  dpdk_crypto_main_t *dcm = &dpdk_crypto_main;
+
+  return (alg - dcm->auth_algs);
+}
+
+static crypto_alg_t *
+cipher_cap_to_alg (const struct rte_cryptodev_capabilities *cap, u8 key_len)
+{
+  dpdk_crypto_main_t *dcm = &dpdk_crypto_main;
+  crypto_alg_t *alg;
+
+  if (cap->op != RTE_CRYPTO_OP_TYPE_SYMMETRIC)
+    return NULL;
 
-             ret = rte_cryptodev_sym_session_free(sa_sess->sess);
-             ASSERT (!ret);
+  /* *INDENT-OFF* */
+  vec_foreach (alg, dcm->cipher_algs)
+    {
+      if ((cap->sym.xform_type == RTE_CRYPTO_SYM_XFORM_CIPHER) &&
+         (alg->type == RTE_CRYPTO_SYM_XFORM_CIPHER) &&
+         (cap->sym.cipher.algo == alg->alg) &&
+         (alg->key_len == key_len))
+       return alg;
+#if ! DPDK_NO_AEAD
+      if ((cap->sym.xform_type == RTE_CRYPTO_SYM_XFORM_AEAD) &&
+         (alg->type == RTE_CRYPTO_SYM_XFORM_AEAD) &&
+         (cap->sym.aead.algo == alg->alg) &&
+         (alg->key_len == key_len))
+       return alg;
 #endif
-             memset(sa_sess, 0, sizeof(sa_sess[0]));
-           }
-       }
     }
-  /* *INDENT-OFF* */
+  /* *INDENT-ON* */
 
-  return 0;
+  return NULL;
 }
 
-static void
-update_qp_data (crypto_worker_main_t * cwm,
-               u8 cdev_id, u16 qp_id, u8 is_outbound, u16 * idx)
+static crypto_alg_t *
+auth_cap_to_alg (const struct rte_cryptodev_capabilities *cap, u8 trunc_size)
 {
-  crypto_qp_data_t *qpd;
+  dpdk_crypto_main_t *dcm = &dpdk_crypto_main;
+  crypto_alg_t *alg;
+
+  if ((cap->op != RTE_CRYPTO_OP_TYPE_SYMMETRIC) ||
+      (cap->sym.xform_type != RTE_CRYPTO_SYM_XFORM_AUTH))
+    return NULL;
 
   /* *INDENT-OFF* */
-  vec_foreach_index (*idx, cwm->qp_data)
+  vec_foreach (alg, dcm->auth_algs)
     {
-      qpd = vec_elt_at_index(cwm->qp_data, *idx);
-
-      if (qpd->dev_id == cdev_id && qpd->qp_id == qp_id &&
-         qpd->is_outbound == is_outbound)
-         return;
+      if ((cap->sym.auth.algo == alg->alg) &&
+         (alg->trunc_size == trunc_size))
+       return alg;
     }
   /* *INDENT-ON* */
 
-  vec_add2_aligned (cwm->qp_data, qpd, 1, CLIB_CACHE_LINE_BYTES);
+  return NULL;
+}
+
+#if ! DPDK_NO_AEAD
+static void
+crypto_set_aead_xform (struct rte_crypto_sym_xform *xform,
+                      ipsec_sa_t * sa, u8 is_outbound)
+{
+  dpdk_crypto_main_t *dcm = &dpdk_crypto_main;
+  crypto_alg_t *c;
+
+  c = vec_elt_at_index (dcm->cipher_algs, sa->crypto_alg);
+
+  ASSERT (c->type == RTE_CRYPTO_SYM_XFORM_AEAD);
+
+  xform->type = RTE_CRYPTO_SYM_XFORM_AEAD;
+  xform->aead.algo = c->alg;
+  xform->aead.key.data = sa->crypto_key;
+  xform->aead.key.length = c->key_len;
+  xform->aead.iv.offset =
+    crypto_op_get_priv_offset () + offsetof (dpdk_op_priv_t, cb);
+  xform->aead.iv.length = 12;
+  xform->aead.digest_length = c->trunc_size;
+  xform->aead.aad_length = sa->use_esn ? 12 : 8;
+  xform->next = NULL;
 
-  qpd->dev_id = cdev_id;
-  qpd->qp_id = qp_id;
-  qpd->is_outbound = is_outbound;
+  if (is_outbound)
+    xform->aead.op = RTE_CRYPTO_AEAD_OP_ENCRYPT;
+  else
+    xform->aead.op = RTE_CRYPTO_AEAD_OP_DECRYPT;
 }
+#endif
 
-/*
- * return:
- *     0: already exist
- *     1: mapped
- */
-static int
-add_mapping (crypto_worker_main_t * cwm,
-            u8 cdev_id, u16 qp, u8 is_outbound,
-            const struct rte_cryptodev_capabilities *cipher_cap,
-            const struct rte_cryptodev_capabilities *auth_cap)
+static void
+crypto_set_cipher_xform (struct rte_crypto_sym_xform *xform,
+                        ipsec_sa_t * sa, u8 is_outbound)
 {
-  u16 qp_index;
-  uword key = 0, data, *ret;
-  crypto_worker_qp_key_t *p_key = (crypto_worker_qp_key_t *) & key;
+  dpdk_crypto_main_t *dcm = &dpdk_crypto_main;
+  crypto_alg_t *c;
 
-  p_key->cipher_algo = (u8) cipher_cap->sym.cipher.algo;
-  p_key->auth_algo = (u8) auth_cap->sym.auth.algo;
-  p_key->is_outbound = is_outbound;
+  c = vec_elt_at_index (dcm->cipher_algs, sa->crypto_alg);
+
+  ASSERT (c->type == RTE_CRYPTO_SYM_XFORM_CIPHER);
+
+  xform->type = RTE_CRYPTO_SYM_XFORM_CIPHER;
+  xform->cipher.algo = c->alg;
+  xform->cipher.key.data = sa->crypto_key;
+  xform->cipher.key.length = c->key_len;
 #if ! DPDK_NO_AEAD
-  p_key->is_aead = cipher_cap->sym.xform_type == RTE_CRYPTO_SYM_XFORM_AEAD;
+  xform->cipher.iv.offset =
+    crypto_op_get_priv_offset () + offsetof (dpdk_op_priv_t, cb);
+  xform->cipher.iv.length = c->iv_len;
 #endif
+  xform->next = NULL;
 
-  ret = hash_get (cwm->algo_qp_map, key);
-  if (ret)
-    return 0;
+  if (is_outbound)
+    xform->cipher.op = RTE_CRYPTO_CIPHER_OP_ENCRYPT;
+  else
+    xform->cipher.op = RTE_CRYPTO_CIPHER_OP_DECRYPT;
+}
 
-  update_qp_data (cwm, cdev_id, qp, is_outbound, &qp_index);
+static void
+crypto_set_auth_xform (struct rte_crypto_sym_xform *xform,
+                      ipsec_sa_t * sa, u8 is_outbound)
+{
+  dpdk_crypto_main_t *dcm = &dpdk_crypto_main;
+  crypto_alg_t *a;
+
+  a = vec_elt_at_index (dcm->auth_algs, sa->integ_alg);
 
-  data = (uword) qp_index;
-  hash_set (cwm->algo_qp_map, key, data);
+  ASSERT (a->type == RTE_CRYPTO_SYM_XFORM_AUTH);
+
+  xform->type = RTE_CRYPTO_SYM_XFORM_AUTH;
+  xform->auth.algo = a->alg;
+  xform->auth.key.data = sa->integ_key;
+  xform->auth.key.length = a->key_len;
+  xform->auth.digest_length = a->trunc_size;
+#if DPDK_NO_AEAD
+  if (sa->crypto_alg == IPSEC_CRYPTO_ALG_AES_GCM_128 |
+      sa->crypto_alg == IPSEC_CRYPTO_ALG_AES_GCM_192 |
+      sa->crypto_alg == IPSEC_CRYPTO_ALG_AES_GCM_256)
+    xform->auth.algo = RTE_CRYPTO_AUTH_AES_GCM;
+  xform->auth.add_auth_data_length = sa->use_esn ? 12 : 8;
+#else
+#if 0
+  xform->auth.iv.offset =
+    sizeof (struct rte_crypto_op) + sizeof (struct rte_crypto_sym_op) +
+    offsetof (dpdk_op_priv_t, cb);
+  xform->auth.iv.length = a->iv_len;
+#endif
+#endif
+  xform->next = NULL;
 
-  return 1;
+  if (is_outbound)
+    xform->auth.op = RTE_CRYPTO_AUTH_OP_GENERATE;
+  else
+    xform->auth.op = RTE_CRYPTO_AUTH_OP_VERIFY;
 }
 
-/*
- * return:
- *     0: already exist
- *     1: mapped
- */
-static int
-add_cdev_mapping (crypto_worker_main_t * cwm,
-                 struct rte_cryptodev_info *dev_info, u8 cdev_id,
-                 u16 qp, u8 is_outbound)
+clib_error_t *
+create_sym_session (struct rte_cryptodev_sym_session **session,
+                   u32 sa_idx,
+                   crypto_resource_t * res,
+                   crypto_worker_main_t * cwm, u8 is_outbound)
 {
-  const struct rte_cryptodev_capabilities *i, *j;
-  u32 mapped = 0;
+  dpdk_crypto_main_t *dcm = &dpdk_crypto_main;
+  ipsec_main_t *im = &ipsec_main;
+  crypto_data_t *data;
+  ipsec_sa_t *sa;
+  struct rte_crypto_sym_xform cipher_xform = { 0 };
+  struct rte_crypto_sym_xform auth_xform = { 0 };
+  struct rte_crypto_sym_xform *xfs;
+  crypto_session_key_t key = { 0 };
+
+  key.drv_id = res->drv_id;
+  key.sa_idx = sa_idx;
+
+  sa = pool_elt_at_index (im->sad, sa_idx);
 
-  for (i = dev_info->capabilities; i->op != RTE_CRYPTO_OP_TYPE_UNDEFINED; i++)
-    {
 #if ! DPDK_NO_AEAD
-      if (i->sym.xform_type == RTE_CRYPTO_SYM_XFORM_AEAD)
+  if ((sa->crypto_alg == IPSEC_CRYPTO_ALG_AES_GCM_128) |
+      (sa->crypto_alg == IPSEC_CRYPTO_ALG_AES_GCM_192) |
+      (sa->crypto_alg == IPSEC_CRYPTO_ALG_AES_GCM_256))
+    {
+      crypto_set_aead_xform (&cipher_xform, sa, is_outbound);
+      xfs = &cipher_xform;
+    }
+  else
+#endif /* ! DPDK_NO_AEAD */
+    {
+      crypto_set_cipher_xform (&cipher_xform, sa, is_outbound);
+      crypto_set_auth_xform (&auth_xform, sa, is_outbound);
+
+      if (is_outbound)
+       {
+         cipher_xform.next = &auth_xform;
+         xfs = &cipher_xform;
+       }
+      else
        {
-         struct rte_cryptodev_capabilities none = { 0 };
+         auth_xform.next = &cipher_xform;
+         xfs = &auth_xform;
+       }
+    }
 
-         if (check_algo_is_supported (i, NULL) != 0)
-           continue;
+  data = vec_elt_at_index (dcm->data, res->numa);
+
+#if DPDK_NO_AEAD
+  /*
+   * DPDK_VER <= 1705:
+   *   Each worker/thread has its own session per device driver
+   */
+  session[0] = rte_cryptodev_sym_session_create (res->dev_id, xfs);
+  if (!session[0])
+    {
+      data->session_drv_failed[res->drv_id] += 1;
+      return clib_error_return (0, "failed to create session for dev %u",
+                               res->dev_id);
+    }
+#else
+  /*
+   * DPDK_VER >= 1708:
+   *   Multiple worker/threads share the session for an SA
+   *   Single session per SA, initialized for each device driver
+   */
+  session[0] = (void *) hash_get (data->session_by_sa_index, sa_idx);
+
+  if (!session[0])
+    {
+      session[0] = rte_cryptodev_sym_session_create (data->session_h);
+      if (!session[0])
+       {
+         data->session_h_failed += 1;
+         return clib_error_return (0, "failed to create session header");
+       }
+      hash_set (data->session_by_sa_index, sa_idx, session[0]);
+    }
 
-         none.sym.auth.algo = RTE_CRYPTO_AUTH_NULL;
+  struct rte_mempool **mp;
+  mp = vec_elt_at_index (data->session_drv, res->drv_id);
+  ASSERT (mp[0] != NULL);
 
-         mapped |= add_mapping (cwm, cdev_id, qp, is_outbound, i, &none);
-         continue;
+  i32 ret =
+    rte_cryptodev_sym_session_init (res->dev_id, session[0], xfs, mp[0]);
+  if (ret)
+    {
+      data->session_drv_failed[res->drv_id] += 1;
+      return clib_error_return (0, "failed to init session for drv %u",
+                               res->drv_id);
+    }
+#endif /* DPDK_NO_AEAD */
+
+  hash_set (cwm->session_by_drv_id_and_sa_index, key.val, session[0]);
+
+  return 0;
+}
+
+static void __attribute__ ((unused)) clear_and_free_obj (void *obj)
+{
+  struct rte_mempool *mp = rte_mempool_from_obj (obj);
+
+  memset (obj, 0, mp->elt_size);
+
+  rte_mempool_put (mp, obj);
+}
+
+#if ! DPDK_NO_AEAD
+/* This is from rte_cryptodev_pmd.h */
+static inline void *
+get_session_private_data (const struct rte_cryptodev_sym_session *sess,
+                         uint8_t driver_id)
+{
+  return sess->sess_private_data[driver_id];
+}
+
+/* This is from rte_cryptodev_pmd.h */
+static inline void
+set_session_private_data (struct rte_cryptodev_sym_session *sess,
+                         uint8_t driver_id, void *private_data)
+{
+  sess->sess_private_data[driver_id] = private_data;
+}
+#endif
+
+static clib_error_t *
+add_del_sa_session (u32 sa_index, u8 is_add)
+{
+  ipsec_main_t *im = &ipsec_main;
+  dpdk_crypto_main_t *dcm = &dpdk_crypto_main;
+  crypto_worker_main_t *cwm;
+  struct rte_cryptodev_sym_session *s;
+  crypto_session_key_t key = { 0 };
+  uword *val;
+  u32 drv_id;
+  i32 ret;
+
+  key.sa_idx = sa_index;
+
+  if (is_add)
+    {
+#if 1
+      ipsec_sa_t *sa = pool_elt_at_index (im->sad, sa_index);
+      u32 seed;
+      switch (sa->crypto_alg)
+       {
+       case IPSEC_CRYPTO_ALG_AES_GCM_128:
+       case IPSEC_CRYPTO_ALG_AES_GCM_192:
+       case IPSEC_CRYPTO_ALG_AES_GCM_256:
+         clib_memcpy (&sa->salt, &sa->crypto_key[sa->crypto_key_len - 4], 4);
+         break;
+       default:
+         seed = (u32) clib_cpu_time_now ();
+         sa->salt = random_u32 (&seed);
        }
 #endif
-      if (i->sym.xform_type != RTE_CRYPTO_SYM_XFORM_CIPHER)
-       continue;
+      return 0;
+    }
 
-      if (check_algo_is_supported (i, NULL) != 0)
-       continue;
+  /* XXX Wait N cycles to be sure session is not in use OR
+   * keep refcnt at SA level per worker/thread ? */
+  unix_sleep (0.2);
 
-      for (j = dev_info->capabilities; j->op != RTE_CRYPTO_OP_TYPE_UNDEFINED;
-          j++)
+  /* *INDENT-OFF* */
+  vec_foreach (cwm, dcm->workers_main)
+    {
+      for (drv_id = 0; drv_id < dcm->max_drv_id; drv_id++)
        {
-         if (j->sym.xform_type != RTE_CRYPTO_SYM_XFORM_AUTH)
-           continue;
+         key.drv_id = drv_id;
+         val = hash_get (cwm->session_by_drv_id_and_sa_index, key.val);
+         s = (struct rte_cryptodev_sym_session *) val;
 
-         if (check_algo_is_supported (j, NULL) != 0)
+         if (!s)
            continue;
 
-         mapped |= add_mapping (cwm, cdev_id, qp, is_outbound, i, j);
+#if DPDK_NO_AEAD
+         ret = (rte_cryptodev_sym_session_free (s->dev_id, s) == NULL);
+         ASSERT (ret);
+#endif
+         hash_unset (cwm->session_by_drv_id_and_sa_index, key.val);
        }
     }
+  /* *INDENT-ON* */
 
-  return mapped;
-}
+#if ! DPDK_NO_AEAD
+  crypto_data_t *data;
+  /* *INDENT-OFF* */
+  vec_foreach (data, dcm->data)
+    {
+      val = hash_get (data->session_by_sa_index, sa_index);
+      s = (struct rte_cryptodev_sym_session *) val;
 
-static int
-check_cryptodev_queues ()
-{
-  u32 n_qs = 0;
-  u8 cdev_id;
-  u32 n_req_qs = 2;
+      if (!s)
+       continue;
 
-  if (vlib_num_workers () > 0)
-    n_req_qs = vlib_num_workers () * 2;
+      hash_unset (data->session_by_sa_index, sa_index);
 
-  for (cdev_id = 0; cdev_id < rte_cryptodev_count (); cdev_id++)
-    {
-      struct rte_cryptodev_info cdev_info;
+      void *drv_session;
+      vec_foreach_index (drv_id, dcm->drv)
+       {
+         drv_session = get_session_private_data (s, drv_id);
+         if (!drv_session)
+           continue;
 
-      rte_cryptodev_info_get (cdev_id, &cdev_info);
+         /*
+          * Custom clear to avoid finding a dev_id for drv_id:
+          *  ret = rte_cryptodev_sym_session_clear (dev_id, drv_session);
+          *  ASSERT (!ret);
+          */
+         clear_and_free_obj (drv_session);
 
-      if (!
-         (cdev_info.feature_flags & RTE_CRYPTODEV_FF_SYM_OPERATION_CHAINING))
-       continue;
+         set_session_private_data (s, drv_id, NULL);
+       }
 
-      n_qs += cdev_info.max_nb_queue_pairs;
+      ret = rte_cryptodev_sym_session_free(s);
+      ASSERT (!ret);
     }
+  /* *INDENT-ON* */
+#endif
 
-  if (n_qs >= n_req_qs)
-    return 0;
-  else
-    return -1;
+  return 0;
 }
 
 static clib_error_t *
 dpdk_ipsec_check_support (ipsec_sa_t * sa)
 {
-  if (sa->crypto_alg == IPSEC_CRYPTO_ALG_AES_GCM_128)
+  dpdk_crypto_main_t *dcm = &dpdk_crypto_main;
+
+  if (sa->integ_alg == IPSEC_INTEG_ALG_NONE)
+    switch (sa->crypto_alg)
+      {
+      case IPSEC_CRYPTO_ALG_AES_GCM_128:
+      case IPSEC_CRYPTO_ALG_AES_GCM_192:
+      case IPSEC_CRYPTO_ALG_AES_GCM_256:
+       break;
+      default:
+       return clib_error_return (0, "unsupported integ-alg %U crypto-alg %U",
+                                 format_ipsec_integ_alg, sa->integ_alg,
+                                 format_ipsec_crypto_alg, sa->crypto_alg);
+      }
+
+  /* XXX do we need the NONE check? */
+  if (sa->crypto_alg != IPSEC_CRYPTO_ALG_NONE &&
+      dcm->cipher_algs[sa->crypto_alg].disabled)
+    return clib_error_return (0, "disabled crypto-alg %U",
+                             format_ipsec_crypto_alg, sa->crypto_alg);
+
+  /* XXX do we need the NONE check? */
+  if (sa->integ_alg != IPSEC_INTEG_ALG_NONE &&
+      dcm->auth_algs[sa->integ_alg].disabled)
+    return clib_error_return (0, "disabled integ-alg %U",
+                             format_ipsec_integ_alg, sa->integ_alg);
+  return NULL;
+}
+
+static void
+crypto_parse_capabilities (crypto_dev_t * dev,
+                          const struct rte_cryptodev_capabilities *cap,
+                          u32 n_mains)
+{
+  dpdk_crypto_main_t *dcm = &dpdk_crypto_main;
+  crypto_alg_t *alg;
+  u8 len, inc;
+
+  for (; cap->op != RTE_CRYPTO_OP_TYPE_UNDEFINED; cap++)
     {
-      if (sa->integ_alg != IPSEC_INTEG_ALG_NONE)
-       return clib_error_return (0, "unsupported integ-alg %U with "
-                                 "crypto-alg aes-gcm-128",
-                                 format_ipsec_integ_alg, sa->integ_alg);
-#if DPDK_NO_AEAD
-      sa->integ_alg = IPSEC_INTEG_ALG_AES_GCM_128;
+      /* A single capability maps to multiple cipher/auth algorithms */
+      switch (cap->sym.xform_type)
+       {
+#if ! DPDK_NO_AEAD
+       case RTE_CRYPTO_SYM_XFORM_AEAD:
 #endif
+       case RTE_CRYPTO_SYM_XFORM_CIPHER:
+         inc = cap->sym.cipher.key_size.increment;
+         inc = inc ? inc : 1;
+         for (len = cap->sym.cipher.key_size.min;
+              len <= cap->sym.cipher.key_size.max; len += inc)
+           {
+             alg = cipher_cap_to_alg (cap, len);
+             if (!alg)
+               continue;
+             dev->cipher_support[cipher_alg_index (alg)] = 1;
+             alg->resources += vec_len (dev->free_resources);
+             /* At least enough resources to support one algo */
+             dcm->enabled |= (alg->resources >= n_mains);
+           }
+         break;
+       case RTE_CRYPTO_SYM_XFORM_AUTH:
+         inc = cap->sym.auth.digest_size.increment;
+         inc = inc ? inc : 1;
+         for (len = cap->sym.auth.digest_size.min;
+              len <= cap->sym.auth.digest_size.max; len += inc)
+           {
+             alg = auth_cap_to_alg (cap, len);
+             if (!alg)
+               continue;
+             dev->auth_support[auth_alg_index (alg)] = 1;
+             alg->resources += vec_len (dev->free_resources);
+             /* At least enough resources to support one algo */
+             dcm->enabled |= (alg->resources >= n_mains);
+           }
+         break;
+       default:
+         ;
+       }
     }
+}
+
+#define DPDK_CRYPTO_N_QUEUE_DESC  2048
+#define DPDK_CRYPTO_NB_SESS_OBJS  20000
+
+static clib_error_t *
+crypto_dev_conf (u8 dev, u16 n_qp, u8 numa)
+{
+  struct rte_cryptodev_config dev_conf;
+  struct rte_cryptodev_qp_conf qp_conf;
+  i32 ret;
+  u16 qp;
+  i8 *error_str;
+
+  dev_conf.socket_id = numa;
+  dev_conf.nb_queue_pairs = n_qp;
+#if DPDK_NO_AEAD
+  dev_conf.session_mp.nb_objs = DPDK_CRYPTO_NB_SESS_OBJS;
+  dev_conf.session_mp.cache_size = 512;
+#endif
+
+  error_str = "failed to configure crypto device %u";
+  ret = rte_cryptodev_configure (dev, &dev_conf);
+  if (ret < 0)
+    return clib_error_return (0, error_str, dev);
+
+  error_str = "failed to setup crypto device %u queue pair %u";
+  qp_conf.nb_descriptors = DPDK_CRYPTO_N_QUEUE_DESC;
+  for (qp = 0; qp < n_qp; qp++)
+    {
 #if DPDK_NO_AEAD
-  else if (sa->crypto_alg == IPSEC_CRYPTO_ALG_NONE ||
-          sa->integ_alg == IPSEC_INTEG_ALG_NONE ||
-          sa->integ_alg == IPSEC_INTEG_ALG_AES_GCM_128)
+      ret = rte_cryptodev_queue_pair_setup (dev, qp, &qp_conf, numa);
 #else
-  else if (sa->integ_alg == IPSEC_INTEG_ALG_NONE)
+      ret = rte_cryptodev_queue_pair_setup (dev, qp, &qp_conf, numa, NULL);
 #endif
-    return clib_error_return (0,
-                             "unsupported integ-alg %U with crypto-alg %U",
-                             format_ipsec_integ_alg, sa->integ_alg,
-                             format_ipsec_crypto_alg, sa->crypto_alg);
+      if (ret < 0)
+       return clib_error_return (0, error_str, dev, qp);
+    }
 
   return 0;
 }
 
-static uword
-dpdk_ipsec_process (vlib_main_t * vm, vlib_node_runtime_t * rt,
-                   vlib_frame_t * f)
+static void
+crypto_scan_devs (u32 n_mains)
 {
-  ipsec_main_t *im = &ipsec_main;
   dpdk_crypto_main_t *dcm = &dpdk_crypto_main;
-  vlib_thread_main_t *tm = vlib_get_thread_main ();
-  struct rte_cryptodev_config dev_conf;
-  struct rte_cryptodev_qp_conf qp_conf;
-  struct rte_cryptodev_info cdev_info;
-  struct rte_mempool *rmp;
-  i32 dev_id, ret;
-  u32 i, skip_master;
-#if ! DPDK_NO_AEAD
-  u32 max_sess_size = 0, sess_size;
-  i8 socket_id;
+  struct rte_cryptodev *cryptodev;
+  struct rte_cryptodev_info info;
+  crypto_dev_t *dev;
+  crypto_resource_t *res;
+  clib_error_t *error;
+  u32 i;
+  u16 max_res_idx, res_idx, j;
+  u8 drv_id;
+
+  vec_validate_init_empty (dcm->dev, rte_cryptodev_count () - 1,
+                          (crypto_dev_t) EMPTY_STRUCT);
+
+  for (i = 0; i < rte_cryptodev_count (); i++)
+    {
+      dev = vec_elt_at_index (dcm->dev, i);
+
+      cryptodev = &rte_cryptodevs[i];
+      rte_cryptodev_info_get (i, &info);
+
+      dev->id = i;
+      dev->name = cryptodev->data->name;
+      dev->numa = rte_cryptodev_socket_id (i);
+      dev->features = info.feature_flags;
+      dev->max_qp = info.max_nb_queue_pairs;
+#if DPDK_NO_AEAD
+      drv_id = cryptodev->dev_type;
+#else
+      drv_id = info.driver_id;
 #endif
+      if (drv_id >= vec_len (dcm->drv))
+       vec_validate_init_empty (dcm->drv, drv_id,
+                                (crypto_drv_t) EMPTY_STRUCT);
+      vec_elt_at_index (dcm->drv, drv_id)->name = info.driver_name;
+      dev->drv_id = drv_id;
+      vec_add1 (vec_elt_at_index (dcm->drv, drv_id)->devs, i);
+
+      if (!(info.feature_flags & RTE_CRYPTODEV_FF_SYM_OPERATION_CHAINING))
+       continue;
 
-  if (check_cryptodev_queues () < 0)
-    {
-      clib_warning ("not enough Cryptodevs, default to OpenSSL IPsec");
-      return 0;
-    }
-  dcm->enabled = 1;
+      if ((error = crypto_dev_conf (i, dev->max_qp, dev->numa)))
+       {
+         clib_error_report (error);
+         continue;
+       }
 
-  vec_alloc (dcm->workers_main, tm->n_vlib_mains);
-  _vec_len (dcm->workers_main) = tm->n_vlib_mains;
+      max_res_idx = (dev->max_qp / 2) - 1;
 
-  skip_master = vlib_num_workers () > 0;
+      vec_validate (dev->free_resources, max_res_idx);
 
-  fprintf (stdout, "DPDK Cryptodevs info:\n");
-  fprintf (stdout, "dev_id\tn_qp\tnb_obj\tcache_size\n");
-  /* HW cryptodevs have higher dev_id, use HW first */
-  for (dev_id = rte_cryptodev_count () - 1; dev_id >= 0; dev_id--)
-    {
-      u16 max_nb_qp, qp = 0;
+      res_idx = vec_len (dcm->resource);
+      vec_validate_init_empty_aligned (dcm->resource, res_idx + max_res_idx,
+                                      (crypto_resource_t) EMPTY_STRUCT,
+                                      CLIB_CACHE_LINE_BYTES);
 
-      rte_cryptodev_info_get (dev_id, &cdev_info);
+      for (j = 0; j <= max_res_idx; j++, res_idx++)
+       {
+         vec_elt (dev->free_resources, max_res_idx - j) = res_idx;
+         res = &dcm->resource[res_idx];
+         res->dev_id = i;
+         res->drv_id = drv_id;
+         res->qp_id = j * 2;
+         res->numa = dev->numa;
+         res->thread_idx = (u16) ~ 0;
+       }
 
-      if (!
-         (cdev_info.feature_flags & RTE_CRYPTODEV_FF_SYM_OPERATION_CHAINING))
-       continue;
+      crypto_parse_capabilities (dev, info.capabilities, n_mains);
+    }
+}
 
-      max_nb_qp = cdev_info.max_nb_queue_pairs;
+void
+crypto_auto_placement (void)
+{
+  dpdk_crypto_main_t *dcm = &dpdk_crypto_main;
+  crypto_resource_t *res;
+  crypto_worker_main_t *cwm;
+  crypto_dev_t *dev;
+  u32 thread_idx, skip_master;
+  u16 res_idx, *idx;
+  u8 used;
+  u16 i;
 
-      for (i = 0; i < tm->n_vlib_mains; i++)
+  skip_master = vlib_num_workers () > 0;
+
+  /* *INDENT-OFF* */
+  vec_foreach (dev, dcm->dev)
+    {
+      vec_foreach_index (thread_idx, dcm->workers_main)
        {
-         u8 is_outbound;
-         crypto_worker_main_t *cwm;
-         uword *map;
+         if (vec_len (dev->free_resources) == 0)
+           break;
+
+         if (thread_idx < skip_master)
+           continue;
+
+         /* Check thread is not already using the device */
+         vec_foreach (idx, dev->used_resources)
+           if (dcm->resource[idx[0]].thread_idx == thread_idx)
+             continue;
 
-         if (skip_master)
+         cwm = vec_elt_at_index (dcm->workers_main, thread_idx);
+
+         used = 0;
+         res_idx = vec_pop (dev->free_resources);
+
+         /* Set device only for supported algos */
+         for (i = 0; i < IPSEC_CRYPTO_N_ALG; i++)
+           if (dev->cipher_support[i] &&
+               cwm->cipher_resource_idx[i] == (u16) ~0)
+             {
+               dcm->cipher_algs[i].disabled--;
+               cwm->cipher_resource_idx[i] = res_idx;
+               used = 1;
+             }
+
+         for (i = 0; i < IPSEC_INTEG_N_ALG; i++)
+           if (dev->auth_support[i] &&
+               cwm->auth_resource_idx[i] == (u16) ~0)
+             {
+               dcm->auth_algs[i].disabled--;
+               cwm->auth_resource_idx[i] = res_idx;
+               used = 1;
+             }
+
+         if (!used)
            {
-             skip_master = 0;
+             vec_add1 (dev->free_resources, res_idx);
              continue;
            }
 
-         cwm = vec_elt_at_index (dcm->workers_main, i);
-         map = cwm->algo_qp_map;
+         vec_add1 (dev->used_resources, res_idx);
 
-         if (!map)
-           {
-             map = hash_create (0, sizeof (crypto_worker_qp_key_t));
-             if (!map)
-               {
-                 clib_warning ("unable to create hash table for worker %u",
-                               vlib_mains[i]->thread_index);
-                 goto error;
-               }
-             cwm->algo_qp_map = map;
-           }
+         res = vec_elt_at_index (dcm->resource, res_idx);
+
+         ASSERT (res->thread_idx == (u16) ~0);
+         res->thread_idx = thread_idx;
 
-         for (is_outbound = 0; is_outbound < 2 && qp < max_nb_qp;
-              is_outbound++)
-           qp += add_cdev_mapping (cwm, &cdev_info, dev_id, qp, is_outbound);
+         /* Add device to vector of polling resources */
+         vec_add1 (cwm->resource_idx, res_idx);
        }
+    }
+  /* *INDENT-ON* */
+}
 
-      if (qp == 0)
-       continue;
+static void
+crypto_op_init (struct rte_mempool *mempool,
+               void *_arg __attribute__ ((unused)),
+               void *_obj, unsigned i __attribute__ ((unused)))
+{
+  struct rte_crypto_op *op = _obj;
 
-      dev_conf.socket_id = rte_cryptodev_socket_id (dev_id);
-      dev_conf.nb_queue_pairs = cdev_info.max_nb_queue_pairs;
 #if DPDK_NO_AEAD
-      dev_conf.session_mp.nb_objs = DPDK_CRYPTO_NB_SESS_OBJS;
-      dev_conf.session_mp.cache_size = DPDK_CRYPTO_CACHE_SIZE;
+  op->sym = (struct rte_crypto_sym_op *) (op + 1);
+  op->sym->sess_type = RTE_CRYPTO_SYM_OP_WITH_SESSION;
+#else
+  op->sess_type = RTE_CRYPTO_OP_WITH_SESSION;
 #endif
-      ret = rte_cryptodev_configure (dev_id, &dev_conf);
-      if (ret < 0)
-       {
-         clib_warning ("cryptodev %u config error", dev_id);
-         goto error;
-       }
+  op->type = RTE_CRYPTO_OP_TYPE_SYMMETRIC;
+  op->status = RTE_CRYPTO_OP_STATUS_NOT_PROCESSED;
+  op->phys_addr = rte_mem_virt2phy (_obj);
+  op->mempool = mempool;
+}
 
-      qp_conf.nb_descriptors = DPDK_CRYPTO_N_QUEUE_DESC;
-      for (qp = 0; qp < dev_conf.nb_queue_pairs; qp++)
-       {
+static clib_error_t *
+crypto_create_crypto_op_pool (u8 numa)
+{
+  dpdk_crypto_main_t *dcm = &dpdk_crypto_main;
+  dpdk_config_main_t *conf = &dpdk_config_main;
+  crypto_data_t *data;
+  u8 *pool_name;
+  u32 pool_priv_size = sizeof (struct rte_crypto_op_pool_private);
+  struct rte_crypto_op_pool_private *priv;
+  clib_error_t *error = NULL;
+
+  data = vec_elt_at_index (dcm->data, numa);
+
+  if (data->crypto_op)
+    return NULL;
+
+  pool_name = format (0, "crypto_pool_numa%u%c", numa, 0);
+
+  data->crypto_op =
+    rte_mempool_create ((i8 *) pool_name, conf->num_mbufs, crypto_op_len (),
+                       512, pool_priv_size, NULL, NULL, crypto_op_init, NULL,
+                       numa, 0);
+
+  if (!data->crypto_op)
+    {
+      error = clib_error_return (0, "failed to allocate %s", pool_name);
+      goto done;
+    }
+
+  priv = rte_mempool_get_priv (data->crypto_op);
+
+  priv->priv_size = pool_priv_size;
+  priv->type = RTE_CRYPTO_OP_TYPE_SYMMETRIC;
+
+done:
+  vec_free (pool_name);
+
+  return error;
+}
+
+static clib_error_t *
+crypto_create_session_h_pool (u8 numa)
+{
 #if DPDK_NO_AEAD
-         ret = rte_cryptodev_queue_pair_setup (dev_id, qp, &qp_conf,
-                                               dev_conf.socket_id);
+  return NULL;
 #else
-         ret = rte_cryptodev_queue_pair_setup (dev_id, qp, &qp_conf,
-                                               dev_conf.socket_id, NULL);
+  dpdk_crypto_main_t *dcm = &dpdk_crypto_main;
+  crypto_data_t *data;
+  u8 *pool_name;
+  u32 elt_size;
+  clib_error_t *error = NULL;
+
+  data = vec_elt_at_index (dcm->data, numa);
+
+  if (data->session_h)
+    return NULL;
+
+  pool_name = format (0, "session_h_pool_numa%u%c", numa, 0);
+  elt_size = rte_cryptodev_get_header_session_size ();
+
+  data->session_h =
+    rte_mempool_create ((i8 *) pool_name, DPDK_CRYPTO_NB_SESS_OBJS, elt_size,
+                       512, 0, NULL, NULL, NULL, NULL, numa, 0);
+
+  if (!data->session_h)
+    error = clib_error_return (0, "failed to allocate %s", pool_name);
+
+  vec_free (pool_name);
+
+  return error;
 #endif
-         if (ret < 0)
-           {
-             clib_warning ("cryptodev %u qp %u setup error", dev_id, qp);
-             goto error;
-           }
-       }
-      vec_validate (dcm->cop_pools, dev_conf.socket_id);
+}
 
-#if ! DPDK_NO_AEAD
-      sess_size = rte_cryptodev_get_private_session_size (dev_id);
-      if (sess_size > max_sess_size)
-       max_sess_size = sess_size;
+static clib_error_t *
+crypto_create_session_drv_pool (crypto_dev_t * dev)
+{
+#if DPDK_NO_AEAD
+  return NULL;
+#else
+  dpdk_crypto_main_t *dcm = &dpdk_crypto_main;
+  crypto_data_t *data;
+  u8 *pool_name;
+  u32 elt_size;
+  clib_error_t *error = NULL;
+  u8 numa = dev->numa;
+
+  data = vec_elt_at_index (dcm->data, numa);
+
+  vec_validate (data->session_drv, dev->drv_id);
+  vec_validate (data->session_drv_failed, dev->drv_id);
+
+  if (data->session_drv[dev->drv_id])
+    return NULL;
+
+  pool_name = format (0, "session_drv%u_pool_numa%u%c", dev->drv_id, numa, 0);
+  elt_size = rte_cryptodev_get_private_session_size (dev->id);
+
+  data->session_drv[dev->drv_id] =
+    rte_mempool_create ((i8 *) pool_name, DPDK_CRYPTO_NB_SESS_OBJS, elt_size,
+                       512, 0, NULL, NULL, NULL, NULL, numa, 0);
+
+  if (!data->session_drv[dev->drv_id])
+    error = clib_error_return (0, "failed to allocate %s", pool_name);
+
+  vec_free (pool_name);
+
+  return error;
 #endif
+}
 
-      if (!vec_elt (dcm->cop_pools, dev_conf.socket_id))
-       {
-         u8 *pool_name = format (0, "crypto_op_pool_socket%u%c",
-                                 dev_conf.socket_id, 0);
-
-         rmp = rte_crypto_op_pool_create ((char *) pool_name,
-                                          RTE_CRYPTO_OP_TYPE_SYMMETRIC,
-                                          DPDK_CRYPTO_NB_COPS *
-                                          (1 + vlib_num_workers ()),
-                                          DPDK_CRYPTO_CACHE_SIZE,
-                                          DPDK_CRYPTO_PRIV_SIZE,
-                                          dev_conf.socket_id);
-
-         if (!rmp)
-           {
-             clib_warning ("failed to allocate %s", pool_name);
-             vec_free (pool_name);
-             goto error;
-           }
-         vec_free (pool_name);
-         vec_elt (dcm->cop_pools, dev_conf.socket_id) = rmp;
-       }
+static clib_error_t *
+crypto_create_pools (void)
+{
+  dpdk_crypto_main_t *dcm = &dpdk_crypto_main;
+  clib_error_t *error = NULL;
+  crypto_dev_t *dev;
+
+  /* *INDENT-OFF* */
+  vec_foreach (dev, dcm->dev)
+    {
+      vec_validate (dcm->data, dev->numa);
+
+      error = crypto_create_crypto_op_pool (dev->numa);
+      if (error)
+       return error;
+
+      error = crypto_create_session_h_pool (dev->numa);
+      if (error)
+       return error;
 
-      fprintf (stdout, "%u\t%u\t%u\t%u\n", dev_id, dev_conf.nb_queue_pairs,
-              DPDK_CRYPTO_NB_SESS_OBJS, DPDK_CRYPTO_CACHE_SIZE);
+      error = crypto_create_session_drv_pool (dev);
+      if (error)
+       return error;
     }
+  /* *INDENT-ON* */
+
+  return NULL;
+}
+
+static void
+crypto_disable (void)
+{
+  dpdk_crypto_main_t *dcm = &dpdk_crypto_main;
+  crypto_data_t *data;
+  u8 i;
+
+  dcm->enabled = 0;
 
-#if ! DPDK_NO_AEAD
   /* *INDENT-OFF* */
-  vec_foreach_index (socket_id, dcm->cop_pools)
+  vec_foreach (data, dcm->data)
     {
-      u8 *pool_name;
+      rte_mempool_free (data->crypto_op);
+      rte_mempool_free (data->session_h);
 
-      if (!vec_elt (dcm->cop_pools, socket_id))
-       continue;
+      vec_foreach_index (i, data->session_drv)
+       rte_mempool_free (data->session_drv[i]);
 
-      vec_validate (dcm->sess_h_pools, socket_id);
-      pool_name = format (0, "crypto_sess_h_socket%u%c",
-                             socket_id, 0);
-      rmp =
-       rte_mempool_create((i8 *)pool_name, DPDK_CRYPTO_NB_SESS_OBJS,
-                          rte_cryptodev_get_header_session_size (),
-                          512, 0, NULL, NULL, NULL, NULL,
-                          socket_id, 0);
-      if (!rmp)
-       {
-         clib_warning ("failed to allocate %s", pool_name);
-         vec_free (pool_name);
-         goto error;
-       }
-      vec_free (pool_name);
-      vec_elt (dcm->sess_h_pools, socket_id) = rmp;
-
-      vec_validate (dcm->sess_pools, socket_id);
-      pool_name = format (0, "crypto_sess_socket%u%c",
-                             socket_id, 0);
-      rmp =
-       rte_mempool_create((i8 *)pool_name, DPDK_CRYPTO_NB_SESS_OBJS,
-                          max_sess_size, 512, 0, NULL, NULL, NULL, NULL,
-                          socket_id, 0);
-      if (!rmp)
-       {
-         clib_warning ("failed to allocate %s", pool_name);
-         vec_free (pool_name);
-         goto error;
-       }
-      vec_free (pool_name);
-      vec_elt (dcm->sess_pools, socket_id) = rmp;
+      vec_free (data->session_drv);
     }
   /* *INDENT-ON* */
-#endif
 
-  dpdk_esp_init ();
+  vec_free (dcm->data);
 
-  /* Add new next node and set as default */
+  vec_free (dcm->workers_main);
+  vec_free (dcm->sa_session);
+  vec_free (dcm->dev);
+  vec_free (dcm->resource);
+  vec_free (dcm->cipher_algs);
+  vec_free (dcm->auth_algs);
+}
+
+static uword
+dpdk_ipsec_process (vlib_main_t * vm, vlib_node_runtime_t * rt,
+                   vlib_frame_t * f)
+{
+  ipsec_main_t *im = &ipsec_main;
+  dpdk_crypto_main_t *dcm = &dpdk_crypto_main;
+  vlib_thread_main_t *tm = vlib_get_thread_main ();
+  crypto_worker_main_t *cwm;
+  clib_error_t *error = NULL;
+  u32 i, skip_master, n_mains;
+
+  n_mains = tm->n_vlib_mains;
+  skip_master = vlib_num_workers () > 0;
+
+  algos_init (n_mains - skip_master);
+
+  crypto_scan_devs (n_mains - skip_master);
+
+  if (!(dcm->enabled))
+    {
+      clib_warning ("not enough DPDK crypto resources, default to OpenSSL");
+      crypto_disable ();
+      return 0;
+    }
+
+  vec_validate_init_empty (dcm->workers_main, n_mains - 1,
+                          (crypto_worker_main_t) EMPTY_STRUCT);
+
+  /* *INDENT-OFF* */
+  vec_foreach (cwm, dcm->workers_main)
+    {
+      memset (cwm->cipher_resource_idx, ~0,
+             IPSEC_CRYPTO_N_ALG * sizeof(*cwm->cipher_resource_idx));
+      memset (cwm->auth_resource_idx, ~0,
+             IPSEC_INTEG_N_ALG * sizeof(*cwm->auth_resource_idx));
+    }
+  /* *INDENT-ON* */
+
+  crypto_auto_placement ();
+
+  error = crypto_create_pools ();
+  if (error)
+    {
+      clib_error_report (error);
+      crypto_disable ();
+      return 0;
+    }
+
+  /* Add new next node and set it as default */
   vlib_node_t *node, *next_node;
 
   next_node = vlib_get_node_by_name (vm, (u8 *) "dpdk-esp-encrypt");
@@ -459,33 +1091,12 @@ dpdk_ipsec_process (vlib_main_t * vm, vlib_node_runtime_t * rt,
     vlib_node_add_next (vm, node->index, next_node->index);
 
   im->cb.check_support_cb = dpdk_ipsec_check_support;
-  im->cb.add_del_sa_sess_cb = add_del_sa_sess;
-
-  for (i = skip_master; i < tm->n_vlib_mains; i++)
-    vlib_node_set_state (vlib_mains[i], dpdk_crypto_input_node.index,
-                        VLIB_NODE_STATE_POLLING);
-
-  /* TODO cryptodev counters */
-
-  return 0;
-
-error:
-  ;
-  crypto_worker_main_t *cwm;
-  struct rte_mempool **mp;
-  /* *INDENT-OFF* */
-  vec_foreach (cwm, dcm->workers_main)
-    hash_free (cwm->algo_qp_map);
-
-  vec_foreach (mp, dcm->cop_pools)
-    {
-      if (mp)
-       rte_mempool_free (mp[0]);
-    }
-  /* *INDENT-ON* */
-  vec_free (dcm->workers_main);
-  vec_free (dcm->cop_pools);
+  im->cb.add_del_sa_sess_cb = add_del_sa_session;
 
+  node = vlib_get_node_by_name (vm, (u8 *) "dpdk-crypto-input");
+  ASSERT (node);
+  for (i = skip_master; i < n_mains; i++)
+    vlib_node_set_state (vlib_mains[i], node->index, VLIB_NODE_STATE_POLLING);
   return 0;
 }
 
index a94dd68..98e5ad5 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016 Intel and/or its affiliates.
+ * Copyright (c) 2017 Intel and/or its affiliates.
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
  * You may obtain a copy of the License at:
@@ -16,6 +16,8 @@
 #define __DPDK_IPSEC_H__
 
 #include <vnet/vnet.h>
+#include <vppinfra/cache.h>
+#include <vnet/ipsec/ipsec.h>
 
 #undef always_inline
 #include <rte_config.h>
 #define always_inline static inline __attribute__ ((__always_inline__))
 #endif
 
+#define foreach_dpdk_crypto_input_next         \
+  _(DROP, "error-drop")                                \
+  _(IP4_LOOKUP, "ip4-lookup")                   \
+  _(IP6_LOOKUP, "ip6-lookup")                   \
+  _(INTERFACE_OUTPUT, "interface-output")      \
+  _(DECRYPT_POST, "dpdk-esp-decrypt-post")
+
+typedef enum
+{
+#define _(f,s) DPDK_CRYPTO_INPUT_NEXT_##f,
+  foreach_dpdk_crypto_input_next
+#undef _
+    DPDK_CRYPTO_INPUT_N_NEXT,
+} dpdk_crypto_input_next_t;
 
 #define MAX_QP_PER_LCORE 16
 
@@ -41,196 +57,334 @@ typedef struct
 typedef struct
 {
   dpdk_gcm_cnt_blk cb;
-  union
-  {
-    u8 aad[12];
-    u8 icv[64];
-  };
-} dpdk_cop_priv_t;
+  u8 aad[12];
+  u32 next;
+  u8 icv[32];
+} dpdk_op_priv_t __attribute__ ((aligned (16)));
 
 typedef struct
 {
-  u8 cipher_algo;
-  u8 auth_algo;
-  u8 is_outbound;
-  u8 is_aead;
-} crypto_worker_qp_key_t;
+  u16 *resource_idx;
+  uword *session_by_drv_id_and_sa_index;
+  u16 cipher_resource_idx[IPSEC_CRYPTO_N_ALG];
+  u16 auth_resource_idx[IPSEC_INTEG_N_ALG];
+  struct rte_crypto_op *ops[VLIB_FRAME_SIZE];
+} crypto_worker_main_t __attribute__ ((aligned (CLIB_CACHE_LINE_BYTES)));
 
 typedef struct
 {
-  u16 dev_id;
-  u16 qp_id;
-  u16 is_outbound;
-  i16 inflights;
-  u32 bi[VLIB_FRAME_SIZE];
-  struct rte_crypto_op *cops[VLIB_FRAME_SIZE];
-  struct rte_crypto_op **free_cops;
-} crypto_qp_data_t;
+  char *name;
+  enum rte_crypto_sym_xform_type type;
+  u32 alg;
+  u8 key_len;
+  u8 iv_len;
+  u8 trunc_size;
+  u8 boundary;
+  u8 disabled;
+  u8 resources;
+} crypto_alg_t __attribute__ ((aligned (8)));
 
 typedef struct
 {
-  u8 qp_index;
-  void *sess;
-} crypto_sa_session_t;
+  u16 *free_resources;
+  u16 *used_resources;
+  u8 cipher_support[IPSEC_CRYPTO_N_ALG];
+  u8 auth_support[IPSEC_INTEG_N_ALG];
+  u8 drv_id;
+  u8 numa;
+  u16 id;
+  const i8 *name;
+  u32 max_qp;
+  u64 features;
+} crypto_dev_t;
+
+typedef struct
+{
+  const i8 *name;
+  u16 *devs;
+} crypto_drv_t;
+
+typedef struct
+{
+  u16 thread_idx;
+  u8 remove;
+  u8 drv_id;
+  u8 dev_id;
+  u8 numa;
+  u16 qp_id;
+  u16 inflights[2];
+  u16 n_ops;
+  u16 __unused;
+  struct rte_crypto_op *ops[VLIB_FRAME_SIZE];
+  u32 bi[VLIB_FRAME_SIZE];
+} crypto_resource_t __attribute__ ((aligned (CLIB_CACHE_LINE_BYTES)));
 
 typedef struct
 {
-  crypto_sa_session_t *sa_sess_d[2];
-  crypto_qp_data_t *qp_data;
-  uword *algo_qp_map;
-} crypto_worker_main_t;
+  struct rte_mempool *crypto_op;
+  struct rte_mempool *session_h;
+  struct rte_mempool **session_drv;
+  uword *session_by_sa_index;
+  u64 crypto_op_get_failed;
+  u64 session_h_failed;
+  u64 *session_drv_failed;
+} crypto_data_t;
 
 typedef struct
 {
-  struct rte_mempool **sess_h_pools;
-  struct rte_mempool **sess_pools;
-  struct rte_mempool **cop_pools;
   crypto_worker_main_t *workers_main;
+  struct rte_cryptodev_sym_session **sa_session;
+  crypto_dev_t *dev;
+  crypto_resource_t *resource;
+  crypto_alg_t *cipher_algs;
+  crypto_alg_t *auth_algs;
+  crypto_data_t *data;
+  crypto_drv_t *drv;
+  u8 max_drv_id;
   u8 enabled;
 } dpdk_crypto_main_t;
 
 dpdk_crypto_main_t dpdk_crypto_main;
 
-extern vlib_node_registration_t dpdk_crypto_input_node;
+static const u8 pad_data[] =
+  { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 0 };
 
-#define CRYPTO_N_FREE_COPS (VLIB_FRAME_SIZE * 3)
+void crypto_auto_placement (void);
 
-static_always_inline void
-crypto_alloc_cops ()
+clib_error_t *create_sym_session (struct rte_cryptodev_sym_session **session,
+                                 u32 sa_idx, crypto_resource_t * res,
+                                 crypto_worker_main_t * cwm, u8 is_outbound);
+
+static_always_inline u32
+crypto_op_len (void)
+{
+  const u32 align = 16;
+  u32 op_size =
+    sizeof (struct rte_crypto_op) + sizeof (struct rte_crypto_sym_op);
+
+  return ((op_size + align - 1) & ~(align - 1)) + sizeof (dpdk_op_priv_t);
+}
+
+static_always_inline u32
+crypto_op_get_priv_offset (void)
+{
+  const u32 align = 16;
+  u32 offset;
+
+  offset = sizeof (struct rte_crypto_op) + sizeof (struct rte_crypto_sym_op);
+  offset = (offset + align - 1) & ~(align - 1);
+
+  return offset;
+}
+
+static_always_inline dpdk_op_priv_t *
+crypto_op_get_priv (struct rte_crypto_op * op)
+{
+  return (dpdk_op_priv_t *) (((u8 *) op) + crypto_op_get_priv_offset ());
+}
+
+/* XXX this requires 64 bit builds so hash_xxx macros use u64 key */
+typedef union
+{
+  u64 val;
+  struct
+  {
+    u32 drv_id;
+    u32 sa_idx;
+  };
+} crypto_session_key_t;
+
+static_always_inline clib_error_t *
+crypto_get_session (struct rte_cryptodev_sym_session **session,
+                   u32 sa_idx,
+                   crypto_resource_t * res,
+                   crypto_worker_main_t * cwm, u8 is_outbound)
+{
+  crypto_session_key_t key = { 0 };
+
+  key.drv_id = res->drv_id;
+  key.sa_idx = sa_idx;
+
+  uword *val = hash_get (cwm->session_by_drv_id_and_sa_index, key.val);
+
+  if (PREDICT_FALSE (!val))
+    return create_sym_session (session, sa_idx, res, cwm, is_outbound);
+
+  session[0] = (struct rte_cryptodev_sym_session *) val[0];
+
+  return NULL;
+}
+
+static_always_inline u16
+get_resource (crypto_worker_main_t * cwm, ipsec_sa_t * sa)
+{
+  u16 cipher_res = cwm->cipher_resource_idx[sa->crypto_alg];
+  u16 auth_res = cwm->auth_resource_idx[sa->integ_alg];
+  u8 is_aead;
+
+  /* Not allowed to setup SA with no-aead-cipher/NULL or NULL/NULL */
+
+  is_aead = ((sa->crypto_alg == IPSEC_CRYPTO_ALG_AES_GCM_128) |
+            (sa->crypto_alg == IPSEC_CRYPTO_ALG_AES_GCM_192) |
+            (sa->crypto_alg == IPSEC_CRYPTO_ALG_AES_GCM_256));
+
+  if (sa->crypto_alg == IPSEC_CRYPTO_ALG_NONE)
+    return auth_res;
+
+  if (cipher_res == auth_res)
+    return cipher_res;
+
+  if (is_aead)
+    return cipher_res;
+
+  return (u16) ~ 0;
+}
+
+static_always_inline i32
+crypto_alloc_ops (u8 numa, struct rte_crypto_op ** ops, u32 n)
 {
   dpdk_crypto_main_t *dcm = &dpdk_crypto_main;
-  u32 thread_index = vlib_get_thread_index ();
-  crypto_worker_main_t *cwm = &dcm->workers_main[thread_index];
-  unsigned socket_id = rte_socket_id ();
-  crypto_qp_data_t *qpd;
+  crypto_data_t *data = vec_elt_at_index (dcm->data, numa);
+  i32 ret;
 
-  /* *INDENT-OFF* */
-  vec_foreach (qpd, cwm->qp_data)
-    {
-      u32 l = vec_len (qpd->free_cops);
+  ret = rte_mempool_get_bulk (data->crypto_op, (void **) ops, n);
 
-      if (PREDICT_FALSE (l < VLIB_FRAME_SIZE))
-       {
-         u32 n_alloc;
+  data->crypto_op_get_failed += ! !ret;
 
-         if (PREDICT_FALSE (!qpd->free_cops))
-           vec_alloc (qpd->free_cops, CRYPTO_N_FREE_COPS);
+  return ret;
+}
 
-         n_alloc = rte_crypto_op_bulk_alloc (dcm->cop_pools[socket_id],
-                                             RTE_CRYPTO_OP_TYPE_SYMMETRIC,
-                                             &qpd->free_cops[l],
-                                             CRYPTO_N_FREE_COPS - l - 1);
+static_always_inline void
+crypto_free_ops (u8 numa, struct rte_crypto_op **ops, u32 n)
+{
+  dpdk_crypto_main_t *dcm = &dpdk_crypto_main;
+  crypto_data_t *data = vec_elt_at_index (dcm->data, numa);
 
-         _vec_len (qpd->free_cops) = l + n_alloc;
-       }
-    }
-  /* *INDENT-ON* */
+  if (!n)
+    return;
+
+  rte_mempool_put_bulk (data->crypto_op, (void **) ops, n);
 }
 
 static_always_inline void
-crypto_free_cop (crypto_qp_data_t * qpd, struct rte_crypto_op **cops, u32 n)
+crypto_enqueue_ops (vlib_main_t * vm, crypto_worker_main_t * cwm, u8 outbound,
+                   u32 node_index, u32 error, u8 numa)
 {
-  u32 l = vec_len (qpd->free_cops);
+  dpdk_crypto_main_t *dcm = &dpdk_crypto_main;
+  crypto_resource_t *res;
+  u16 *res_idx;
 
-  if (l + n >= CRYPTO_N_FREE_COPS)
+  /* *INDENT-OFF* */
+  vec_foreach (res_idx, cwm->resource_idx)
     {
-      l -= VLIB_FRAME_SIZE;
-      rte_mempool_put_bulk (cops[0]->mempool,
-                           (void **) &qpd->free_cops[l], VLIB_FRAME_SIZE);
-    }
-  clib_memcpy (&qpd->free_cops[l], cops, sizeof (*cops) * n);
+      u16 enq;
+      res = vec_elt_at_index (dcm->resource, res_idx[0]);
+
+      if (!res->n_ops)
+       continue;
+
+      enq = rte_cryptodev_enqueue_burst (res->dev_id, res->qp_id + outbound,
+                                        res->ops, res->n_ops);
+      res->inflights[outbound] += enq;
+
+      if (PREDICT_FALSE (enq < res->n_ops))
+       {
+         crypto_free_ops (numa, &res->ops[enq], res->n_ops - enq);
+         vlib_buffer_free (vm, &res->bi[enq], res->n_ops - enq);
 
-  _vec_len (qpd->free_cops) = l + n;
+          vlib_node_increment_counter (vm, node_index, error,
+                                      res->n_ops - enq);
+        }
+      res->n_ops = 0;
+    }
+  /* *INDENT-ON* */
 }
 
-static_always_inline int
-check_algo_is_supported (const struct rte_cryptodev_capabilities *cap,
-                        char *name)
+static_always_inline void
+crypto_set_icb (dpdk_gcm_cnt_blk * icb, u32 salt, u32 seq, u32 seq_hi)
 {
-  struct
-  {
-    enum rte_crypto_sym_xform_type type;
-    union
-    {
-      enum rte_crypto_auth_algorithm auth;
-      enum rte_crypto_cipher_algorithm cipher;
-#if ! DPDK_NO_AEAD
-      enum rte_crypto_aead_algorithm aead;
-#endif
-    };
-    char *name;
-  } supported_algo[] =
-  {
-    {
-    .type = RTE_CRYPTO_SYM_XFORM_CIPHER,.cipher =
-       RTE_CRYPTO_CIPHER_NULL,.name = "NULL"},
-    {
-    .type = RTE_CRYPTO_SYM_XFORM_CIPHER,.cipher =
-       RTE_CRYPTO_CIPHER_AES_CBC,.name = "AES_CBC"},
+  icb->salt = salt;
+  icb->iv[0] = seq;
+  icb->iv[1] = seq_hi;
 #if DPDK_NO_AEAD
-    {
-    .type = RTE_CRYPTO_SYM_XFORM_CIPHER,.cipher =
-       RTE_CRYPTO_CIPHER_AES_GCM,.name = "AES-GCM"},
-#else
-    {
-    .type = RTE_CRYPTO_SYM_XFORM_AEAD,.aead =
-       RTE_CRYPTO_AEAD_AES_GCM,.name = "AES-GCM"},
+  icb->cnt = clib_host_to_net_u32 (1);
 #endif
-    {
-    .type = RTE_CRYPTO_SYM_XFORM_AUTH,.auth =
-       RTE_CRYPTO_AUTH_NULL,.name = "NULL"},
-    {
-    .type = RTE_CRYPTO_SYM_XFORM_AUTH,.auth =
-       RTE_CRYPTO_AUTH_SHA1_HMAC,.name = "HMAC-SHA1"},
-    {
-    .type = RTE_CRYPTO_SYM_XFORM_AUTH,.auth =
-       RTE_CRYPTO_AUTH_SHA256_HMAC,.name = "HMAC-SHA256"},
-    {
-    .type = RTE_CRYPTO_SYM_XFORM_AUTH,.auth =
-       RTE_CRYPTO_AUTH_SHA384_HMAC,.name = "HMAC-SHA384"},
-    {
-    .type = RTE_CRYPTO_SYM_XFORM_AUTH,.auth =
-       RTE_CRYPTO_AUTH_SHA512_HMAC,.name = "HMAC-SHA512"},
+}
+
+#define __unused __attribute__((unused))
+static_always_inline void
+crypto_op_setup (u8 is_aead, struct rte_mbuf *mb0,
+                struct rte_crypto_op *op, void *session,
+                u32 cipher_off, u32 cipher_len,
+                u8 * icb __unused, u32 iv_size __unused,
+                u32 auth_off, u32 auth_len,
+                u8 * aad __unused, u32 aad_size __unused,
+                u8 * digest, u64 digest_paddr, u32 digest_size __unused)
+{
+  struct rte_crypto_sym_op *sym_op;
+
+  sym_op = (struct rte_crypto_sym_op *) (op + 1);
+
+  sym_op->m_src = mb0;
+  sym_op->session = session;
+
+  if (!digest_paddr)
+    digest_paddr = mb0->buf_physaddr + ((u8 *) digest) - ((u8 *) mb0);
+
 #if DPDK_NO_AEAD
+  sym_op->cipher.data.offset = cipher_off;
+  sym_op->cipher.data.length = cipher_len;
+
+  sym_op->cipher.iv.data = icb;
+  sym_op->cipher.iv.phys_addr =
+    op->phys_addr + (uintptr_t) icb - (uintptr_t) op;
+  sym_op->cipher.iv.length = iv_size;
+
+  if (is_aead)
     {
-    .type = RTE_CRYPTO_SYM_XFORM_AUTH,.auth =
-       RTE_CRYPTO_AUTH_AES_GCM,.name = "AES-GCM"},
-#endif
+      sym_op->auth.aad.data = aad;
+      sym_op->auth.aad.phys_addr =
+       op->phys_addr + (uintptr_t) aad - (uintptr_t) op;
+      sym_op->auth.aad.length = aad_size;
+    }
+  else
     {
-      /* tail */
-    .type = RTE_CRYPTO_SYM_XFORM_NOT_SPECIFIED}
-  };
+      sym_op->auth.data.offset = auth_off;
+      sym_op->auth.data.length = auth_len;
+    }
 
-  uint32_t i = 0;
+  sym_op->auth.digest.data = digest;
+  sym_op->auth.digest.phys_addr = digest_paddr;
+  sym_op->auth.digest.length = digest_size;
+#else /* ! DPDK_NO_AEAD */
+  if (is_aead)
+    {
+      sym_op->aead.data.offset = cipher_off;
+      sym_op->aead.data.length = cipher_len;
 
-  if (cap->op != RTE_CRYPTO_OP_TYPE_SYMMETRIC)
-    return -1;
+      sym_op->aead.aad.data = aad;
+      sym_op->aead.aad.phys_addr =
+       op->phys_addr + (uintptr_t) aad - (uintptr_t) op;
 
-  while (supported_algo[i].type != RTE_CRYPTO_SYM_XFORM_NOT_SPECIFIED)
-    {
-      if (cap->sym.xform_type == supported_algo[i].type)
-       {
-         if ((cap->sym.xform_type == RTE_CRYPTO_SYM_XFORM_CIPHER &&
-              cap->sym.cipher.algo == supported_algo[i].cipher) ||
-#if ! DPDK_NO_AEAD
-             (cap->sym.xform_type == RTE_CRYPTO_SYM_XFORM_AEAD &&
-              cap->sym.aead.algo == supported_algo[i].aead) ||
-#endif
-             (cap->sym.xform_type == RTE_CRYPTO_SYM_XFORM_AUTH &&
-              cap->sym.auth.algo == supported_algo[i].auth))
-           {
-             if (name)
-               strcpy (name, supported_algo[i].name);
-             return 0;
-           }
-       }
-
-      i++;
+      sym_op->aead.digest.data = digest;
+      sym_op->aead.digest.phys_addr = digest_paddr;
     }
+  else
+    {
+      sym_op->cipher.data.offset = cipher_off;
+      sym_op->cipher.data.length = cipher_len;
 
-  return -1;
+      sym_op->auth.data.offset = auth_off;
+      sym_op->auth.data.length = auth_len;
+
+      sym_op->auth.digest.data = digest;
+      sym_op->auth.digest.phys_addr = digest_paddr;
+    }
+#endif /* DPDK_NO_AEAD */
 }
 
+#undef __unused
+
 #endif /* __DPDK_IPSEC_H__ */
 
 /*
index aa3ada1..36d16c0 100644 (file)
@@ -398,6 +398,7 @@ libvnet_la_SOURCES +=                               \
  vnet/ipsec/ipsec_if.c                         \
  vnet/ipsec/ipsec_if_in.c                      \
  vnet/ipsec/ipsec_if_out.c                     \
+ vnet/ipsec/esp_format.c                       \
  vnet/ipsec/esp_encrypt.c                      \
  vnet/ipsec/esp_decrypt.c                      \
  vnet/ipsec/ikev2.c                            \
index 799003b..98db701 100644 (file)
@@ -15,6 +15,9 @@
 #ifndef __ESP_H__
 #define __ESP_H__
 
+#include <vnet/ip/ip.h>
+#include <vnet/ipsec/ipsec.h>
+
 #include <openssl/hmac.h>
 #include <openssl/rand.h>
 #include <openssl/evp.h>
@@ -82,6 +85,7 @@ esp_main_t esp_main;
 #define ESP_WINDOW_SIZE                (64)
 #define ESP_SEQ_MAX            (4294967295UL)
 
+u8 *format_esp_header (u8 * s, va_list * args);
 
 always_inline int
 esp_replay_check (ipsec_sa_t * sa, u32 seq)
diff --git a/src/vnet/ipsec/esp_format.c b/src/vnet/ipsec/esp_format.c
new file mode 100644 (file)
index 0000000..615d576
--- /dev/null
@@ -0,0 +1,39 @@
+/*
+ * esp_format.c : ESP format
+ *
+ * Copyright (c) 2017 Intel and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vnet/vnet.h>
+#include <vnet/api_errno.h>
+#include <vnet/ipsec/esp.h>
+
+u8 *
+format_esp_header (u8 * s, va_list * args)
+{
+  esp_header_t *esp = va_arg (*args, esp_header_t *);
+
+  s = format (s, "ESP: spi %u, seq %u",
+             clib_net_to_host_u32 (esp->spi),
+             clib_net_to_host_u32 (esp->seq));
+  return s;
+}
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
index cfe434a..ba0d68b 100644 (file)
@@ -434,7 +434,7 @@ ipsec_add_del_sa (vlib_main_t * vm, ipsec_sa_t * new_sa, int is_add)
        }
       hash_unset (im->sa_index_by_sa_id, sa->id);
       if (im->cb.add_del_sa_sess_cb &&
-         im->cb.add_del_sa_sess_cb (sa_index, is_add) < 0)
+         im->cb.add_del_sa_sess_cb (sa_index, 0) < 0)
        return VNET_API_ERROR_SYSCALL_ERROR_1;
       pool_put (im->sad, sa);
     }
@@ -445,7 +445,7 @@ ipsec_add_del_sa (vlib_main_t * vm, ipsec_sa_t * new_sa, int is_add)
       sa_index = sa - im->sad;
       hash_set (im->sa_index_by_sa_id, sa->id, sa_index);
       if (im->cb.add_del_sa_sess_cb &&
-         im->cb.add_del_sa_sess_cb (sa_index, is_add) < 0)
+         im->cb.add_del_sa_sess_cb (sa_index, 1) < 0)
        return VNET_API_ERROR_SYSCALL_ERROR_1;
     }
   return 0;
@@ -482,7 +482,7 @@ ipsec_set_sa_key (vlib_main_t * vm, ipsec_sa_t * sa_update)
       sa->integ_key_len = sa_update->integ_key_len;
     }
 
-  if (sa->crypto_key_len + sa->integ_key_len > 0)
+  if (0 < sa_update->crypto_key_len || 0 < sa_update->integ_key_len)
     {
       if (im->cb.add_del_sa_sess_cb &&
          im->cb.add_del_sa_sess_cb (sa_index, 0) < 0)
@@ -516,8 +516,6 @@ ipsec_check_support (ipsec_sa_t * sa)
     return clib_error_return (0, "unsupported aes-gcm-128 crypto-alg");
   if (sa->integ_alg == IPSEC_INTEG_ALG_NONE)
     return clib_error_return (0, "unsupported none integ-alg");
-  if (sa->integ_alg == IPSEC_INTEG_ALG_AES_GCM_128)
-    return clib_error_return (0, "unsupported aes-gcm-128 integ-alg");
 
   return 0;
 }
index 1eff1c3..f11bc56 100644 (file)
@@ -15,6 +15,9 @@
 #ifndef __IPSEC_H__
 #define __IPSEC_H__
 
+#include <vnet/ip/ip.h>
+#include <vnet/feature/feature.h>
+
 #define IPSEC_FLAG_IPSEC_GRE_TUNNEL (1 << 0)
 
 
@@ -63,7 +66,12 @@ typedef enum
   _(1, AES_CBC_128, "aes-cbc-128")  \
   _(2, AES_CBC_192, "aes-cbc-192")  \
   _(3, AES_CBC_256, "aes-cbc-256")  \
-  _(4, AES_GCM_128, "aes-gcm-128")
+  _(4, AES_CTR_128, "aes-ctr-128")  \
+  _(5, AES_CTR_192, "aes-ctr-192")  \
+  _(6, AES_CTR_256, "aes-ctr-256")  \
+  _(7, AES_GCM_128, "aes-gcm-128")  \
+  _(8, AES_GCM_192, "aes-gcm-192")  \
+  _(9, AES_GCM_256, "aes-gcm-256")
 
 typedef enum
 {
@@ -80,8 +88,7 @@ typedef enum
   _(3, SHA_256_96, "sha-256-96")   /* draft-ietf-ipsec-ciph-sha-256-00 */ \
   _(4, SHA_256_128, "sha-256-128") /* RFC4868 */                          \
   _(5, SHA_384_192, "sha-384-192") /* RFC4868 */                          \
-  _(6, SHA_512_256, "sha-512-256") /* RFC4868 */                          \
-  _(7, AES_GCM_128, "aes-gcm-128")     /* RFC4106 */
+  _(6, SHA_512_256, "sha-512-256")     /* RFC4868 */
 
 typedef enum
 {
@@ -236,7 +243,7 @@ typedef struct
 
 typedef struct
 {
-  i32 (*add_del_sa_sess_cb) (u32 sa_index, u8 is_add);
+  clib_error_t *(*add_del_sa_sess_cb) (u32 sa_index, u8 is_add);
   clib_error_t *(*check_support_cb) (ipsec_sa_t * sa);
 } ipsec_main_callbacks_t;
 
index 9359a3b..9745534 100644 (file)
@@ -49,25 +49,63 @@ ipsec_admin_up_down_function (vnet_main_t * vnm, u32 hw_if_index, u32 flags)
   ipsec_sa_t *sa;
 
   hi = vnet_get_hw_interface (vnm, hw_if_index);
+  t = pool_elt_at_index (im->tunnel_interfaces, hi->hw_instance);
+
   if (flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP)
     {
-      t = pool_elt_at_index (im->tunnel_interfaces, hi->hw_instance);
       ASSERT (im->cb.check_support_cb);
+
       sa = pool_elt_at_index (im->sad, t->input_sa_index);
+
       err = im->cb.check_support_cb (sa);
       if (err)
        return err;
 
+      if (im->cb.add_del_sa_sess_cb)
+       {
+         err = im->cb.add_del_sa_sess_cb (t->input_sa_index, 1);
+         if (err)
+           return err;
+       }
+
       sa = pool_elt_at_index (im->sad, t->output_sa_index);
+
       err = im->cb.check_support_cb (sa);
       if (err)
        return err;
 
+      if (im->cb.add_del_sa_sess_cb)
+       {
+         err = im->cb.add_del_sa_sess_cb (t->output_sa_index, 1);
+         if (err)
+           return err;
+       }
+
       vnet_hw_interface_set_flags (vnm, hw_if_index,
                                   VNET_HW_INTERFACE_FLAG_LINK_UP);
     }
   else
-    vnet_hw_interface_set_flags (vnm, hw_if_index, 0 /* down */ );
+    {
+      vnet_hw_interface_set_flags (vnm, hw_if_index, 0 /* down */ );
+
+      sa = pool_elt_at_index (im->sad, t->input_sa_index);
+
+      if (im->cb.add_del_sa_sess_cb)
+       {
+         err = im->cb.add_del_sa_sess_cb (t->input_sa_index, 0);
+         if (err)
+           return err;
+       }
+
+      sa = pool_elt_at_index (im->sad, t->output_sa_index);
+
+      if (im->cb.add_del_sa_sess_cb)
+       {
+         err = im->cb.add_del_sa_sess_cb (t->output_sa_index, 0);
+         if (err)
+           return err;
+       }
+    }
 
   return /* no error */ 0;
 }
@@ -157,10 +195,6 @@ ipsec_add_del_tunnel_if_internal (vnet_main_t * vnm,
                       args->remote_crypto_key_len);
        }
 
-      if (im->cb.add_del_sa_sess_cb &&
-         im->cb.add_del_sa_sess_cb (t->input_sa_index, args->is_add) < 0)
-       return VNET_API_ERROR_SYSCALL_ERROR_1;
-
       pool_get (im->sad, sa);
       memset (sa, 0, sizeof (*sa));
       t->output_sa_index = sa - im->sad;
@@ -168,7 +202,6 @@ ipsec_add_del_tunnel_if_internal (vnet_main_t * vnm,
       sa->tunnel_src_addr.ip4.as_u32 = args->local_ip.as_u32;
       sa->tunnel_dst_addr.ip4.as_u32 = args->remote_ip.as_u32;
       sa->is_tunnel = 1;
-      sa->seq = 1;
       sa->use_esn = args->esn;
       sa->use_anti_replay = args->anti_replay;
       sa->integ_alg = args->integ_alg;
@@ -186,10 +219,6 @@ ipsec_add_del_tunnel_if_internal (vnet_main_t * vnm,
                       args->local_crypto_key_len);
        }
 
-      if (im->cb.add_del_sa_sess_cb &&
-         im->cb.add_del_sa_sess_cb (t->output_sa_index, args->is_add) < 0)
-       return VNET_API_ERROR_SYSCALL_ERROR_1;
-
       hash_set (im->ipsec_if_pool_index_by_key, key,
                t - im->tunnel_interfaces);
 
@@ -242,18 +271,10 @@ ipsec_add_del_tunnel_if_internal (vnet_main_t * vnm,
       /* delete input and output SA */
       sa = pool_elt_at_index (im->sad, t->input_sa_index);
 
-      if (im->cb.add_del_sa_sess_cb &&
-         im->cb.add_del_sa_sess_cb (t->input_sa_index, args->is_add) < 0)
-       return VNET_API_ERROR_SYSCALL_ERROR_1;
-
       pool_put (im->sad, sa);
 
       sa = pool_elt_at_index (im->sad, t->output_sa_index);
 
-      if (im->cb.add_del_sa_sess_cb &&
-         im->cb.add_del_sa_sess_cb (t->output_sa_index, args->is_add) < 0)
-       return VNET_API_ERROR_SYSCALL_ERROR_1;
-
       pool_put (im->sad, sa);
 
       hash_unset (im->ipsec_if_pool_index_by_key, key);
@@ -340,16 +361,15 @@ ipsec_set_interface_key (vnet_main_t * vnm, u32 hw_if_index,
   hi = vnet_get_hw_interface (vnm, hw_if_index);
   t = pool_elt_at_index (im->tunnel_interfaces, hi->dev_instance);
 
+  if (hi->flags & VNET_HW_INTERFACE_FLAG_LINK_UP)
+    return VNET_API_ERROR_SYSCALL_ERROR_1;
+
   if (type == IPSEC_IF_SET_KEY_TYPE_LOCAL_CRYPTO)
     {
       sa = pool_elt_at_index (im->sad, t->output_sa_index);
       sa->crypto_alg = alg;
       sa->crypto_key_len = vec_len (key);
       clib_memcpy (sa->crypto_key, key, vec_len (key));
-
-      if (im->cb.add_del_sa_sess_cb &&
-         im->cb.add_del_sa_sess_cb (t->output_sa_index, 0) < 0)
-       return VNET_API_ERROR_SYSCALL_ERROR_1;
     }
   else if (type == IPSEC_IF_SET_KEY_TYPE_LOCAL_INTEG)
     {
@@ -357,10 +377,6 @@ ipsec_set_interface_key (vnet_main_t * vnm, u32 hw_if_index,
       sa->integ_alg = alg;
       sa->integ_key_len = vec_len (key);
       clib_memcpy (sa->integ_key, key, vec_len (key));
-
-      if (im->cb.add_del_sa_sess_cb &&
-         im->cb.add_del_sa_sess_cb (t->output_sa_index, 0) < 0)
-       return VNET_API_ERROR_SYSCALL_ERROR_1;
     }
   else if (type == IPSEC_IF_SET_KEY_TYPE_REMOTE_CRYPTO)
     {
@@ -368,10 +384,6 @@ ipsec_set_interface_key (vnet_main_t * vnm, u32 hw_if_index,
       sa->crypto_alg = alg;
       sa->crypto_key_len = vec_len (key);
       clib_memcpy (sa->crypto_key, key, vec_len (key));
-
-      if (im->cb.add_del_sa_sess_cb &&
-         im->cb.add_del_sa_sess_cb (t->input_sa_index, 0) < 0)
-       return VNET_API_ERROR_SYSCALL_ERROR_1;
     }
   else if (type == IPSEC_IF_SET_KEY_TYPE_REMOTE_INTEG)
     {
@@ -379,10 +391,6 @@ ipsec_set_interface_key (vnet_main_t * vnm, u32 hw_if_index,
       sa->integ_alg = alg;
       sa->integ_key_len = vec_len (key);
       clib_memcpy (sa->integ_key, key, vec_len (key));
-
-      if (im->cb.add_del_sa_sess_cb &&
-         im->cb.add_del_sa_sess_cb (t->input_sa_index, 0) < 0)
-       return VNET_API_ERROR_SYSCALL_ERROR_1;
     }
   else
     return VNET_API_ERROR_INVALID_VALUE;