From b9894ee69f6ba9c6a8eeb531d528e656b2a07812 Mon Sep 17 00:00:00 2001 From: Kingwel Xie Date: Sat, 22 Dec 2018 03:17:15 -0500 Subject: [PATCH] crypto-input impprovement: 1. multi-loop, and new style with vlib_buffer_enqueue_to_next 2. add error counter for AUTH-FAILURE 3. buffer trace changed. now it supports 'trace add dpdk-crypto-input 10' just like the other input nodes Actual measurement shows >10 clocks per packets are saved, under QAT or openssl PMD case Change-Id: I6ea34e4ae3b08c381219ff6bc8adda2d927fbfd5 Signed-off-by: Kingwel Xie --- src/plugins/dpdk/ipsec/crypto_node.c | 237 ++++++++++++++++++++++++----------- 1 file changed, 163 insertions(+), 74 deletions(-) diff --git a/src/plugins/dpdk/ipsec/crypto_node.c b/src/plugins/dpdk/ipsec/crypto_node.c index 5fbaaf3316a..a5f8d8e805e 100644 --- a/src/plugins/dpdk/ipsec/crypto_node.c +++ b/src/plugins/dpdk/ipsec/crypto_node.c @@ -28,6 +28,7 @@ #define foreach_dpdk_crypto_input_error \ _(DQ_COPS, "Crypto ops dequeued") \ + _(AUTH_FAILED, "Crypto verification failed") \ _(STATUS, "Crypto operation failed") typedef enum @@ -88,90 +89,178 @@ format_dpdk_crypto_input_trace (u8 * s, va_list * args) return s; } +static_always_inline void +dpdk_crypto_check_check_op (vlib_main_t * vm, vlib_node_runtime_t * node, + struct rte_crypto_op *op0, u16 * next) +{ + if (PREDICT_FALSE (op0->status != RTE_CRYPTO_OP_STATUS_SUCCESS)) + { + next[0] = DPDK_CRYPTO_INPUT_NEXT_DROP; + vlib_node_increment_counter (vm, + node->node_index, + DPDK_CRYPTO_INPUT_ERROR_STATUS, 1); + /* if auth failed */ + if (op0->status == RTE_CRYPTO_OP_STATUS_AUTH_FAILED) + vlib_node_increment_counter (vm, + node->node_index, + DPDK_CRYPTO_INPUT_ERROR_AUTH_FAILED, 1); + } +} + +always_inline void +dpdk_crypto_input_trace (vlib_main_t * vm, vlib_node_runtime_t * node, + struct rte_crypto_op **ops, u32 n_deq) +{ + u32 n_left, n_trace; + if (PREDICT_FALSE ((n_trace = vlib_get_trace_count (vm, node)))) + { + n_left = n_deq; + + while (n_trace && n_left) + { + vlib_buffer_t *b0; + struct rte_crypto_op *op0; + u16 next; + + op0 = ops[0]; + + next = crypto_op_get_priv (op0)->next; + + b0 = vlib_buffer_from_rte_mbuf (op0->sym[0].m_src); + + vlib_trace_buffer (vm, node, next, b0, /* follow_chain */ 0); + + dpdk_crypto_input_trace_t *tr = + vlib_add_trace (vm, node, b0, sizeof (*tr)); + tr->status = op0->status; + + n_trace--; + n_left--; + ops++; + } + vlib_set_trace_count (vm, node, n_trace); + } +} + static_always_inline u32 dpdk_crypto_dequeue (vlib_main_t * vm, vlib_node_runtime_t * node, crypto_resource_t * res, u8 outbound) { - u32 n_deq, total_n_deq = 0, *to_next = 0, n_ops, next_index; u32 thread_idx = vlib_get_thread_index (); - dpdk_crypto_main_t *dcm = &dpdk_crypto_main; u8 numa = rte_socket_id (); + + dpdk_crypto_main_t *dcm = &dpdk_crypto_main; crypto_worker_main_t *cwm = vec_elt_at_index (dcm->workers_main, thread_idx); + + u32 n_ops, n_deq; + u32 bis[VLIB_FRAME_SIZE], *bi; + u16 nexts[VLIB_FRAME_SIZE], *next; struct rte_crypto_op **ops; - next_index = node->cached_next_index; + bi = bis; + next = nexts; + ops = cwm->ops; - { - ops = cwm->ops; - n_ops = rte_cryptodev_dequeue_burst (res->dev_id, - res->qp_id + outbound, - ops, VLIB_FRAME_SIZE); - res->inflights[outbound] -= n_ops; - ASSERT (res->inflights >= 0); - - n_deq = n_ops; - total_n_deq += n_ops; - - while (n_ops > 0) - { - u32 n_left_to_next; - - vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); - - while (n_ops > 0 && n_left_to_next > 0) - { - u32 bi0, next0; - vlib_buffer_t *b0 = 0; - struct rte_crypto_op *op; - - op = ops[0]; - ops += 1; - n_ops -= 1; - n_left_to_next -= 1; - - dpdk_op_priv_t *priv = crypto_op_get_priv (op); - next0 = priv->next; - - if (PREDICT_FALSE (op->status != RTE_CRYPTO_OP_STATUS_SUCCESS)) - { - next0 = DPDK_CRYPTO_INPUT_NEXT_DROP; - vlib_node_increment_counter (vm, - dpdk_crypto_input_node.index, - DPDK_CRYPTO_INPUT_ERROR_STATUS, - 1); - } - - /* XXX store bi0 and next0 in op private? */ - - b0 = vlib_buffer_from_rte_mbuf (op->sym[0].m_src); - bi0 = vlib_get_buffer_index (vm, b0); - - to_next[0] = bi0; - to_next += 1; - - if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED)) - { - vlib_trace_next_frame (vm, node, next0); - dpdk_crypto_input_trace_t *tr = - vlib_add_trace (vm, node, b0, sizeof (*tr)); - tr->status = op->status; - } - - op->status = RTE_CRYPTO_OP_STATUS_NOT_PROCESSED; - - vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next, - n_left_to_next, bi0, next0); - } - vlib_put_next_frame (vm, node, next_index, n_left_to_next); - } - - crypto_free_ops (numa, cwm->ops, n_deq); - } - - vlib_node_increment_counter (vm, dpdk_crypto_input_node.index, - DPDK_CRYPTO_INPUT_ERROR_DQ_COPS, total_n_deq); - return total_n_deq; + n_ops = n_deq = rte_cryptodev_dequeue_burst (res->dev_id, + res->qp_id + outbound, + ops, VLIB_FRAME_SIZE); + + res->inflights[outbound] -= n_ops; + + dpdk_crypto_input_trace (vm, node, ops, n_deq); + + while (n_ops >= 4) + { + struct rte_crypto_op *op0, *op1, *op2, *op3; + vlib_buffer_t *b0, *b1, *b2, *b3; + + /* Prefetch next iteration. */ + if (n_ops >= 8) + { + CLIB_PREFETCH (ops[4], CLIB_CACHE_LINE_BYTES, LOAD); + CLIB_PREFETCH (ops[5], CLIB_CACHE_LINE_BYTES, LOAD); + CLIB_PREFETCH (ops[6], CLIB_CACHE_LINE_BYTES, LOAD); + CLIB_PREFETCH (ops[7], CLIB_CACHE_LINE_BYTES, LOAD); + + CLIB_PREFETCH (crypto_op_get_priv (ops[4]), CLIB_CACHE_LINE_BYTES, + LOAD); + CLIB_PREFETCH (crypto_op_get_priv (ops[5]), CLIB_CACHE_LINE_BYTES, + LOAD); + CLIB_PREFETCH (crypto_op_get_priv (ops[6]), CLIB_CACHE_LINE_BYTES, + LOAD); + CLIB_PREFETCH (crypto_op_get_priv (ops[7]), CLIB_CACHE_LINE_BYTES, + LOAD); + } + + op0 = ops[0]; + op1 = ops[1]; + op2 = ops[2]; + op3 = ops[3]; + + next[0] = crypto_op_get_priv (op0)->next; + next[1] = crypto_op_get_priv (op1)->next; + next[2] = crypto_op_get_priv (op2)->next; + next[3] = crypto_op_get_priv (op3)->next; + + dpdk_crypto_check_check_op (vm, node, op0, next + 0); + dpdk_crypto_check_check_op (vm, node, op0, next + 1); + dpdk_crypto_check_check_op (vm, node, op0, next + 2); + dpdk_crypto_check_check_op (vm, node, op0, next + 3); + + b0 = vlib_buffer_from_rte_mbuf (op0->sym[0].m_src); + b1 = vlib_buffer_from_rte_mbuf (op1->sym[0].m_src); + b2 = vlib_buffer_from_rte_mbuf (op2->sym[0].m_src); + b3 = vlib_buffer_from_rte_mbuf (op3->sym[0].m_src); + + bi[0] = vlib_get_buffer_index (vm, b0); + bi[1] = vlib_get_buffer_index (vm, b1); + bi[2] = vlib_get_buffer_index (vm, b2); + bi[3] = vlib_get_buffer_index (vm, b3); + + op0->status = RTE_CRYPTO_OP_STATUS_NOT_PROCESSED; + op1->status = RTE_CRYPTO_OP_STATUS_NOT_PROCESSED; + op2->status = RTE_CRYPTO_OP_STATUS_NOT_PROCESSED; + op3->status = RTE_CRYPTO_OP_STATUS_NOT_PROCESSED; + + /* next */ + next += 4; + n_ops -= 4; + ops += 4; + bi += 4; + } + while (n_ops > 0) + { + struct rte_crypto_op *op0; + vlib_buffer_t *b0; + + op0 = ops[0]; + + next[0] = crypto_op_get_priv (op0)->next; + + dpdk_crypto_check_check_op (vm, node, op0, next + 0); + + /* XXX store bi0 and next0 in op0 private? */ + b0 = vlib_buffer_from_rte_mbuf (op0->sym[0].m_src); + bi[0] = vlib_get_buffer_index (vm, b0); + + op0->status = RTE_CRYPTO_OP_STATUS_NOT_PROCESSED; + + /* next */ + next += 1; + n_ops -= 1; + ops += 1; + bi += 1; + } + + vlib_node_increment_counter (vm, node->node_index, + DPDK_CRYPTO_INPUT_ERROR_DQ_COPS, n_deq); + + vlib_buffer_enqueue_to_next (vm, node, bis, nexts, n_deq); + + crypto_free_ops (numa, cwm->ops, n_deq); + + return n_deq; } static_always_inline uword @@ -197,7 +286,7 @@ dpdk_crypto_input_inline (vlib_main_t * vm, vlib_node_runtime_t * node, if (res->inflights[1]) n_deq += dpdk_crypto_dequeue (vm, node, res, 1); - if (unlikely(res->remove && !(res->inflights[0] || res->inflights[1]))) + if (PREDICT_FALSE (res->remove && !(res->inflights[0] || res->inflights[1]))) vec_add1 (remove, res_idx[0]); } /* *INDENT-ON* */ -- 2.16.6