New upstream version 17.11-rc3
[deb_dpdk.git] / app / test-crypto-perf / cperf_test_latency.c
1 /*-
2  *   BSD LICENSE
3  *
4  *   Copyright(c) 2016-2017 Intel Corporation. All rights reserved.
5  *
6  *   Redistribution and use in source and binary forms, with or without
7  *   modification, are permitted provided that the following conditions
8  *   are met:
9  *
10  *     * Redistributions of source code must retain the above copyright
11  *       notice, this list of conditions and the following disclaimer.
12  *     * Redistributions in binary form must reproduce the above copyright
13  *       notice, this list of conditions and the following disclaimer in
14  *       the documentation and/or other materials provided with the
15  *       distribution.
16  *     * Neither the name of Intel Corporation nor the names of its
17  *       contributors may be used to endorse or promote products derived
18  *       from this software without specific prior written permission.
19  *
20  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
23  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
24  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
26  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27  *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
30  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31  */
32
33 #include <rte_malloc.h>
34 #include <rte_cycles.h>
35 #include <rte_crypto.h>
36 #include <rte_cryptodev.h>
37
38 #include "cperf_test_latency.h"
39 #include "cperf_ops.h"
40 #include "cperf_test_common.h"
41
42 struct cperf_op_result {
43         uint64_t tsc_start;
44         uint64_t tsc_end;
45         enum rte_crypto_op_status status;
46 };
47
48 struct cperf_latency_ctx {
49         uint8_t dev_id;
50         uint16_t qp_id;
51         uint8_t lcore_id;
52
53         struct rte_mempool *pool;
54
55         struct rte_cryptodev_sym_session *sess;
56
57         cperf_populate_ops_t populate_ops;
58
59         uint32_t src_buf_offset;
60         uint32_t dst_buf_offset;
61
62         const struct cperf_options *options;
63         const struct cperf_test_vector *test_vector;
64         struct cperf_op_result *res;
65 };
66
67 struct priv_op_data {
68         struct cperf_op_result *result;
69 };
70
71 #define max(a, b) (a > b ? (uint64_t)a : (uint64_t)b)
72 #define min(a, b) (a < b ? (uint64_t)a : (uint64_t)b)
73
74 static void
75 cperf_latency_test_free(struct cperf_latency_ctx *ctx)
76 {
77         if (ctx) {
78                 if (ctx->sess) {
79                         rte_cryptodev_sym_session_clear(ctx->dev_id, ctx->sess);
80                         rte_cryptodev_sym_session_free(ctx->sess);
81                 }
82
83                 if (ctx->pool)
84                         rte_mempool_free(ctx->pool);
85
86                 rte_free(ctx->res);
87                 rte_free(ctx);
88         }
89 }
90
91 void *
92 cperf_latency_test_constructor(struct rte_mempool *sess_mp,
93                 uint8_t dev_id, uint16_t qp_id,
94                 const struct cperf_options *options,
95                 const struct cperf_test_vector *test_vector,
96                 const struct cperf_op_fns *op_fns)
97 {
98         struct cperf_latency_ctx *ctx = NULL;
99         size_t extra_op_priv_size = sizeof(struct priv_op_data);
100
101         ctx = rte_malloc(NULL, sizeof(struct cperf_latency_ctx), 0);
102         if (ctx == NULL)
103                 goto err;
104
105         ctx->dev_id = dev_id;
106         ctx->qp_id = qp_id;
107
108         ctx->populate_ops = op_fns->populate_ops;
109         ctx->options = options;
110         ctx->test_vector = test_vector;
111
112         /* IV goes at the end of the crypto operation */
113         uint16_t iv_offset = sizeof(struct rte_crypto_op) +
114                 sizeof(struct rte_crypto_sym_op) +
115                 sizeof(struct cperf_op_result *);
116
117         ctx->sess = op_fns->sess_create(sess_mp, dev_id, options, test_vector,
118                         iv_offset);
119         if (ctx->sess == NULL)
120                 goto err;
121
122         if (cperf_alloc_common_memory(options, test_vector, dev_id, qp_id,
123                         extra_op_priv_size,
124                         &ctx->src_buf_offset, &ctx->dst_buf_offset,
125                         &ctx->pool) < 0)
126                 goto err;
127
128         ctx->res = rte_malloc(NULL, sizeof(struct cperf_op_result) *
129                         ctx->options->total_ops, 0);
130
131         if (ctx->res == NULL)
132                 goto err;
133
134         return ctx;
135 err:
136         cperf_latency_test_free(ctx);
137
138         return NULL;
139 }
140
141 static inline void
142 store_timestamp(struct rte_crypto_op *op, uint64_t timestamp)
143 {
144         struct priv_op_data *priv_data;
145
146         priv_data = (struct priv_op_data *) (op->sym + 1);
147         priv_data->result->status = op->status;
148         priv_data->result->tsc_end = timestamp;
149 }
150
151 int
152 cperf_latency_test_runner(void *arg)
153 {
154         struct cperf_latency_ctx *ctx = arg;
155         uint16_t test_burst_size;
156         uint8_t burst_size_idx = 0;
157
158         static int only_once;
159
160         if (ctx == NULL)
161                 return 0;
162
163         struct rte_crypto_op *ops[ctx->options->max_burst_size];
164         struct rte_crypto_op *ops_processed[ctx->options->max_burst_size];
165         uint64_t i;
166         struct priv_op_data *priv_data;
167
168         uint32_t lcore = rte_lcore_id();
169
170 #ifdef CPERF_LINEARIZATION_ENABLE
171         struct rte_cryptodev_info dev_info;
172         int linearize = 0;
173
174         /* Check if source mbufs require coalescing */
175         if (ctx->options->segment_sz < ctx->options->max_buffer_size) {
176                 rte_cryptodev_info_get(ctx->dev_id, &dev_info);
177                 if ((dev_info.feature_flags &
178                                 RTE_CRYPTODEV_FF_MBUF_SCATTER_GATHER) == 0)
179                         linearize = 1;
180         }
181 #endif /* CPERF_LINEARIZATION_ENABLE */
182
183         ctx->lcore_id = lcore;
184
185         /* Warm up the host CPU before starting the test */
186         for (i = 0; i < ctx->options->total_ops; i++)
187                 rte_cryptodev_enqueue_burst(ctx->dev_id, ctx->qp_id, NULL, 0);
188
189         /* Get first size from range or list */
190         if (ctx->options->inc_burst_size != 0)
191                 test_burst_size = ctx->options->min_burst_size;
192         else
193                 test_burst_size = ctx->options->burst_size_list[0];
194
195         uint16_t iv_offset = sizeof(struct rte_crypto_op) +
196                 sizeof(struct rte_crypto_sym_op) +
197                 sizeof(struct cperf_op_result *);
198
199         while (test_burst_size <= ctx->options->max_burst_size) {
200                 uint64_t ops_enqd = 0, ops_deqd = 0;
201                 uint64_t b_idx = 0;
202
203                 uint64_t tsc_val, tsc_end, tsc_start;
204                 uint64_t tsc_max = 0, tsc_min = ~0UL, tsc_tot = 0, tsc_idx = 0;
205                 uint64_t enqd_max = 0, enqd_min = ~0UL, enqd_tot = 0;
206                 uint64_t deqd_max = 0, deqd_min = ~0UL, deqd_tot = 0;
207
208                 while (enqd_tot < ctx->options->total_ops) {
209
210                         uint16_t burst_size = ((enqd_tot + test_burst_size)
211                                         <= ctx->options->total_ops) ?
212                                                         test_burst_size :
213                                                         ctx->options->total_ops -
214                                                         enqd_tot;
215
216                         /* Allocate objects containing crypto operations and mbufs */
217                         if (rte_mempool_get_bulk(ctx->pool, (void **)ops,
218                                                 burst_size) != 0) {
219                                 RTE_LOG(ERR, USER1,
220                                         "Failed to allocate more crypto operations "
221                                         "from the the crypto operation pool.\n"
222                                         "Consider increasing the pool size "
223                                         "with --pool-sz\n");
224                                 return -1;
225                         }
226
227                         /* Setup crypto op, attach mbuf etc */
228                         (ctx->populate_ops)(ops, ctx->src_buf_offset,
229                                         ctx->dst_buf_offset,
230                                         burst_size, ctx->sess, ctx->options,
231                                         ctx->test_vector, iv_offset);
232
233                         tsc_start = rte_rdtsc_precise();
234
235 #ifdef CPERF_LINEARIZATION_ENABLE
236                         if (linearize) {
237                                 /* PMD doesn't support scatter-gather and source buffer
238                                  * is segmented.
239                                  * We need to linearize it before enqueuing.
240                                  */
241                                 for (i = 0; i < burst_size; i++)
242                                         rte_pktmbuf_linearize(ops[i]->sym->m_src);
243                         }
244 #endif /* CPERF_LINEARIZATION_ENABLE */
245
246                         /* Enqueue burst of ops on crypto device */
247                         ops_enqd = rte_cryptodev_enqueue_burst(ctx->dev_id, ctx->qp_id,
248                                         ops, burst_size);
249
250                         /* Dequeue processed burst of ops from crypto device */
251                         ops_deqd = rte_cryptodev_dequeue_burst(ctx->dev_id, ctx->qp_id,
252                                         ops_processed, test_burst_size);
253
254                         tsc_end = rte_rdtsc_precise();
255
256                         /* Free memory for not enqueued operations */
257                         if (ops_enqd != burst_size)
258                                 rte_mempool_put_bulk(ctx->pool,
259                                                 (void **)&ops[ops_enqd],
260                                                 burst_size - ops_enqd);
261
262                         for (i = 0; i < ops_enqd; i++) {
263                                 ctx->res[tsc_idx].tsc_start = tsc_start;
264                                 /*
265                                  * Private data structure starts after the end of the
266                                  * rte_crypto_sym_op structure.
267                                  */
268                                 priv_data = (struct priv_op_data *) (ops[i]->sym + 1);
269                                 priv_data->result = (void *)&ctx->res[tsc_idx];
270                                 tsc_idx++;
271                         }
272
273                         if (likely(ops_deqd))  {
274                                 /* Free crypto ops so they can be reused. */
275                                 for (i = 0; i < ops_deqd; i++)
276                                         store_timestamp(ops_processed[i], tsc_end);
277
278                                 rte_mempool_put_bulk(ctx->pool,
279                                                 (void **)ops_processed, ops_deqd);
280
281                                 deqd_tot += ops_deqd;
282                                 deqd_max = max(ops_deqd, deqd_max);
283                                 deqd_min = min(ops_deqd, deqd_min);
284                         }
285
286                         enqd_tot += ops_enqd;
287                         enqd_max = max(ops_enqd, enqd_max);
288                         enqd_min = min(ops_enqd, enqd_min);
289
290                         b_idx++;
291                 }
292
293                 /* Dequeue any operations still in the crypto device */
294                 while (deqd_tot < ctx->options->total_ops) {
295                         /* Sending 0 length burst to flush sw crypto device */
296                         rte_cryptodev_enqueue_burst(ctx->dev_id, ctx->qp_id, NULL, 0);
297
298                         /* dequeue burst */
299                         ops_deqd = rte_cryptodev_dequeue_burst(ctx->dev_id, ctx->qp_id,
300                                         ops_processed, test_burst_size);
301
302                         tsc_end = rte_rdtsc_precise();
303
304                         if (ops_deqd != 0) {
305                                 for (i = 0; i < ops_deqd; i++)
306                                         store_timestamp(ops_processed[i], tsc_end);
307
308                                 rte_mempool_put_bulk(ctx->pool,
309                                                 (void **)ops_processed, ops_deqd);
310
311                                 deqd_tot += ops_deqd;
312                                 deqd_max = max(ops_deqd, deqd_max);
313                                 deqd_min = min(ops_deqd, deqd_min);
314                         }
315                 }
316
317                 for (i = 0; i < tsc_idx; i++) {
318                         tsc_val = ctx->res[i].tsc_end - ctx->res[i].tsc_start;
319                         tsc_max = max(tsc_val, tsc_max);
320                         tsc_min = min(tsc_val, tsc_min);
321                         tsc_tot += tsc_val;
322                 }
323
324                 double time_tot, time_avg, time_max, time_min;
325
326                 const uint64_t tunit = 1000000; /* us */
327                 const uint64_t tsc_hz = rte_get_tsc_hz();
328
329                 uint64_t enqd_avg = enqd_tot / b_idx;
330                 uint64_t deqd_avg = deqd_tot / b_idx;
331                 uint64_t tsc_avg = tsc_tot / tsc_idx;
332
333                 time_tot = tunit*(double)(tsc_tot) / tsc_hz;
334                 time_avg = tunit*(double)(tsc_avg) / tsc_hz;
335                 time_max = tunit*(double)(tsc_max) / tsc_hz;
336                 time_min = tunit*(double)(tsc_min) / tsc_hz;
337
338                 if (ctx->options->csv) {
339                         if (!only_once)
340                                 printf("\n# lcore, Buffer Size, Burst Size, Pakt Seq #, "
341                                                 "Packet Size, cycles, time (us)");
342
343                         for (i = 0; i < ctx->options->total_ops; i++) {
344
345                                 printf("\n%u;%u;%u;%"PRIu64";%"PRIu64";%.3f",
346                                         ctx->lcore_id, ctx->options->test_buffer_size,
347                                         test_burst_size, i + 1,
348                                         ctx->res[i].tsc_end - ctx->res[i].tsc_start,
349                                         tunit * (double) (ctx->res[i].tsc_end
350                                                         - ctx->res[i].tsc_start)
351                                                 / tsc_hz);
352
353                         }
354                         only_once = 1;
355                 } else {
356                         printf("\n# Device %d on lcore %u\n", ctx->dev_id,
357                                 ctx->lcore_id);
358                         printf("\n# total operations: %u", ctx->options->total_ops);
359                         printf("\n# Buffer size: %u", ctx->options->test_buffer_size);
360                         printf("\n# Burst size: %u", test_burst_size);
361                         printf("\n#     Number of bursts: %"PRIu64,
362                                         b_idx);
363
364                         printf("\n#");
365                         printf("\n#          \t       Total\t   Average\t   "
366                                         "Maximum\t   Minimum");
367                         printf("\n#  enqueued\t%12"PRIu64"\t%10"PRIu64"\t"
368                                         "%10"PRIu64"\t%10"PRIu64, enqd_tot,
369                                         enqd_avg, enqd_max, enqd_min);
370                         printf("\n#  dequeued\t%12"PRIu64"\t%10"PRIu64"\t"
371                                         "%10"PRIu64"\t%10"PRIu64, deqd_tot,
372                                         deqd_avg, deqd_max, deqd_min);
373                         printf("\n#    cycles\t%12"PRIu64"\t%10"PRIu64"\t"
374                                         "%10"PRIu64"\t%10"PRIu64, tsc_tot,
375                                         tsc_avg, tsc_max, tsc_min);
376                         printf("\n# time [us]\t%12.0f\t%10.3f\t%10.3f\t%10.3f",
377                                         time_tot, time_avg, time_max, time_min);
378                         printf("\n\n");
379
380                 }
381
382                 /* Get next size from range or list */
383                 if (ctx->options->inc_burst_size != 0)
384                         test_burst_size += ctx->options->inc_burst_size;
385                 else {
386                         if (++burst_size_idx == ctx->options->burst_size_count)
387                                 break;
388                         test_burst_size =
389                                 ctx->options->burst_size_list[burst_size_idx];
390                 }
391         }
392
393         return 0;
394 }
395
396 void
397 cperf_latency_test_destructor(void *arg)
398 {
399         struct cperf_latency_ctx *ctx = arg;
400
401         if (ctx == NULL)
402                 return;
403
404         cperf_latency_test_free(ctx);
405 }