3ce22843289c0b23e2f69dff88e0112a7c19cda0
[vpp.git] / src / plugins / dpdk / ipsec / esp_encrypt.c
1 /*
2  * esp_encrypt.c : IPSec ESP encrypt node using DPDK Cryptodev
3  *
4  * Copyright (c) 2017 Intel and/or its affiliates.
5  * Licensed under the Apache License, Version 2.0 (the "License");
6  * you may not use this file except in compliance with the License.
7  * You may obtain a copy of the License at:
8  *
9  *     http://www.apache.org/licenses/LICENSE-2.0
10  *
11  * Unless required by applicable law or agreed to in writing, software
12  * distributed under the License is distributed on an "AS IS" BASIS,
13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  * See the License for the specific language governing permissions and
15  * limitations under the License.
16  */
17
18 #include <vnet/vnet.h>
19 #include <vnet/api_errno.h>
20 #include <vnet/ip/ip.h>
21
22 #include <vnet/ipsec/ipsec.h>
23 #include <vnet/ipsec/esp.h>
24 #include <dpdk/ipsec/ipsec.h>
25 #include <dpdk/device/dpdk.h>
26 #include <dpdk/device/dpdk_priv.h>
27
28 #define foreach_esp_encrypt_next                   \
29 _(DROP, "error-drop")                              \
30 _(IP4_LOOKUP, "ip4-lookup")                        \
31 _(IP6_LOOKUP, "ip6-lookup")                        \
32 _(INTERFACE_OUTPUT, "interface-output")
33
34 #define _(v, s) ESP_ENCRYPT_NEXT_##v,
35 typedef enum
36 {
37   foreach_esp_encrypt_next
38 #undef _
39     ESP_ENCRYPT_N_NEXT,
40 } esp_encrypt_next_t;
41
42 #define foreach_esp_encrypt_error                   \
43  _(RX_PKTS, "ESP pkts received")                    \
44  _(SEQ_CYCLED, "Sequence number cycled")            \
45  _(ENQ_FAIL, "Enqueue failed to crypto device")     \
46  _(DISCARD, "Not enough crypto operations, discarding frame")  \
47  _(SESSION, "Failed to get crypto session")         \
48  _(NOSUP, "Cipher/Auth not supported")
49
50
51 typedef enum
52 {
53 #define _(sym,str) ESP_ENCRYPT_ERROR_##sym,
54   foreach_esp_encrypt_error
55 #undef _
56     ESP_ENCRYPT_N_ERROR,
57 } esp_encrypt_error_t;
58
59 static char *esp_encrypt_error_strings[] = {
60 #define _(sym,string) string,
61   foreach_esp_encrypt_error
62 #undef _
63 };
64
65 vlib_node_registration_t dpdk_esp_encrypt_node;
66
67 typedef struct
68 {
69   ipsec_crypto_alg_t crypto_alg;
70   ipsec_integ_alg_t integ_alg;
71   u8 packet_data[64];
72 } esp_encrypt_trace_t;
73
74 /* packet trace format function */
75 static u8 *
76 format_esp_encrypt_trace (u8 * s, va_list * args)
77 {
78   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
79   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
80   esp_encrypt_trace_t *t = va_arg (*args, esp_encrypt_trace_t *);
81   ip4_header_t *ih4 = (ip4_header_t *) t->packet_data;
82   uword indent = format_get_indent (s), offset;
83
84   s = format (s, "cipher %U auth %U\n",
85               format_ipsec_crypto_alg, t->crypto_alg,
86               format_ipsec_integ_alg, t->integ_alg);
87
88   if ((ih4->ip_version_and_header_length & 0xF0) == 0x60)
89     {
90       s = format (s, "%U%U", format_white_space, indent,
91                   format_ip6_header, ih4);
92       offset = sizeof (ip6_header_t);
93     }
94   else
95     {
96       s = format (s, "%U%U", format_white_space, indent,
97                   format_ip4_header, ih4);
98       offset = ip4_header_bytes (ih4);
99     }
100
101   s = format (s, "\n%U%U", format_white_space, indent,
102               format_esp_header, t->packet_data + offset);
103
104   return s;
105 }
106
107 static uword
108 dpdk_esp_encrypt_node_fn (vlib_main_t * vm,
109                           vlib_node_runtime_t * node,
110                           vlib_frame_t * from_frame)
111 {
112   u32 n_left_from, *from, *to_next, next_index;
113   ipsec_main_t *im = &ipsec_main;
114   u32 thread_idx = vlib_get_thread_index ();
115   dpdk_crypto_main_t *dcm = &dpdk_crypto_main;
116   crypto_resource_t *res = 0;
117   ipsec_sa_t *sa0 = 0;
118   crypto_alg_t *cipher_alg = 0, *auth_alg = 0;
119   struct rte_cryptodev_sym_session *session = 0;
120   u32 ret, last_sa_index = ~0;
121   u8 numa = rte_socket_id ();
122   u8 is_aead = 0;
123   crypto_worker_main_t *cwm =
124     vec_elt_at_index (dcm->workers_main, thread_idx);
125   struct rte_crypto_op **ops = cwm->ops;
126
127   from = vlib_frame_vector_args (from_frame);
128   n_left_from = from_frame->n_vectors;
129
130   ret = crypto_alloc_ops (numa, ops, n_left_from);
131   if (ret)
132     {
133       vlib_node_increment_counter (vm, dpdk_esp_encrypt_node.index,
134                                    ESP_ENCRYPT_ERROR_DISCARD, 1);
135       /* Discard whole frame */
136       return n_left_from;
137     }
138
139   next_index = ESP_ENCRYPT_NEXT_DROP;
140
141   while (n_left_from > 0)
142     {
143       u32 n_left_to_next;
144
145       vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
146
147       while (n_left_from > 0 && n_left_to_next > 0)
148         {
149           clib_error_t *error;
150           u32 bi0;
151           vlib_buffer_t *b0 = 0;
152           u32 sa_index0;
153           ip4_and_esp_header_t *ih0, *oh0 = 0;
154           ip6_and_esp_header_t *ih6_0, *oh6_0 = 0;
155           esp_header_t *esp0;
156           esp_footer_t *f0;
157           u8 is_ipv6, next_hdr_type;
158           u32 iv_size;
159           u16 orig_sz;
160           u8 trunc_size;
161           struct rte_mbuf *mb0 = 0;
162           struct rte_crypto_op *op;
163           u16 res_idx;
164
165           bi0 = from[0];
166           from += 1;
167           n_left_from -= 1;
168
169           b0 = vlib_get_buffer (vm, bi0);
170           ih0 = vlib_buffer_get_current (b0);
171           mb0 = rte_mbuf_from_vlib_buffer (b0);
172
173           /* ih0/ih6_0 */
174           CLIB_PREFETCH (ih0, sizeof (ih6_0[0]), LOAD);
175           /* f0 */
176           CLIB_PREFETCH (vlib_buffer_get_tail (b0), 20, STORE);
177           /* mb0 */
178           CLIB_PREFETCH (mb0, CLIB_CACHE_LINE_BYTES, STORE);
179
180           op = ops[0];
181           ops += 1;
182           ASSERT (op->status == RTE_CRYPTO_OP_STATUS_NOT_PROCESSED);
183
184           dpdk_op_priv_t *priv = crypto_op_get_priv (op);
185
186           u16 op_len =
187             sizeof (op[0]) + sizeof (op[0].sym[0]) + sizeof (priv[0]);
188           CLIB_PREFETCH (op, op_len, STORE);
189
190           sa_index0 = vnet_buffer (b0)->ipsec.sad_index;
191
192           if (sa_index0 != last_sa_index)
193             {
194               last_sa_index = sa_index0;
195
196               sa0 = pool_elt_at_index (im->sad, sa_index0);
197
198               cipher_alg =
199                 vec_elt_at_index (dcm->cipher_algs, sa0->crypto_alg);
200               auth_alg = vec_elt_at_index (dcm->auth_algs, sa0->integ_alg);
201
202 #if DPDK_NO_AEAD
203               is_aead = ((sa0->crypto_alg == IPSEC_CRYPTO_ALG_AES_GCM_128) ||
204                          (sa0->crypto_alg == IPSEC_CRYPTO_ALG_AES_GCM_192) ||
205                          (sa0->crypto_alg == IPSEC_CRYPTO_ALG_AES_GCM_256));
206 #else
207               is_aead = (cipher_alg->type == RTE_CRYPTO_SYM_XFORM_AEAD);
208 #endif
209
210               if (is_aead)
211                 auth_alg = cipher_alg;
212
213               res_idx = get_resource (cwm, sa0);
214
215               if (PREDICT_FALSE (res_idx == (u16) ~ 0))
216                 {
217                   clib_warning ("unsupported SA by thread index %u",
218                                 thread_idx);
219                   vlib_node_increment_counter (vm,
220                                                dpdk_esp_encrypt_node.index,
221                                                ESP_ENCRYPT_ERROR_NOSUP, 1);
222                   to_next[0] = bi0;
223                   to_next += 1;
224                   n_left_to_next -= 1;
225                   goto trace;
226                 }
227               res = vec_elt_at_index (dcm->resource, res_idx);
228
229               error = crypto_get_session (&session, sa_index0, res, cwm, 1);
230               if (PREDICT_FALSE (error || !session))
231                 {
232                   clib_warning ("failed to get crypto session");
233                   vlib_node_increment_counter (vm,
234                                                dpdk_esp_encrypt_node.index,
235                                                ESP_ENCRYPT_ERROR_SESSION, 1);
236                   to_next[0] = bi0;
237                   to_next += 1;
238                   n_left_to_next -= 1;
239                   goto trace;
240                 }
241             }
242
243           if (PREDICT_FALSE (esp_seq_advance (sa0)))
244             {
245               clib_warning ("sequence number counter has cycled SPI %u",
246                             sa0->spi);
247               vlib_node_increment_counter (vm, dpdk_esp_encrypt_node.index,
248                                            ESP_ENCRYPT_ERROR_SEQ_CYCLED, 1);
249               //TODO: rekey SA
250               to_next[0] = bi0;
251               to_next += 1;
252               n_left_to_next -= 1;
253               goto trace;
254             }
255
256           orig_sz = b0->current_length;
257
258           /* TODO multi-seg support - total_length_not_including_first_buffer */
259           sa0->total_data_size += b0->current_length;
260
261           res->ops[res->n_ops] = op;
262           res->bi[res->n_ops] = bi0;
263           res->n_ops += 1;
264
265           dpdk_gcm_cnt_blk *icb = &priv->cb;
266
267           crypto_set_icb (icb, sa0->salt, sa0->seq, sa0->seq_hi);
268
269           is_ipv6 = (ih0->ip4.ip_version_and_header_length & 0xF0) == 0x60;
270
271           iv_size = cipher_alg->iv_len;
272           trunc_size = auth_alg->trunc_size;
273
274           if (sa0->is_tunnel)
275             {
276               if (!is_ipv6 && !sa0->is_tunnel_ip6)      /* ip4inip4 */
277                 {
278                   /* in tunnel mode send it back to FIB */
279                   priv->next = DPDK_CRYPTO_INPUT_NEXT_IP4_LOOKUP;
280                   u8 adv =
281                     sizeof (ip4_header_t) + sizeof (esp_header_t) + iv_size;
282                   vlib_buffer_advance (b0, -adv);
283                   oh0 = vlib_buffer_get_current (b0);
284                   next_hdr_type = IP_PROTOCOL_IP_IN_IP;
285                   /*
286                    * oh0->ip4.ip_version_and_header_length = 0x45;
287                    * oh0->ip4.tos = ih0->ip4.tos;
288                    * oh0->ip4.fragment_id = 0;
289                    * oh0->ip4.flags_and_fragment_offset = 0;
290                    */
291                   oh0->ip4.checksum_data_64[0] =
292                     clib_host_to_net_u64 (0x45ULL << 56);
293                   /*
294                    * oh0->ip4.ttl = 254;
295                    * oh0->ip4.protocol = IP_PROTOCOL_IPSEC_ESP;
296                    */
297                   oh0->ip4.checksum_data_32[2] =
298                     clib_host_to_net_u32 (0xfe320000);
299
300                   oh0->ip4.src_address.as_u32 =
301                     sa0->tunnel_src_addr.ip4.as_u32;
302                   oh0->ip4.dst_address.as_u32 =
303                     sa0->tunnel_dst_addr.ip4.as_u32;
304                   esp0 = &oh0->esp;
305                   oh0->esp.spi = clib_host_to_net_u32 (sa0->spi);
306                   oh0->esp.seq = clib_host_to_net_u32 (sa0->seq);
307                 }
308               else if (is_ipv6 && sa0->is_tunnel_ip6)   /* ip6inip6 */
309                 {
310                   /* in tunnel mode send it back to FIB */
311                   priv->next = DPDK_CRYPTO_INPUT_NEXT_IP6_LOOKUP;
312
313                   u8 adv =
314                     sizeof (ip6_header_t) + sizeof (esp_header_t) + iv_size;
315                   vlib_buffer_advance (b0, -adv);
316                   ih6_0 = (ip6_and_esp_header_t *) ih0;
317                   oh6_0 = vlib_buffer_get_current (b0);
318
319                   next_hdr_type = IP_PROTOCOL_IPV6;
320
321                   oh6_0->ip6.ip_version_traffic_class_and_flow_label =
322                     ih6_0->ip6.ip_version_traffic_class_and_flow_label;
323
324                   oh6_0->ip6.protocol = IP_PROTOCOL_IPSEC_ESP;
325                   oh6_0->ip6.hop_limit = 254;
326                   oh6_0->ip6.src_address.as_u64[0] =
327                     sa0->tunnel_src_addr.ip6.as_u64[0];
328                   oh6_0->ip6.src_address.as_u64[1] =
329                     sa0->tunnel_src_addr.ip6.as_u64[1];
330                   oh6_0->ip6.dst_address.as_u64[0] =
331                     sa0->tunnel_dst_addr.ip6.as_u64[0];
332                   oh6_0->ip6.dst_address.as_u64[1] =
333                     sa0->tunnel_dst_addr.ip6.as_u64[1];
334                   esp0 = &oh6_0->esp;
335                   oh6_0->esp.spi = clib_host_to_net_u32 (sa0->spi);
336                   oh6_0->esp.seq = clib_host_to_net_u32 (sa0->seq);
337                 }
338               else              /* unsupported ip4inip6, ip6inip4 */
339                 {
340                   vlib_node_increment_counter (vm,
341                                                dpdk_esp_encrypt_node.index,
342                                                ESP_ENCRYPT_ERROR_NOSUP, 1);
343                   to_next[0] = bi0;
344                   to_next += 1;
345                   n_left_to_next -= 1;
346                   goto trace;
347                 }
348               vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
349             }
350           else                  /* transport mode */
351             {
352               priv->next = DPDK_CRYPTO_INPUT_NEXT_INTERFACE_OUTPUT;
353               u16 rewrite_len = vnet_buffer (b0)->ip.save_rewrite_length;
354               u16 adv = sizeof (esp_header_t) + iv_size;
355               vlib_buffer_advance (b0, -rewrite_len - adv);
356               u8 *src = ((u8 *) ih0) - rewrite_len;
357               u8 *dst = vlib_buffer_get_current (b0);
358               oh0 = (ip4_and_esp_header_t *) (dst + rewrite_len);
359
360               if (is_ipv6)
361                 {
362                   orig_sz -= sizeof (ip6_header_t);
363                   ih6_0 = (ip6_and_esp_header_t *) ih0;
364                   next_hdr_type = ih6_0->ip6.protocol;
365                   memmove (dst, src, rewrite_len + sizeof (ip6_header_t));
366                   oh6_0 = (ip6_and_esp_header_t *) oh0;
367                   oh6_0->ip6.protocol = IP_PROTOCOL_IPSEC_ESP;
368                   esp0 = &oh6_0->esp;
369                 }
370               else              /* ipv4 */
371                 {
372                   orig_sz -= ip4_header_bytes (&ih0->ip4);
373                   next_hdr_type = ih0->ip4.protocol;
374                   memmove (dst, src,
375                            rewrite_len + ip4_header_bytes (&ih0->ip4));
376                   oh0->ip4.protocol = IP_PROTOCOL_IPSEC_ESP;
377                   esp0 =
378                     (esp_header_t *) (oh6_0 + ip4_header_bytes (&ih0->ip4));
379                 }
380               esp0->spi = clib_host_to_net_u32 (sa0->spi);
381               esp0->seq = clib_host_to_net_u32 (sa0->seq);
382             }
383
384           ASSERT (is_pow2 (cipher_alg->boundary));
385           u16 mask = cipher_alg->boundary - 1;
386           u16 pad_payload_len = ((orig_sz + 2) + mask) & ~mask;
387           u8 pad_bytes = pad_payload_len - 2 - orig_sz;
388
389           u8 *padding =
390             vlib_buffer_put_uninit (b0, pad_bytes + 2 + trunc_size);
391
392           if (pad_bytes)
393             clib_memcpy (padding, pad_data, 16);
394
395           f0 = (esp_footer_t *) (padding + pad_bytes);
396           f0->pad_length = pad_bytes;
397           f0->next_header = next_hdr_type;
398
399           if (is_ipv6)
400             {
401               u16 len = b0->current_length - sizeof (ip6_header_t);
402               oh6_0->ip6.payload_length = clib_host_to_net_u16 (len);
403             }
404           else
405             {
406               oh0->ip4.length = clib_host_to_net_u16 (b0->current_length);
407               oh0->ip4.checksum = ip4_header_checksum (&oh0->ip4);
408             }
409
410           vnet_buffer (b0)->sw_if_index[VLIB_RX] =
411             vnet_buffer (b0)->sw_if_index[VLIB_RX];
412           b0->flags |= VLIB_BUFFER_TOTAL_LENGTH_VALID;
413
414           /* mbuf packet starts at ESP header */
415           mb0->data_len = vlib_buffer_get_tail (b0) - ((u8 *) esp0);
416           mb0->pkt_len = vlib_buffer_get_tail (b0) - ((u8 *) esp0);
417           mb0->data_off = ((void *) esp0) - mb0->buf_addr;
418
419           u32 cipher_off, cipher_len;
420           u32 auth_len = 0, aad_size = 0;
421           u32 *aad = NULL;
422           u8 *digest = vlib_buffer_get_tail (b0) - trunc_size;
423
424           if (cipher_alg->alg == RTE_CRYPTO_CIPHER_AES_CBC)
425             {
426               cipher_off = sizeof (esp_header_t);
427               cipher_len = iv_size + pad_payload_len;
428             }
429           else                  /* CTR/GCM */
430             {
431               u32 *esp_iv = (u32 *) (esp0 + 1);
432               esp_iv[0] = sa0->seq;
433               esp_iv[1] = sa0->seq_hi;
434
435               cipher_off = sizeof (esp_header_t) + iv_size;
436               cipher_len = pad_payload_len;
437
438               iv_size = 12;     /* CTR/GCM IV size, not ESP IV size */
439             }
440
441           if (is_aead)
442             {
443               aad = (u32 *) priv->aad;
444               aad[0] = clib_host_to_net_u32 (sa0->spi);
445               aad[1] = clib_host_to_net_u32 (sa0->seq);
446
447               if (sa0->use_esn)
448                 {
449                   aad[2] = clib_host_to_net_u32 (sa0->seq_hi);
450                   aad_size = 12;
451                 }
452               else
453                 aad_size = 8;
454             }
455           else
456             {
457               auth_len =
458                 vlib_buffer_get_tail (b0) - ((u8 *) esp0) - trunc_size;
459               if (sa0->use_esn)
460                 {
461                   *((u32 *) digest) = sa0->seq_hi;
462                   auth_len += 4;
463                 }
464             }
465
466           crypto_op_setup (is_aead, mb0, op, session,
467                            cipher_off, cipher_len, (u8 *) icb, iv_size,
468                            0, auth_len, (u8 *) aad, aad_size,
469                            digest, 0, trunc_size);
470
471         trace:
472           if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
473             {
474               esp_encrypt_trace_t *tr =
475                 vlib_add_trace (vm, node, b0, sizeof (*tr));
476               tr->crypto_alg = sa0->crypto_alg;
477               tr->integ_alg = sa0->integ_alg;
478               u8 *p = vlib_buffer_get_current (b0);
479               if (!sa0->is_tunnel)
480                 p += vnet_buffer (b0)->ip.save_rewrite_length;
481               clib_memcpy (tr->packet_data, p, sizeof (tr->packet_data));
482             }
483         }
484       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
485     }
486   vlib_node_increment_counter (vm, dpdk_esp_encrypt_node.index,
487                                ESP_ENCRYPT_ERROR_RX_PKTS,
488                                from_frame->n_vectors);
489
490   crypto_enqueue_ops (vm, cwm, 1, dpdk_esp_encrypt_node.index,
491                       ESP_ENCRYPT_ERROR_ENQ_FAIL, numa);
492
493   crypto_free_ops (numa, ops, cwm->ops + from_frame->n_vectors - ops);
494
495   return from_frame->n_vectors;
496 }
497
498 /* *INDENT-OFF* */
499 VLIB_REGISTER_NODE (dpdk_esp_encrypt_node) = {
500   .function = dpdk_esp_encrypt_node_fn,
501   .name = "dpdk-esp-encrypt",
502   .flags = VLIB_NODE_FLAG_IS_OUTPUT,
503   .vector_size = sizeof (u32),
504   .format_trace = format_esp_encrypt_trace,
505   .n_errors = ARRAY_LEN (esp_encrypt_error_strings),
506   .error_strings = esp_encrypt_error_strings,
507   .n_next_nodes = 1,
508   .next_nodes =
509     {
510       [ESP_ENCRYPT_NEXT_DROP] = "error-drop",
511     }
512 };
513 /* *INDENT-ON* */
514
515 VLIB_NODE_FUNCTION_MULTIARCH (dpdk_esp_encrypt_node, dpdk_esp_encrypt_node_fn)
516 /*
517  * fd.io coding-style-patch-verification: ON
518  *
519  * Local Variables:
520  * eval: (c-set-style "gnu")
521  * End:
522  */