2 * esp_encrypt.c : IPSec ESP encrypt node using DPDK Cryptodev
4 * Copyright (c) 2017 Intel and/or its affiliates.
5 * Licensed under the Apache License, Version 2.0 (the "License");
6 * you may not use this file except in compliance with the License.
7 * You may obtain a copy of the License at:
9 * http://www.apache.org/licenses/LICENSE-2.0
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
18 #include <vnet/vnet.h>
19 #include <vnet/api_errno.h>
20 #include <vnet/ip/ip.h>
22 #include <vnet/ipsec/ipsec.h>
23 #include <vnet/ipsec/esp.h>
24 #include <vnet/udp/udp.h>
25 #include <dpdk/buffer.h>
26 #include <dpdk/ipsec/ipsec.h>
27 #include <dpdk/device/dpdk.h>
28 #include <dpdk/device/dpdk_priv.h>
30 #define foreach_esp_encrypt_next \
31 _(DROP, "error-drop") \
32 _(IP4_LOOKUP, "ip4-lookup") \
33 _(IP6_LOOKUP, "ip6-lookup") \
34 _(INTERFACE_OUTPUT, "interface-output")
36 #define _(v, s) ESP_ENCRYPT_NEXT_##v,
39 foreach_esp_encrypt_next
44 #define foreach_esp_encrypt_error \
45 _(RX_PKTS, "ESP pkts received") \
46 _(SEQ_CYCLED, "Sequence number cycled") \
47 _(ENQ_FAIL, "Enqueue failed to crypto device") \
48 _(DISCARD, "Not enough crypto operations, discarding frame") \
49 _(SESSION, "Failed to get crypto session") \
50 _(NOSUP, "Cipher/Auth not supported")
55 #define _(sym,str) ESP_ENCRYPT_ERROR_##sym,
56 foreach_esp_encrypt_error
59 } esp_encrypt_error_t;
61 static char *esp_encrypt_error_strings[] = {
62 #define _(sym,string) string,
63 foreach_esp_encrypt_error
67 extern vlib_node_registration_t dpdk_esp4_encrypt_node;
68 extern vlib_node_registration_t dpdk_esp6_encrypt_node;
72 ipsec_crypto_alg_t crypto_alg;
73 ipsec_integ_alg_t integ_alg;
75 } esp_encrypt_trace_t;
77 /* packet trace format function */
79 format_esp_encrypt_trace (u8 * s, va_list * args)
81 CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
82 CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
83 esp_encrypt_trace_t *t = va_arg (*args, esp_encrypt_trace_t *);
84 ip4_header_t *ih4 = (ip4_header_t *) t->packet_data;
85 u32 indent = format_get_indent (s), offset;
87 s = format (s, "cipher %U auth %U\n",
88 format_ipsec_crypto_alg, t->crypto_alg,
89 format_ipsec_integ_alg, t->integ_alg);
91 if ((ih4->ip_version_and_header_length & 0xF0) == 0x60)
93 s = format (s, "%U%U", format_white_space, indent,
94 format_ip6_header, ih4);
95 offset = sizeof (ip6_header_t);
99 s = format (s, "%U%U", format_white_space, indent,
100 format_ip4_header, ih4);
101 offset = ip4_header_bytes (ih4);
104 s = format (s, "\n%U%U", format_white_space, indent,
105 format_esp_header, t->packet_data + offset);
111 dpdk_esp_encrypt_inline (vlib_main_t * vm,
112 vlib_node_runtime_t * node,
113 vlib_frame_t * from_frame, int is_ip6)
115 u32 n_left_from, *from, *to_next, next_index, thread_index;
116 ipsec_main_t *im = &ipsec_main;
117 u32 thread_idx = vlib_get_thread_index ();
118 dpdk_crypto_main_t *dcm = &dpdk_crypto_main;
119 crypto_resource_t *res = 0;
121 crypto_alg_t *cipher_alg = 0, *auth_alg = 0;
122 struct rte_cryptodev_sym_session *session = 0;
123 u32 ret, last_sa_index = ~0;
124 u8 numa = rte_socket_id ();
126 crypto_worker_main_t *cwm =
127 vec_elt_at_index (dcm->workers_main, thread_idx);
128 struct rte_crypto_op **ops = cwm->ops;
130 from = vlib_frame_vector_args (from_frame);
131 n_left_from = from_frame->n_vectors;
132 thread_index = vm->thread_index;
134 ret = crypto_alloc_ops (numa, ops, n_left_from);
138 vlib_node_increment_counter (vm, dpdk_esp6_encrypt_node.index,
139 ESP_ENCRYPT_ERROR_DISCARD, 1);
141 vlib_node_increment_counter (vm, dpdk_esp4_encrypt_node.index,
142 ESP_ENCRYPT_ERROR_DISCARD, 1);
143 /* Discard whole frame */
147 next_index = ESP_ENCRYPT_NEXT_DROP;
149 while (n_left_from > 0)
153 vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
155 while (n_left_from > 0 && n_left_to_next > 0)
159 vlib_buffer_t *b0, *b1;
161 ip4_and_esp_header_t *ih0, *oh0 = 0;
162 ip6_and_esp_header_t *ih6_0, *oh6_0 = 0;
163 ip4_and_udp_and_esp_header_t *ouh0 = 0;
171 u16 udp_encap_adv = 0;
172 struct rte_mbuf *mb0;
173 struct rte_crypto_op *op;
180 b0 = vlib_get_buffer (vm, bi0);
181 ih0 = vlib_buffer_get_current (b0);
182 mb0 = rte_mbuf_from_vlib_buffer (b0);
185 CLIB_PREFETCH (ih0, sizeof (ih6_0[0]), LOAD);
187 CLIB_PREFETCH (vlib_buffer_get_tail (b0), 20, STORE);
189 CLIB_PREFETCH (mb0, CLIB_CACHE_LINE_BYTES, STORE);
194 b1 = vlib_get_buffer (vm, bi1);
196 CLIB_PREFETCH (b1, CLIB_CACHE_LINE_BYTES, LOAD);
197 CLIB_PREFETCH (b1->data - CLIB_CACHE_LINE_BYTES,
198 CLIB_CACHE_LINE_BYTES, STORE);
203 ASSERT (op->status == RTE_CRYPTO_OP_STATUS_NOT_PROCESSED);
205 dpdk_op_priv_t *priv = crypto_op_get_priv (op);
206 /* store bi in op private */
210 sizeof (op[0]) + sizeof (op[0].sym[0]) + sizeof (priv[0]);
211 CLIB_PREFETCH (op, op_len, STORE);
213 sa_index0 = vnet_buffer (b0)->ipsec.sad_index;
215 if (sa_index0 != last_sa_index)
217 sa0 = pool_elt_at_index (im->sad, sa_index0);
220 vec_elt_at_index (dcm->cipher_algs, sa0->crypto_alg);
221 auth_alg = vec_elt_at_index (dcm->auth_algs, sa0->integ_alg);
223 is_aead = (cipher_alg->type == RTE_CRYPTO_SYM_XFORM_AEAD);
226 auth_alg = cipher_alg;
228 res_idx = get_resource (cwm, sa0);
230 if (PREDICT_FALSE (res_idx == (u16) ~ 0))
232 clib_warning ("unsupported SA by thread index %u",
235 vlib_node_increment_counter (vm,
236 dpdk_esp6_encrypt_node.index,
237 ESP_ENCRYPT_ERROR_NOSUP, 1);
239 vlib_node_increment_counter (vm,
240 dpdk_esp4_encrypt_node.index,
241 ESP_ENCRYPT_ERROR_NOSUP, 1);
247 res = vec_elt_at_index (dcm->resource, res_idx);
249 error = crypto_get_session (&session, sa_index0, res, cwm, 1);
250 if (PREDICT_FALSE (error || !session))
252 clib_warning ("failed to get crypto session");
254 vlib_node_increment_counter (vm,
255 dpdk_esp6_encrypt_node.index,
256 ESP_ENCRYPT_ERROR_SESSION,
259 vlib_node_increment_counter (vm,
260 dpdk_esp4_encrypt_node.index,
261 ESP_ENCRYPT_ERROR_SESSION,
269 last_sa_index = sa_index0;
272 if (PREDICT_FALSE (esp_seq_advance (sa0)))
274 clib_warning ("sequence number counter has cycled SPI %u",
277 vlib_node_increment_counter (vm,
278 dpdk_esp6_encrypt_node.index,
279 ESP_ENCRYPT_ERROR_SEQ_CYCLED, 1);
281 vlib_node_increment_counter (vm,
282 dpdk_esp4_encrypt_node.index,
283 ESP_ENCRYPT_ERROR_SEQ_CYCLED, 1);
291 orig_sz = b0->current_length;
293 /* TODO multi-seg support - total_length_not_including_first_buffer */
294 vlib_increment_combined_counter
295 (&ipsec_sa_counters, thread_index, sa_index0,
296 1, b0->current_length);
298 res->ops[res->n_ops] = op;
299 res->bi[res->n_ops] = bi0;
302 dpdk_gcm_cnt_blk *icb = &priv->cb;
304 crypto_set_icb (icb, sa0->salt, sa0->seq, sa0->seq_hi);
306 iv_size = cipher_alg->iv_len;
307 trunc_size = auth_alg->trunc_size;
309 /* if UDP encapsulation is used adjust the address of the IP header */
310 if (ipsec_sa_is_set_UDP_ENCAP (sa0) && !is_ip6)
311 udp_encap_adv = sizeof (udp_header_t);
313 if (ipsec_sa_is_set_IS_TUNNEL (sa0))
316 if (!is_ip6 && !ipsec_sa_is_set_IS_TUNNEL_V6 (sa0)) /* ip4inip4 */
318 /* in tunnel mode send it back to FIB */
319 priv->next = DPDK_CRYPTO_INPUT_NEXT_IP4_LOOKUP;
320 u8 adv = sizeof (ip4_header_t) + udp_encap_adv +
321 sizeof (esp_header_t) + iv_size;
322 vlib_buffer_advance (b0, -adv);
323 oh0 = vlib_buffer_get_current (b0);
324 ouh0 = vlib_buffer_get_current (b0);
325 next_hdr_type = IP_PROTOCOL_IP_IN_IP;
327 * oh0->ip4.ip_version_and_header_length = 0x45;
328 * oh0->ip4.tos = ih0->ip4.tos;
329 * oh0->ip4.fragment_id = 0;
330 * oh0->ip4.flags_and_fragment_offset = 0;
332 oh0->ip4.checksum_data_64[0] =
333 clib_host_to_net_u64 (0x45ULL << 56);
335 * oh0->ip4.ttl = 254;
336 * oh0->ip4.protocol = IP_PROTOCOL_IPSEC_ESP;
338 oh0->ip4.checksum_data_32[2] =
339 clib_host_to_net_u32 (0xfe320000);
341 oh0->ip4.src_address.as_u32 =
342 sa0->tunnel_src_addr.ip4.as_u32;
343 oh0->ip4.dst_address.as_u32 =
344 sa0->tunnel_dst_addr.ip4.as_u32;
346 if (ipsec_sa_is_set_UDP_ENCAP (sa0))
348 oh0->ip4.protocol = IP_PROTOCOL_UDP;
353 esp0->spi = clib_host_to_net_u32 (sa0->spi);
354 esp0->seq = clib_host_to_net_u32 (sa0->seq);
356 else if (is_ip6 && ipsec_sa_is_set_IS_TUNNEL_V6 (sa0))
359 /* in tunnel mode send it back to FIB */
360 priv->next = DPDK_CRYPTO_INPUT_NEXT_IP6_LOOKUP;
363 sizeof (ip6_header_t) + sizeof (esp_header_t) + iv_size;
364 vlib_buffer_advance (b0, -adv);
365 ih6_0 = (ip6_and_esp_header_t *) ih0;
366 oh6_0 = vlib_buffer_get_current (b0);
368 next_hdr_type = IP_PROTOCOL_IPV6;
370 oh6_0->ip6.ip_version_traffic_class_and_flow_label =
371 ih6_0->ip6.ip_version_traffic_class_and_flow_label;
373 oh6_0->ip6.protocol = IP_PROTOCOL_IPSEC_ESP;
374 oh6_0->ip6.hop_limit = 254;
375 oh6_0->ip6.src_address.as_u64[0] =
376 sa0->tunnel_src_addr.ip6.as_u64[0];
377 oh6_0->ip6.src_address.as_u64[1] =
378 sa0->tunnel_src_addr.ip6.as_u64[1];
379 oh6_0->ip6.dst_address.as_u64[0] =
380 sa0->tunnel_dst_addr.ip6.as_u64[0];
381 oh6_0->ip6.dst_address.as_u64[1] =
382 sa0->tunnel_dst_addr.ip6.as_u64[1];
384 oh6_0->esp.spi = clib_host_to_net_u32 (sa0->spi);
385 oh6_0->esp.seq = clib_host_to_net_u32 (sa0->seq);
387 else /* unsupported ip4inip6, ip6inip4 */
390 vlib_node_increment_counter (vm,
391 dpdk_esp6_encrypt_node.index,
392 ESP_ENCRYPT_ERROR_NOSUP, 1);
394 vlib_node_increment_counter (vm,
395 dpdk_esp4_encrypt_node.index,
396 ESP_ENCRYPT_ERROR_NOSUP, 1);
402 vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
404 else /* transport mode */
406 priv->next = DPDK_CRYPTO_INPUT_NEXT_INTERFACE_OUTPUT;
407 rewrite_len = vnet_buffer (b0)->ip.save_rewrite_length;
408 u16 adv = sizeof (esp_header_t) + iv_size + udp_encap_adv;
409 vlib_buffer_advance (b0, -adv - rewrite_len);
410 u8 *src = ((u8 *) ih0) - rewrite_len;
411 u8 *dst = vlib_buffer_get_current (b0);
412 oh0 = vlib_buffer_get_current (b0) + rewrite_len;
416 orig_sz -= sizeof (ip6_header_t);
417 ih6_0 = (ip6_and_esp_header_t *) ih0;
418 next_hdr_type = ih6_0->ip6.protocol;
419 memmove (dst, src, rewrite_len + sizeof (ip6_header_t));
420 oh6_0 = (ip6_and_esp_header_t *) oh0;
421 oh6_0->ip6.protocol = IP_PROTOCOL_IPSEC_ESP;
426 u16 ip_size = ip4_header_bytes (&ih0->ip4);
428 next_hdr_type = ih0->ip4.protocol;
429 memmove (dst, src, rewrite_len + ip_size);
430 oh0->ip4.protocol = IP_PROTOCOL_IPSEC_ESP;
431 esp0 = (esp_header_t *) (((u8 *) oh0) + ip_size);
432 if (ipsec_sa_is_set_UDP_ENCAP (sa0))
434 oh0->ip4.protocol = IP_PROTOCOL_UDP;
435 esp0 = (esp_header_t *)
436 (((u8 *) oh0) + ip_size + udp_encap_adv);
440 oh0->ip4.protocol = IP_PROTOCOL_IPSEC_ESP;
441 esp0 = (esp_header_t *) (((u8 *) oh0) + ip_size);
444 esp0->spi = clib_host_to_net_u32 (sa0->spi);
445 esp0->seq = clib_host_to_net_u32 (sa0->seq);
448 if (ipsec_sa_is_set_UDP_ENCAP (sa0) && ouh0)
450 ouh0->udp.src_port = clib_host_to_net_u16 (UDP_DST_PORT_ipsec);
451 ouh0->udp.dst_port = clib_host_to_net_u16 (UDP_DST_PORT_ipsec);
452 ouh0->udp.checksum = 0;
454 ASSERT (is_pow2 (cipher_alg->boundary));
455 u16 mask = cipher_alg->boundary - 1;
456 u16 pad_payload_len = ((orig_sz + 2) + mask) & ~mask;
457 u8 pad_bytes = pad_payload_len - 2 - orig_sz;
460 vlib_buffer_put_uninit (b0, pad_bytes + 2 + trunc_size);
462 /* The extra pad bytes would be overwritten by the digest */
464 clib_memcpy_fast (padding, pad_data, 16);
466 f0 = (esp_footer_t *) (padding + pad_bytes);
467 f0->pad_length = pad_bytes;
468 f0->next_header = next_hdr_type;
472 u16 len = b0->current_length - sizeof (ip6_header_t);
473 oh6_0->ip6.payload_length =
474 clib_host_to_net_u16 (len - rewrite_len);
479 clib_host_to_net_u16 (b0->current_length - rewrite_len);
480 oh0->ip4.checksum = ip4_header_checksum (&oh0->ip4);
481 if (ipsec_sa_is_set_UDP_ENCAP (sa0) && ouh0)
484 clib_host_to_net_u16 (clib_net_to_host_u16
486 ip4_header_bytes (&ouh0->ip4));
490 b0->flags |= VLIB_BUFFER_TOTAL_LENGTH_VALID;
492 /* mbuf packet starts at ESP header */
493 mb0->data_len = vlib_buffer_get_tail (b0) - ((u8 *) esp0);
494 mb0->pkt_len = vlib_buffer_get_tail (b0) - ((u8 *) esp0);
495 mb0->data_off = ((void *) esp0) - mb0->buf_addr;
497 u32 cipher_off, cipher_len, auth_len = 0;
500 u8 *digest = vlib_buffer_get_tail (b0) - trunc_size;
502 mb0->buf_physaddr + digest - ((u8 *) mb0->buf_addr);
504 if (!is_aead && cipher_alg->alg == RTE_CRYPTO_CIPHER_AES_CBC)
506 cipher_off = sizeof (esp_header_t);
507 cipher_len = iv_size + pad_payload_len;
511 u32 *esp_iv = (u32 *) (esp0 + 1);
512 esp_iv[0] = sa0->seq;
513 esp_iv[1] = sa0->seq_hi;
515 cipher_off = sizeof (esp_header_t) + iv_size;
516 cipher_len = pad_payload_len;
521 aad = (u32 *) priv->aad;
522 aad[0] = clib_host_to_net_u32 (sa0->spi);
523 aad[1] = clib_host_to_net_u32 (sa0->seq);
525 /* aad[3] should always be 0 */
526 if (PREDICT_FALSE (ipsec_sa_is_set_USE_ESN (sa0)))
527 aad[2] = clib_host_to_net_u32 (sa0->seq_hi);
534 vlib_buffer_get_tail (b0) - ((u8 *) esp0) - trunc_size;
535 if (ipsec_sa_is_set_USE_ESN (sa0))
537 u32 *_digest = (u32 *) digest;
538 _digest[0] = clib_host_to_net_u32 (sa0->seq_hi);
543 crypto_op_setup (is_aead, mb0, op, session, cipher_off, cipher_len,
544 0, auth_len, (u8 *) aad, digest, digest_paddr);
547 if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
549 esp_encrypt_trace_t *tr =
550 vlib_add_trace (vm, node, b0, sizeof (*tr));
551 tr->crypto_alg = sa0->crypto_alg;
552 tr->integ_alg = sa0->integ_alg;
553 u8 *p = vlib_buffer_get_current (b0);
554 if (!ipsec_sa_is_set_IS_TUNNEL (sa0))
555 p += vnet_buffer (b0)->ip.save_rewrite_length;
556 clib_memcpy_fast (tr->packet_data, p, sizeof (tr->packet_data));
559 vlib_put_next_frame (vm, node, next_index, n_left_to_next);
563 vlib_node_increment_counter (vm, dpdk_esp6_encrypt_node.index,
564 ESP_ENCRYPT_ERROR_RX_PKTS,
565 from_frame->n_vectors);
567 crypto_enqueue_ops (vm, cwm, dpdk_esp6_encrypt_node.index,
568 ESP_ENCRYPT_ERROR_ENQ_FAIL, numa);
572 vlib_node_increment_counter (vm, dpdk_esp4_encrypt_node.index,
573 ESP_ENCRYPT_ERROR_RX_PKTS,
574 from_frame->n_vectors);
576 crypto_enqueue_ops (vm, cwm, dpdk_esp4_encrypt_node.index,
577 ESP_ENCRYPT_ERROR_ENQ_FAIL, numa);
580 crypto_free_ops (numa, ops, cwm->ops + from_frame->n_vectors - ops);
582 return from_frame->n_vectors;
585 VLIB_NODE_FN (dpdk_esp4_encrypt_node) (vlib_main_t * vm,
586 vlib_node_runtime_t * node,
587 vlib_frame_t * from_frame)
589 return dpdk_esp_encrypt_inline (vm, node, from_frame, 0 /*is_ip6 */ );
593 VLIB_REGISTER_NODE (dpdk_esp4_encrypt_node) = {
594 .name = "dpdk-esp4-encrypt",
595 .flags = VLIB_NODE_FLAG_IS_OUTPUT,
596 .vector_size = sizeof (u32),
597 .format_trace = format_esp_encrypt_trace,
598 .n_errors = ARRAY_LEN (esp_encrypt_error_strings),
599 .error_strings = esp_encrypt_error_strings,
603 [ESP_ENCRYPT_NEXT_DROP] = "error-drop",
608 VLIB_NODE_FN (dpdk_esp6_encrypt_node) (vlib_main_t * vm,
609 vlib_node_runtime_t * node,
610 vlib_frame_t * from_frame)
612 return dpdk_esp_encrypt_inline (vm, node, from_frame, 1 /*is_ip6 */ );
616 VLIB_REGISTER_NODE (dpdk_esp6_encrypt_node) = {
617 .name = "dpdk-esp6-encrypt",
618 .flags = VLIB_NODE_FLAG_IS_OUTPUT,
619 .vector_size = sizeof (u32),
620 .format_trace = format_esp_encrypt_trace,
621 .n_errors = ARRAY_LEN (esp_encrypt_error_strings),
622 .error_strings = esp_encrypt_error_strings,
626 [ESP_ENCRYPT_NEXT_DROP] = "error-drop",
632 * fd.io coding-style-patch-verification: ON
635 * eval: (c-set-style "gnu")