From 492d7790ff26c569bee81617c662363652891140 Mon Sep 17 00:00:00 2001 From: Gabriel Oginski Date: Wed, 10 Nov 2021 07:59:56 +0000 Subject: [PATCH] wireguard: add async mode for encryption packets Originally wireguard doesn't support async mode for encryption packets. This patch add async mode for encryption in wireguard and also adds support chacha20-poly1305 algorithm in cryptodev for async handler. In addition it contains new command line to activate async mode for wireguard: set wireguard async mode on|off and also add new command to check active mode for wireguard: show wireguard mode Type: improvement Signed-off-by: Gabriel Oginski Change-Id: I141d48b42ee8dbff0112b8542ab5205268089da6 --- src/plugins/wireguard/wireguard.api | 11 + src/plugins/wireguard/wireguard.c | 28 ++ src/plugins/wireguard/wireguard.h | 58 ++++ src/plugins/wireguard/wireguard_api.c | 12 + src/plugins/wireguard/wireguard_cli.c | 55 ++++ src/plugins/wireguard/wireguard_noise.c | 60 ---- src/plugins/wireguard/wireguard_noise.h | 14 +- src/plugins/wireguard/wireguard_output_tun.c | 452 +++++++++++++++++++++++++-- src/plugins/wireguard/wireguard_timer.c | 12 +- src/plugins/wireguard/wireguard_timer.h | 2 +- src/vnet/crypto/crypto.h | 19 +- 11 files changed, 616 insertions(+), 107 deletions(-) diff --git a/src/plugins/wireguard/wireguard.api b/src/plugins/wireguard/wireguard.api index 1473d9ca39b..9a839deabd9 100644 --- a/src/plugins/wireguard/wireguard.api +++ b/src/plugins/wireguard/wireguard.api @@ -195,6 +195,17 @@ define wireguard_peers_details { vl_api_wireguard_peer_t peer; }; +/** \brief Wireguard Set Async mode + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param async_enable - wireguard async mode on or off +*/ +autoreply define wg_set_async_mode { + u32 client_index; + u32 context; + bool async_enable; +}; + /* * Local Variables: * eval: (c-set-style "gnu") diff --git a/src/plugins/wireguard/wireguard.c b/src/plugins/wireguard/wireguard.c index 8438cc126a6..40c2c090e9e 100644 --- a/src/plugins/wireguard/wireguard.c +++ b/src/plugins/wireguard/wireguard.c @@ -16,6 +16,7 @@ #include #include #include +#include #include #include @@ -23,6 +24,31 @@ #include wg_main_t wg_main; +wg_async_post_next_t wg_encrypt_async_next; + +void +wg_set_async_mode (u32 is_enabled) +{ + vnet_crypto_request_async_mode (is_enabled); + + if (is_enabled) + wg_op_mode_set_ASYNC (); + else + wg_op_mode_unset_ASYNC (); +} + +static void +wireguard_register_post_node (vlib_main_t *vm) +{ + wg_async_post_next_t *eit; + + eit = &wg_encrypt_async_next; + + eit->wg4_post_next = + vnet_crypto_register_post_node (vm, "wg4-output-tun-post-node"); + eit->wg6_post_next = + vnet_crypto_register_post_node (vm, "wg6-output-tun-post-node"); +} static clib_error_t * wg_init (vlib_main_t * vm) @@ -44,6 +70,8 @@ wg_init (vlib_main_t * vm) CLIB_CACHE_LINE_BYTES); wg_timer_wheel_init (); + wireguard_register_post_node (vm); + wmp->op_mode_flags = 0; return (NULL); } diff --git a/src/plugins/wireguard/wireguard.h b/src/plugins/wireguard/wireguard.h index 4cbee1fcf7a..aaec3d47b23 100644 --- a/src/plugins/wireguard/wireguard.h +++ b/src/plugins/wireguard/wireguard.h @@ -30,6 +30,7 @@ typedef struct wg_per_thread_data_t_ { CLIB_CACHE_LINE_ALIGN_MARK (cacheline0); vnet_crypto_op_t *crypto_ops; + vnet_crypto_async_frame_t **async_frames; u8 data[WG_DEFAULT_DATA_SIZE]; } wg_per_thread_data_t; typedef struct @@ -50,12 +51,69 @@ typedef struct u8 feature_init; tw_timer_wheel_16t_2w_512sl_t timer_wheel; + + /* operation mode flags (e.g. async) */ + u8 op_mode_flags; } wg_main_t; +typedef struct +{ + /* wg post node index for async crypto */ + u32 wg4_post_next; + u32 wg6_post_next; +} wg_async_post_next_t; + +extern wg_async_post_next_t wg_encrypt_async_next; extern wg_main_t wg_main; +/** + * Wireguard operation mode + **/ +#define foreach_wg_op_mode_flags _ (0, ASYNC, "async") + +/** + * Helper function to set/unset and check op modes + **/ +typedef enum wg_op_mode_flags_t_ +{ +#define _(v, f, s) WG_OP_MODE_FLAG_##f = 1 << v, + foreach_wg_op_mode_flags +#undef _ +} __clib_packed wg_op_mode_flags_t; + +#define _(a, v, s) \ + always_inline int wg_op_mode_set_##v (void) \ + { \ + return (wg_main.op_mode_flags |= WG_OP_MODE_FLAG_##v); \ + } \ + always_inline int wg_op_mode_unset_##v (void) \ + { \ + return (wg_main.op_mode_flags &= ~WG_OP_MODE_FLAG_##v); \ + } \ + always_inline int wg_op_mode_is_set_##v (void) \ + { \ + return (wg_main.op_mode_flags & WG_OP_MODE_FLAG_##v); \ + } +foreach_wg_op_mode_flags +#undef _ + + typedef struct +{ + u8 __pad[22]; + u16 next_index; +} wg_post_data_t; + +STATIC_ASSERT (sizeof (wg_post_data_t) <= + STRUCT_SIZE_OF (vnet_buffer_opaque_t, unused), + "Custom meta-data too large for vnet_buffer_opaque_t"); + +#define wg_post_data(b) \ + ((wg_post_data_t *) ((u8 *) ((b)->opaque) + \ + STRUCT_OFFSET_OF (vnet_buffer_opaque_t, unused))) + #define WG_START_EVENT 1 void wg_feature_init (wg_main_t * wmp); +void wg_set_async_mode (u32 is_enabled); #endif /* __included_wg_h__ */ diff --git a/src/plugins/wireguard/wireguard_api.c b/src/plugins/wireguard/wireguard_api.c index 4cb40a15231..67f68d069f1 100644 --- a/src/plugins/wireguard/wireguard_api.c +++ b/src/plugins/wireguard/wireguard_api.c @@ -365,6 +365,18 @@ wg_api_peer_event (index_t peeri, wg_peer_flags flags) }; } +static void +vl_api_wg_set_async_mode_t_handler (vl_api_wg_set_async_mode_t *mp) +{ + wg_main_t *wmp = &wg_main; + vl_api_wg_set_async_mode_reply_t *rmp; + int rv = 0; + + wg_set_async_mode (mp->async_enable); + + REPLY_MACRO (VL_API_WG_SET_ASYNC_MODE_REPLY); +} + /* set tup the API message handling tables */ #include static clib_error_t * diff --git a/src/plugins/wireguard/wireguard_cli.c b/src/plugins/wireguard/wireguard_cli.c index f821a3c313a..5e0b7243a83 100644 --- a/src/plugins/wireguard/wireguard_cli.c +++ b/src/plugins/wireguard/wireguard_cli.c @@ -356,6 +356,61 @@ VLIB_CLI_COMMAND (wg_show_itfs_command, static) = .short_help = "show wireguard", .function = wg_show_if_command_fn, }; + +static clib_error_t * +wg_set_async_mode_command_fn (vlib_main_t *vm, unformat_input_t *input, + vlib_cli_command_t *cmd) +{ + unformat_input_t _line_input, *line_input = &_line_input; + int async_enable = 0; + + if (!unformat_user (input, unformat_line_input, line_input)) + return 0; + + while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (line_input, "on")) + async_enable = 1; + else if (unformat (line_input, "off")) + async_enable = 0; + else + return (clib_error_return (0, "unknown input '%U'", + format_unformat_error, line_input)); + } + + wg_set_async_mode (async_enable); + + unformat_free (line_input); + return (NULL); +} + +VLIB_CLI_COMMAND (wg_set_async_mode_command, static) = { + .path = "set wireguard async mode", + .short_help = "set wireguard async mode on|off", + .function = wg_set_async_mode_command_fn, +}; + +static clib_error_t * +wg_show_mode_command_fn (vlib_main_t *vm, unformat_input_t *input, + vlib_cli_command_t *cmd) +{ + vlib_cli_output (vm, "Wireguard mode"); + +#define _(v, f, s) \ + vlib_cli_output (vm, "\t%s: %s", s, \ + (wg_op_mode_is_set_##f () ? "enabled" : "disabled")); + foreach_wg_op_mode_flags +#undef _ + + return (NULL); +} + +VLIB_CLI_COMMAND (wg_show_modemode_command, static) = { + .path = "show wireguard mode", + .short_help = "show wireguard mode", + .function = wg_show_mode_command_fn, +}; + /* *INDENT-ON* */ /* diff --git a/src/plugins/wireguard/wireguard_noise.c b/src/plugins/wireguard/wireguard_noise.c index c8605f117cd..6efec28fef8 100644 --- a/src/plugins/wireguard/wireguard_noise.c +++ b/src/plugins/wireguard/wireguard_noise.c @@ -629,58 +629,6 @@ error: return ret; } -enum noise_state_crypt -noise_sync_remote_encrypt (vlib_main_t *vm, vnet_crypto_op_t **crypto_ops, - noise_remote_t *r, uint32_t *r_idx, uint64_t *nonce, - uint8_t *src, size_t srclen, uint8_t *dst, u32 bi, - u8 *iv, f64 time) -{ - noise_keypair_t *kp; - enum noise_state_crypt ret = SC_FAILED; - - if ((kp = r->r_current) == NULL) - goto error; - - /* We confirm that our values are within our tolerances. We want: - * - a valid keypair - * - our keypair to be less than REJECT_AFTER_TIME seconds old - * - our receive counter to be less than REJECT_AFTER_MESSAGES - * - our send counter to be less than REJECT_AFTER_MESSAGES - */ - if (!kp->kp_valid || - wg_birthdate_has_expired_opt (kp->kp_birthdate, REJECT_AFTER_TIME, - time) || - kp->kp_ctr.c_recv >= REJECT_AFTER_MESSAGES || - ((*nonce = noise_counter_send (&kp->kp_ctr)) > REJECT_AFTER_MESSAGES)) - goto error; - - /* We encrypt into the same buffer, so the caller must ensure that buf - * has NOISE_AUTHTAG_LEN bytes to store the MAC. The nonce and index - * are passed back out to the caller through the provided data pointer. */ - *r_idx = kp->kp_remote_index; - - wg_prepare_sync_op (vm, crypto_ops, src, srclen, dst, NULL, 0, *nonce, - VNET_CRYPTO_OP_CHACHA20_POLY1305_ENC, kp->kp_send_index, - bi, iv); - - /* If our values are still within tolerances, but we are approaching - * the tolerances, we notify the caller with ESTALE that they should - * establish a new keypair. The current keypair can continue to be used - * until the tolerances are hit. We notify if: - * - our send counter is valid and not less than REKEY_AFTER_MESSAGES - * - we're the initiator and our keypair is older than - * REKEY_AFTER_TIME seconds */ - ret = SC_KEEP_KEY_FRESH; - if ((kp->kp_valid && *nonce >= REKEY_AFTER_MESSAGES) || - (kp->kp_is_initiator && wg_birthdate_has_expired_opt ( - kp->kp_birthdate, REKEY_AFTER_TIME, time))) - goto error; - - ret = SC_OK; -error: - return ret; -} - enum noise_state_crypt noise_sync_remote_decrypt (vlib_main_t *vm, vnet_crypto_op_t **crypto_ops, noise_remote_t *r, uint32_t r_idx, uint64_t nonce, @@ -791,14 +739,6 @@ noise_remote_handshake_index_drop (noise_remote_t * r) u->u_index_drop (hs->hs_local_index); } -static uint64_t -noise_counter_send (noise_counter_t * ctr) -{ - uint64_t ret; - ret = ctr->c_send++; - return ret; -} - static void noise_kdf (uint8_t * a, uint8_t * b, uint8_t * c, const uint8_t * x, size_t a_len, size_t b_len, size_t c_len, size_t x_len, diff --git a/src/plugins/wireguard/wireguard_noise.h b/src/plugins/wireguard/wireguard_noise.h index ef1e7dcbfca..33ac189b0c0 100644 --- a/src/plugins/wireguard/wireguard_noise.h +++ b/src/plugins/wireguard/wireguard_noise.h @@ -136,6 +136,14 @@ noise_local_get (uint32_t locali) return (pool_elt_at_index (noise_local_pool, locali)); } +static_always_inline uint64_t +noise_counter_send (noise_counter_t *ctr) +{ + uint64_t ret; + ret = ctr->c_send++; + return ret; +} + void noise_local_init (noise_local_t *, struct noise_upcall *); bool noise_local_set_private (noise_local_t *, const uint8_t[NOISE_PUBLIC_KEY_LEN]); @@ -188,12 +196,6 @@ noise_remote_encrypt (vlib_main_t * vm, noise_remote_t *, uint64_t * nonce, uint8_t * src, size_t srclen, uint8_t * dst); -enum noise_state_crypt -noise_sync_remote_encrypt (vlib_main_t *vm, vnet_crypto_op_t **crypto_ops, - noise_remote_t *r, uint32_t *r_idx, uint64_t *nonce, - uint8_t *src, size_t srclen, uint8_t *dst, u32 bi, - u8 *iv, f64 time); - enum noise_state_crypt noise_sync_remote_decrypt (vlib_main_t *vm, vnet_crypto_op_t **crypto_ops, noise_remote_t *, uint32_t r_idx, uint64_t nonce, diff --git a/src/plugins/wireguard/wireguard_output_tun.c b/src/plugins/wireguard/wireguard_output_tun.c index 2feb0570a31..f7ae4d174c0 100644 --- a/src/plugins/wireguard/wireguard_output_tun.c +++ b/src/plugins/wireguard/wireguard_output_tun.c @@ -21,11 +21,12 @@ #include #include -#define foreach_wg_output_error \ - _(NONE, "No error") \ - _(PEER, "Peer error") \ - _(KEYPAIR, "Keypair error") \ - _(TOO_BIG, "packet too big") \ +#define foreach_wg_output_error \ + _ (NONE, "No error") \ + _ (PEER, "Peer error") \ + _ (KEYPAIR, "Keypair error") \ + _ (TOO_BIG, "packet too big") \ + _ (CRYPTO_ENGINE_ERROR, "crypto engine error (packet dropped)") typedef enum { @@ -56,6 +57,12 @@ typedef struct u8 is_ip4; } wg_output_tun_trace_t; +typedef struct +{ + index_t peer; + u32 next_index; +} wg_output_tun_post_trace_t; + u8 * format_ip4_udp_header (u8 * s, va_list * args) { @@ -93,6 +100,47 @@ format_wg_output_tun_trace (u8 * s, va_list * args) return s; } +/* post node - packet trace format function */ +static u8 * +format_wg_output_tun_post_trace (u8 *s, va_list *args) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); + + wg_output_tun_post_trace_t *t = va_arg (*args, wg_output_tun_post_trace_t *); + + s = format (s, "peer: %d\n", t->peer); + s = format (s, " wg-post: next node index %u", t->next_index); + return s; +} + +static_always_inline void +wg_prepare_sync_enc_op (vlib_main_t *vm, vnet_crypto_op_t **crypto_ops, + u8 *src, u32 src_len, u8 *dst, u8 *aad, u32 aad_len, + u64 nonce, vnet_crypto_key_index_t key_index, u32 bi, + u8 *iv) +{ + vnet_crypto_op_t _op, *op = &_op; + u8 src_[] = {}; + + clib_memset (iv, 0, 4); + clib_memcpy (iv + 4, &nonce, sizeof (nonce)); + + vec_add2_aligned (crypto_ops[0], op, 1, CLIB_CACHE_LINE_BYTES); + vnet_crypto_op_init (op, VNET_CRYPTO_OP_CHACHA20_POLY1305_ENC); + + op->tag_len = NOISE_AUTHTAG_LEN; + op->tag = dst + src_len; + op->src = !src ? src_ : src; + op->len = src_len; + op->dst = dst; + op->key_index = key_index; + op->aad = aad; + op->aad_len = aad_len; + op->iv = iv; + op->user_data = bi; +} + static_always_inline void wg_output_process_ops (vlib_main_t *vm, vlib_node_runtime_t *node, vnet_crypto_op_t *ops, vlib_buffer_t *b[], u16 *nexts, @@ -121,10 +169,148 @@ wg_output_process_ops (vlib_main_t *vm, vlib_node_runtime_t *node, } } +static_always_inline void +wg_output_tun_add_to_frame (vlib_main_t *vm, vnet_crypto_async_frame_t *f, + u32 key_index, u32 crypto_len, + i16 crypto_start_offset, u32 buffer_index, + u16 next_node, u8 *iv, u8 *tag, u8 flags) +{ + vnet_crypto_async_frame_elt_t *fe; + u16 index; + + ASSERT (f->n_elts < VNET_CRYPTO_FRAME_SIZE); + + index = f->n_elts; + fe = &f->elts[index]; + f->n_elts++; + fe->key_index = key_index; + fe->crypto_total_length = crypto_len; + fe->crypto_start_offset = crypto_start_offset; + fe->iv = iv; + fe->tag = tag; + fe->flags = flags; + f->buffer_indices[index] = buffer_index; + f->next_node_index[index] = next_node; +} + +static_always_inline enum noise_state_crypt +wq_output_tun_process (vlib_main_t *vm, vnet_crypto_op_t **crypto_ops, + noise_remote_t *r, uint32_t *r_idx, uint64_t *nonce, + uint8_t *src, size_t srclen, uint8_t *dst, u32 bi, + u8 *iv, f64 time) +{ + noise_keypair_t *kp; + enum noise_state_crypt ret = SC_FAILED; + + if ((kp = r->r_current) == NULL) + goto error; + + /* We confirm that our values are within our tolerances. We want: + * - a valid keypair + * - our keypair to be less than REJECT_AFTER_TIME seconds old + * - our receive counter to be less than REJECT_AFTER_MESSAGES + * - our send counter to be less than REJECT_AFTER_MESSAGES + */ + if (!kp->kp_valid || + wg_birthdate_has_expired_opt (kp->kp_birthdate, REJECT_AFTER_TIME, + time) || + kp->kp_ctr.c_recv >= REJECT_AFTER_MESSAGES || + ((*nonce = noise_counter_send (&kp->kp_ctr)) > REJECT_AFTER_MESSAGES)) + goto error; + + /* We encrypt into the same buffer, so the caller must ensure that buf + * has NOISE_AUTHTAG_LEN bytes to store the MAC. The nonce and index + * are passed back out to the caller through the provided data pointer. */ + *r_idx = kp->kp_remote_index; + + wg_prepare_sync_enc_op (vm, crypto_ops, src, srclen, dst, NULL, 0, *nonce, + kp->kp_send_index, bi, iv); + + /* If our values are still within tolerances, but we are approaching + * the tolerances, we notify the caller with ESTALE that they should + * establish a new keypair. The current keypair can continue to be used + * until the tolerances are hit. We notify if: + * - our send counter is valid and not less than REKEY_AFTER_MESSAGES + * - we're the initiator and our keypair is older than + * REKEY_AFTER_TIME seconds */ + ret = SC_KEEP_KEY_FRESH; + if ((kp->kp_valid && *nonce >= REKEY_AFTER_MESSAGES) || + (kp->kp_is_initiator && wg_birthdate_has_expired_opt ( + kp->kp_birthdate, REKEY_AFTER_TIME, time))) + goto error; + + ret = SC_OK; +error: + return ret; +} + +static_always_inline enum noise_state_crypt +wg_add_to_async_frame (vlib_main_t *vm, wg_per_thread_data_t *ptd, + vnet_crypto_async_frame_t *async_frame, + vlib_buffer_t *b, u8 *payload, u32 payload_len, u32 bi, + u16 next, u16 async_next, noise_remote_t *r, + uint32_t *r_idx, uint64_t *nonce, u8 *iv, f64 time) +{ + wg_post_data_t *post = wg_post_data (b); + u8 flag = 0; + noise_keypair_t *kp; + + post->next_index = next; + + /* crypto */ + enum noise_state_crypt ret = SC_FAILED; + + if ((kp = r->r_current) == NULL) + goto error; + + /* We confirm that our values are within our tolerances. We want: + * - a valid keypair + * - our keypair to be less than REJECT_AFTER_TIME seconds old + * - our receive counter to be less than REJECT_AFTER_MESSAGES + * - our send counter to be less than REJECT_AFTER_MESSAGES + */ + if (!kp->kp_valid || + wg_birthdate_has_expired_opt (kp->kp_birthdate, REJECT_AFTER_TIME, + time) || + kp->kp_ctr.c_recv >= REJECT_AFTER_MESSAGES || + ((*nonce = noise_counter_send (&kp->kp_ctr)) > REJECT_AFTER_MESSAGES)) + goto error; + + /* We encrypt into the same buffer, so the caller must ensure that buf + * has NOISE_AUTHTAG_LEN bytes to store the MAC. The nonce and index + * are passed back out to the caller through the provided data pointer. */ + *r_idx = kp->kp_remote_index; + + clib_memset (iv, 0, 4); + clib_memcpy (iv + 4, nonce, sizeof (nonce)); + + /* this always succeeds because we know the frame is not full */ + wg_output_tun_add_to_frame (vm, async_frame, kp->kp_send_index, payload_len, + payload - b->data, bi, async_next, iv, + payload + payload_len, flag); + + /* If our values are still within tolerances, but we are approaching + * the tolerances, we notify the caller with ESTALE that they should + * establish a new keypair. The current keypair can continue to be used + * until the tolerances are hit. We notify if: + * - our send counter is valid and not less than REKEY_AFTER_MESSAGES + * - we're the initiator and our keypair is older than + * REKEY_AFTER_TIME seconds */ + ret = SC_KEEP_KEY_FRESH; + if ((kp->kp_valid && *nonce >= REKEY_AFTER_MESSAGES) || + (kp->kp_is_initiator && wg_birthdate_has_expired_opt ( + kp->kp_birthdate, REKEY_AFTER_TIME, time))) + goto error; + + ret = SC_OK; +error: + return ret; +} + /* is_ip4 - inner header flag */ always_inline uword wg_output_tun_inline (vlib_main_t *vm, vlib_node_runtime_t *node, - vlib_frame_t *frame, u8 is_ip4) + vlib_frame_t *frame, u8 is_ip4, u16 async_next_node) { wg_main_t *wmp = &wg_main; wg_per_thread_data_t *ptd = @@ -140,10 +326,18 @@ wg_output_tun_inline (vlib_main_t *vm, vlib_node_runtime_t *node, vlib_buffer_t *sync_bufs[VLIB_FRAME_SIZE]; u32 thread_index = vm->thread_index; u16 n_sync = 0; - u16 drop_next = WG_OUTPUT_NEXT_ERROR; + const u16 drop_next = WG_OUTPUT_NEXT_ERROR; + const u8 is_async = wg_op_mode_is_set_ASYNC (); + vnet_crypto_async_frame_t *async_frame = NULL; + u16 n_async = 0; + u16 noop_nexts[VLIB_FRAME_SIZE], *noop_next = noop_nexts, n_noop = 0; + u16 err = !0; + u32 sync_bi[VLIB_FRAME_SIZE]; + u32 noop_bi[VLIB_FRAME_SIZE]; vlib_get_buffers (vm, from, bufs, n_left_from); vec_reset_length (ptd->crypto_ops); + vec_reset_length (ptd->async_frames); wg_peer_t *peer = NULL; u32 adj_index = 0; @@ -169,7 +363,8 @@ wg_output_tun_inline (vlib_main_t *vm, vlib_node_runtime_t *node, LOAD); } - next[0] = WG_OUTPUT_NEXT_ERROR; + noop_next[0] = WG_OUTPUT_NEXT_ERROR; + err = WG_OUTPUT_NEXT_ERROR; adj_index = vnet_buffer (b[0])->ip.adj_index[VLIB_TX]; @@ -193,9 +388,10 @@ wg_output_tun_inline (vlib_main_t *vm, vlib_node_runtime_t *node, wg_peer_assign_thread (thread_index)); } - if (PREDICT_TRUE (thread_index != peer->output_thread_index)) + if (PREDICT_FALSE (thread_index != peer->output_thread_index)) { - next[0] = WG_OUTPUT_NEXT_HANDOFF; + noop_next[0] = WG_OUTPUT_NEXT_HANDOFF; + err = WG_OUTPUT_NEXT_HANDOFF; goto next; } @@ -245,12 +441,35 @@ wg_output_tun_inline (vlib_main_t *vm, vlib_node_runtime_t *node, last_adj_index = adj_index; } + /* Here we are sure that can send packet to next node */ + next[0] = WG_OUTPUT_NEXT_INTERFACE_OUTPUT; + enum noise_state_crypt state; - state = noise_sync_remote_encrypt ( - vm, crypto_ops, &peer->remote, &message_data_wg->receiver_index, - &message_data_wg->counter, plain_data, plain_data_len, plain_data, - n_sync, iv_data, time); + if (is_async) + { + /* get a frame for this op if we don't yet have one or it's full */ + if (NULL == async_frame || + vnet_crypto_async_frame_is_full (async_frame)) + { + async_frame = vnet_crypto_async_get_frame ( + vm, VNET_CRYPTO_OP_CHACHA20_POLY1305_TAG16_AAD0_ENC); + /* Save the frame to the list we'll submit at the end */ + vec_add1 (ptd->async_frames, async_frame); + } + state = wg_add_to_async_frame ( + vm, ptd, async_frame, b[0], plain_data, plain_data_len, + from[b - bufs], next[0], async_next_node, &peer->remote, + &message_data_wg->receiver_index, &message_data_wg->counter, + iv_data, time); + } + else + { + state = wq_output_tun_process ( + vm, crypto_ops, &peer->remote, &message_data_wg->receiver_index, + &message_data_wg->counter, plain_data, plain_data_len, plain_data, + n_sync, iv_data, time); + } if (PREDICT_FALSE (state == SC_KEEP_KEY_FRESH)) { @@ -261,11 +480,11 @@ wg_output_tun_inline (vlib_main_t *vm, vlib_node_runtime_t *node, // TODO: Maybe wrong wg_send_handshake_from_mt (peeri, false); wg_peer_update_flags (peeri, WG_PEER_ESTABLISHED, false); + noop_next[0] = WG_OUTPUT_NEXT_ERROR; goto out; } - /* Here we are sure that can send packet to next node */ - next[0] = WG_OUTPUT_NEXT_INTERFACE_OUTPUT; + err = WG_OUTPUT_NEXT_INTERFACE_OUTPUT; if (is_ip4_out) { @@ -304,31 +523,214 @@ wg_output_tun_inline (vlib_main_t *vm, vlib_node_runtime_t *node, } next: - sync_bufs[n_sync] = b[0]; - n_sync += 1; + if (PREDICT_FALSE (err != WG_OUTPUT_NEXT_INTERFACE_OUTPUT)) + { + noop_bi[n_noop] = from[b - bufs]; + n_noop++; + noop_next++; + goto next_left; + } + if (!is_async) + { + sync_bi[n_sync] = from[b - bufs]; + sync_bufs[n_sync] = b[0]; + n_sync += 1; + next += 1; + } + else + { + n_async++; + } + next_left: n_left_from -= 1; - next += 1; b += 1; } - /* wg-output-process-ops */ - wg_output_process_ops (vm, node, ptd->crypto_ops, sync_bufs, nexts, - drop_next); + if (n_sync) + { + /* wg-output-process-ops */ + wg_output_process_ops (vm, node, ptd->crypto_ops, sync_bufs, nexts, + drop_next); + vlib_buffer_enqueue_to_next (vm, node, sync_bi, nexts, n_sync); + } + if (n_async) + { + /* submit all of the open frames */ + vnet_crypto_async_frame_t **async_frame; + + vec_foreach (async_frame, ptd->async_frames) + { + if (PREDICT_FALSE ( + vnet_crypto_async_submit_open_frame (vm, *async_frame) < 0)) + { + u32 n_drop = (*async_frame)->n_elts; + u32 *bi = (*async_frame)->buffer_indices; + u16 index = n_noop; + while (n_drop--) + { + noop_bi[index] = bi[0]; + vlib_buffer_t *b = vlib_get_buffer (vm, bi[0]); + noop_nexts[index] = drop_next; + b->error = node->errors[WG_OUTPUT_ERROR_CRYPTO_ENGINE_ERROR]; + bi++; + index++; + } + n_noop += (*async_frame)->n_elts; + + vnet_crypto_async_reset_frame (*async_frame); + vnet_crypto_async_free_frame (vm, *async_frame); + } + } + } + if (n_noop) + { + vlib_buffer_enqueue_to_next (vm, node, noop_bi, noop_nexts, n_noop); + } - vlib_buffer_enqueue_to_next (vm, node, from, nexts, n_sync); return frame->n_vectors; } +always_inline uword +wg_output_tun_post (vlib_main_t *vm, vlib_node_runtime_t *node, + vlib_frame_t *frame) +{ + vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b = bufs; + u16 nexts[VLIB_FRAME_SIZE], *next = nexts; + u32 *from = vlib_frame_vector_args (frame); + u32 n_left = frame->n_vectors; + + index_t peeri = ~0; + + vlib_get_buffers (vm, from, b, n_left); + + if (n_left >= 4) + { + vlib_prefetch_buffer_header (b[0], LOAD); + vlib_prefetch_buffer_header (b[1], LOAD); + vlib_prefetch_buffer_header (b[2], LOAD); + vlib_prefetch_buffer_header (b[3], LOAD); + } + + while (n_left > 8) + { + vlib_prefetch_buffer_header (b[4], LOAD); + vlib_prefetch_buffer_header (b[5], LOAD); + vlib_prefetch_buffer_header (b[6], LOAD); + vlib_prefetch_buffer_header (b[7], LOAD); + + next[0] = (wg_post_data (b[0]))->next_index; + next[1] = (wg_post_data (b[1]))->next_index; + next[2] = (wg_post_data (b[2]))->next_index; + next[3] = (wg_post_data (b[3]))->next_index; + + if (PREDICT_FALSE (node->flags & VLIB_NODE_FLAG_TRACE)) + { + if (b[0]->flags & VLIB_BUFFER_IS_TRACED) + { + wg_output_tun_post_trace_t *tr = + vlib_add_trace (vm, node, b[0], sizeof (*tr)); + peeri = wg_peer_get_by_adj_index ( + vnet_buffer (b[0])->ip.adj_index[VLIB_TX]); + tr->peer = peeri; + tr->next_index = next[0]; + } + if (b[1]->flags & VLIB_BUFFER_IS_TRACED) + { + wg_output_tun_post_trace_t *tr = + vlib_add_trace (vm, node, b[1], sizeof (*tr)); + peeri = wg_peer_get_by_adj_index ( + vnet_buffer (b[1])->ip.adj_index[VLIB_TX]); + tr->next_index = next[1]; + } + if (b[2]->flags & VLIB_BUFFER_IS_TRACED) + { + wg_output_tun_post_trace_t *tr = + vlib_add_trace (vm, node, b[2], sizeof (*tr)); + peeri = wg_peer_get_by_adj_index ( + vnet_buffer (b[2])->ip.adj_index[VLIB_TX]); + tr->next_index = next[2]; + } + if (b[3]->flags & VLIB_BUFFER_IS_TRACED) + { + wg_output_tun_post_trace_t *tr = + vlib_add_trace (vm, node, b[3], sizeof (*tr)); + peeri = wg_peer_get_by_adj_index ( + vnet_buffer (b[3])->ip.adj_index[VLIB_TX]); + tr->next_index = next[3]; + } + } + + b += 4; + next += 4; + n_left -= 4; + } + + while (n_left > 0) + { + next[0] = (wg_post_data (b[0]))->next_index; + if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE) && + (b[0]->flags & VLIB_BUFFER_IS_TRACED))) + { + wg_output_tun_post_trace_t *tr = + vlib_add_trace (vm, node, b[0], sizeof (*tr)); + peeri = wg_peer_get_by_adj_index ( + vnet_buffer (b[0])->ip.adj_index[VLIB_TX]); + tr->next_index = next[0]; + } + + b += 1; + next += 1; + n_left -= 1; + } + + vlib_buffer_enqueue_to_next (vm, node, from, nexts, frame->n_vectors); + return frame->n_vectors; +} + +VLIB_REGISTER_NODE (wg4_output_tun_post_node) = { + .name = "wg4-output-tun-post-node", + .vector_size = sizeof (u32), + .format_trace = format_wg_output_tun_post_trace, + .type = VLIB_NODE_TYPE_INTERNAL, + .sibling_of = "wg4-output-tun", + .n_errors = ARRAY_LEN (wg_output_error_strings), + .error_strings = wg_output_error_strings, +}; + +VLIB_REGISTER_NODE (wg6_output_tun_post_node) = { + .name = "wg6-output-tun-post-node", + .vector_size = sizeof (u32), + .format_trace = format_wg_output_tun_post_trace, + .type = VLIB_NODE_TYPE_INTERNAL, + .sibling_of = "wg6-output-tun", + .n_errors = ARRAY_LEN (wg_output_error_strings), + .error_strings = wg_output_error_strings, +}; + +VLIB_NODE_FN (wg4_output_tun_post_node) +(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *from_frame) +{ + return wg_output_tun_post (vm, node, from_frame); +} + +VLIB_NODE_FN (wg6_output_tun_post_node) +(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *from_frame) +{ + return wg_output_tun_post (vm, node, from_frame); +} + VLIB_NODE_FN (wg4_output_tun_node) (vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame) { - return wg_output_tun_inline (vm, node, frame, /* is_ip4 */ 1); + return wg_output_tun_inline (vm, node, frame, /* is_ip4 */ 1, + wg_encrypt_async_next.wg4_post_next); } VLIB_NODE_FN (wg6_output_tun_node) (vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame) { - return wg_output_tun_inline (vm, node, frame, /* is_ip4 */ 0); + return wg_output_tun_inline (vm, node, frame, /* is_ip4 */ 0, + wg_encrypt_async_next.wg6_post_next); } /* *INDENT-OFF* */ diff --git a/src/plugins/wireguard/wireguard_timer.c b/src/plugins/wireguard/wireguard_timer.c index d2ae7ebf93e..b95801122fc 100644 --- a/src/plugins/wireguard/wireguard_timer.c +++ b/src/plugins/wireguard/wireguard_timer.c @@ -73,7 +73,7 @@ start_timer_thread_fn (void *arg) return 0; } -static void +static_always_inline void start_timer_from_mt (u32 peer_idx, u32 timer_id, u32 interval_ticks) { wg_timers_args a = { @@ -204,8 +204,8 @@ wg_expired_zero_key_material (vlib_main_t * vm, wg_peer_t * peer) } } -void -wg_timers_any_authenticated_packet_traversal (wg_peer_t * peer) +inline void +wg_timers_any_authenticated_packet_traversal (wg_peer_t *peer) { if (peer->persistent_keepalive_interval) { @@ -221,7 +221,7 @@ wg_timers_any_authenticated_packet_sent (wg_peer_t * peer) peer->last_sent_packet = vlib_time_now (vlib_get_main ()); } -void +inline void wg_timers_any_authenticated_packet_sent_opt (wg_peer_t *peer, f64 time) { peer->last_sent_packet = time; @@ -259,7 +259,7 @@ wg_timers_data_sent (wg_peer_t * peer) peer->new_handshake_interval_tick); } -void +inline void wg_timers_data_sent_opt (wg_peer_t *peer, f64 time) { peer->new_handshake_interval_tick = @@ -299,7 +299,7 @@ wg_timers_any_authenticated_packet_received (wg_peer_t * peer) peer->last_received_packet = vlib_time_now (vlib_get_main ()); } -void +inline void wg_timers_any_authenticated_packet_received_opt (wg_peer_t *peer, f64 time) { peer->last_received_packet = time; diff --git a/src/plugins/wireguard/wireguard_timer.h b/src/plugins/wireguard/wireguard_timer.h index cc8e123f3a2..9d5c071c86e 100644 --- a/src/plugins/wireguard/wireguard_timer.h +++ b/src/plugins/wireguard/wireguard_timer.h @@ -61,7 +61,7 @@ wg_birthdate_has_expired (f64 birthday_seconds, f64 expiration_seconds) return (birthday_seconds + expiration_seconds) < now_seconds; } -static inline bool +static_always_inline bool wg_birthdate_has_expired_opt (f64 birthday_seconds, f64 expiration_seconds, f64 time) { diff --git a/src/vnet/crypto/crypto.h b/src/vnet/crypto/crypto.h index 73b7f70ec46..eb381187f29 100644 --- a/src/vnet/crypto/crypto.h +++ b/src/vnet/crypto/crypto.h @@ -82,15 +82,16 @@ typedef enum /** async crypto **/ /* CRYPTO_ID, PRETTY_NAME, KEY_LENGTH_IN_BYTES, TAG_LEN, AAD_LEN */ -#define foreach_crypto_aead_async_alg \ - _(AES_128_GCM, "aes-128-gcm-aad8", 16, 16, 8) \ - _(AES_128_GCM, "aes-128-gcm-aad12", 16, 16, 12) \ - _(AES_192_GCM, "aes-192-gcm-aad8", 24, 16, 8) \ - _(AES_192_GCM, "aes-192-gcm-aad12", 24, 16, 12) \ - _(AES_256_GCM, "aes-256-gcm-aad8", 32, 16, 8) \ - _(AES_256_GCM, "aes-256-gcm-aad12", 32, 16, 12) \ - _(CHACHA20_POLY1305, "chacha20-poly1305-aad8", 32, 16, 8) \ - _(CHACHA20_POLY1305, "chacha20-poly1305-aad12", 32, 16, 12) +#define foreach_crypto_aead_async_alg \ + _ (AES_128_GCM, "aes-128-gcm-aad8", 16, 16, 8) \ + _ (AES_128_GCM, "aes-128-gcm-aad12", 16, 16, 12) \ + _ (AES_192_GCM, "aes-192-gcm-aad8", 24, 16, 8) \ + _ (AES_192_GCM, "aes-192-gcm-aad12", 24, 16, 12) \ + _ (AES_256_GCM, "aes-256-gcm-aad8", 32, 16, 8) \ + _ (AES_256_GCM, "aes-256-gcm-aad12", 32, 16, 12) \ + _ (CHACHA20_POLY1305, "chacha20-poly1305-aad8", 32, 16, 8) \ + _ (CHACHA20_POLY1305, "chacha20-poly1305-aad12", 32, 16, 12) \ + _ (CHACHA20_POLY1305, "chacha20-poly1305", 32, 16, 0) /* CRYPTO_ID, INTEG_ID, PRETTY_NAME, KEY_LENGTH_IN_BYTES, DIGEST_LEN */ #define foreach_crypto_link_async_alg \ -- 2.16.6