wireguard: add support for chained buffers 97/38597/3
authorAlexander Chernavin <achernavin@netgate.com>
Wed, 29 Mar 2023 16:09:37 +0000 (16:09 +0000)
committerMatthew Smith <mgsmith@netgate.com>
Fri, 2 Jun 2023 14:41:53 +0000 (14:41 +0000)
Type: feature

With this change, packets that are larger than a single buffer can fit
will be able to be sent and received over a Wireguard tunnel. Also,
cover this with tests.

Signed-off-by: Alexander Chernavin <achernavin@netgate.com>
Change-Id: Ifaf7325676d728580097bc389b51a9be39e44d88

src/plugins/wireguard/wireguard.h
src/plugins/wireguard/wireguard_input.c
src/plugins/wireguard/wireguard_output_tun.c
test/test_wireguard.py

index 3a6248b..05cefc4 100644 (file)
@@ -31,9 +31,12 @@ typedef struct wg_per_thread_data_t_
 {
   CLIB_CACHE_LINE_ALIGN_MARK (cacheline0);
   vnet_crypto_op_t *crypto_ops;
+  vnet_crypto_op_t *chained_crypto_ops;
+  vnet_crypto_op_chunk_t *chunks;
   vnet_crypto_async_frame_t **async_frames;
   u8 data[WG_DEFAULT_DATA_SIZE];
 } wg_per_thread_data_t;
+
 typedef struct
 {
   /* convenience */
index 6b8c803..db37fa5 100644 (file)
@@ -34,7 +34,7 @@
   _ (HANDSHAKE_RECEIVE, "Failed while receiving Handshake")                   \
   _ (COOKIE_DECRYPTION, "Failed during Cookie decryption")                    \
   _ (COOKIE_SEND, "Failed during sending Cookie")                             \
-  _ (TOO_BIG, "Packet too big")                                               \
+  _ (NO_BUFFERS, "No buffers")                                                \
   _ (UNDEFINED, "Undefined error")                                            \
   _ (CRYPTO_ENGINE_ERROR, "crypto engine error (packet dropped)")
 
@@ -340,6 +340,7 @@ wg_input_post_process (vlib_main_t *vm, vlib_buffer_t *b, u16 *next,
 {
   next[0] = WG_INPUT_NEXT_PUNT;
   noise_keypair_t *kp;
+  vlib_buffer_t *lb;
 
   if ((kp = wg_get_active_keypair (&peer->remote, data->receiver_index)) ==
       NULL)
@@ -350,11 +351,16 @@ wg_input_post_process (vlib_main_t *vm, vlib_buffer_t *b, u16 *next,
       return -1;
     }
 
-  u16 encr_len = b->current_length - sizeof (message_data_t);
+  lb = b;
+  /* Find last buffer in the chain */
+  while (lb->flags & VLIB_BUFFER_NEXT_PRESENT)
+    lb = vlib_get_buffer (vm, lb->next_buffer);
+
+  u16 encr_len = vlib_buffer_length_in_chain (vm, b) - sizeof (message_data_t);
   u16 decr_len = encr_len - NOISE_AUTHTAG_LEN;
 
   vlib_buffer_advance (b, sizeof (message_data_t));
-  b->current_length = decr_len;
+  vlib_buffer_chain_increase_length (b, lb, -NOISE_AUTHTAG_LEN);
   vnet_buffer_offload_flags_clear (b, VNET_BUFFER_OFFLOAD_F_UDP_CKSUM);
 
   /* Keepalive packet has zero length */
@@ -433,9 +439,75 @@ wg_input_process_ops (vlib_main_t *vm, vlib_node_runtime_t *node,
     }
 }
 
+static_always_inline void
+wg_input_process_chained_ops (vlib_main_t *vm, vlib_node_runtime_t *node,
+                             vnet_crypto_op_t *ops, vlib_buffer_t *b[],
+                             u16 *nexts, vnet_crypto_op_chunk_t *chunks,
+                             u16 drop_next)
+{
+  u32 n_fail, n_ops = vec_len (ops);
+  vnet_crypto_op_t *op = ops;
+
+  if (n_ops == 0)
+    return;
+
+  n_fail = n_ops - vnet_crypto_process_chained_ops (vm, op, chunks, n_ops);
+
+  while (n_fail)
+    {
+      ASSERT (op - ops < n_ops);
+
+      if (op->status != VNET_CRYPTO_OP_STATUS_COMPLETED)
+       {
+         u32 bi = op->user_data;
+         b[bi]->error = node->errors[WG_INPUT_ERROR_DECRYPTION];
+         nexts[bi] = drop_next;
+         n_fail--;
+       }
+      op++;
+    }
+}
+
+static_always_inline void
+wg_input_chain_crypto (vlib_main_t *vm, wg_per_thread_data_t *ptd,
+                      vlib_buffer_t *b, vlib_buffer_t *lb, u8 *start,
+                      u32 start_len, u16 *n_ch)
+{
+  vnet_crypto_op_chunk_t *ch;
+  vlib_buffer_t *cb = b;
+  u32 n_chunks = 1;
+
+  vec_add2 (ptd->chunks, ch, 1);
+  ch->len = start_len;
+  ch->src = ch->dst = start;
+  cb = vlib_get_buffer (vm, cb->next_buffer);
+
+  while (1)
+    {
+      vec_add2 (ptd->chunks, ch, 1);
+      n_chunks += 1;
+      if (lb == cb)
+       ch->len = cb->current_length - NOISE_AUTHTAG_LEN;
+      else
+       ch->len = cb->current_length;
+
+      ch->src = ch->dst = vlib_buffer_get_current (cb);
+
+      if (!(cb->flags & VLIB_BUFFER_NEXT_PRESENT))
+       break;
+
+      cb = vlib_get_buffer (vm, cb->next_buffer);
+    }
+
+  if (n_ch)
+    *n_ch = n_chunks;
+}
+
 always_inline void
-wg_prepare_sync_dec_op (vlib_main_t *vm, vnet_crypto_op_t **crypto_ops,
-                       u8 *src, u32 src_len, u8 *dst, u8 *aad, u32 aad_len,
+wg_prepare_sync_dec_op (vlib_main_t *vm, wg_per_thread_data_t *ptd,
+                       vlib_buffer_t *b, vlib_buffer_t *lb,
+                       vnet_crypto_op_t **crypto_ops, u8 *src, u32 src_len,
+                       u8 *dst, u8 *aad, u32 aad_len,
                        vnet_crypto_key_index_t key_index, u32 bi, u8 *iv)
 {
   vnet_crypto_op_t _op, *op = &_op;
@@ -445,16 +517,28 @@ wg_prepare_sync_dec_op (vlib_main_t *vm, vnet_crypto_op_t **crypto_ops,
   vnet_crypto_op_init (op, VNET_CRYPTO_OP_CHACHA20_POLY1305_DEC);
 
   op->tag_len = NOISE_AUTHTAG_LEN;
-  op->tag = src + src_len;
-  op->src = !src ? src_ : src;
-  op->len = src_len;
-  op->dst = dst;
+  op->tag = vlib_buffer_get_tail (lb) - NOISE_AUTHTAG_LEN;
   op->key_index = key_index;
   op->aad = aad;
   op->aad_len = aad_len;
   op->iv = iv;
   op->user_data = bi;
   op->flags |= VNET_CRYPTO_OP_FLAG_HMAC_CHECK;
+
+  if (b != lb)
+    {
+      /* Chained buffers */
+      op->flags |= VNET_CRYPTO_OP_FLAG_CHAINED_BUFFERS;
+      op->chunk_index = vec_len (ptd->chunks);
+      wg_input_chain_crypto (vm, ptd, b, lb, src, src_len + NOISE_AUTHTAG_LEN,
+                            &op->n_chunks);
+    }
+  else
+    {
+      op->src = !src ? src_ : src;
+      op->len = src_len;
+      op->dst = dst;
+    }
 }
 
 static_always_inline void
@@ -485,10 +569,10 @@ static_always_inline enum noise_state_crypt
 wg_input_process (vlib_main_t *vm, wg_per_thread_data_t *ptd,
                  vnet_crypto_op_t **crypto_ops,
                  vnet_crypto_async_frame_t **async_frame, vlib_buffer_t *b,
-                 u32 buf_idx, noise_remote_t *r, uint32_t r_idx,
-                 uint64_t nonce, uint8_t *src, size_t srclen, uint8_t *dst,
-                 u32 from_idx, u8 *iv, f64 time, u8 is_async,
-                 u16 async_next_node)
+                 vlib_buffer_t *lb, u32 buf_idx, noise_remote_t *r,
+                 uint32_t r_idx, uint64_t nonce, uint8_t *src, size_t srclen,
+                 size_t srclen_total, uint8_t *dst, u32 from_idx, u8 *iv,
+                 f64 time, u8 is_async, u16 async_next_node)
 {
   noise_keypair_t *kp;
   enum noise_state_crypt ret = SC_FAILED;
@@ -516,6 +600,12 @@ wg_input_process (vlib_main_t *vm, wg_per_thread_data_t *ptd,
 
   if (is_async)
     {
+      u8 flags = VNET_CRYPTO_OP_FLAG_HMAC_CHECK;
+      u8 *tag = vlib_buffer_get_tail (lb) - NOISE_AUTHTAG_LEN;
+
+      if (b != lb)
+       flags |= VNET_CRYPTO_OP_FLAG_CHAINED_BUFFERS;
+
       if (NULL == *async_frame ||
          vnet_crypto_async_frame_is_full (*async_frame))
        {
@@ -525,14 +615,14 @@ wg_input_process (vlib_main_t *vm, wg_per_thread_data_t *ptd,
          vec_add1 (ptd->async_frames, *async_frame);
        }
 
-      wg_input_add_to_frame (vm, *async_frame, kp->kp_recv_index, srclen,
-                            src - b->data, buf_idx, async_next_node, iv,
-                            src + srclen, VNET_CRYPTO_OP_FLAG_HMAC_CHECK);
+      wg_input_add_to_frame (vm, *async_frame, kp->kp_recv_index, srclen_total,
+                            src - b->data, buf_idx, async_next_node, iv, tag,
+                            flags);
     }
   else
     {
-      wg_prepare_sync_dec_op (vm, crypto_ops, src, srclen, dst, NULL, 0,
-                             kp->kp_recv_index, from_idx, iv);
+      wg_prepare_sync_dec_op (vm, ptd, b, lb, crypto_ops, src, srclen, dst,
+                             NULL, 0, kp->kp_recv_index, from_idx, iv);
     }
 
   /* If we've received the handshake confirming data packet then move the
@@ -605,8 +695,9 @@ wg_input_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
   u32 n_left_from = frame->n_vectors;
 
   vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b = bufs;
+  vlib_buffer_t *lb;
   u32 thread_index = vm->thread_index;
-  vnet_crypto_op_t **crypto_ops = &ptd->crypto_ops;
+  vnet_crypto_op_t **crypto_ops;
   const u16 drop_next = WG_INPUT_NEXT_PUNT;
   message_type_t header_type;
   vlib_buffer_t *data_bufs[VLIB_FRAME_SIZE];
@@ -620,6 +711,8 @@ wg_input_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
 
   vlib_get_buffers (vm, from, bufs, n_left_from);
   vec_reset_length (ptd->crypto_ops);
+  vec_reset_length (ptd->chained_crypto_ops);
+  vec_reset_length (ptd->chunks);
   vec_reset_length (ptd->async_frames);
 
   f64 time = clib_time_now (&vm->clib_time) + vm->time_offset;
@@ -655,6 +748,7 @@ wg_input_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
          message_data_t *data = vlib_buffer_get_current (b[0]);
          u8 *iv_data = b[0]->pre_data;
          u32 buf_idx = from[b - bufs];
+         u32 n_bufs;
          peer_idx = wg_index_table_lookup (&wmp->index_table,
                                            data->receiver_index);
 
@@ -701,21 +795,63 @@ wg_input_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
              goto next;
            }
 
-         u16 encr_len = b[0]->current_length - sizeof (message_data_t);
-         u16 decr_len = encr_len - NOISE_AUTHTAG_LEN;
-         if (PREDICT_FALSE (decr_len >= WG_DEFAULT_DATA_SIZE))
+         lb = b[0];
+         n_bufs = vlib_buffer_chain_linearize (vm, b[0]);
+         if (n_bufs == 0)
            {
-             b[0]->error = node->errors[WG_INPUT_ERROR_TOO_BIG];
+             other_next[n_other] = WG_INPUT_NEXT_ERROR;
+             b[0]->error = node->errors[WG_INPUT_ERROR_NO_BUFFERS];
              other_bi[n_other] = buf_idx;
              n_other += 1;
              goto out;
            }
 
-         enum noise_state_crypt state_cr = wg_input_process (
-           vm, ptd, crypto_ops, &async_frame, b[0], buf_idx, &peer->remote,
-           data->receiver_index, data->counter, data->encrypted_data,
-           decr_len, data->encrypted_data, n_data, iv_data, time, is_async,
-           async_next_node);
+         if (n_bufs > 1)
+           {
+             vlib_buffer_t *before_last = b[0];
+
+             /* Find last and before last buffer in the chain */
+             while (lb->flags & VLIB_BUFFER_NEXT_PRESENT)
+               {
+                 before_last = lb;
+                 lb = vlib_get_buffer (vm, lb->next_buffer);
+               }
+
+             /* Ensure auth tag is contiguous and not splitted into two last
+              * buffers */
+             if (PREDICT_FALSE (lb->current_length < NOISE_AUTHTAG_LEN))
+               {
+                 u32 len_diff = NOISE_AUTHTAG_LEN - lb->current_length;
+
+                 before_last->current_length -= len_diff;
+                 if (before_last == b[0])
+                   before_last->flags &= ~VLIB_BUFFER_TOTAL_LENGTH_VALID;
+
+                 vlib_buffer_advance (lb, (signed) -len_diff);
+
+                 clib_memcpy_fast (vlib_buffer_get_current (lb),
+                                   vlib_buffer_get_tail (before_last),
+                                   len_diff);
+               }
+           }
+
+         u16 encr_len = b[0]->current_length - sizeof (message_data_t);
+         u16 decr_len = encr_len - NOISE_AUTHTAG_LEN;
+         u16 encr_len_total =
+           vlib_buffer_length_in_chain (vm, b[0]) - sizeof (message_data_t);
+         u16 decr_len_total = encr_len_total - NOISE_AUTHTAG_LEN;
+
+         if (lb != b[0])
+           crypto_ops = &ptd->chained_crypto_ops;
+         else
+           crypto_ops = &ptd->crypto_ops;
+
+         enum noise_state_crypt state_cr =
+           wg_input_process (vm, ptd, crypto_ops, &async_frame, b[0], lb,
+                             buf_idx, &peer->remote, data->receiver_index,
+                             data->counter, data->encrypted_data, decr_len,
+                             decr_len_total, data->encrypted_data, n_data,
+                             iv_data, time, is_async, async_next_node);
 
          if (PREDICT_FALSE (state_cr == SC_FAILED))
            {
@@ -796,6 +932,8 @@ wg_input_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
   /* decrypt packets */
   wg_input_process_ops (vm, node, ptd->crypto_ops, data_bufs, data_nexts,
                        drop_next);
+  wg_input_process_chained_ops (vm, node, ptd->chained_crypto_ops, data_bufs,
+                               data_nexts, ptd->chunks, drop_next);
 
   /* process after decryption */
   b = data_bufs;
index 4d85a59..4ff1621 100644 (file)
@@ -25,7 +25,7 @@
   _ (NONE, "No error")                                                        \
   _ (PEER, "Peer error")                                                      \
   _ (KEYPAIR, "Keypair error")                                                \
-  _ (TOO_BIG, "packet too big")                                               \
+  _ (NO_BUFFERS, "No buffers")                                                \
   _ (CRYPTO_ENGINE_ERROR, "crypto engine error (packet dropped)")
 
 typedef enum
@@ -115,10 +115,46 @@ format_wg_output_tun_post_trace (u8 *s, va_list *args)
 }
 
 static_always_inline void
-wg_prepare_sync_enc_op (vlib_main_t *vm, vnet_crypto_op_t **crypto_ops,
-                       u8 *src, u32 src_len, u8 *dst, u8 *aad, u32 aad_len,
-                       u64 nonce, vnet_crypto_key_index_t key_index, u32 bi,
-                       u8 *iv)
+wg_output_chain_crypto (vlib_main_t *vm, wg_per_thread_data_t *ptd,
+                       vlib_buffer_t *b, vlib_buffer_t *lb, u8 *start,
+                       u32 start_len, u16 *n_ch)
+{
+  vnet_crypto_op_chunk_t *ch;
+  vlib_buffer_t *cb = b;
+  u32 n_chunks = 1;
+
+  vec_add2 (ptd->chunks, ch, 1);
+  ch->len = start_len;
+  ch->src = ch->dst = start;
+  cb = vlib_get_buffer (vm, cb->next_buffer);
+
+  while (1)
+    {
+      vec_add2 (ptd->chunks, ch, 1);
+      n_chunks += 1;
+      if (lb == cb)
+       ch->len = cb->current_length - NOISE_AUTHTAG_LEN;
+      else
+       ch->len = cb->current_length;
+
+      ch->src = ch->dst = vlib_buffer_get_current (cb);
+
+      if (!(cb->flags & VLIB_BUFFER_NEXT_PRESENT))
+       break;
+
+      cb = vlib_get_buffer (vm, cb->next_buffer);
+    }
+
+  if (n_ch)
+    *n_ch = n_chunks;
+}
+
+static_always_inline void
+wg_prepare_sync_enc_op (vlib_main_t *vm, wg_per_thread_data_t *ptd,
+                       vlib_buffer_t *b, vlib_buffer_t *lb,
+                       vnet_crypto_op_t **crypto_ops, u8 *src, u32 src_len,
+                       u8 *dst, u8 *aad, u32 aad_len, u64 nonce,
+                       vnet_crypto_key_index_t key_index, u32 bi, u8 *iv)
 {
   vnet_crypto_op_t _op, *op = &_op;
   u8 src_[] = {};
@@ -130,15 +166,55 @@ wg_prepare_sync_enc_op (vlib_main_t *vm, vnet_crypto_op_t **crypto_ops,
   vnet_crypto_op_init (op, VNET_CRYPTO_OP_CHACHA20_POLY1305_ENC);
 
   op->tag_len = NOISE_AUTHTAG_LEN;
-  op->tag = dst + src_len;
-  op->src = !src ? src_ : src;
-  op->len = src_len;
-  op->dst = dst;
+  op->tag = vlib_buffer_get_tail (lb) - NOISE_AUTHTAG_LEN;
   op->key_index = key_index;
   op->aad = aad;
   op->aad_len = aad_len;
   op->iv = iv;
   op->user_data = bi;
+
+  if (b != lb)
+    {
+      /* Chained buffers */
+      op->flags |= VNET_CRYPTO_OP_FLAG_CHAINED_BUFFERS;
+      op->chunk_index = vec_len (ptd->chunks);
+      wg_output_chain_crypto (vm, ptd, b, lb, src, src_len, &op->n_chunks);
+    }
+  else
+    {
+      op->src = !src ? src_ : src;
+      op->len = src_len;
+      op->dst = dst;
+    }
+}
+
+static_always_inline void
+wg_output_process_chained_ops (vlib_main_t *vm, vlib_node_runtime_t *node,
+                              vnet_crypto_op_t *ops, vlib_buffer_t *b[],
+                              u16 *nexts, vnet_crypto_op_chunk_t *chunks,
+                              u16 drop_next)
+{
+  u32 n_fail, n_ops = vec_len (ops);
+  vnet_crypto_op_t *op = ops;
+
+  if (n_ops == 0)
+    return;
+
+  n_fail = n_ops - vnet_crypto_process_chained_ops (vm, op, chunks, n_ops);
+
+  while (n_fail)
+    {
+      ASSERT (op - ops < n_ops);
+
+      if (op->status != VNET_CRYPTO_OP_STATUS_COMPLETED)
+       {
+         u32 bi = op->user_data;
+         b[bi]->error = node->errors[WG_OUTPUT_ERROR_CRYPTO_ENGINE_ERROR];
+         nexts[bi] = drop_next;
+         n_fail--;
+       }
+      op++;
+    }
 }
 
 static_always_inline void
@@ -194,10 +270,11 @@ wg_output_tun_add_to_frame (vlib_main_t *vm, vnet_crypto_async_frame_t *f,
 }
 
 static_always_inline enum noise_state_crypt
-wq_output_tun_process (vlib_main_t *vm, vnet_crypto_op_t **crypto_ops,
-                      noise_remote_t *r, uint32_t *r_idx, uint64_t *nonce,
-                      uint8_t *src, size_t srclen, uint8_t *dst, u32 bi,
-                      u8 *iv, f64 time)
+wg_output_tun_process (vlib_main_t *vm, wg_per_thread_data_t *ptd,
+                      vlib_buffer_t *b, vlib_buffer_t *lb,
+                      vnet_crypto_op_t **crypto_ops, noise_remote_t *r,
+                      uint32_t *r_idx, uint64_t *nonce, uint8_t *src,
+                      size_t srclen, uint8_t *dst, u32 bi, u8 *iv, f64 time)
 {
   noise_keypair_t *kp;
   enum noise_state_crypt ret = SC_FAILED;
@@ -223,8 +300,8 @@ wq_output_tun_process (vlib_main_t *vm, vnet_crypto_op_t **crypto_ops,
    * are passed back out to the caller through the provided data pointer. */
   *r_idx = kp->kp_remote_index;
 
-  wg_prepare_sync_enc_op (vm, crypto_ops, src, srclen, dst, NULL, 0, *nonce,
-                         kp->kp_send_index, bi, iv);
+  wg_prepare_sync_enc_op (vm, ptd, b, lb, crypto_ops, src, srclen, dst, NULL,
+                         0, *nonce, kp->kp_send_index, bi, iv);
 
   /* If our values are still within tolerances, but we are approaching
    * the tolerances, we notify the caller with ESTALE that they should
@@ -247,12 +324,14 @@ error:
 static_always_inline enum noise_state_crypt
 wg_add_to_async_frame (vlib_main_t *vm, wg_per_thread_data_t *ptd,
                       vnet_crypto_async_frame_t **async_frame,
-                      vlib_buffer_t *b, u8 *payload, u32 payload_len, u32 bi,
-                      u16 next, u16 async_next, noise_remote_t *r,
-                      uint32_t *r_idx, uint64_t *nonce, u8 *iv, f64 time)
+                      vlib_buffer_t *b, vlib_buffer_t *lb, u8 *payload,
+                      u32 payload_len, u32 bi, u16 next, u16 async_next,
+                      noise_remote_t *r, uint32_t *r_idx, uint64_t *nonce,
+                      u8 *iv, f64 time)
 {
   wg_post_data_t *post = wg_post_data (b);
   u8 flag = 0;
+  u8 *tag;
   noise_keypair_t *kp;
 
   post->next_index = next;
@@ -293,10 +372,15 @@ wg_add_to_async_frame (vlib_main_t *vm, wg_per_thread_data_t *ptd,
       vec_add1 (ptd->async_frames, *async_frame);
     }
 
+  if (b != lb)
+    flag |= VNET_CRYPTO_OP_FLAG_CHAINED_BUFFERS;
+
+  tag = vlib_buffer_get_tail (lb) - NOISE_AUTHTAG_LEN;
+
   /* this always succeeds because we know the frame is not full */
   wg_output_tun_add_to_frame (vm, *async_frame, kp->kp_send_index, payload_len,
-                             payload - b->data, bi, async_next, iv,
-                             payload + payload_len, flag);
+                             payload - b->data, bi, async_next, iv, tag,
+                             flag);
 
   /* If our values are still within tolerances, but we are approaching
    * the tolerances, we notify the caller with ESTALE that they should
@@ -346,7 +430,8 @@ wg_output_tun_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
   ip6_udp_wg_header_t *hdr6_out = NULL;
   message_data_t *message_data_wg = NULL;
   vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b = bufs;
-  vnet_crypto_op_t **crypto_ops = &ptd->crypto_ops;
+  vlib_buffer_t *lb;
+  vnet_crypto_op_t **crypto_ops;
   u16 nexts[VLIB_FRAME_SIZE], *next = nexts;
   vlib_buffer_t *sync_bufs[VLIB_FRAME_SIZE];
   u32 thread_index = vm->thread_index;
@@ -362,6 +447,8 @@ wg_output_tun_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
 
   vlib_get_buffers (vm, from, bufs, n_left_from);
   vec_reset_length (ptd->crypto_ops);
+  vec_reset_length (ptd->chained_crypto_ops);
+  vec_reset_length (ptd->chunks);
   vec_reset_length (ptd->async_frames);
 
   wg_peer_t *peer = NULL;
@@ -377,6 +464,10 @@ wg_output_tun_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
       u8 is_ip4_out = 1;
       u8 *plain_data;
       u16 plain_data_len;
+      u16 plain_data_len_total;
+      u16 n_bufs;
+      u16 b_space_left_at_beginning;
+      u32 bi = from[b - bufs];
 
       if (n_left_from > 2)
        {
@@ -432,34 +523,72 @@ wg_output_tun_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
          goto out;
        }
 
-      iph_offset = vnet_buffer (b[0])->ip.save_rewrite_length;
-      plain_data_len = vlib_buffer_length_in_chain (vm, b[0]) - iph_offset;
-      u8 *iv_data = b[0]->pre_data;
+      lb = b[0];
+      n_bufs = vlib_buffer_chain_linearize (vm, b[0]);
+      if (n_bufs == 0)
+       {
+         b[0]->error = node->errors[WG_OUTPUT_ERROR_NO_BUFFERS];
+         goto out;
+       }
 
-      size_t encrypted_packet_len = message_data_len (plain_data_len);
+      if (n_bufs > 1)
+       {
+         /* Find last buffer in the chain */
+         while (lb->flags & VLIB_BUFFER_NEXT_PRESENT)
+           lb = vlib_get_buffer (vm, lb->next_buffer);
+       }
 
-      /*
-       * Ensure there is enough space to write the encrypted data
-       * into the packet
+      /* Ensure there is enough free space at the beginning of the first buffer
+       * to write ethernet header (e.g. IPv6 VxLAN over IPv6 Wireguard will
+       * trigger this)
        */
-      if (PREDICT_FALSE (encrypted_packet_len >= WG_DEFAULT_DATA_SIZE) ||
-         PREDICT_FALSE ((iph_offset + encrypted_packet_len) >=
-                        vlib_buffer_get_default_data_size (vm)))
+      ASSERT ((signed) b[0]->current_data >=
+             (signed) -VLIB_BUFFER_PRE_DATA_SIZE);
+      b_space_left_at_beginning =
+       b[0]->current_data + VLIB_BUFFER_PRE_DATA_SIZE;
+      if (PREDICT_FALSE (b_space_left_at_beginning <
+                        sizeof (ethernet_header_t)))
        {
-         b[0]->error = node->errors[WG_OUTPUT_ERROR_TOO_BIG];
-         goto out;
+         u32 size_diff =
+           sizeof (ethernet_header_t) - b_space_left_at_beginning;
+
+         /* Can only move buffer when it's single and has enough free space*/
+         if (lb == b[0] &&
+             vlib_buffer_space_left_at_end (vm, b[0]) >= size_diff)
+           {
+             vlib_buffer_move (vm, b[0],
+                               b[0]->current_data + (signed) size_diff);
+           }
+         else
+           {
+             b[0]->error = node->errors[WG_OUTPUT_ERROR_NO_BUFFERS];
+             goto out;
+           }
        }
 
       /*
-       * Move the buffer to fit ethernet header
-       */
-      if (b[0]->current_data + VLIB_BUFFER_PRE_DATA_SIZE <
-         sizeof (ethernet_header_t))
+       * Ensure there is enough free space at the end of the last buffer to
+       * write auth tag */
+      if (PREDICT_FALSE (vlib_buffer_space_left_at_end (vm, lb) <
+                        NOISE_AUTHTAG_LEN))
        {
-         vlib_buffer_move (vm, b[0], 0);
+         u32 tmp_bi = 0;
+         if (vlib_buffer_alloc (vm, &tmp_bi, 1) != 1)
+           {
+             b[0]->error = node->errors[WG_OUTPUT_ERROR_NO_BUFFERS];
+             goto out;
+           }
+         lb = vlib_buffer_chain_buffer (vm, lb, tmp_bi);
        }
 
+      iph_offset = vnet_buffer (b[0])->ip.save_rewrite_length;
       plain_data = vlib_buffer_get_current (b[0]) + iph_offset;
+      plain_data_len = b[0]->current_length - iph_offset;
+      plain_data_len_total =
+       vlib_buffer_length_in_chain (vm, b[0]) - iph_offset;
+      size_t encrypted_packet_len = message_data_len (plain_data_len_total);
+      vlib_buffer_chain_increase_length (b[0], lb, NOISE_AUTHTAG_LEN);
+      u8 *iv_data = b[0]->pre_data;
 
       is_ip4_out = ip46_address_is_ip4 (&peer->src.addr);
       if (is_ip4_out)
@@ -484,22 +613,27 @@ wg_output_tun_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
       /* Here we are sure that can send packet to next node */
       next[0] = WG_OUTPUT_NEXT_INTERFACE_OUTPUT;
 
+      if (lb != b[0])
+       crypto_ops = &ptd->chained_crypto_ops;
+      else
+       crypto_ops = &ptd->crypto_ops;
+
       enum noise_state_crypt state;
 
       if (is_async)
        {
          state = wg_add_to_async_frame (
-           vm, ptd, &async_frame, b[0], plain_data, plain_data_len,
-           from[b - bufs], next[0], async_next_node, &peer->remote,
+           vm, ptd, &async_frame, b[0], lb, plain_data, plain_data_len_total,
+           bi, next[0], async_next_node, &peer->remote,
            &message_data_wg->receiver_index, &message_data_wg->counter,
            iv_data, time);
        }
       else
        {
-         state = wq_output_tun_process (
-           vm, crypto_ops, &peer->remote, &message_data_wg->receiver_index,
-           &message_data_wg->counter, plain_data, plain_data_len, plain_data,
-           n_sync, iv_data, time);
+         state = wg_output_tun_process (
+           vm, ptd, b[0], lb, crypto_ops, &peer->remote,
+           &message_data_wg->receiver_index, &message_data_wg->counter,
+           plain_data, plain_data_len, plain_data, n_sync, iv_data, time);
        }
 
       if (PREDICT_FALSE (state == SC_KEEP_KEY_FRESH))
@@ -522,10 +656,9 @@ wg_output_tun_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
          hdr4_out->wg.header.type = MESSAGE_DATA;
          hdr4_out->udp.length = clib_host_to_net_u16 (encrypted_packet_len +
                                                       sizeof (udp_header_t));
-         b[0]->current_length =
-           (encrypted_packet_len + sizeof (ip4_udp_header_t));
          ip4_header_set_len_w_chksum (
-           &hdr4_out->ip4, clib_host_to_net_u16 (b[0]->current_length));
+           &hdr4_out->ip4, clib_host_to_net_u16 (encrypted_packet_len +
+                                                 sizeof (ip4_udp_header_t)));
        }
       else
        {
@@ -533,8 +666,6 @@ wg_output_tun_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
          hdr6_out->ip6.payload_length = hdr6_out->udp.length =
            clib_host_to_net_u16 (encrypted_packet_len +
                                  sizeof (udp_header_t));
-         b[0]->current_length =
-           (encrypted_packet_len + sizeof (ip6_udp_header_t));
        }
 
     out:
@@ -555,14 +686,14 @@ wg_output_tun_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
     next:
       if (PREDICT_FALSE (err != WG_OUTPUT_NEXT_INTERFACE_OUTPUT))
        {
-         noop_bi[n_noop] = from[b - bufs];
+         noop_bi[n_noop] = bi;
          n_noop++;
          noop_next++;
          goto next_left;
        }
       if (!is_async)
        {
-         sync_bi[n_sync] = from[b - bufs];
+         sync_bi[n_sync] = bi;
          sync_bufs[n_sync] = b[0];
          n_sync += 1;
          next += 1;
@@ -581,6 +712,8 @@ wg_output_tun_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
       /* wg-output-process-ops */
       wg_output_process_ops (vm, node, ptd->crypto_ops, sync_bufs, nexts,
                             drop_next);
+      wg_output_process_chained_ops (vm, node, ptd->chained_crypto_ops,
+                                    sync_bufs, nexts, ptd->chunks, drop_next);
 
       int n_left_from_sync_bufs = n_sync;
       while (n_left_from_sync_bufs > 0)
index b9713f6..e63508a 100644 (file)
@@ -11,6 +11,7 @@ from scapy.packet import Raw
 from scapy.layers.l2 import Ether, ARP
 from scapy.layers.inet import IP, UDP
 from scapy.layers.inet6 import IPv6
+from scapy.layers.vxlan import VXLAN
 from scapy.contrib.wireguard import (
     Wireguard,
     WireguardResponse,
@@ -40,6 +41,8 @@ from vpp_ipip_tun_interface import VppIpIpTunInterface
 from vpp_interface import VppInterface
 from vpp_pg_interface import is_ipv6_misc
 from vpp_ip_route import VppIpRoute, VppRoutePath
+from vpp_l2 import VppBridgeDomain, VppBridgeDomainPort
+from vpp_vxlan_tunnel import VppVxlanTunnel
 from vpp_object import VppObject
 from vpp_papi import VppEnum
 from framework import is_distro_ubuntu2204, is_distro_debian11, tag_fixme_vpp_debug
@@ -470,6 +473,7 @@ class VppWgPeer(VppObject):
         return self.noise.encrypt(bytes(p))
 
     def validate_encapped(self, rxs, tx, is_tunnel_ip6=False, is_transport_ip6=False):
+        ret_rxs = []
         for rx in rxs:
             rx = self.decrypt_transport(rx, is_tunnel_ip6)
             if is_transport_ip6 is False:
@@ -482,6 +486,8 @@ class VppWgPeer(VppObject):
                 # check the original packet is present
                 self._test.assertEqual(rx[IPv6].dst, tx[IPv6].dst)
                 self._test.assertEqual(rx[IPv6].hlim, tx[IPv6].hlim - 1)
+            ret_rxs.append(rx)
+        return ret_rxs
 
     def want_events(self):
         self._test.vapi.want_wireguard_peer_events(
@@ -2510,6 +2516,227 @@ class TestWg(VppTestCase):
         peer_1.remove_vpp_config()
         wg0.remove_vpp_config()
 
+    def _test_wg_large_packet_tmpl(self, is_async, is_ip6):
+        self.vapi.wg_set_async_mode(is_async)
+        port = 12323
+
+        # create wg interface
+        if is_ip6:
+            wg0 = VppWgInterface(self, self.pg1.local_ip6, port).add_vpp_config()
+            wg0.admin_up()
+            wg0.config_ip6()
+        else:
+            wg0 = VppWgInterface(self, self.pg1.local_ip4, port).add_vpp_config()
+            wg0.admin_up()
+            wg0.config_ip4()
+
+        self.pg_enable_capture(self.pg_interfaces)
+        self.pg_start()
+
+        # create a peer
+        if is_ip6:
+            peer_1 = VppWgPeer(
+                self, wg0, self.pg1.remote_ip6, port + 1, ["1::3:0/112"]
+            ).add_vpp_config()
+        else:
+            peer_1 = VppWgPeer(
+                self, wg0, self.pg1.remote_ip4, port + 1, ["10.11.3.0/24"]
+            ).add_vpp_config()
+        self.assertEqual(len(self.vapi.wireguard_peers_dump()), 1)
+
+        # create a route to rewrite traffic into the wg interface
+        if is_ip6:
+            r1 = VppIpRoute(
+                self, "1::3:0", 112, [VppRoutePath("1::3:1", wg0.sw_if_index)]
+            ).add_vpp_config()
+        else:
+            r1 = VppIpRoute(
+                self, "10.11.3.0", 24, [VppRoutePath("10.11.3.1", wg0.sw_if_index)]
+            ).add_vpp_config()
+
+        # wait for the peer to send a handshake initiation
+        rxs = self.pg1.get_capture(1, timeout=2)
+
+        # prepare and send a handshake response
+        # expect a keepalive message
+        resp = peer_1.consume_init(rxs[0], self.pg1, is_ip6=is_ip6)
+        rxs = self.send_and_expect(self.pg1, [resp], self.pg1)
+
+        # verify the keepalive message
+        b = peer_1.decrypt_transport(rxs[0], is_ip6=is_ip6)
+        self.assertEqual(0, len(b))
+
+        # prepare and send data packets
+        # expect to receive them decrypted
+        if is_ip6:
+            ip_header = IPv6(src="1::3:1", dst=self.pg0.remote_ip6, hlim=20)
+        else:
+            ip_header = IP(src="10.11.3.1", dst=self.pg0.remote_ip4, ttl=20)
+        packet_len_opts = (
+            2500,  # two buffers
+            1500,  # one buffer
+            4500,  # three buffers
+            1910 if is_ip6 else 1950,  # auth tag is not contiguous
+        )
+        txs = []
+        for l in packet_len_opts:
+            txs.append(
+                peer_1.mk_tunnel_header(self.pg1, is_ip6=is_ip6)
+                / Wireguard(message_type=4, reserved_zero=0)
+                / WireguardTransport(
+                    receiver_index=peer_1.sender,
+                    counter=len(txs),
+                    encrypted_encapsulated_packet=peer_1.encrypt_transport(
+                        ip_header / UDP(sport=222, dport=223) / Raw(b"\xfe" * l)
+                    ),
+                )
+            )
+        rxs = self.send_and_expect(self.pg1, txs, self.pg0)
+
+        # verify decrypted packets
+        for i, l in enumerate(packet_len_opts):
+            if is_ip6:
+                self.assertEqual(rxs[i][IPv6].dst, self.pg0.remote_ip6)
+                self.assertEqual(rxs[i][IPv6].hlim, ip_header.hlim - 1)
+            else:
+                self.assertEqual(rxs[i][IP].dst, self.pg0.remote_ip4)
+                self.assertEqual(rxs[i][IP].ttl, ip_header.ttl - 1)
+            self.assertEqual(len(rxs[i][Raw]), l)
+            self.assertEqual(bytes(rxs[i][Raw]), b"\xfe" * l)
+
+        # prepare and send packets that will be rewritten into the wg interface
+        # expect data packets sent
+        if is_ip6:
+            ip_header = IPv6(src=self.pg0.remote_ip6, dst="1::3:2")
+        else:
+            ip_header = IP(src=self.pg0.remote_ip4, dst="10.11.3.2")
+        packet_len_opts = (
+            2500,  # two buffers
+            1500,  # one buffer
+            4500,  # three buffers
+            1980 if is_ip6 else 2000,  # no free space to write auth tag
+        )
+        txs = []
+        for l in packet_len_opts:
+            txs.append(
+                Ether(dst=self.pg0.local_mac, src=self.pg0.remote_mac)
+                / ip_header
+                / UDP(sport=555, dport=556)
+                / Raw(b"\xfe" * l)
+            )
+        rxs = self.send_and_expect(self.pg0, txs, self.pg1)
+
+        # verify the data packets
+        rxs_decrypted = peer_1.validate_encapped(
+            rxs, ip_header, is_tunnel_ip6=is_ip6, is_transport_ip6=is_ip6
+        )
+
+        for i, l in enumerate(packet_len_opts):
+            self.assertEqual(len(rxs_decrypted[i][Raw]), l)
+            self.assertEqual(bytes(rxs_decrypted[i][Raw]), b"\xfe" * l)
+
+        # remove configs
+        r1.remove_vpp_config()
+        peer_1.remove_vpp_config()
+        wg0.remove_vpp_config()
+
+    def test_wg_large_packet_v4_sync(self):
+        """Large packet (v4, sync)"""
+        self._test_wg_large_packet_tmpl(is_async=False, is_ip6=False)
+
+    def test_wg_large_packet_v6_sync(self):
+        """Large packet (v6, sync)"""
+        self._test_wg_large_packet_tmpl(is_async=False, is_ip6=True)
+
+    def test_wg_large_packet_v4_async(self):
+        """Large packet (v4, async)"""
+        self._test_wg_large_packet_tmpl(is_async=True, is_ip6=False)
+
+    def test_wg_large_packet_v6_async(self):
+        """Large packet (v6, async)"""
+        self._test_wg_large_packet_tmpl(is_async=True, is_ip6=True)
+
+    def test_wg_lack_of_buf_headroom(self):
+        """Lack of buffer's headroom (v6 vxlan over v6 wg)"""
+        port = 12323
+
+        # create wg interface
+        wg0 = VppWgInterface(self, self.pg1.local_ip6, port).add_vpp_config()
+        wg0.admin_up()
+        wg0.config_ip6()
+
+        self.pg_enable_capture(self.pg_interfaces)
+        self.pg_start()
+
+        # create a peer
+        peer_1 = VppWgPeer(
+            self, wg0, self.pg1.remote_ip6, port + 1, ["::/0"]
+        ).add_vpp_config()
+        self.assertEqual(len(self.vapi.wireguard_peers_dump()), 1)
+
+        # create a route to enable communication between wg interface addresses
+        r1 = VppIpRoute(
+            self, wg0.remote_ip6, 128, [VppRoutePath("0.0.0.0", wg0.sw_if_index)]
+        ).add_vpp_config()
+
+        # wait for the peer to send a handshake initiation
+        rxs = self.pg1.get_capture(1, timeout=2)
+
+        # prepare and send a handshake response
+        # expect a keepalive message
+        resp = peer_1.consume_init(rxs[0], self.pg1, is_ip6=True)
+        rxs = self.send_and_expect(self.pg1, [resp], self.pg1)
+
+        # verify the keepalive message
+        b = peer_1.decrypt_transport(rxs[0], is_ip6=True)
+        self.assertEqual(0, len(b))
+
+        # create vxlan interface over the wg interface
+        vxlan0 = VppVxlanTunnel(self, src=wg0.local_ip6, dst=wg0.remote_ip6, vni=1111)
+        vxlan0.add_vpp_config()
+
+        # create bridge domain
+        bd1 = VppBridgeDomain(self, bd_id=1)
+        bd1.add_vpp_config()
+
+        # add the vxlan interface and pg0 to the bridge domain
+        bd1_ports = (
+            VppBridgeDomainPort(self, bd1, vxlan0).add_vpp_config(),
+            VppBridgeDomainPort(self, bd1, self.pg0).add_vpp_config(),
+        )
+
+        # prepare and send packets that will be rewritten into the vxlan interface
+        # expect they to be rewritten into the wg interface then and data packets sent
+        tx = (
+            Ether(dst="00:00:00:00:00:01", src="00:00:00:00:00:02")
+            / IPv6(src="::1", dst="::2", hlim=20)
+            / UDP(sport=1111, dport=1112)
+            / Raw(b"\xfe" * 1900)
+        )
+        rxs = self.send_and_expect(self.pg0, [tx] * 5, self.pg1)
+
+        # verify the data packet
+        for rx in rxs:
+            rx_decrypted = IPv6(peer_1.decrypt_transport(rx, is_ip6=True))
+
+            self.assertEqual(rx_decrypted[VXLAN].vni, vxlan0.vni)
+            inner = rx_decrypted[VXLAN].payload
+
+            # check the original packet is present
+            self.assertEqual(inner[IPv6].dst, tx[IPv6].dst)
+            self.assertEqual(inner[IPv6].hlim, tx[IPv6].hlim)
+            self.assertEqual(len(inner[Raw]), len(tx[Raw]))
+            self.assertEqual(bytes(inner[Raw]), bytes(tx[Raw]))
+
+        # remove configs
+        for bdp in bd1_ports:
+            bdp.remove_vpp_config()
+        bd1.remove_vpp_config()
+        vxlan0.remove_vpp_config()
+        r1.remove_vpp_config()
+        peer_1.remove_vpp_config()
+        wg0.remove_vpp_config()
+
 
 @tag_fixme_vpp_debug
 class WireguardHandoffTests(TestWg):