wireguard: reduce memcopy and prefetch header 01/34001/3
authorGabriel Oginski <gabrielx.oginski@intel.com>
Fri, 8 Oct 2021 08:09:45 +0000 (09:09 +0100)
committerMatthew Smith <mgsmith@netgate.com>
Wed, 3 Nov 2021 16:04:00 +0000 (16:04 +0000)
Originally wireguard implementation does memory copy of the whole
packet in encryption and decryption.

This patch removes unnecessary packet copy in wireguard. In addition,
it contains some performance improvement such as prefetching header
and deleting unnecessary lock and unlock for decryption.

Type: improvement

Signed-off-by: Gabriel Oginski <gabrielx.oginski@intel.com>
Change-Id: I1fe8e54d749e6922465341083b448c842e2b670f

src/plugins/wireguard/wireguard_input.c
src/plugins/wireguard/wireguard_noise.c
src/plugins/wireguard/wireguard_output_tun.c
src/plugins/wireguard/wireguard_peer.c
src/plugins/wireguard/wireguard_peer.h
src/plugins/wireguard/wireguard_send.c

index 4f5bd4d..3e8ae9b 100644 (file)
@@ -366,15 +366,9 @@ wg_input_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
              goto out;
            }
 
-         u8 *decr_data = wmp->per_thread_data[thread_index].data;
-
-         enum noise_state_crypt state_cr = noise_remote_decrypt (vm,
-                                                                 &peer->remote,
-                                                                 data->receiver_index,
-                                                                 data->counter,
-                                                                 data->encrypted_data,
-                                                                 encr_len,
-                                                                 decr_data);
+         enum noise_state_crypt state_cr = noise_remote_decrypt (
+           vm, &peer->remote, data->receiver_index, data->counter,
+           data->encrypted_data, encr_len, data->encrypted_data);
 
          if (PREDICT_FALSE (state_cr == SC_CONN_RESET))
            {
@@ -392,7 +386,7 @@ wg_input_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
              goto out;
            }
 
-         clib_memcpy (vlib_buffer_get_current (b[0]), decr_data, decr_len);
+         vlib_buffer_advance (b[0], sizeof (message_data_t));
          b[0]->current_length = decr_len;
          vnet_buffer_offload_flags_clear (b[0],
                                           VNET_BUFFER_OFFLOAD_F_UDP_CKSUM);
index 7b4c019..36de8ae 100644 (file)
@@ -549,7 +549,6 @@ noise_remote_encrypt (vlib_main_t * vm, noise_remote_t * r, uint32_t * r_idx,
   noise_keypair_t *kp;
   enum noise_state_crypt ret = SC_FAILED;
 
-  clib_rwlock_reader_lock (&r->r_keypair_lock);
   if ((kp = r->r_current) == NULL)
     goto error;
 
@@ -589,7 +588,6 @@ noise_remote_encrypt (vlib_main_t * vm, noise_remote_t * r, uint32_t * r_idx,
 
   ret = SC_OK;
 error:
-  clib_rwlock_reader_unlock (&r->r_keypair_lock);
   return ret;
 }
 
@@ -600,7 +598,6 @@ noise_remote_decrypt (vlib_main_t * vm, noise_remote_t * r, uint32_t r_idx,
 {
   noise_keypair_t *kp;
   enum noise_state_crypt ret = SC_FAILED;
-  clib_rwlock_reader_lock (&r->r_keypair_lock);
 
   if (r->r_current != NULL && r->r_current->kp_local_index == r_idx)
     {
@@ -644,7 +641,6 @@ noise_remote_decrypt (vlib_main_t * vm, noise_remote_t * r, uint32_t r_idx,
    * data packet can't confirm a session that we are an INITIATOR of. */
   if (kp == r->r_next)
     {
-      clib_rwlock_reader_unlock (&r->r_keypair_lock);
       clib_rwlock_writer_lock (&r->r_keypair_lock);
       if (kp == r->r_next && kp->kp_local_index == r_idx)
        {
@@ -655,11 +651,9 @@ noise_remote_decrypt (vlib_main_t * vm, noise_remote_t * r, uint32_t r_idx,
 
          ret = SC_CONN_RESET;
          clib_rwlock_writer_unlock (&r->r_keypair_lock);
-         clib_rwlock_reader_lock (&r->r_keypair_lock);
          goto error;
        }
       clib_rwlock_writer_unlock (&r->r_keypair_lock);
-      clib_rwlock_reader_lock (&r->r_keypair_lock);
     }
 
   /* Similar to when we encrypt, we want to notify the caller when we
@@ -676,7 +670,6 @@ noise_remote_decrypt (vlib_main_t * vm, noise_remote_t * r, uint32_t r_idx,
 
   ret = SC_OK;
 error:
-  clib_rwlock_reader_unlock (&r->r_keypair_lock);
   return ret;
 }
 
index ec6cb7c..c792d4b 100644 (file)
@@ -100,8 +100,9 @@ wg_output_tun_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
 {
   u32 n_left_from;
   u32 *from;
-  ip4_udp_header_t *hdr4_out = NULL;
-  ip6_udp_header_t *hdr6_out = NULL;
+  ip4_udp_wg_header_t *hdr4_out = NULL;
+  ip6_udp_wg_header_t *hdr6_out = NULL;
+  message_data_t *message_data_wg = NULL;
   vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b;
   u16 nexts[VLIB_FRAME_SIZE], *next;
   u32 thread_index = vm->thread_index;
@@ -113,7 +114,6 @@ wg_output_tun_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
 
   vlib_get_buffers (vm, from, bufs, n_left_from);
 
-  wg_main_t *wmp = &wg_main;
   wg_peer_t *peer = NULL;
 
   while (n_left_from > 0)
@@ -124,6 +124,14 @@ wg_output_tun_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
       u8 *plain_data;
       u16 plain_data_len;
 
+      if (n_left_from > 2)
+       {
+         u8 *p;
+         vlib_prefetch_buffer_header (b[2], LOAD);
+         p = vlib_buffer_get_current (b[1]);
+         CLIB_PREFETCH (p, CLIB_CACHE_LINE_BYTES, LOAD);
+       }
+
       next[0] = WG_OUTPUT_NEXT_ERROR;
       peeri =
        wg_peer_get_by_adj_index (vnet_buffer (b[0])->ip.adj_index[VLIB_TX]);
@@ -160,10 +168,12 @@ wg_output_tun_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
       if (is_ip4_out)
        {
          hdr4_out = vlib_buffer_get_current (b[0]);
+         message_data_wg = &hdr4_out->wg;
        }
       else
        {
          hdr6_out = vlib_buffer_get_current (b[0]);
+         message_data_wg = &hdr6_out->wg;
        }
 
       iph_offset = vnet_buffer (b[0])->ip.save_rewrite_length;
@@ -184,14 +194,11 @@ wg_output_tun_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
          goto out;
        }
 
-      message_data_t *encrypted_packet =
-       (message_data_t *) wmp->per_thread_data[thread_index].data;
-
       enum noise_state_crypt state;
+
       state = noise_remote_encrypt (
-       vm, &peer->remote, &encrypted_packet->receiver_index,
-       &encrypted_packet->counter, plain_data, plain_data_len,
-       encrypted_packet->encrypted_data);
+       vm, &peer->remote, &message_data_wg->receiver_index,
+       &message_data_wg->counter, plain_data, plain_data_len, plain_data);
 
       if (PREDICT_FALSE (state == SC_KEEP_KEY_FRESH))
        {
@@ -207,12 +214,10 @@ wg_output_tun_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
 
       /* Here we are sure that can send packet to next node */
       next[0] = WG_OUTPUT_NEXT_INTERFACE_OUTPUT;
-      encrypted_packet->header.type = MESSAGE_DATA;
-
-      clib_memcpy (plain_data, (u8 *) encrypted_packet, encrypted_packet_len);
 
       if (is_ip4_out)
        {
+         hdr4_out->wg.header.type = MESSAGE_DATA;
          hdr4_out->udp.length = clib_host_to_net_u16 (encrypted_packet_len +
                                                       sizeof (udp_header_t));
          b[0]->current_length =
@@ -222,6 +227,7 @@ wg_output_tun_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
        }
       else
        {
+         hdr6_out->wg.header.type = MESSAGE_DATA;
          hdr6_out->udp.length = clib_host_to_net_u16 (encrypted_packet_len +
                                                       sizeof (udp_header_t));
          b[0]->current_length =
@@ -244,9 +250,9 @@ wg_output_tun_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
          t->peer = peeri;
          t->is_ip4 = is_ip4_out;
          if (hdr4_out)
-           clib_memcpy (t->header, hdr4_out, sizeof (*hdr4_out));
+           clib_memcpy (t->header, hdr4_out, sizeof (ip4_udp_header_t));
          else if (hdr6_out)
-           clib_memcpy (t->header, hdr6_out, sizeof (*hdr6_out));
+           clib_memcpy (t->header, hdr6_out, sizeof (ip6_udp_header_t));
        }
 
     next:
index 81cc74a..f5fbc3c 100644 (file)
@@ -103,7 +103,8 @@ wg_peer_build_rewrite (const wg_peer_t *peer, u8 is_ip4)
     {
       ip4_udp_header_t *hdr;
 
-      vec_validate (rewrite, sizeof (*hdr) - 1);
+      /* reserve space for ip4, udp and wireguard headers */
+      vec_validate (rewrite, sizeof (ip4_udp_wg_header_t) - 1);
       hdr = (ip4_udp_header_t *) rewrite;
 
       hdr->ip4.ip_version_and_header_length = 0x45;
@@ -121,7 +122,8 @@ wg_peer_build_rewrite (const wg_peer_t *peer, u8 is_ip4)
     {
       ip6_udp_header_t *hdr;
 
-      vec_validate (rewrite, sizeof (*hdr) - 1);
+      /* reserve space for ip6, udp and wireguard headers */
+      vec_validate (rewrite, sizeof (ip6_udp_wg_header_t) - 1);
       hdr = (ip6_udp_header_t *) rewrite;
 
       hdr->ip6.ip_version_traffic_class_and_flow_label = 0x60;
index a08fff7..1af5799 100644 (file)
@@ -33,12 +33,26 @@ typedef struct ip4_udp_header_t_
   udp_header_t udp;
 } __clib_packed ip4_udp_header_t;
 
+typedef struct ip4_udp_wg_header_t_
+{
+  ip4_header_t ip4;
+  udp_header_t udp;
+  message_data_t wg;
+} __clib_packed ip4_udp_wg_header_t;
+
 typedef struct ip6_udp_header_t_
 {
   ip6_header_t ip6;
   udp_header_t udp;
 } __clib_packed ip6_udp_header_t;
 
+typedef struct ip6_udp_wg_header_t_
+{
+  ip6_header_t ip6;
+  udp_header_t udp;
+  message_data_t wg;
+} __clib_packed ip6_udp_wg_header_t;
+
 u8 *format_ip4_udp_header (u8 * s, va_list * va);
 u8 *format_ip6_udp_header (u8 *s, va_list *va);
 
index a5f8177..53692f0 100644 (file)
@@ -50,7 +50,9 @@ wg_buffer_prepend_rewrite (vlib_buffer_t *b0, const wg_peer_t *peer, u8 is_ip4)
       vlib_buffer_advance (b0, -sizeof (*hdr4));
 
       hdr4 = vlib_buffer_get_current (b0);
-      clib_memcpy (hdr4, peer->rewrite, vec_len (peer->rewrite));
+
+      /* copy only ip4 and udp header; wireguard header not needed */
+      clib_memcpy (hdr4, peer->rewrite, sizeof (ip4_udp_header_t));
 
       hdr4->udp.length =
        clib_host_to_net_u16 (b0->current_length - sizeof (ip4_header_t));
@@ -64,7 +66,9 @@ wg_buffer_prepend_rewrite (vlib_buffer_t *b0, const wg_peer_t *peer, u8 is_ip4)
       vlib_buffer_advance (b0, -sizeof (*hdr6));
 
       hdr6 = vlib_buffer_get_current (b0);
-      clib_memcpy (hdr6, peer->rewrite, vec_len (peer->rewrite));
+
+      /* copy only ip6 and udp header; wireguard header not needed */
+      clib_memcpy (hdr6, peer->rewrite, sizeof (ip6_udp_header_t));
 
       hdr6->udp.length =
        clib_host_to_net_u16 (b0->current_length - sizeof (ip6_header_t));