wireguard: add handoff node
[vpp.git] / src / plugins / wireguard / wireguard_input.c
index cdd65f8..b15c265 100755 (executable)
@@ -30,6 +30,7 @@
   _(DECRYPTION, "Failed during decryption")             \
   _(KEEPALIVE_SEND, "Failed while sending Keepalive")   \
   _(HANDSHAKE_SEND, "Failed while sending Handshake")   \
+  _(TOO_BIG, "Packet too big")                          \
   _(UNDEFINED, "Undefined error")
 
 typedef enum
@@ -51,7 +52,7 @@ typedef struct
   message_type_t type;
   u16 current_length;
   bool is_keepalive;
-
+  index_t peer;
 } wg_input_trace_t;
 
 u8 *
@@ -79,6 +80,7 @@ format_wg_input_trace (u8 * s, va_list * args)
 
   s = format (s, "WG input: \n");
   s = format (s, "  Type: %U\n", format_wg_message_type, t->type);
+  s = format (s, "  peer: %d\n", t->peer);
   s = format (s, "  Length: %d\n", t->current_length);
   s = format (s, "  Keepalive: %s", t->is_keepalive ? "true" : "false");
 
@@ -87,6 +89,8 @@ format_wg_input_trace (u8 * s, va_list * args)
 
 typedef enum
 {
+  WG_INPUT_NEXT_HANDOFF_HANDSHAKE,
+  WG_INPUT_NEXT_HANDOFF_DATA,
   WG_INPUT_NEXT_IP4_INPUT,
   WG_INPUT_NEXT_PUNT,
   WG_INPUT_NEXT_ERROR,
@@ -106,6 +110,8 @@ typedef enum
 static wg_input_error_t
 wg_handshake_process (vlib_main_t * vm, wg_main_t * wmp, vlib_buffer_t * b)
 {
+  ASSERT (vm->thread_index == 0);
+
   enum cookie_mac_state mac_state;
   bool packet_needs_cookie;
   bool under_load;
@@ -129,17 +135,15 @@ wg_handshake_process (vlib_main_t * vm, wg_main_t * wmp, vlib_buffer_t * b)
   if (NULL == wg_if)
     return WG_INPUT_ERROR_INTERFACE;
 
-  if (header->type == MESSAGE_HANDSHAKE_COOKIE)
+  if (PREDICT_FALSE (header->type == MESSAGE_HANDSHAKE_COOKIE))
     {
       message_handshake_cookie_t *packet =
        (message_handshake_cookie_t *) current_b_data;
       u32 *entry =
        wg_index_table_lookup (&wmp->index_table, packet->receiver_index);
       if (entry)
-       {
-         peer = pool_elt_at_index (wmp->peers, *entry);
-       }
-      if (!peer)
+       peer = wg_peer_get (*entry);
+      else
        return WG_INPUT_ERROR_PEER;
 
       // TODO: Implement cookie_maker_consume_payload
@@ -178,17 +182,17 @@ wg_handshake_process (vlib_main_t * vm, wg_main_t * wmp, vlib_buffer_t * b)
            // TODO: Add processing
          }
        noise_remote_t *rp;
-
        if (noise_consume_initiation
-           (wmp->vlib_main, &wg_if->local, &rp, message->sender_index,
-            message->unencrypted_ephemeral, message->encrypted_static,
-            message->encrypted_timestamp))
+           (vm, noise_local_get (wg_if->local_idx), &rp,
+            message->sender_index, message->unencrypted_ephemeral,
+            message->encrypted_static, message->encrypted_timestamp))
          {
-           peer = pool_elt_at_index (wmp->peers, rp->r_peer_idx);
+           peer = wg_peer_get (rp->r_peer_idx);
+         }
+       else
+         {
+           return WG_INPUT_ERROR_PEER;
          }
-
-       if (!peer)
-         return WG_INPUT_ERROR_PEER;
 
        // set_peer_address (peer, ip4_src, udp_src_port);
        if (PREDICT_FALSE (!wg_send_handshake_response (vm, peer)))
@@ -203,15 +207,18 @@ wg_handshake_process (vlib_main_t * vm, wg_main_t * wmp, vlib_buffer_t * b)
        message_handshake_response_t *resp = current_b_data;
        u32 *entry =
          wg_index_table_lookup (&wmp->index_table, resp->receiver_index);
-       if (entry)
+
+       if (PREDICT_TRUE (entry != NULL))
          {
-           peer = pool_elt_at_index (wmp->peers, *entry);
-           if (!peer || peer->is_dead)
+           peer = wg_peer_get (*entry);
+           if (peer->is_dead)
              return WG_INPUT_ERROR_PEER;
          }
+       else
+         return WG_INPUT_ERROR_PEER;
 
        if (!noise_consume_response
-           (wmp->vlib_main, &peer->remote, resp->sender_index,
+           (vm, &peer->remote, resp->sender_index,
             resp->receiver_index, resp->unencrypted_ephemeral,
             resp->encrypted_nothing))
          {
@@ -223,8 +230,9 @@ wg_handshake_process (vlib_main_t * vm, wg_main_t * wmp, vlib_buffer_t * b)
          }
 
        // set_peer_address (peer, ip4_src, udp_src_port);
-       if (noise_remote_begin_session (wmp->vlib_main, &peer->remote))
+       if (noise_remote_begin_session (vm, &peer->remote))
          {
+
            wg_timers_session_derived (peer);
            wg_timers_handshake_complete (peer);
            if (PREDICT_FALSE (!wg_send_keepalive (vm, peer)))
@@ -272,6 +280,7 @@ VLIB_NODE_FN (wg_input_node) (vlib_main_t * vm,
   u32 *from;
   vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b;
   u16 nexts[VLIB_FRAME_SIZE], *next;
+  u32 thread_index = vm->thread_index;
 
   from = vlib_frame_vector_args (frame);
   n_left_from = frame->n_vectors;
@@ -289,120 +298,132 @@ VLIB_NODE_FN (wg_input_node) (vlib_main_t * vm,
       next[0] = WG_INPUT_NEXT_PUNT;
       header_type =
        ((message_header_t *) vlib_buffer_get_current (b[0]))->type;
+      u32 *peer_idx;
 
-      switch (header_type)
+      if (PREDICT_TRUE (header_type == MESSAGE_DATA))
        {
-       case MESSAGE_HANDSHAKE_INITIATION:
-       case MESSAGE_HANDSHAKE_RESPONSE:
-       case MESSAGE_HANDSHAKE_COOKIE:
-         {
-           wg_input_error_t ret = wg_handshake_process (vm, wmp, b[0]);
-           if (ret != WG_INPUT_ERROR_NONE)
-             {
-               next[0] = WG_INPUT_NEXT_ERROR;
-               b[0]->error = node->errors[ret];
-             }
-           break;
-         }
-       case MESSAGE_DATA:
-         {
-           message_data_t *data = vlib_buffer_get_current (b[0]);
-           u32 *entry =
-             wg_index_table_lookup (&wmp->index_table, data->receiver_index);
-
-           if (entry)
-             {
-               peer = pool_elt_at_index (wmp->peers, *entry);
-             }
-           else
-             {
-               next[0] = WG_INPUT_NEXT_ERROR;
-               b[0]->error = node->errors[WG_INPUT_ERROR_PEER];
-               goto out;
-             }
+         message_data_t *data = vlib_buffer_get_current (b[0]);
 
-           u16 encr_len = b[0]->current_length - sizeof (message_data_t);
-           u16 decr_len = encr_len - NOISE_AUTHTAG_LEN;
-           u8 *decr_data = clib_mem_alloc (decr_len);
+         peer_idx = wg_index_table_lookup (&wmp->index_table,
+                                           data->receiver_index);
 
-           enum noise_state_crypt state_cr =
-             noise_remote_decrypt (wmp->vlib_main,
-                                   &peer->remote,
-                                   data->receiver_index,
-                                   data->counter,
-                                   data->encrypted_data,
-                                   encr_len,
-                                   decr_data);
+         if (peer_idx)
+           {
+             peer = wg_peer_get (*peer_idx);
+           }
+         else
+           {
+             next[0] = WG_INPUT_NEXT_ERROR;
+             b[0]->error = node->errors[WG_INPUT_ERROR_PEER];
+             goto out;
+           }
 
-           switch (state_cr)
-             {
-             case SC_OK:
-               break;
-             case SC_CONN_RESET:
-               wg_timers_handshake_complete (peer);
-               break;
-             case SC_KEEP_KEY_FRESH:
-               if (PREDICT_FALSE (!wg_send_handshake (vm, peer, false)))
-                 {
-                   vlib_node_increment_counter (vm, wg_input_node.index,
-                                                WG_INPUT_ERROR_HANDSHAKE_SEND,
-                                                1);
-                 }
-               break;
-             case SC_FAILED:
-               next[0] = WG_INPUT_NEXT_ERROR;
-               b[0]->error = node->errors[WG_INPUT_ERROR_DECRYPTION];
-               goto out;
-             default:
-               break;
-             }
+         if (PREDICT_FALSE (~0 == peer->input_thread_index))
+           {
+             /* this is the first packet to use this peer, claim the peer
+              * for this thread.
+              */
+             clib_atomic_cmp_and_swap (&peer->input_thread_index, ~0,
+                                       wg_peer_assign_thread (thread_index));
+           }
 
-           clib_memcpy (vlib_buffer_get_current (b[0]), decr_data, decr_len);
-           b[0]->current_length = decr_len;
-           b[0]->flags &= ~VNET_BUFFER_F_OFFLOAD_UDP_CKSUM;
+         if (PREDICT_TRUE (thread_index != peer->input_thread_index))
+           {
+             next[0] = WG_INPUT_NEXT_HANDOFF_DATA;
+             goto next;
+           }
 
-           clib_mem_free (decr_data);
+         u16 encr_len = b[0]->current_length - sizeof (message_data_t);
+         u16 decr_len = encr_len - NOISE_AUTHTAG_LEN;
+         if (PREDICT_FALSE (decr_len >= WG_DEFAULT_DATA_SIZE))
+           {
+             b[0]->error = node->errors[WG_INPUT_ERROR_TOO_BIG];
+             goto out;
+           }
 
-           wg_timers_any_authenticated_packet_received (peer);
-           wg_timers_any_authenticated_packet_traversal (peer);
+         u8 *decr_data = wmp->per_thread_data[thread_index].data;
 
-           if (decr_len == 0)
-             {
-               is_keepalive = true;
-               goto out;
-             }
+         enum noise_state_crypt state_cr = noise_remote_decrypt (vm,
+                                                                 &peer->remote,
+                                                                 data->receiver_index,
+                                                                 data->counter,
+                                                                 data->encrypted_data,
+                                                                 encr_len,
+                                                                 decr_data);
 
-           wg_timers_data_received (peer);
+         if (PREDICT_FALSE (state_cr == SC_CONN_RESET))
+           {
+             wg_timers_handshake_complete (peer);
+           }
+         else if (PREDICT_FALSE (state_cr == SC_KEEP_KEY_FRESH))
+           {
+             wg_send_handshake_from_mt (*peer_idx, false);
+           }
+         else if (PREDICT_FALSE (state_cr == SC_FAILED))
+           {
+             next[0] = WG_INPUT_NEXT_ERROR;
+             b[0]->error = node->errors[WG_INPUT_ERROR_DECRYPTION];
+             goto out;
+           }
 
-           ip4_header_t *iph = vlib_buffer_get_current (b[0]);
+         clib_memcpy (vlib_buffer_get_current (b[0]), decr_data, decr_len);
+         b[0]->current_length = decr_len;
+         b[0]->flags &= ~VNET_BUFFER_F_OFFLOAD_UDP_CKSUM;
 
-           const wg_peer_allowed_ip_t *allowed_ip;
-           bool allowed = false;
+         wg_timers_any_authenticated_packet_received (peer);
+         wg_timers_any_authenticated_packet_traversal (peer);
 
-           /*
-            * we could make this into an ACL, but the expectation
-            * is that there aren't many allowed IPs and thus a linear
-            * walk is fater than an ACL
-            */
-           vec_foreach (allowed_ip, peer->allowed_ips)
+         /* Keepalive packet has zero length */
+         if (decr_len == 0)
            {
-             if (fib_prefix_is_cover_addr_4 (&allowed_ip->prefix,
-                                             &iph->src_address))
-               {
-                 allowed = true;
-                 break;
-               }
+             is_keepalive = true;
+             goto out;
            }
-           if (allowed)
+
+         wg_timers_data_received (peer);
+
+         ip4_header_t *iph = vlib_buffer_get_current (b[0]);
+
+         const wg_peer_allowed_ip_t *allowed_ip;
+         bool allowed = false;
+
+         /*
+          * we could make this into an ACL, but the expectation
+          * is that there aren't many allowed IPs and thus a linear
+          * walk is fater than an ACL
+          */
+         vec_foreach (allowed_ip, peer->allowed_ips)
+         {
+           if (fib_prefix_is_cover_addr_4 (&allowed_ip->prefix,
+                                           &iph->src_address))
              {
-               vnet_buffer (b[0])->sw_if_index[VLIB_RX] =
-                 peer->wg_sw_if_index;
-               next[0] = WG_INPUT_NEXT_IP4_INPUT;
+               allowed = true;
+               break;
              }
-           break;
          }
-       default:
-         break;
+         if (allowed)
+           {
+             vnet_buffer (b[0])->sw_if_index[VLIB_RX] = peer->wg_sw_if_index;
+             next[0] = WG_INPUT_NEXT_IP4_INPUT;
+           }
+       }
+      else
+       {
+         peer_idx = NULL;
+
+         /* Handshake packets should be processed in main thread */
+         if (thread_index != 0)
+           {
+             next[0] = WG_INPUT_NEXT_HANDOFF_HANDSHAKE;
+             goto next;
+           }
+
+         wg_input_error_t ret = wg_handshake_process (vm, wmp, b[0]);
+         if (ret != WG_INPUT_ERROR_NONE)
+           {
+             next[0] = WG_INPUT_NEXT_ERROR;
+             b[0]->error = node->errors[ret];
+           }
        }
 
     out:
@@ -413,7 +434,9 @@ VLIB_NODE_FN (wg_input_node) (vlib_main_t * vm,
          t->type = header_type;
          t->current_length = b[0]->current_length;
          t->is_keepalive = is_keepalive;
+         t->peer = peer_idx ? *peer_idx : INDEX_INVALID;
        }
+    next:
       n_left_from -= 1;
       next += 1;
       b += 1;
@@ -435,6 +458,8 @@ VLIB_REGISTER_NODE (wg_input_node) =
   .n_next_nodes = WG_INPUT_N_NEXT,
   /* edit / add dispositions here */
   .next_nodes = {
+        [WG_INPUT_NEXT_HANDOFF_HANDSHAKE] = "wg-handshake-handoff",
+        [WG_INPUT_NEXT_HANDOFF_DATA] = "wg-input-data-handoff",
         [WG_INPUT_NEXT_IP4_INPUT] = "ip4-input-no-checksum",
         [WG_INPUT_NEXT_PUNT] = "error-punt",
         [WG_INPUT_NEXT_ERROR] = "error-drop",