From d4e109138279fcfbfce9d82384f0fa53b8f43ae1 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Beno=C3=AEt=20Ganne?= Date: Tue, 15 Sep 2020 15:25:43 +0200 Subject: [PATCH] af_xdp: add option to claim all available rx queues MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit Type: feature Change-Id: I97176c2c90ea664a68078b3a7b7d44eb237a7f13 Signed-off-by: Benoît Ganne --- src/plugins/af_xdp/af_xdp.api | 4 ++-- src/plugins/af_xdp/af_xdp.h | 2 ++ src/plugins/af_xdp/af_xdp_doc.md | 36 +++++++++++++++--------------- src/plugins/af_xdp/cli.c | 2 +- src/plugins/af_xdp/device.c | 47 +++++++++++++++++++++++++--------------- src/plugins/af_xdp/unformat.c | 2 ++ 6 files changed, 55 insertions(+), 38 deletions(-) diff --git a/src/plugins/af_xdp/af_xdp.api b/src/plugins/af_xdp/af_xdp.api index 765af6820e3..14f51d87d6a 100644 --- a/src/plugins/af_xdp/af_xdp.api +++ b/src/plugins/af_xdp/af_xdp.api @@ -31,7 +31,7 @@ enum af_xdp_mode @param context - sender context, to match reply w/ request @param host_if - Linux netdev interface name @param name - new af_xdp interface name (optional) - @param rxq_num - number of receive queues (optional) + @param rxq_num - number of receive queues. 65535 can be used as special value to request all available queues (optional) @param rxq_size - receive queue size (optional) @param txq_size - transmit queue size (optional) @param mode - operation mode (optional) @@ -50,7 +50,7 @@ define af_xdp_create u16 txq_size [default=0]; vl_api_af_xdp_mode_t mode [default=0]; string prog[256]; - option vat_help = " [name ifname] [rx-queue-size size] [tx-queue-size size] [num-rx-queues num] [prog pathname] [zero-copy|no-zero-copy]"; + option vat_help = " [name ifname] [rx-queue-size size] [tx-queue-size size] [num-rx-queues ] [prog pathname] [zero-copy|no-zero-copy]"; option status="in_progress"; }; diff --git a/src/plugins/af_xdp/af_xdp.h b/src/plugins/af_xdp/af_xdp.h index fd990ec3f90..3bd53ad768d 100644 --- a/src/plugins/af_xdp/af_xdp.h +++ b/src/plugins/af_xdp/af_xdp.h @@ -22,6 +22,8 @@ #include #include +#define AF_XDP_NUM_RX_QUEUES_ALL ((u16)-1) + #define af_xdp_log(lvl, dev, f, ...) \ vlib_log(lvl, af_xdp_main.log_class, "%v: " f, (dev)->name, ##__VA_ARGS__) diff --git a/src/plugins/af_xdp/af_xdp_doc.md b/src/plugins/af_xdp/af_xdp_doc.md index 40a3596be7c..76d653fd251 100644 --- a/src/plugins/af_xdp/af_xdp_doc.md +++ b/src/plugins/af_xdp/af_xdp_doc.md @@ -17,8 +17,13 @@ Because of AF_XDP restrictions, the MTU is limited to below PAGE_SIZE (4096-bytes on most systems) minus 256-bytes, and they are additional limitations depending upon specific Linux device drivers. As a rule of thumb, a MTU of 3000-bytes or less should be safe. -Furthermore, upon UMEM creation, the kernel allocates a physically-contiguous structure, whose size is proportional to the number of 4KB pages contained in the UMEM. That allocation might fail when the number of buffers allocated by VPP is too high. That number can be controlled with the `buffers { buffers-per-numa }` configuration option. -Finally, note that because of this limitation, this plugin is unlikely to be compatible with the use of 1GB hugepages. +Furthermore, upon UMEM creation, the kernel allocates a +physically-contiguous structure, whose size is proportional to the number +of 4KB pages contained in the UMEM. That allocation might fail when +the number of buffers allocated by VPP is too high. That number can be +controlled with the `buffers { buffers-per-numa }` configuration option. +Finally, note that because of this limitation, this plugin is unlikely +to be compatible with the use of 1GB hugepages. ## Requirements The Linux kernel interface must be up and have enough queues before @@ -31,9 +36,10 @@ AF_XDP interface, and only them. Depending on your configuration, there will usually be several RX queues (typically 1 per core) and packets are spread accross queues by RSS. In order to receive consistent traffic, you **must** program the NIC dispatching accordingly. The simplest way -to get all the packets is to reconfigure the Linux kernel driver to use -only `num_rx_queues` RX queues (ie all NIC queues will be associated -with the AF_XDP socket): +to get all the packets is to specify `num-rx-queues all` to grab all +available queues or to reconfigure the Linux kernel driver to use only +`num_rx_queues` RX queues (ie all NIC queues will be associated with +the AF_XDP socket): ``` ~# ethtool -L combined ``` @@ -57,25 +63,21 @@ kernel interface in promiscuous mode: ## Security considerations When creating an AF_XDP interface, it will receive all packets arriving -to the NIC RX queue #0. You need to configure the Linux kernel NIC -driver properly to ensure that only intented packets will arrive in -this queue. There is no way to filter the packets after-the-fact using -eg. netfilter or eBPF. +to the NIC RX queue [0, num_rx_queues[`. You need to configure the Linux +kernel NIC driver properly to ensure that only intented packets will +arrive in this queue. There is no way to filter the packets after-the-fact +using eg. netfilter or eBPF. ## Quickstart -1. Setup the Linux kernel interface (enp216s0f0 here) to use 4 queues: -``` -~# ethtool -L enp216s0f0 combined 4 -``` -2. Put the Linux kernel interface up and in promiscuous mode: +1. Put the Linux kernel interface up and in promiscuous mode: ``` ~# ip l set dev enp216s0f0 promisc on up ``` -3. Create the AF_XDP interface: +2. Create the AF_XDP interface: ``` -~# vppctl create int af_xdp host-if enp216s0f0 num-rx-queues 4 +~# vppctl create int af_xdp host-if enp216s0f0 num-rx-queues all ``` -4. Use the interface as usual, eg.: +3. Use the interface as usual, eg.: ``` ~# vppctl set int ip addr enp216s0f0/0 1.1.1.1/24 ~# vppctl set int st enp216s0f0/0 up diff --git a/src/plugins/af_xdp/cli.c b/src/plugins/af_xdp/cli.c index 5fe7c2ef399..d5f21d4c391 100644 --- a/src/plugins/af_xdp/cli.c +++ b/src/plugins/af_xdp/cli.c @@ -47,7 +47,7 @@ af_xdp_create_command_fn (vlib_main_t * vm, unformat_input_t * input, /* *INDENT-OFF* */ VLIB_CLI_COMMAND (af_xdp_create_command, static) = { .path = "create interface af_xdp", - .short_help = "create interface af_xdp [name ifname] [rx-queue-size size] [tx-queue-size size] [num-rx-queues num] [prog pathname] [zero-copy|no-zero-copy]", + .short_help = "create interface af_xdp [name ifname] [rx-queue-size size] [tx-queue-size size] [num-rx-queues ] [prog pathname] [zero-copy|no-zero-copy]", .function = af_xdp_create_command_fn, }; /* *INDENT-ON* */ diff --git a/src/plugins/af_xdp/device.c b/src/plugins/af_xdp/device.c index c750e8dd8bd..5090d3a649a 100644 --- a/src/plugins/af_xdp/device.c +++ b/src/plugins/af_xdp/device.c @@ -172,14 +172,27 @@ static int af_xdp_create_queue (vlib_main_t * vm, af_xdp_create_if_args_t * args, af_xdp_device_t * ad, int qid, int rxq_num, int txq_num) { - struct xsk_umem **umem = vec_elt_at_index (ad->umem, qid); - struct xsk_socket **xsk = vec_elt_at_index (ad->xsk, qid); - af_xdp_rxq_t *rxq = vec_elt_at_index (ad->rxqs, qid); - af_xdp_txq_t *txq = vec_elt_at_index (ad->txqs, qid); + struct xsk_umem **umem; + struct xsk_socket **xsk; + af_xdp_rxq_t *rxq; + af_xdp_txq_t *txq; struct xsk_umem_config umem_config; struct xsk_socket_config sock_config; struct xdp_options opt; socklen_t optlen; + + vec_validate_aligned (ad->umem, qid, CLIB_CACHE_LINE_BYTES); + umem = vec_elt_at_index (ad->umem, qid); + + vec_validate_aligned (ad->xsk, qid, CLIB_CACHE_LINE_BYTES); + xsk = vec_elt_at_index (ad->xsk, qid); + + vec_validate_aligned (ad->rxqs, qid, CLIB_CACHE_LINE_BYTES); + rxq = vec_elt_at_index (ad->rxqs, qid); + + vec_validate_aligned (ad->txqs, qid, CLIB_CACHE_LINE_BYTES); + txq = vec_elt_at_index (ad->txqs, qid); + /* * fq and cq must always be allocated even if unused * whereas rx and tx indicates whether we want rxq, txq, or both @@ -333,10 +346,6 @@ af_xdp_create_if (vlib_main_t * vm, af_xdp_create_if_args_t * args) goto err1; q_num = clib_max (rxq_num, txq_num); - vec_validate_aligned (ad->rxqs, q_num - 1, CLIB_CACHE_LINE_BYTES); - vec_validate_aligned (ad->txqs, q_num - 1, CLIB_CACHE_LINE_BYTES); - vec_validate_aligned (ad->umem, q_num - 1, CLIB_CACHE_LINE_BYTES); - vec_validate_aligned (ad->xsk, q_num - 1, CLIB_CACHE_LINE_BYTES); ad->txq_num = txq_num; for (i = 0; i < q_num; i++) { @@ -345,10 +354,10 @@ af_xdp_create_if (vlib_main_t * vm, af_xdp_create_if_args_t * args) /* * queue creation failed * it is only a fatal error if we could not create the number of rx - * queues requested explicitely by the user + * queues requested explicitely by the user and the user did not + * requested 'max' * we might create less tx queues than workers but this is ok */ - af_xdp_txq_t *txq; /* fixup vectors length */ vec_set_len (ad->umem, i); @@ -356,15 +365,17 @@ af_xdp_create_if (vlib_main_t * vm, af_xdp_create_if_args_t * args) vec_set_len (ad->rxqs, i); vec_set_len (ad->txqs, i); - if (i < rxq_num) + if (i < rxq_num && AF_XDP_NUM_RX_QUEUES_ALL != rxq_num) goto err1; /* failed creating requested rxq: fatal error, bailing out */ - /* - * we created all rxq but failed some txq: not an error but - * initialize lock for shared txq - */ - ad->txq_num = i; - vec_foreach (txq, ad->txqs) clib_spinlock_init (&txq->lock); + if (i < txq_num) + { + /* we created less txq than threads not an error but initialize lock for shared txq */ + af_xdp_txq_t *txq; + ad->txq_num = i; + vec_foreach (txq, ad->txqs) clib_spinlock_init (&txq->lock); + } + args->rv = 0; clib_error_free (args->error); break; @@ -404,7 +415,7 @@ af_xdp_create_if (vlib_main_t * vm, af_xdp_create_if_args_t * args) vnet_hw_interface_set_input_node (vnm, ad->hw_if_index, af_xdp_input_node.index); - for (i = 0; i < rxq_num; i++) + for (i = 0; i < vec_len (ad->rxqs); i++) { af_xdp_rxq_t *rxq = vec_elt_at_index (ad->rxqs, i); clib_file_t f = { diff --git a/src/plugins/af_xdp/unformat.c b/src/plugins/af_xdp/unformat.c index 154d459900e..b2292464112 100644 --- a/src/plugins/af_xdp/unformat.c +++ b/src/plugins/af_xdp/unformat.c @@ -40,6 +40,8 @@ unformat_af_xdp_create_if_args (unformat_input_t * input, va_list * vargs) ; else if (unformat (line_input, "tx-queue-size %u", &args->txq_size)) ; + else if (unformat (line_input, "num-rx-queues all")) + args->rxq_num = AF_XDP_NUM_RX_QUEUES_ALL; else if (unformat (line_input, "num-rx-queues %u", &args->rxq_num)) ; else if (unformat (line_input, "prog %s", &args->prog)) -- 2.16.6