2 *------------------------------------------------------------------
3 * Copyright (c) 2018 Cisco and/or its affiliates.
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at:
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 *------------------------------------------------------------------
19 #include <vlib/vlib.h>
20 #include <vlib/unix/unix.h>
21 #include <vlib/pci/pci.h>
22 #include <vnet/ethernet/ethernet.h>
23 #include <vnet/devices/devices.h>
24 #include <vnet/interface/rx_queue_funcs.h>
27 #define foreach_af_xdp_input_error \
28 _ (SYSCALL_REQUIRED, "syscall required") \
29 _ (SYSCALL_FAILURES, "syscall failures")
33 #define _(f,s) AF_XDP_INPUT_ERROR_##f,
34 foreach_af_xdp_input_error
37 } af_xdp_input_error_t;
39 static __clib_unused char *af_xdp_input_error_strings[] = {
41 foreach_af_xdp_input_error
45 static_always_inline void
46 af_xdp_device_input_trace (vlib_main_t * vm, vlib_node_runtime_t * node,
47 u32 n_left, const u32 * bi, u32 next_index,
50 u32 n_trace = vlib_get_trace_count (vm, node);
52 if (PREDICT_TRUE (0 == n_trace))
55 while (n_trace && n_left)
57 vlib_buffer_t *b = vlib_get_buffer (vm, bi[0]);
59 (vlib_trace_buffer (vm, node, next_index, b, /* follow_chain */ 0)))
61 af_xdp_input_trace_t *tr =
62 vlib_add_trace (vm, node, b, sizeof (*tr));
63 tr->next_index = next_index;
64 tr->hw_if_index = hw_if_index;
71 vlib_set_trace_count (vm, node, n_trace);
74 static_always_inline void
75 af_xdp_device_input_refill_db (vlib_main_t * vm,
76 const vlib_node_runtime_t * node,
77 af_xdp_device_t * ad, af_xdp_rxq_t * rxq,
80 xsk_ring_prod__submit (&rxq->fq, n_alloc);
82 if (AF_XDP_RXQ_MODE_INTERRUPT == rxq->mode ||
83 !xsk_ring_prod__needs_wakeup (&rxq->fq))
87 vlib_error_count (vm, node->node_index,
88 AF_XDP_INPUT_ERROR_SYSCALL_REQUIRED, 1);
90 if (clib_spinlock_trylock_if_init (&rxq->syscall_lock))
92 struct pollfd fd = { .fd = rxq->xsk_fd, .events = POLLIN | POLLOUT };
93 int ret = poll (&fd, 1, 0);
94 clib_spinlock_unlock_if_init (&rxq->syscall_lock);
95 if (PREDICT_FALSE (ret < 0))
97 /* something bad is happening */
99 vlib_error_count (vm, node->node_index,
100 AF_XDP_INPUT_ERROR_SYSCALL_FAILURES, 1);
101 af_xdp_device_error (ad, "rx poll() failed");
106 static_always_inline void
107 af_xdp_device_input_refill_inline (vlib_main_t *vm,
108 const vlib_node_runtime_t *node,
109 af_xdp_device_t *ad, af_xdp_rxq_t *rxq,
113 const u32 size = rxq->fq.size;
114 const u32 mask = size - 1;
115 u32 bis[VLIB_FRAME_SIZE], *bi = bis;
116 u32 n_alloc, n, n_wrap;
119 ASSERT (mask == rxq->fq.mask);
121 /* do not enqueue more packet than ring space */
122 n_alloc = xsk_prod_nb_free (&rxq->fq, 16);
123 /* do not bother to allocate if too small */
127 n_alloc = clib_min (n_alloc, ARRAY_LEN (bis));
128 n_alloc = vlib_buffer_alloc_from_pool (vm, bis, n_alloc, ad->pool);
129 n = xsk_ring_prod__reserve (&rxq->fq, n_alloc, &idx);
130 ASSERT (n == n_alloc);
132 fill = xsk_ring_prod__fill_addr (&rxq->fq, idx);
133 n = clib_min (n_alloc, size - (idx & mask));
134 n_wrap = n_alloc - n;
137 * Note about headroom: for some reasons, there seem to be a discrepency
138 * between 0-copy and copy mode. See
139 * src/plugins/af_xdp/device.c:af_xdp_create_queue()
141 #define bi2addr(bi) \
142 (((bi) << CLIB_LOG2_CACHE_LINE_BYTES) + (copy ? XDP_PACKET_HEADROOM : 0))
148 #ifdef CLIB_HAVE_VEC256
149 u64x4 b0 = u64x4_from_u32x4 (*(u32x4u *) (bi + 0));
150 u64x4 b1 = u64x4_from_u32x4 (*(u32x4u *) (bi + 4));
151 *(u64x4u *) (fill + 0) = bi2addr (b0);
152 *(u64x4u *) (fill + 4) = bi2addr (b1);
154 fill[0] = bi2addr (bi[0]);
155 fill[1] = bi2addr (bi[1]);
156 fill[2] = bi2addr (bi[2]);
157 fill[3] = bi2addr (bi[3]);
158 fill[4] = bi2addr (bi[4]);
159 fill[5] = bi2addr (bi[5]);
160 fill[6] = bi2addr (bi[6]);
161 fill[7] = bi2addr (bi[7]);
170 fill[0] = bi2addr (bi[0]);
178 fill = xsk_ring_prod__fill_addr (&rxq->fq, 0);
184 af_xdp_device_input_refill_db (vm, node, ad, rxq, n_alloc);
187 static_always_inline void
188 af_xdp_device_input_ethernet (vlib_main_t * vm, vlib_node_runtime_t * node,
189 const u32 next_index, const u32 sw_if_index,
190 const u32 hw_if_index)
192 vlib_next_frame_t *nf;
194 ethernet_input_frame_t *ef;
196 if (PREDICT_FALSE (VNET_DEVICE_INPUT_NEXT_ETHERNET_INPUT != next_index))
200 vlib_node_runtime_get_next_frame (vm, node,
201 VNET_DEVICE_INPUT_NEXT_ETHERNET_INPUT);
202 f = vlib_get_frame (vm, nf->frame);
203 f->flags = ETH_INPUT_FRAME_F_SINGLE_SW_IF_IDX;
205 ef = vlib_frame_scalar_args (f);
206 ef->sw_if_index = sw_if_index;
207 ef->hw_if_index = hw_if_index;
210 static_always_inline u32
211 af_xdp_device_input_bufs (vlib_main_t * vm, const af_xdp_device_t * ad,
212 af_xdp_rxq_t * rxq, u32 * bis, const u32 n_rx,
213 vlib_buffer_t * bt, u32 idx, const int copy)
215 vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b = bufs;
216 u16 lens[VLIB_FRAME_SIZE], *len = lens;
217 const u32 mask = rxq->rx.mask;
218 u32 n = n_rx, *bi = bis, bytes = 0;
220 #define addr2bi(addr) \
221 (((addr) - (copy ? XDP_PACKET_HEADROOM : 0)) >> CLIB_LOG2_CACHE_LINE_BYTES)
225 const struct xdp_desc *desc = xsk_ring_cons__rx_desc (&rxq->rx, idx);
226 bi[0] = addr2bi (xsk_umem__extract_addr (desc->addr));
227 ASSERT (vlib_buffer_is_known (vm, bi[0]) ==
228 VLIB_BUFFER_KNOWN_ALLOCATED);
230 idx = (idx + 1) & mask;
236 vlib_get_buffers (vm, bis, bufs, n_rx);
243 vlib_prefetch_buffer_header (b[4], LOAD);
244 vlib_buffer_copy_template (b[0], bt);
245 bytes += b[0]->current_length = len[0];
247 vlib_prefetch_buffer_header (b[5], LOAD);
248 vlib_buffer_copy_template (b[1], bt);
249 bytes += b[1]->current_length = len[1];
251 vlib_prefetch_buffer_header (b[6], LOAD);
252 vlib_buffer_copy_template (b[2], bt);
253 bytes += b[2]->current_length = len[2];
255 vlib_prefetch_buffer_header (b[7], LOAD);
256 vlib_buffer_copy_template (b[3], bt);
257 bytes += b[3]->current_length = len[3];
266 vlib_buffer_copy_template (b[0], bt);
267 bytes += b[0]->current_length = len[0];
273 xsk_ring_cons__release (&rxq->rx, n_rx);
277 static_always_inline uword
278 af_xdp_device_input_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
279 vlib_frame_t * frame, af_xdp_device_t * ad,
280 u16 qid, const int copy)
282 vnet_main_t *vnm = vnet_get_main ();
283 af_xdp_rxq_t *rxq = vec_elt_at_index (ad->rxqs, qid);
285 u32 next_index, *to_next, n_left_to_next;
286 u32 n_rx_packets, n_rx_bytes;
289 n_rx_packets = xsk_ring_cons__peek (&rxq->rx, VLIB_FRAME_SIZE, &idx);
291 if (PREDICT_FALSE (0 == n_rx_packets))
294 vlib_buffer_copy_template (&bt, ad->buffer_template);
295 next_index = ad->per_interface_next_index;
296 if (PREDICT_FALSE (vnet_device_input_have_features (ad->sw_if_index)))
297 vnet_feature_start_device_input_x1 (ad->sw_if_index, &next_index, &bt);
299 vlib_get_new_next_frame (vm, node, next_index, to_next, n_left_to_next);
302 af_xdp_device_input_bufs (vm, ad, rxq, to_next, n_rx_packets, &bt, idx,
304 af_xdp_device_input_ethernet (vm, node, next_index, ad->sw_if_index,
307 vlib_put_next_frame (vm, node, next_index, n_left_to_next - n_rx_packets);
309 af_xdp_device_input_trace (vm, node, n_rx_packets, to_next, next_index,
312 vlib_increment_combined_counter
313 (vnm->interface_main.combined_sw_if_counters +
314 VNET_INTERFACE_COUNTER_RX, vm->thread_index,
315 ad->hw_if_index, n_rx_packets, n_rx_bytes);
318 af_xdp_device_input_refill_inline (vm, node, ad, rxq, copy);
323 VLIB_NODE_FN (af_xdp_input_node) (vlib_main_t * vm,
324 vlib_node_runtime_t * node,
325 vlib_frame_t * frame)
328 af_xdp_main_t *am = &af_xdp_main;
329 vnet_hw_if_rxq_poll_vector_t *p,
330 *pv = vnet_hw_if_get_rxq_poll_vector (vm, node);
334 af_xdp_device_t *ad = vec_elt_at_index (am->devices, p->dev_instance);
335 if ((ad->flags & AF_XDP_DEVICE_F_ADMIN_UP) == 0)
337 if (PREDICT_TRUE (ad->flags & AF_XDP_DEVICE_F_ZEROCOPY))
338 n_rx += af_xdp_device_input_inline (vm, node, frame, ad, p->queue_id,
341 n_rx += af_xdp_device_input_inline (vm, node, frame, ad, p->queue_id,
348 #ifndef CLIB_MARCH_VARIANT
350 af_xdp_device_input_refill (af_xdp_device_t *ad)
352 vlib_main_t *vm = vlib_get_main ();
354 vec_foreach (rxq, ad->rxqs)
355 af_xdp_device_input_refill_inline (
356 vm, 0, ad, rxq, 0 == (ad->flags & AF_XDP_DEVICE_F_ZEROCOPY));
358 #endif /* CLIB_MARCH_VARIANT */
361 VLIB_REGISTER_NODE (af_xdp_input_node) = {
362 .name = "af_xdp-input",
363 .sibling_of = "device-input",
364 .format_trace = format_af_xdp_input_trace,
365 .type = VLIB_NODE_TYPE_INPUT,
366 .state = VLIB_NODE_STATE_DISABLED,
367 .n_errors = AF_XDP_INPUT_N_ERROR,
368 .error_strings = af_xdp_input_error_strings,
369 .flags = VLIB_NODE_FLAG_TRACE_SUPPORTED,
374 * fd.io coding-style-patch-verification: ON
377 * eval: (c-set-style "gnu")