2 #include <vppinfra/clib.h>
4 #include <vlib/unix/unix.h>
5 #include <vnet/ethernet/ethernet.h>
6 #include <vnet/devices/devices.h>
7 #include <af_xdp/af_xdp.h>
9 #define AF_XDP_TX_RETRIES 5
11 static_always_inline void
12 af_xdp_device_output_free (vlib_main_t * vm, const vlib_node_runtime_t * node,
16 const u32 size = txq->cq.size;
17 const u32 mask = size - 1;
18 u32 bis[VLIB_FRAME_SIZE], *bi = bis;
20 u32 n = xsk_ring_cons__peek (&txq->cq, ARRAY_LEN (bis), &idx);
23 /* we rely on on casting addr (u64) -> bi (u32) to discard XSK offset below */
24 STATIC_ASSERT (BITS (bi[0]) + CLIB_LOG2_CACHE_LINE_BYTES <=
25 XSK_UNALIGNED_BUF_OFFSET_SHIFT, "wrong size");
26 ASSERT (mask == txq->cq.mask);
31 compl = xsk_ring_cons__comp_addr (&txq->cq, idx);
32 n = clib_min (n_free, size - (idx & mask));
39 #ifdef CLIB_HAVE_VEC256
40 u64x4 b0 = (*(u64x4u *) (compl + 0)) >> CLIB_LOG2_CACHE_LINE_BYTES;
41 u64x4 b1 = (*(u64x4u *) (compl + 4)) >> CLIB_LOG2_CACHE_LINE_BYTES;
42 /* permute 256-bit register so lower u32s of each buffer index are
43 * placed into lower 128-bits */
44 const u32x8 mask = { 0, 2, 4, 6, 1, 3, 5, 7 };
45 u32x8 b2 = u32x8_permute ((u32x8) b0, mask);
46 u32x8 b3 = u32x8_permute ((u32x8) b1, mask);
47 /* extract lower 128-bits and save them to the array of buffer indices */
48 *(u32x4u *) (bi + 0) = u32x8_extract_lo (b2);
49 *(u32x4u *) (bi + 4) = u32x8_extract_lo (b3);
51 bi[0] = compl[0] >> CLIB_LOG2_CACHE_LINE_BYTES;
52 bi[1] = compl[1] >> CLIB_LOG2_CACHE_LINE_BYTES;
53 bi[2] = compl[2] >> CLIB_LOG2_CACHE_LINE_BYTES;
54 bi[3] = compl[3] >> CLIB_LOG2_CACHE_LINE_BYTES;
55 bi[4] = compl[4] >> CLIB_LOG2_CACHE_LINE_BYTES;
56 bi[5] = compl[5] >> CLIB_LOG2_CACHE_LINE_BYTES;
57 bi[6] = compl[6] >> CLIB_LOG2_CACHE_LINE_BYTES;
58 bi[7] = compl[7] >> CLIB_LOG2_CACHE_LINE_BYTES;
67 bi[0] = compl[0] >> CLIB_LOG2_CACHE_LINE_BYTES;
68 ASSERT (vlib_buffer_is_known (vm, bi[0]) ==
69 VLIB_BUFFER_KNOWN_ALLOCATED);
77 compl = xsk_ring_cons__comp_addr (&txq->cq, 0);
83 xsk_ring_cons__release (&txq->cq, n_free);
84 vlib_buffer_free (vm, bis, n_free);
87 static_always_inline void
88 af_xdp_device_output_tx_db (vlib_main_t * vm,
89 const vlib_node_runtime_t * node,
91 af_xdp_txq_t * txq, const u32 n_tx)
93 xsk_ring_prod__submit (&txq->tx, n_tx);
95 if (!xsk_ring_prod__needs_wakeup (&txq->tx))
98 vlib_error_count (vm, node->node_index, AF_XDP_TX_ERROR_SYSCALL_REQUIRED, 1);
100 clib_spinlock_lock_if_init (&txq->syscall_lock);
102 if (xsk_ring_prod__needs_wakeup (&txq->tx))
104 const struct msghdr msg = {};
106 /* On tx, xsk socket will only tx up to TX_BATCH_SIZE, as defined in
107 * kernel net/xdp/xsk.c. Unfortunately we do not know how much this is,
108 * our only option is to retry until everything is sent... */
111 ret = sendmsg (txq->xsk_fd, &msg, MSG_DONTWAIT);
113 while (ret < 0 && EAGAIN == errno);
114 if (PREDICT_FALSE (ret < 0))
116 /* not EAGAIN: something bad is happening */
117 vlib_error_count (vm, node->node_index,
118 AF_XDP_TX_ERROR_SYSCALL_FAILURES, 1);
119 af_xdp_device_error (ad, "tx poll() failed");
123 clib_spinlock_unlock_if_init (&txq->syscall_lock);
126 static_always_inline u32
127 af_xdp_device_output_tx_try (vlib_main_t * vm,
128 const vlib_node_runtime_t * node,
129 af_xdp_device_t * ad, af_xdp_txq_t * txq,
132 vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b = bufs;
133 const uword start = vm->buffer_main->buffer_mem_start;
134 const u32 size = txq->tx.size;
135 const u32 mask = size - 1;
136 struct xdp_desc *desc;
140 ASSERT (mask == txq->cq.mask);
142 n_tx = xsk_ring_prod__reserve (&txq->tx, n_tx, &idx);
144 /* if ring is full, do nothing */
145 if (PREDICT_FALSE (0 == n_tx))
148 vlib_get_buffers (vm, bi, bufs, n_tx);
150 desc = xsk_ring_prod__tx_desc (&txq->tx, idx);
151 n = clib_min (n_tx, size - (idx & mask));
158 if (PREDICT_FALSE (b[0]->flags & VLIB_BUFFER_NEXT_PRESENT ||
159 b[1]->flags & VLIB_BUFFER_NEXT_PRESENT ||
160 b[2]->flags & VLIB_BUFFER_NEXT_PRESENT ||
161 b[3]->flags & VLIB_BUFFER_NEXT_PRESENT))
166 vlib_prefetch_buffer_header (b[4], LOAD);
168 (sizeof (vlib_buffer_t) +
169 b[0]->current_data) << XSK_UNALIGNED_BUF_OFFSET_SHIFT;
170 addr = pointer_to_uword (b[0]) - start;
171 desc[0].addr = offset | addr;
172 desc[0].len = b[0]->current_length;
174 vlib_prefetch_buffer_header (b[5], LOAD);
176 (sizeof (vlib_buffer_t) +
177 b[1]->current_data) << XSK_UNALIGNED_BUF_OFFSET_SHIFT;
178 addr = pointer_to_uword (b[1]) - start;
179 desc[1].addr = offset | addr;
180 desc[1].len = b[1]->current_length;
182 vlib_prefetch_buffer_header (b[6], LOAD);
184 (sizeof (vlib_buffer_t) +
185 b[2]->current_data) << XSK_UNALIGNED_BUF_OFFSET_SHIFT;
186 addr = pointer_to_uword (b[2]) - start;
187 desc[2].addr = offset | addr;
188 desc[2].len = b[2]->current_length;
190 vlib_prefetch_buffer_header (b[7], LOAD);
192 (sizeof (vlib_buffer_t) +
193 b[3]->current_data) << XSK_UNALIGNED_BUF_OFFSET_SHIFT;
194 addr = pointer_to_uword (b[3]) - start;
195 desc[3].addr = offset | addr;
196 desc[3].len = b[3]->current_length;
205 if (PREDICT_FALSE (b[0]->flags & VLIB_BUFFER_NEXT_PRESENT))
207 if (vlib_buffer_chain_linearize (vm, b[0]) != 1)
209 af_xdp_log (VLIB_LOG_LEVEL_ERR, ad,
210 "vlib_buffer_chain_linearize failed");
211 vlib_buffer_free_one (vm, vlib_get_buffer_index (vm, b[0]));
217 (sizeof (vlib_buffer_t) +
218 b[0]->current_data) << XSK_UNALIGNED_BUF_OFFSET_SHIFT;
219 addr = pointer_to_uword (b[0]) - start;
220 desc[0].addr = offset | addr;
221 desc[0].len = b[0]->current_length;
229 desc = xsk_ring_prod__tx_desc (&txq->tx, 0);
238 VNET_DEVICE_CLASS_TX_FN (af_xdp_device_class) (vlib_main_t * vm,
239 vlib_node_runtime_t * node,
240 vlib_frame_t * frame)
242 af_xdp_main_t *rm = &af_xdp_main;
243 vnet_interface_output_runtime_t *ord = (void *) node->runtime_data;
244 af_xdp_device_t *ad = pool_elt_at_index (rm->devices, ord->dev_instance);
245 const vnet_hw_if_tx_frame_t *tf = vlib_frame_scalar_args (frame);
246 const int shared_queue = tf->shared_queue;
247 af_xdp_txq_t *txq = vec_elt_at_index (ad->txqs, tf->queue_id);
252 from = vlib_frame_vector_args (frame);
253 n_tx = frame->n_vectors;
256 clib_spinlock_lock (&txq->lock);
258 for (i = 0, n = 0; i < AF_XDP_TX_RETRIES && n < n_tx; i++)
261 af_xdp_device_output_free (vm, node, txq);
263 af_xdp_device_output_tx_try (vm, node, ad, txq, n_tx - n, from + n);
267 af_xdp_device_output_tx_db (vm, node, ad, txq, n);
270 clib_spinlock_unlock (&txq->lock);
272 if (PREDICT_FALSE (n != n_tx))
274 vlib_buffer_free (vm, from + n, n_tx - n);
275 vlib_error_count (vm, node->node_index,
276 AF_XDP_TX_ERROR_NO_FREE_SLOTS, n_tx - n);
283 * fd.io coding-style-patch-verification: ON
286 * eval: (c-set-style "gnu")