2 *------------------------------------------------------------------
3 * Copyright (c) 2018 Cisco and/or its affiliates.
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at:
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 *------------------------------------------------------------------
21 #include <infiniband/verbs.h>
23 #include <vlib/pci/pci.h>
24 #include <vnet/interface.h>
25 #include <vnet/ethernet/mac_address.h>
26 #include <rdma/rdma_mlx5dv.h>
28 #define foreach_rdma_device_flags \
29 _(0, ERROR, "error") \
30 _(1, ADMIN_UP, "admin-up") \
31 _(2, LINK_UP, "link-up") \
32 _(3, PROMISC, "promiscuous") \
33 _(4, MLX5DV, "mlx5dv") \
34 _(5, STRIDING_RQ, "striding-rq")
38 #define _(a, b, c) RDMA_DEVICE_F_##b = (1 << a),
39 foreach_rdma_device_flags
43 #ifndef MLX5_ETH_L2_INLINE_HEADER_SIZE
44 #define MLX5_ETH_L2_INLINE_HEADER_SIZE 18
49 CLIB_ALIGN_MARK (align0, MLX5_SEND_WQE_BB);
52 struct mlx5_wqe_ctrl_seg ctrl;
61 struct mlx5_wqe_eth_seg eseg;
62 struct mlx5_wqe_data_seg dseg;
64 #define RDMA_MLX5_WQE_SZ sizeof(rdma_mlx5_wqe_t)
65 #define RDMA_MLX5_WQE_DS (RDMA_MLX5_WQE_SZ/sizeof(struct mlx5_wqe_data_seg))
66 STATIC_ASSERT (RDMA_MLX5_WQE_SZ == MLX5_SEND_WQE_BB &&
67 RDMA_MLX5_WQE_SZ % sizeof (struct mlx5_wqe_data_seg) == 0,
72 CLIB_CACHE_LINE_ALIGN_MARK (cacheline0);
85 mlx5dv_wqe_ds_t *wqes;
86 CLIB_CACHE_LINE_ALIGN_MARK (cacheline1);
98 u32 striding_wqe_tail; /* Striding RQ: number of released whole WQE */
99 u8 log_stride_per_wqe; /* Striding RQ: number of strides in a single WQE */
104 u8 *n_used_per_chain; /* Legacy RQ: for each buffer chain, how many additional segments are needed */
106 u32 *second_bufs; /* Legacy RQ: ring of second buffers of each chain */
107 u32 incomplete_tail; /* Legacy RQ: tail index in bufs,
108 corresponds to buffer chains with recycled valid head buffer,
109 but whose other buffers are not yet recycled (due to pool exhaustion). */
110 u16 n_total_additional_segs;
111 u8 n_ds_per_wqe; /* Legacy RQ: number of nonnull data segs per WQE */
114 u8 log_wqe_sz; /* log-size of a single WQE (in data segments) */
119 CLIB_CACHE_LINE_ALIGN_MARK (cacheline0);
121 /* following fields are accessed in datapath */
122 clib_spinlock_t lock;
128 /* ibverb datapath. Cache of cq, sq below */
129 struct ibv_cq *ibv_cq;
130 struct ibv_qp *ibv_qp;
134 /* direct verbs datapath */
135 rdma_mlx5_wqe_t *dv_sq_wqes;
136 volatile u32 *dv_sq_dbrec;
137 volatile u64 *dv_sq_db;
138 struct mlx5_cqe64 *dv_cq_cqes;
139 volatile u32 *dv_cq_dbrec;
143 u32 *bufs; /* vlib_buffer ring buffer */
146 u16 dv_cq_idx; /* monotonic CQE index (valid only for direct verbs) */
147 u8 bufs_log2sz; /* log2 vlib_buffer entries */
148 u8 dv_sq_log2sz:4; /* log2 SQ WQE entries (valid only for direct verbs) */
149 u8 dv_cq_log2sz:4; /* log2 CQ CQE entries (valid only for direct verbs) */
150 STRUCT_MARK (cacheline1);
152 /* WQE template (valid only for direct verbs) */
155 /* end of 2nd 64-bytes cacheline (or 1st 128-bytes cacheline) */
156 STRUCT_MARK (cacheline2);
158 /* fields below are not accessed in datapath */
163 STATIC_ASSERT_OFFSET_OF (rdma_txq_t, cacheline1, 64);
164 STATIC_ASSERT_OFFSET_OF (rdma_txq_t, cacheline2, 128);
166 #define RDMA_TXQ_DV_INVALID_ID 0xffffffff
168 #define RDMA_TXQ_BUF_SZ(txq) (1U << (txq)->bufs_log2sz)
169 #define RDMA_TXQ_DV_SQ_SZ(txq) (1U << (txq)->dv_sq_log2sz)
170 #define RDMA_TXQ_DV_CQ_SZ(txq) (1U << (txq)->dv_cq_log2sz)
172 #define RDMA_TXQ_USED_SZ(head, tail) ((u16)((u16)(tail) - (u16)(head)))
173 #define RDMA_TXQ_AVAIL_SZ(txq, head, tail) ((u16)(RDMA_TXQ_BUF_SZ (txq) - RDMA_TXQ_USED_SZ (head, tail)))
174 #define RDMA_RXQ_MAX_CHAIN_LOG_SZ 3 /* This should NOT be lower than 3! */
175 #define RDMA_RXQ_MAX_CHAIN_SZ (1U << RDMA_RXQ_MAX_CHAIN_LOG_SZ)
176 #define RDMA_RXQ_LEGACY_MODE_MAX_CHAIN_SZ 5
196 CLIB_CACHE_LINE_ALIGN_MARK (cacheline0);
198 /* following fields are accessed in datapath */
202 u32 per_interface_next_index;
205 u32 lkey; /* cache of mr->lkey */
206 u8 pool; /* buffer pool index */
208 /* fields below are not accessed in datapath */
209 vlib_pci_device_info_t *pci;
212 mac_address_t hwaddr;
213 u32 async_event_clib_file_index;
218 struct ibv_context *ctx;
221 struct ibv_qp *rx_qp4;
222 struct ibv_qp *rx_qp6;
223 struct ibv_rwq_ind_table *rx_rwq_ind_tbl;
224 struct ibv_flow *flow_ucast4;
225 struct ibv_flow *flow_mcast4;
226 struct ibv_flow *flow_ucast6;
227 struct ibv_flow *flow_mcast6;
234 CLIB_CACHE_LINE_ALIGN_MARK (cacheline0);
237 u16 cqe_flags[VLIB_FRAME_SIZE];
238 u16x8 cqe_flags8[VLIB_FRAME_SIZE / 8];
239 u16x16 cqe_flags16[VLIB_FRAME_SIZE / 16];
245 u32 current_segs[VLIB_FRAME_SIZE];
246 u32 to_free_buffers[VLIB_FRAME_SIZE];
247 }; /* Specific to STRIDING RQ mode */
250 u32 tmp_bi[VLIB_FRAME_SIZE];
251 vlib_buffer_t *tmp_bufs[VLIB_FRAME_SIZE];
252 }; /* Specific to LEGACY RQ mode */
255 vlib_buffer_t buffer_template;
256 } rdma_per_thread_data_t;
260 rdma_per_thread_data_t *per_thread_data;
261 rdma_device_t *devices;
262 vlib_log_class_t log_class;
266 extern rdma_main_t rdma_main;
284 u8 disable_striding_rq;
293 } rdma_create_if_args_t;
295 void rdma_create_if (vlib_main_t * vm, rdma_create_if_args_t * args);
296 void rdma_delete_if (vlib_main_t * vm, rdma_device_t * rd);
298 extern vlib_node_registration_t rdma_input_node;
299 extern vnet_device_class_t rdma_device_class;
301 format_function_t format_rdma_device;
302 format_function_t format_rdma_device_name;
303 format_function_t format_rdma_input_trace;
304 format_function_t format_rdma_rxq;
305 unformat_function_t unformat_rdma_create_if_args;
312 } rdma_input_trace_t;
314 #define foreach_rdma_tx_func_error \
315 _(SEGMENT_SIZE_EXCEEDED, "segment size exceeded") \
316 _(NO_FREE_SLOTS, "no free tx slots") \
317 _(SUBMISSION, "tx submission errors") \
318 _(COMPLETION, "tx completion errors")
322 #define _(f,s) RDMA_TX_ERROR_##f,
323 foreach_rdma_tx_func_error
326 } rdma_tx_func_error_t;
328 #endif /* _RDMA_H_ */
331 * fd.io coding-style-patch-verification: ON
334 * eval: (c-set-style "gnu")