4 * Copyright 2016 6WIND S.A.
5 * Copyright 2016 Mellanox.
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
11 * * Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * * Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
17 * * Neither the name of 6WIND S.A. nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
34 #include <sys/queue.h>
38 /* ISO C doesn't support unnamed structs/unions, disabling -pedantic. */
40 #pragma GCC diagnostic ignored "-Wpedantic"
42 #include <infiniband/verbs.h>
44 #pragma GCC diagnostic error "-Wpedantic"
47 #include <rte_common.h>
48 #include <rte_ethdev.h>
50 #include <rte_flow_driver.h>
51 #include <rte_malloc.h>
55 #include "mlx5_defs.h"
58 /* Define minimal priority for control plane flows. */
59 #define MLX5_CTRL_FLOW_PRIORITY 4
61 /* Internet Protocol versions. */
65 #ifndef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
66 struct ibv_counter_set_init_attr {
69 struct ibv_flow_spec_counter_action {
72 struct ibv_counter_set {
77 ibv_destroy_counter_set(struct ibv_counter_set *cs)
84 /* Dev ops structure defined in mlx5.c */
85 extern const struct eth_dev_ops mlx5_dev_ops;
86 extern const struct eth_dev_ops mlx5_dev_ops_isolate;
88 /** Structure give to the conversion functions. */
89 struct mlx5_flow_data {
90 struct mlx5_flow_parse *parser; /** Parser context. */
91 struct rte_flow_error *error; /** Error context. */
95 mlx5_flow_create_eth(const struct rte_flow_item *item,
96 const void *default_mask,
97 struct mlx5_flow_data *data);
100 mlx5_flow_create_vlan(const struct rte_flow_item *item,
101 const void *default_mask,
102 struct mlx5_flow_data *data);
105 mlx5_flow_create_ipv4(const struct rte_flow_item *item,
106 const void *default_mask,
107 struct mlx5_flow_data *data);
110 mlx5_flow_create_ipv6(const struct rte_flow_item *item,
111 const void *default_mask,
112 struct mlx5_flow_data *data);
115 mlx5_flow_create_udp(const struct rte_flow_item *item,
116 const void *default_mask,
117 struct mlx5_flow_data *data);
120 mlx5_flow_create_tcp(const struct rte_flow_item *item,
121 const void *default_mask,
122 struct mlx5_flow_data *data);
125 mlx5_flow_create_vxlan(const struct rte_flow_item *item,
126 const void *default_mask,
127 struct mlx5_flow_data *data);
129 struct mlx5_flow_parse;
132 mlx5_flow_create_copy(struct mlx5_flow_parse *parser, void *src,
136 mlx5_flow_create_flag_mark(struct mlx5_flow_parse *parser, uint32_t mark_id);
139 mlx5_flow_create_count(struct rte_eth_dev *dev, struct mlx5_flow_parse *parser);
141 /* Hash RX queue types. */
152 /* Initialization data for hash RX queue. */
153 struct hash_rxq_init {
154 uint64_t hash_fields; /* Fields that participate in the hash. */
155 uint64_t dpdk_rss_hf; /* Matching DPDK RSS hash fields. */
156 unsigned int flow_priority; /* Flow priority to use. */
157 unsigned int ip_version; /* Internet protocol. */
160 /* Initialization data for hash RX queues. */
161 const struct hash_rxq_init hash_rxq_init[] = {
163 .hash_fields = (IBV_RX_HASH_SRC_IPV4 |
164 IBV_RX_HASH_DST_IPV4 |
165 IBV_RX_HASH_SRC_PORT_TCP |
166 IBV_RX_HASH_DST_PORT_TCP),
167 .dpdk_rss_hf = ETH_RSS_NONFRAG_IPV4_TCP,
169 .ip_version = MLX5_IPV4,
172 .hash_fields = (IBV_RX_HASH_SRC_IPV4 |
173 IBV_RX_HASH_DST_IPV4 |
174 IBV_RX_HASH_SRC_PORT_UDP |
175 IBV_RX_HASH_DST_PORT_UDP),
176 .dpdk_rss_hf = ETH_RSS_NONFRAG_IPV4_UDP,
178 .ip_version = MLX5_IPV4,
181 .hash_fields = (IBV_RX_HASH_SRC_IPV4 |
182 IBV_RX_HASH_DST_IPV4),
183 .dpdk_rss_hf = (ETH_RSS_IPV4 |
186 .ip_version = MLX5_IPV4,
189 .hash_fields = (IBV_RX_HASH_SRC_IPV6 |
190 IBV_RX_HASH_DST_IPV6 |
191 IBV_RX_HASH_SRC_PORT_TCP |
192 IBV_RX_HASH_DST_PORT_TCP),
193 .dpdk_rss_hf = ETH_RSS_NONFRAG_IPV6_TCP,
195 .ip_version = MLX5_IPV6,
198 .hash_fields = (IBV_RX_HASH_SRC_IPV6 |
199 IBV_RX_HASH_DST_IPV6 |
200 IBV_RX_HASH_SRC_PORT_UDP |
201 IBV_RX_HASH_DST_PORT_UDP),
202 .dpdk_rss_hf = ETH_RSS_NONFRAG_IPV6_UDP,
204 .ip_version = MLX5_IPV6,
207 .hash_fields = (IBV_RX_HASH_SRC_IPV6 |
208 IBV_RX_HASH_DST_IPV6),
209 .dpdk_rss_hf = (ETH_RSS_IPV6 |
212 .ip_version = MLX5_IPV6,
221 /* Number of entries in hash_rxq_init[]. */
222 const unsigned int hash_rxq_init_n = RTE_DIM(hash_rxq_init);
224 /** Structure for holding counter stats. */
225 struct mlx5_flow_counter_stats {
226 uint64_t hits; /**< Number of packets matched by the rule. */
227 uint64_t bytes; /**< Number of bytes matched by the rule. */
230 /** Structure for Drop queue. */
231 struct mlx5_hrxq_drop {
232 struct ibv_rwq_ind_table *ind_table; /**< Indirection table. */
233 struct ibv_qp *qp; /**< Verbs queue pair. */
234 struct ibv_wq *wq; /**< Verbs work queue. */
235 struct ibv_cq *cq; /**< Verbs completion queue. */
238 /* Flows structures. */
240 uint64_t hash_fields; /**< Fields that participate in the hash. */
241 struct ibv_flow_attr *ibv_attr; /**< Pointer to Verbs attributes. */
242 struct ibv_flow *ibv_flow; /**< Verbs flow. */
243 struct mlx5_hrxq *hrxq; /**< Hash Rx queues. */
246 /* Drop flows structures. */
247 struct mlx5_flow_drop {
248 struct ibv_flow_attr *ibv_attr; /**< Pointer to Verbs attributes. */
249 struct ibv_flow *ibv_flow; /**< Verbs flow. */
253 TAILQ_ENTRY(rte_flow) next; /**< Pointer to the next flow structure. */
254 uint32_t mark:1; /**< Set if the flow is marked. */
255 uint32_t drop:1; /**< Drop queue. */
256 uint16_t queues_n; /**< Number of entries in queue[]. */
257 uint16_t (*queues)[]; /**< Queues indexes to use. */
258 struct rte_eth_rss_conf rss_conf; /**< RSS configuration */
259 uint8_t rss_key[40]; /**< copy of the RSS key. */
260 struct ibv_counter_set *cs; /**< Holds the counters for the rule. */
261 struct mlx5_flow_counter_stats counter_stats;/**<The counter stats. */
262 struct mlx5_flow frxq[RTE_DIM(hash_rxq_init)];
263 /**< Flow with Rx queue. */
266 /** Static initializer for items. */
268 (const enum rte_flow_item_type []){ \
269 __VA_ARGS__, RTE_FLOW_ITEM_TYPE_END, \
272 /** Structure to generate a simple graph of layers supported by the NIC. */
273 struct mlx5_flow_items {
274 /** List of possible actions for these items. */
275 const enum rte_flow_action_type *const actions;
276 /** Bit-masks corresponding to the possibilities for the item. */
279 * Default bit-masks to use when item->mask is not provided. When
280 * \default_mask is also NULL, the full supported bit-mask (\mask) is
283 const void *default_mask;
284 /** Bit-masks size in bytes. */
285 const unsigned int mask_sz;
287 * Conversion function from rte_flow to NIC specific flow.
290 * rte_flow item to convert.
291 * @param default_mask
292 * Default bit-masks to use when item->mask is not provided.
294 * Internal structure to store the conversion.
297 * 0 on success, a negative errno value otherwise and rte_errno is
300 int (*convert)(const struct rte_flow_item *item,
301 const void *default_mask,
302 struct mlx5_flow_data *data);
303 /** Size in bytes of the destination structure. */
304 const unsigned int dst_sz;
305 /** List of possible following items. */
306 const enum rte_flow_item_type *const items;
309 /** Valid action for this PMD. */
310 static const enum rte_flow_action_type valid_actions[] = {
311 RTE_FLOW_ACTION_TYPE_DROP,
312 RTE_FLOW_ACTION_TYPE_QUEUE,
313 RTE_FLOW_ACTION_TYPE_MARK,
314 RTE_FLOW_ACTION_TYPE_FLAG,
315 #ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
316 RTE_FLOW_ACTION_TYPE_COUNT,
318 RTE_FLOW_ACTION_TYPE_END,
321 /** Graph of supported items and associated actions. */
322 static const struct mlx5_flow_items mlx5_flow_items[] = {
323 [RTE_FLOW_ITEM_TYPE_END] = {
324 .items = ITEMS(RTE_FLOW_ITEM_TYPE_ETH,
325 RTE_FLOW_ITEM_TYPE_VXLAN),
327 [RTE_FLOW_ITEM_TYPE_ETH] = {
328 .items = ITEMS(RTE_FLOW_ITEM_TYPE_VLAN,
329 RTE_FLOW_ITEM_TYPE_IPV4,
330 RTE_FLOW_ITEM_TYPE_IPV6),
331 .actions = valid_actions,
332 .mask = &(const struct rte_flow_item_eth){
333 .dst.addr_bytes = "\xff\xff\xff\xff\xff\xff",
334 .src.addr_bytes = "\xff\xff\xff\xff\xff\xff",
337 .default_mask = &rte_flow_item_eth_mask,
338 .mask_sz = sizeof(struct rte_flow_item_eth),
339 .convert = mlx5_flow_create_eth,
340 .dst_sz = sizeof(struct ibv_flow_spec_eth),
342 [RTE_FLOW_ITEM_TYPE_VLAN] = {
343 .items = ITEMS(RTE_FLOW_ITEM_TYPE_IPV4,
344 RTE_FLOW_ITEM_TYPE_IPV6),
345 .actions = valid_actions,
346 .mask = &(const struct rte_flow_item_vlan){
349 .default_mask = &rte_flow_item_vlan_mask,
350 .mask_sz = sizeof(struct rte_flow_item_vlan),
351 .convert = mlx5_flow_create_vlan,
354 [RTE_FLOW_ITEM_TYPE_IPV4] = {
355 .items = ITEMS(RTE_FLOW_ITEM_TYPE_UDP,
356 RTE_FLOW_ITEM_TYPE_TCP),
357 .actions = valid_actions,
358 .mask = &(const struct rte_flow_item_ipv4){
362 .type_of_service = -1,
366 .default_mask = &rte_flow_item_ipv4_mask,
367 .mask_sz = sizeof(struct rte_flow_item_ipv4),
368 .convert = mlx5_flow_create_ipv4,
369 .dst_sz = sizeof(struct ibv_flow_spec_ipv4_ext),
371 [RTE_FLOW_ITEM_TYPE_IPV6] = {
372 .items = ITEMS(RTE_FLOW_ITEM_TYPE_UDP,
373 RTE_FLOW_ITEM_TYPE_TCP),
374 .actions = valid_actions,
375 .mask = &(const struct rte_flow_item_ipv6){
378 0xff, 0xff, 0xff, 0xff,
379 0xff, 0xff, 0xff, 0xff,
380 0xff, 0xff, 0xff, 0xff,
381 0xff, 0xff, 0xff, 0xff,
384 0xff, 0xff, 0xff, 0xff,
385 0xff, 0xff, 0xff, 0xff,
386 0xff, 0xff, 0xff, 0xff,
387 0xff, 0xff, 0xff, 0xff,
394 .default_mask = &rte_flow_item_ipv6_mask,
395 .mask_sz = sizeof(struct rte_flow_item_ipv6),
396 .convert = mlx5_flow_create_ipv6,
397 .dst_sz = sizeof(struct ibv_flow_spec_ipv6),
399 [RTE_FLOW_ITEM_TYPE_UDP] = {
400 .items = ITEMS(RTE_FLOW_ITEM_TYPE_VXLAN),
401 .actions = valid_actions,
402 .mask = &(const struct rte_flow_item_udp){
408 .default_mask = &rte_flow_item_udp_mask,
409 .mask_sz = sizeof(struct rte_flow_item_udp),
410 .convert = mlx5_flow_create_udp,
411 .dst_sz = sizeof(struct ibv_flow_spec_tcp_udp),
413 [RTE_FLOW_ITEM_TYPE_TCP] = {
414 .actions = valid_actions,
415 .mask = &(const struct rte_flow_item_tcp){
421 .default_mask = &rte_flow_item_tcp_mask,
422 .mask_sz = sizeof(struct rte_flow_item_tcp),
423 .convert = mlx5_flow_create_tcp,
424 .dst_sz = sizeof(struct ibv_flow_spec_tcp_udp),
426 [RTE_FLOW_ITEM_TYPE_VXLAN] = {
427 .items = ITEMS(RTE_FLOW_ITEM_TYPE_ETH),
428 .actions = valid_actions,
429 .mask = &(const struct rte_flow_item_vxlan){
430 .vni = "\xff\xff\xff",
432 .default_mask = &rte_flow_item_vxlan_mask,
433 .mask_sz = sizeof(struct rte_flow_item_vxlan),
434 .convert = mlx5_flow_create_vxlan,
435 .dst_sz = sizeof(struct ibv_flow_spec_tunnel),
439 /** Structure to pass to the conversion function. */
440 struct mlx5_flow_parse {
441 uint32_t inner; /**< Set once VXLAN is encountered. */
443 /**< Whether resources should remain after a validate. */
444 uint32_t drop:1; /**< Target is a drop queue. */
445 uint32_t mark:1; /**< Mark is present in the flow. */
446 uint32_t count:1; /**< Count is present in the flow. */
447 uint32_t mark_id; /**< Mark identifier. */
448 uint16_t queues[RTE_MAX_QUEUES_PER_PORT]; /**< Queues indexes to use. */
449 uint16_t queues_n; /**< Number of entries in queue[]. */
450 struct rte_eth_rss_conf rss_conf; /**< RSS configuration */
451 uint8_t rss_key[40]; /**< copy of the RSS key. */
452 enum hash_rxq_type layer; /**< Last pattern layer detected. */
453 struct ibv_counter_set *cs; /**< Holds the counter set for the rule */
455 struct ibv_flow_attr *ibv_attr;
456 /**< Pointer to Verbs attributes. */
458 /**< Current position or total size of the attribute. */
459 } queue[RTE_DIM(hash_rxq_init)];
462 static const struct rte_flow_ops mlx5_flow_ops = {
463 .validate = mlx5_flow_validate,
464 .create = mlx5_flow_create,
465 .destroy = mlx5_flow_destroy,
466 .flush = mlx5_flow_flush,
467 #ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
468 .query = mlx5_flow_query,
472 .isolate = mlx5_flow_isolate,
475 /* Convert FDIR request to Generic flow. */
477 struct rte_flow_attr attr;
478 struct rte_flow_action actions[2];
479 struct rte_flow_item items[4];
480 struct rte_flow_item_eth l2;
481 struct rte_flow_item_eth l2_mask;
483 struct rte_flow_item_ipv4 ipv4;
484 struct rte_flow_item_ipv6 ipv6;
487 struct rte_flow_item_ipv4 ipv4;
488 struct rte_flow_item_ipv6 ipv6;
491 struct rte_flow_item_udp udp;
492 struct rte_flow_item_tcp tcp;
495 struct rte_flow_item_udp udp;
496 struct rte_flow_item_tcp tcp;
498 struct rte_flow_action_queue queue;
501 /* Verbs specification header. */
502 struct ibv_spec_header {
503 enum ibv_flow_spec_type type;
508 * Check item is fully supported by the NIC matching capability.
511 * Item specification.
513 * Bit-masks covering supported fields to compare with spec, last and mask in
516 * Bit-Mask size in bytes.
519 * 0 on success, a negative errno value otherwise and rte_errno is set.
522 mlx5_flow_item_validate(const struct rte_flow_item *item,
523 const uint8_t *mask, unsigned int size)
526 const uint8_t *spec = item->spec;
527 const uint8_t *last = item->last;
528 const uint8_t *m = item->mask ? item->mask : mask;
530 if (!spec && (item->mask || last))
535 * Single-pass check to make sure that:
536 * - item->mask is supported, no bits are set outside mask.
537 * - Both masked item->spec and item->last are equal (no range
540 for (i = 0; i < size; i++) {
543 if ((m[i] | mask[i]) != mask[i])
545 if (last && ((spec[i] & m[i]) != (last[i] & m[i])))
555 * Copy the RSS configuration from the user ones, of the rss_conf is null,
556 * uses the driver one.
559 * Internal parser structure.
561 * User RSS configuration to save.
564 * 0 on success, a negative errno value otherwise and rte_errno is set.
567 mlx5_flow_convert_rss_conf(struct mlx5_flow_parse *parser,
568 const struct rte_eth_rss_conf *rss_conf)
571 * This function is also called at the beginning of
572 * mlx5_flow_convert_actions() to initialize the parser with the
573 * device default RSS configuration.
576 if (rss_conf->rss_hf & MLX5_RSS_HF_MASK) {
580 if (rss_conf->rss_key_len != 40) {
584 if (rss_conf->rss_key_len && rss_conf->rss_key) {
585 parser->rss_conf.rss_key_len = rss_conf->rss_key_len;
586 memcpy(parser->rss_key, rss_conf->rss_key,
587 rss_conf->rss_key_len);
588 parser->rss_conf.rss_key = parser->rss_key;
590 parser->rss_conf.rss_hf = rss_conf->rss_hf;
596 * Extract attribute to the parser.
599 * Flow rule attributes.
601 * Perform verbose error reporting if not NULL.
604 * 0 on success, a negative errno value otherwise and rte_errno is set.
607 mlx5_flow_convert_attributes(const struct rte_flow_attr *attr,
608 struct rte_flow_error *error)
611 rte_flow_error_set(error, ENOTSUP,
612 RTE_FLOW_ERROR_TYPE_ATTR_GROUP,
614 "groups are not supported");
617 if (attr->priority && attr->priority != MLX5_CTRL_FLOW_PRIORITY) {
618 rte_flow_error_set(error, ENOTSUP,
619 RTE_FLOW_ERROR_TYPE_ATTR_PRIORITY,
621 "priorities are not supported");
625 rte_flow_error_set(error, ENOTSUP,
626 RTE_FLOW_ERROR_TYPE_ATTR_EGRESS,
628 "egress is not supported");
631 if (!attr->ingress) {
632 rte_flow_error_set(error, ENOTSUP,
633 RTE_FLOW_ERROR_TYPE_ATTR_INGRESS,
635 "only ingress is supported");
642 * Extract actions request to the parser.
645 * Pointer to Ethernet device.
647 * Associated actions (list terminated by the END action).
649 * Perform verbose error reporting if not NULL.
650 * @param[in, out] parser
651 * Internal parser structure.
654 * 0 on success, a negative errno value otherwise and rte_errno is set.
657 mlx5_flow_convert_actions(struct rte_eth_dev *dev,
658 const struct rte_flow_action actions[],
659 struct rte_flow_error *error,
660 struct mlx5_flow_parse *parser)
662 struct priv *priv = dev->data->dev_private;
666 * Add default RSS configuration necessary for Verbs to create QP even
667 * if no RSS is necessary.
669 ret = mlx5_flow_convert_rss_conf(parser,
670 (const struct rte_eth_rss_conf *)
674 for (; actions->type != RTE_FLOW_ACTION_TYPE_END; ++actions) {
675 if (actions->type == RTE_FLOW_ACTION_TYPE_VOID) {
677 } else if (actions->type == RTE_FLOW_ACTION_TYPE_DROP) {
679 } else if (actions->type == RTE_FLOW_ACTION_TYPE_QUEUE) {
680 const struct rte_flow_action_queue *queue =
681 (const struct rte_flow_action_queue *)
686 if (!queue || (queue->index > (priv->rxqs_n - 1)))
687 goto exit_action_not_supported;
688 for (n = 0; n < parser->queues_n; ++n) {
689 if (parser->queues[n] == queue->index) {
694 if (parser->queues_n > 1 && !found) {
695 rte_flow_error_set(error, ENOTSUP,
696 RTE_FLOW_ERROR_TYPE_ACTION,
698 "queue action not in RSS queues");
702 parser->queues_n = 1;
703 parser->queues[0] = queue->index;
705 } else if (actions->type == RTE_FLOW_ACTION_TYPE_RSS) {
706 const struct rte_flow_action_rss *rss =
707 (const struct rte_flow_action_rss *)
711 if (!rss || !rss->num) {
712 rte_flow_error_set(error, EINVAL,
713 RTE_FLOW_ERROR_TYPE_ACTION,
718 if (parser->queues_n == 1) {
721 assert(parser->queues_n);
722 for (n = 0; n < rss->num; ++n) {
723 if (parser->queues[0] ==
730 rte_flow_error_set(error, ENOTSUP,
731 RTE_FLOW_ERROR_TYPE_ACTION,
733 "queue action not in RSS"
738 if (rss->num > RTE_DIM(parser->queues)) {
739 rte_flow_error_set(error, EINVAL,
740 RTE_FLOW_ERROR_TYPE_ACTION,
742 "too many queues for RSS"
746 for (n = 0; n < rss->num; ++n) {
747 if (rss->queue[n] >= priv->rxqs_n) {
748 rte_flow_error_set(error, EINVAL,
749 RTE_FLOW_ERROR_TYPE_ACTION,
751 "queue id > number of"
756 for (n = 0; n < rss->num; ++n)
757 parser->queues[n] = rss->queue[n];
758 parser->queues_n = rss->num;
759 if (mlx5_flow_convert_rss_conf(parser, rss->rss_conf)) {
760 rte_flow_error_set(error, EINVAL,
761 RTE_FLOW_ERROR_TYPE_ACTION,
763 "wrong RSS configuration");
766 } else if (actions->type == RTE_FLOW_ACTION_TYPE_MARK) {
767 const struct rte_flow_action_mark *mark =
768 (const struct rte_flow_action_mark *)
772 rte_flow_error_set(error, EINVAL,
773 RTE_FLOW_ERROR_TYPE_ACTION,
775 "mark must be defined");
777 } else if (mark->id >= MLX5_FLOW_MARK_MAX) {
778 rte_flow_error_set(error, ENOTSUP,
779 RTE_FLOW_ERROR_TYPE_ACTION,
781 "mark must be between 0"
786 parser->mark_id = mark->id;
787 } else if (actions->type == RTE_FLOW_ACTION_TYPE_FLAG) {
789 } else if (actions->type == RTE_FLOW_ACTION_TYPE_COUNT &&
790 priv->counter_set_supported) {
793 goto exit_action_not_supported;
796 if (parser->drop && parser->mark)
798 if (!parser->queues_n && !parser->drop) {
799 rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_HANDLE,
800 NULL, "no valid action");
804 exit_action_not_supported:
805 rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ACTION,
806 actions, "action not supported");
814 * Pattern specification (list terminated by the END pattern item).
816 * Perform verbose error reporting if not NULL.
817 * @param[in, out] parser
818 * Internal parser structure.
821 * 0 on success, a negative errno value otherwise and rte_errno is set.
824 mlx5_flow_convert_items_validate(const struct rte_flow_item items[],
825 struct rte_flow_error *error,
826 struct mlx5_flow_parse *parser)
828 const struct mlx5_flow_items *cur_item = mlx5_flow_items;
832 /* Initialise the offsets to start after verbs attribute. */
833 for (i = 0; i != hash_rxq_init_n; ++i)
834 parser->queue[i].offset = sizeof(struct ibv_flow_attr);
835 for (; items->type != RTE_FLOW_ITEM_TYPE_END; ++items) {
836 const struct mlx5_flow_items *token = NULL;
839 if (items->type == RTE_FLOW_ITEM_TYPE_VOID)
843 cur_item->items[i] != RTE_FLOW_ITEM_TYPE_END;
845 if (cur_item->items[i] == items->type) {
846 token = &mlx5_flow_items[items->type];
852 goto exit_item_not_supported;
855 ret = mlx5_flow_item_validate(items,
856 (const uint8_t *)cur_item->mask,
859 goto exit_item_not_supported;
860 if (items->type == RTE_FLOW_ITEM_TYPE_VXLAN) {
862 rte_flow_error_set(error, ENOTSUP,
863 RTE_FLOW_ERROR_TYPE_ITEM,
865 "cannot recognize multiple"
866 " VXLAN encapsulations");
869 parser->inner = IBV_FLOW_SPEC_INNER;
872 parser->queue[HASH_RXQ_ETH].offset += cur_item->dst_sz;
874 for (n = 0; n != hash_rxq_init_n; ++n)
875 parser->queue[n].offset += cur_item->dst_sz;
879 parser->queue[HASH_RXQ_ETH].offset +=
880 sizeof(struct ibv_flow_spec_action_drop);
883 for (i = 0; i != hash_rxq_init_n; ++i)
884 parser->queue[i].offset +=
885 sizeof(struct ibv_flow_spec_action_tag);
888 unsigned int size = sizeof(struct ibv_flow_spec_counter_action);
890 for (i = 0; i != hash_rxq_init_n; ++i)
891 parser->queue[i].offset += size;
894 exit_item_not_supported:
895 return rte_flow_error_set(error, -ret, RTE_FLOW_ERROR_TYPE_ITEM,
896 items, "item not supported");
900 * Allocate memory space to store verbs flow attributes.
903 * Amount of byte to allocate.
905 * Perform verbose error reporting if not NULL.
908 * A verbs flow attribute on success, NULL otherwise and rte_errno is set.
910 static struct ibv_flow_attr *
911 mlx5_flow_convert_allocate(unsigned int size, struct rte_flow_error *error)
913 struct ibv_flow_attr *ibv_attr;
915 ibv_attr = rte_calloc(__func__, 1, size, 0);
917 rte_flow_error_set(error, ENOMEM,
918 RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
920 "cannot allocate verbs spec attributes");
927 * Make inner packet matching with an higher priority from the non Inner
930 * @param[in, out] parser
931 * Internal parser structure.
933 * User flow attribute.
936 mlx5_flow_update_priority(struct mlx5_flow_parse *parser,
937 const struct rte_flow_attr *attr)
942 parser->queue[HASH_RXQ_ETH].ibv_attr->priority =
944 hash_rxq_init[HASH_RXQ_ETH].flow_priority;
947 for (i = 0; i != hash_rxq_init_n; ++i) {
948 if (parser->queue[i].ibv_attr) {
949 parser->queue[i].ibv_attr->priority =
951 hash_rxq_init[i].flow_priority -
952 (parser->inner ? 1 : 0);
958 * Finalise verbs flow attributes.
960 * @param[in, out] parser
961 * Internal parser structure.
964 mlx5_flow_convert_finalise(struct mlx5_flow_parse *parser)
966 const unsigned int ipv4 =
967 hash_rxq_init[parser->layer].ip_version == MLX5_IPV4;
968 const enum hash_rxq_type hmin = ipv4 ? HASH_RXQ_TCPV4 : HASH_RXQ_TCPV6;
969 const enum hash_rxq_type hmax = ipv4 ? HASH_RXQ_IPV4 : HASH_RXQ_IPV6;
970 const enum hash_rxq_type ohmin = ipv4 ? HASH_RXQ_TCPV6 : HASH_RXQ_TCPV4;
971 const enum hash_rxq_type ohmax = ipv4 ? HASH_RXQ_IPV6 : HASH_RXQ_IPV4;
972 const enum hash_rxq_type ip = ipv4 ? HASH_RXQ_IPV4 : HASH_RXQ_IPV6;
975 /* Remove any other flow not matching the pattern. */
976 if (parser->queues_n == 1 && !parser->rss_conf.rss_hf) {
977 for (i = 0; i != hash_rxq_init_n; ++i) {
978 if (i == HASH_RXQ_ETH)
980 rte_free(parser->queue[i].ibv_attr);
981 parser->queue[i].ibv_attr = NULL;
985 if (parser->layer == HASH_RXQ_ETH) {
989 * This layer becomes useless as the pattern define under
992 rte_free(parser->queue[HASH_RXQ_ETH].ibv_attr);
993 parser->queue[HASH_RXQ_ETH].ibv_attr = NULL;
995 /* Remove opposite kind of layer e.g. IPv6 if the pattern is IPv4. */
996 for (i = ohmin; i != (ohmax + 1); ++i) {
997 if (!parser->queue[i].ibv_attr)
999 rte_free(parser->queue[i].ibv_attr);
1000 parser->queue[i].ibv_attr = NULL;
1002 /* Remove impossible flow according to the RSS configuration. */
1003 if (hash_rxq_init[parser->layer].dpdk_rss_hf &
1004 parser->rss_conf.rss_hf) {
1005 /* Remove any other flow. */
1006 for (i = hmin; i != (hmax + 1); ++i) {
1007 if ((i == parser->layer) ||
1008 (!parser->queue[i].ibv_attr))
1010 rte_free(parser->queue[i].ibv_attr);
1011 parser->queue[i].ibv_attr = NULL;
1013 } else if (!parser->queue[ip].ibv_attr) {
1014 /* no RSS possible with the current configuration. */
1015 parser->queues_n = 1;
1020 * Fill missing layers in verbs specifications, or compute the correct
1021 * offset to allocate the memory space for the attributes and
1024 for (i = 0; i != hash_rxq_init_n - 1; ++i) {
1026 struct ibv_flow_spec_ipv4_ext ipv4;
1027 struct ibv_flow_spec_ipv6 ipv6;
1028 struct ibv_flow_spec_tcp_udp udp_tcp;
1033 if (i == parser->layer)
1035 if (parser->layer == HASH_RXQ_ETH) {
1036 if (hash_rxq_init[i].ip_version == MLX5_IPV4) {
1037 size = sizeof(struct ibv_flow_spec_ipv4_ext);
1038 specs.ipv4 = (struct ibv_flow_spec_ipv4_ext){
1039 .type = IBV_FLOW_SPEC_IPV4_EXT,
1043 size = sizeof(struct ibv_flow_spec_ipv6);
1044 specs.ipv6 = (struct ibv_flow_spec_ipv6){
1045 .type = IBV_FLOW_SPEC_IPV6,
1049 if (parser->queue[i].ibv_attr) {
1050 dst = (void *)((uintptr_t)
1051 parser->queue[i].ibv_attr +
1052 parser->queue[i].offset);
1053 memcpy(dst, &specs, size);
1054 ++parser->queue[i].ibv_attr->num_of_specs;
1056 parser->queue[i].offset += size;
1058 if ((i == HASH_RXQ_UDPV4) || (i == HASH_RXQ_TCPV4) ||
1059 (i == HASH_RXQ_UDPV6) || (i == HASH_RXQ_TCPV6)) {
1060 size = sizeof(struct ibv_flow_spec_tcp_udp);
1061 specs.udp_tcp = (struct ibv_flow_spec_tcp_udp) {
1062 .type = ((i == HASH_RXQ_UDPV4 ||
1063 i == HASH_RXQ_UDPV6) ?
1068 if (parser->queue[i].ibv_attr) {
1069 dst = (void *)((uintptr_t)
1070 parser->queue[i].ibv_attr +
1071 parser->queue[i].offset);
1072 memcpy(dst, &specs, size);
1073 ++parser->queue[i].ibv_attr->num_of_specs;
1075 parser->queue[i].offset += size;
1081 * Validate and convert a flow supported by the NIC.
1084 * Pointer to Ethernet device.
1086 * Flow rule attributes.
1087 * @param[in] pattern
1088 * Pattern specification (list terminated by the END pattern item).
1089 * @param[in] actions
1090 * Associated actions (list terminated by the END action).
1092 * Perform verbose error reporting if not NULL.
1093 * @param[in, out] parser
1094 * Internal parser structure.
1097 * 0 on success, a negative errno value otherwise and rte_errno is set.
1100 mlx5_flow_convert(struct rte_eth_dev *dev,
1101 const struct rte_flow_attr *attr,
1102 const struct rte_flow_item items[],
1103 const struct rte_flow_action actions[],
1104 struct rte_flow_error *error,
1105 struct mlx5_flow_parse *parser)
1107 const struct mlx5_flow_items *cur_item = mlx5_flow_items;
1111 /* First step. Validate the attributes, items and actions. */
1112 *parser = (struct mlx5_flow_parse){
1113 .create = parser->create,
1114 .layer = HASH_RXQ_ETH,
1115 .mark_id = MLX5_FLOW_MARK_DEFAULT,
1117 ret = mlx5_flow_convert_attributes(attr, error);
1120 ret = mlx5_flow_convert_actions(dev, actions, error, parser);
1123 ret = mlx5_flow_convert_items_validate(items, error, parser);
1126 mlx5_flow_convert_finalise(parser);
1129 * Allocate the memory space to store verbs specifications.
1132 unsigned int offset = parser->queue[HASH_RXQ_ETH].offset;
1134 parser->queue[HASH_RXQ_ETH].ibv_attr =
1135 mlx5_flow_convert_allocate(offset, error);
1136 if (!parser->queue[HASH_RXQ_ETH].ibv_attr)
1138 parser->queue[HASH_RXQ_ETH].offset =
1139 sizeof(struct ibv_flow_attr);
1141 for (i = 0; i != hash_rxq_init_n; ++i) {
1142 unsigned int offset;
1144 if (!(parser->rss_conf.rss_hf &
1145 hash_rxq_init[i].dpdk_rss_hf) &&
1146 (i != HASH_RXQ_ETH))
1148 offset = parser->queue[i].offset;
1149 parser->queue[i].ibv_attr =
1150 mlx5_flow_convert_allocate(offset, error);
1151 if (!parser->queue[i].ibv_attr)
1153 parser->queue[i].offset = sizeof(struct ibv_flow_attr);
1156 /* Third step. Conversion parse, fill the specifications. */
1158 for (; items->type != RTE_FLOW_ITEM_TYPE_END; ++items) {
1159 struct mlx5_flow_data data = {
1164 if (items->type == RTE_FLOW_ITEM_TYPE_VOID)
1166 cur_item = &mlx5_flow_items[items->type];
1167 ret = cur_item->convert(items,
1168 (cur_item->default_mask ?
1169 cur_item->default_mask :
1176 mlx5_flow_create_flag_mark(parser, parser->mark_id);
1177 if (parser->count && parser->create) {
1178 mlx5_flow_create_count(dev, parser);
1180 goto exit_count_error;
1183 * Last step. Complete missing specification to reach the RSS
1187 mlx5_flow_convert_finalise(parser);
1188 mlx5_flow_update_priority(parser, attr);
1190 /* Only verification is expected, all resources should be released. */
1191 if (!parser->create) {
1192 for (i = 0; i != hash_rxq_init_n; ++i) {
1193 if (parser->queue[i].ibv_attr) {
1194 rte_free(parser->queue[i].ibv_attr);
1195 parser->queue[i].ibv_attr = NULL;
1201 for (i = 0; i != hash_rxq_init_n; ++i) {
1202 if (parser->queue[i].ibv_attr) {
1203 rte_free(parser->queue[i].ibv_attr);
1204 parser->queue[i].ibv_attr = NULL;
1207 rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
1208 NULL, "cannot allocate verbs spec attributes");
1211 rte_flow_error_set(error, EINVAL, RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
1212 NULL, "cannot create counter");
1217 * Copy the specification created into the flow.
1220 * Internal parser structure.
1222 * Create specification.
1224 * Size in bytes of the specification to copy.
1227 mlx5_flow_create_copy(struct mlx5_flow_parse *parser, void *src,
1233 for (i = 0; i != hash_rxq_init_n; ++i) {
1234 if (!parser->queue[i].ibv_attr)
1236 /* Specification must be the same l3 type or none. */
1237 if (parser->layer == HASH_RXQ_ETH ||
1238 (hash_rxq_init[parser->layer].ip_version ==
1239 hash_rxq_init[i].ip_version) ||
1240 (hash_rxq_init[i].ip_version == 0)) {
1241 dst = (void *)((uintptr_t)parser->queue[i].ibv_attr +
1242 parser->queue[i].offset);
1243 memcpy(dst, src, size);
1244 ++parser->queue[i].ibv_attr->num_of_specs;
1245 parser->queue[i].offset += size;
1251 * Convert Ethernet item to Verbs specification.
1254 * Item specification.
1255 * @param default_mask[in]
1256 * Default bit-masks to use when item->mask is not provided.
1257 * @param data[in, out]
1261 * 0 on success, a negative errno value otherwise and rte_errno is set.
1264 mlx5_flow_create_eth(const struct rte_flow_item *item,
1265 const void *default_mask,
1266 struct mlx5_flow_data *data)
1268 const struct rte_flow_item_eth *spec = item->spec;
1269 const struct rte_flow_item_eth *mask = item->mask;
1270 struct mlx5_flow_parse *parser = data->parser;
1271 const unsigned int eth_size = sizeof(struct ibv_flow_spec_eth);
1272 struct ibv_flow_spec_eth eth = {
1273 .type = parser->inner | IBV_FLOW_SPEC_ETH,
1277 /* Don't update layer for the inner pattern. */
1279 parser->layer = HASH_RXQ_ETH;
1284 mask = default_mask;
1285 memcpy(ð.val.dst_mac, spec->dst.addr_bytes, ETHER_ADDR_LEN);
1286 memcpy(ð.val.src_mac, spec->src.addr_bytes, ETHER_ADDR_LEN);
1287 eth.val.ether_type = spec->type;
1288 memcpy(ð.mask.dst_mac, mask->dst.addr_bytes, ETHER_ADDR_LEN);
1289 memcpy(ð.mask.src_mac, mask->src.addr_bytes, ETHER_ADDR_LEN);
1290 eth.mask.ether_type = mask->type;
1291 /* Remove unwanted bits from values. */
1292 for (i = 0; i < ETHER_ADDR_LEN; ++i) {
1293 eth.val.dst_mac[i] &= eth.mask.dst_mac[i];
1294 eth.val.src_mac[i] &= eth.mask.src_mac[i];
1296 eth.val.ether_type &= eth.mask.ether_type;
1298 mlx5_flow_create_copy(parser, ð, eth_size);
1303 * Convert VLAN item to Verbs specification.
1306 * Item specification.
1307 * @param default_mask[in]
1308 * Default bit-masks to use when item->mask is not provided.
1309 * @param data[in, out]
1313 * 0 on success, a negative errno value otherwise and rte_errno is set.
1316 mlx5_flow_create_vlan(const struct rte_flow_item *item,
1317 const void *default_mask,
1318 struct mlx5_flow_data *data)
1320 const struct rte_flow_item_vlan *spec = item->spec;
1321 const struct rte_flow_item_vlan *mask = item->mask;
1322 struct mlx5_flow_parse *parser = data->parser;
1323 struct ibv_flow_spec_eth *eth;
1324 const unsigned int eth_size = sizeof(struct ibv_flow_spec_eth);
1329 mask = default_mask;
1331 for (i = 0; i != hash_rxq_init_n; ++i) {
1332 if (!parser->queue[i].ibv_attr)
1335 eth = (void *)((uintptr_t)parser->queue[i].ibv_attr +
1336 parser->queue[i].offset - eth_size);
1337 eth->val.vlan_tag = spec->tci;
1338 eth->mask.vlan_tag = mask->tci;
1339 eth->val.vlan_tag &= eth->mask.vlan_tag;
1341 * From verbs perspective an empty VLAN is equivalent
1342 * to a packet without VLAN layer.
1344 if (!eth->mask.vlan_tag)
1350 return rte_flow_error_set(data->error, EINVAL, RTE_FLOW_ERROR_TYPE_ITEM,
1351 item, "VLAN cannot be empty");
1355 * Convert IPv4 item to Verbs specification.
1358 * Item specification.
1359 * @param default_mask[in]
1360 * Default bit-masks to use when item->mask is not provided.
1361 * @param data[in, out]
1365 * 0 on success, a negative errno value otherwise and rte_errno is set.
1368 mlx5_flow_create_ipv4(const struct rte_flow_item *item,
1369 const void *default_mask,
1370 struct mlx5_flow_data *data)
1372 const struct rte_flow_item_ipv4 *spec = item->spec;
1373 const struct rte_flow_item_ipv4 *mask = item->mask;
1374 struct mlx5_flow_parse *parser = data->parser;
1375 unsigned int ipv4_size = sizeof(struct ibv_flow_spec_ipv4_ext);
1376 struct ibv_flow_spec_ipv4_ext ipv4 = {
1377 .type = parser->inner | IBV_FLOW_SPEC_IPV4_EXT,
1381 /* Don't update layer for the inner pattern. */
1383 parser->layer = HASH_RXQ_IPV4;
1386 mask = default_mask;
1387 ipv4.val = (struct ibv_flow_ipv4_ext_filter){
1388 .src_ip = spec->hdr.src_addr,
1389 .dst_ip = spec->hdr.dst_addr,
1390 .proto = spec->hdr.next_proto_id,
1391 .tos = spec->hdr.type_of_service,
1393 ipv4.mask = (struct ibv_flow_ipv4_ext_filter){
1394 .src_ip = mask->hdr.src_addr,
1395 .dst_ip = mask->hdr.dst_addr,
1396 .proto = mask->hdr.next_proto_id,
1397 .tos = mask->hdr.type_of_service,
1399 /* Remove unwanted bits from values. */
1400 ipv4.val.src_ip &= ipv4.mask.src_ip;
1401 ipv4.val.dst_ip &= ipv4.mask.dst_ip;
1402 ipv4.val.proto &= ipv4.mask.proto;
1403 ipv4.val.tos &= ipv4.mask.tos;
1405 mlx5_flow_create_copy(parser, &ipv4, ipv4_size);
1410 * Convert IPv6 item to Verbs specification.
1413 * Item specification.
1414 * @param default_mask[in]
1415 * Default bit-masks to use when item->mask is not provided.
1416 * @param data[in, out]
1420 * 0 on success, a negative errno value otherwise and rte_errno is set.
1423 mlx5_flow_create_ipv6(const struct rte_flow_item *item,
1424 const void *default_mask,
1425 struct mlx5_flow_data *data)
1427 const struct rte_flow_item_ipv6 *spec = item->spec;
1428 const struct rte_flow_item_ipv6 *mask = item->mask;
1429 struct mlx5_flow_parse *parser = data->parser;
1430 unsigned int ipv6_size = sizeof(struct ibv_flow_spec_ipv6);
1431 struct ibv_flow_spec_ipv6 ipv6 = {
1432 .type = parser->inner | IBV_FLOW_SPEC_IPV6,
1436 /* Don't update layer for the inner pattern. */
1438 parser->layer = HASH_RXQ_IPV6;
1441 uint32_t vtc_flow_val;
1442 uint32_t vtc_flow_mask;
1445 mask = default_mask;
1446 memcpy(&ipv6.val.src_ip, spec->hdr.src_addr,
1447 RTE_DIM(ipv6.val.src_ip));
1448 memcpy(&ipv6.val.dst_ip, spec->hdr.dst_addr,
1449 RTE_DIM(ipv6.val.dst_ip));
1450 memcpy(&ipv6.mask.src_ip, mask->hdr.src_addr,
1451 RTE_DIM(ipv6.mask.src_ip));
1452 memcpy(&ipv6.mask.dst_ip, mask->hdr.dst_addr,
1453 RTE_DIM(ipv6.mask.dst_ip));
1454 vtc_flow_val = rte_be_to_cpu_32(spec->hdr.vtc_flow);
1455 vtc_flow_mask = rte_be_to_cpu_32(mask->hdr.vtc_flow);
1456 ipv6.val.flow_label =
1457 rte_cpu_to_be_32((vtc_flow_val & IPV6_HDR_FL_MASK) >>
1459 ipv6.val.traffic_class = (vtc_flow_val & IPV6_HDR_TC_MASK) >>
1461 ipv6.val.next_hdr = spec->hdr.proto;
1462 ipv6.val.hop_limit = spec->hdr.hop_limits;
1463 ipv6.mask.flow_label =
1464 rte_cpu_to_be_32((vtc_flow_mask & IPV6_HDR_FL_MASK) >>
1466 ipv6.mask.traffic_class = (vtc_flow_mask & IPV6_HDR_TC_MASK) >>
1468 ipv6.mask.next_hdr = mask->hdr.proto;
1469 ipv6.mask.hop_limit = mask->hdr.hop_limits;
1470 /* Remove unwanted bits from values. */
1471 for (i = 0; i < RTE_DIM(ipv6.val.src_ip); ++i) {
1472 ipv6.val.src_ip[i] &= ipv6.mask.src_ip[i];
1473 ipv6.val.dst_ip[i] &= ipv6.mask.dst_ip[i];
1475 ipv6.val.flow_label &= ipv6.mask.flow_label;
1476 ipv6.val.traffic_class &= ipv6.mask.traffic_class;
1477 ipv6.val.next_hdr &= ipv6.mask.next_hdr;
1478 ipv6.val.hop_limit &= ipv6.mask.hop_limit;
1480 mlx5_flow_create_copy(parser, &ipv6, ipv6_size);
1485 * Convert UDP item to Verbs specification.
1488 * Item specification.
1489 * @param default_mask[in]
1490 * Default bit-masks to use when item->mask is not provided.
1491 * @param data[in, out]
1495 * 0 on success, a negative errno value otherwise and rte_errno is set.
1498 mlx5_flow_create_udp(const struct rte_flow_item *item,
1499 const void *default_mask,
1500 struct mlx5_flow_data *data)
1502 const struct rte_flow_item_udp *spec = item->spec;
1503 const struct rte_flow_item_udp *mask = item->mask;
1504 struct mlx5_flow_parse *parser = data->parser;
1505 unsigned int udp_size = sizeof(struct ibv_flow_spec_tcp_udp);
1506 struct ibv_flow_spec_tcp_udp udp = {
1507 .type = parser->inner | IBV_FLOW_SPEC_UDP,
1511 /* Don't update layer for the inner pattern. */
1512 if (!parser->inner) {
1513 if (parser->layer == HASH_RXQ_IPV4)
1514 parser->layer = HASH_RXQ_UDPV4;
1516 parser->layer = HASH_RXQ_UDPV6;
1520 mask = default_mask;
1521 udp.val.dst_port = spec->hdr.dst_port;
1522 udp.val.src_port = spec->hdr.src_port;
1523 udp.mask.dst_port = mask->hdr.dst_port;
1524 udp.mask.src_port = mask->hdr.src_port;
1525 /* Remove unwanted bits from values. */
1526 udp.val.src_port &= udp.mask.src_port;
1527 udp.val.dst_port &= udp.mask.dst_port;
1529 mlx5_flow_create_copy(parser, &udp, udp_size);
1534 * Convert TCP item to Verbs specification.
1537 * Item specification.
1538 * @param default_mask[in]
1539 * Default bit-masks to use when item->mask is not provided.
1540 * @param data[in, out]
1544 * 0 on success, a negative errno value otherwise and rte_errno is set.
1547 mlx5_flow_create_tcp(const struct rte_flow_item *item,
1548 const void *default_mask,
1549 struct mlx5_flow_data *data)
1551 const struct rte_flow_item_tcp *spec = item->spec;
1552 const struct rte_flow_item_tcp *mask = item->mask;
1553 struct mlx5_flow_parse *parser = data->parser;
1554 unsigned int tcp_size = sizeof(struct ibv_flow_spec_tcp_udp);
1555 struct ibv_flow_spec_tcp_udp tcp = {
1556 .type = parser->inner | IBV_FLOW_SPEC_TCP,
1560 /* Don't update layer for the inner pattern. */
1561 if (!parser->inner) {
1562 if (parser->layer == HASH_RXQ_IPV4)
1563 parser->layer = HASH_RXQ_TCPV4;
1565 parser->layer = HASH_RXQ_TCPV6;
1569 mask = default_mask;
1570 tcp.val.dst_port = spec->hdr.dst_port;
1571 tcp.val.src_port = spec->hdr.src_port;
1572 tcp.mask.dst_port = mask->hdr.dst_port;
1573 tcp.mask.src_port = mask->hdr.src_port;
1574 /* Remove unwanted bits from values. */
1575 tcp.val.src_port &= tcp.mask.src_port;
1576 tcp.val.dst_port &= tcp.mask.dst_port;
1578 mlx5_flow_create_copy(parser, &tcp, tcp_size);
1583 * Convert VXLAN item to Verbs specification.
1586 * Item specification.
1587 * @param default_mask[in]
1588 * Default bit-masks to use when item->mask is not provided.
1589 * @param data[in, out]
1593 * 0 on success, a negative errno value otherwise and rte_errno is set.
1596 mlx5_flow_create_vxlan(const struct rte_flow_item *item,
1597 const void *default_mask,
1598 struct mlx5_flow_data *data)
1600 const struct rte_flow_item_vxlan *spec = item->spec;
1601 const struct rte_flow_item_vxlan *mask = item->mask;
1602 struct mlx5_flow_parse *parser = data->parser;
1603 unsigned int size = sizeof(struct ibv_flow_spec_tunnel);
1604 struct ibv_flow_spec_tunnel vxlan = {
1605 .type = parser->inner | IBV_FLOW_SPEC_VXLAN_TUNNEL,
1614 parser->inner = IBV_FLOW_SPEC_INNER;
1617 mask = default_mask;
1618 memcpy(&id.vni[1], spec->vni, 3);
1619 vxlan.val.tunnel_id = id.vlan_id;
1620 memcpy(&id.vni[1], mask->vni, 3);
1621 vxlan.mask.tunnel_id = id.vlan_id;
1622 /* Remove unwanted bits from values. */
1623 vxlan.val.tunnel_id &= vxlan.mask.tunnel_id;
1626 * Tunnel id 0 is equivalent as not adding a VXLAN layer, if only this
1627 * layer is defined in the Verbs specification it is interpreted as
1628 * wildcard and all packets will match this rule, if it follows a full
1629 * stack layer (ex: eth / ipv4 / udp), all packets matching the layers
1630 * before will also match this rule.
1631 * To avoid such situation, VNI 0 is currently refused.
1633 if (!vxlan.val.tunnel_id)
1634 return rte_flow_error_set(data->error, EINVAL,
1635 RTE_FLOW_ERROR_TYPE_ITEM,
1637 "VxLAN vni cannot be 0");
1638 mlx5_flow_create_copy(parser, &vxlan, size);
1643 * Convert mark/flag action to Verbs specification.
1646 * Internal parser structure.
1651 * 0 on success, a negative errno value otherwise and rte_errno is set.
1654 mlx5_flow_create_flag_mark(struct mlx5_flow_parse *parser, uint32_t mark_id)
1656 unsigned int size = sizeof(struct ibv_flow_spec_action_tag);
1657 struct ibv_flow_spec_action_tag tag = {
1658 .type = IBV_FLOW_SPEC_ACTION_TAG,
1660 .tag_id = mlx5_flow_mark_set(mark_id),
1663 assert(parser->mark);
1664 mlx5_flow_create_copy(parser, &tag, size);
1669 * Convert count action to Verbs specification.
1672 * Pointer to Ethernet device.
1674 * Pointer to MLX5 flow parser structure.
1677 * 0 on success, a negative errno value otherwise and rte_errno is set.
1680 mlx5_flow_create_count(struct rte_eth_dev *dev __rte_unused,
1681 struct mlx5_flow_parse *parser __rte_unused)
1683 #ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
1684 struct priv *priv = dev->data->dev_private;
1685 unsigned int size = sizeof(struct ibv_flow_spec_counter_action);
1686 struct ibv_counter_set_init_attr init_attr = {0};
1687 struct ibv_flow_spec_counter_action counter = {
1688 .type = IBV_FLOW_SPEC_ACTION_COUNT,
1690 .counter_set_handle = 0,
1693 init_attr.counter_set_id = 0;
1694 parser->cs = ibv_create_counter_set(priv->ctx, &init_attr);
1699 counter.counter_set_handle = parser->cs->handle;
1700 mlx5_flow_create_copy(parser, &counter, size);
1706 * Complete flow rule creation with a drop queue.
1709 * Pointer to Ethernet device.
1711 * Internal parser structure.
1713 * Pointer to the rte_flow.
1715 * Perform verbose error reporting if not NULL.
1718 * 0 on success, a negative errno value otherwise and rte_errno is set.
1721 mlx5_flow_create_action_queue_drop(struct rte_eth_dev *dev,
1722 struct mlx5_flow_parse *parser,
1723 struct rte_flow *flow,
1724 struct rte_flow_error *error)
1726 struct priv *priv = dev->data->dev_private;
1727 struct ibv_flow_spec_action_drop *drop;
1728 unsigned int size = sizeof(struct ibv_flow_spec_action_drop);
1733 drop = (void *)((uintptr_t)parser->queue[HASH_RXQ_ETH].ibv_attr +
1734 parser->queue[HASH_RXQ_ETH].offset);
1735 *drop = (struct ibv_flow_spec_action_drop){
1736 .type = IBV_FLOW_SPEC_ACTION_DROP,
1739 ++parser->queue[HASH_RXQ_ETH].ibv_attr->num_of_specs;
1740 parser->queue[HASH_RXQ_ETH].offset += size;
1741 flow->frxq[HASH_RXQ_ETH].ibv_attr =
1742 parser->queue[HASH_RXQ_ETH].ibv_attr;
1744 flow->cs = parser->cs;
1745 if (!dev->data->dev_started)
1747 parser->queue[HASH_RXQ_ETH].ibv_attr = NULL;
1748 flow->frxq[HASH_RXQ_ETH].ibv_flow =
1749 ibv_create_flow(priv->flow_drop_queue->qp,
1750 flow->frxq[HASH_RXQ_ETH].ibv_attr);
1751 if (!flow->frxq[HASH_RXQ_ETH].ibv_flow) {
1752 rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
1753 NULL, "flow rule creation failure");
1759 if (flow->frxq[HASH_RXQ_ETH].ibv_flow) {
1760 claim_zero(ibv_destroy_flow(flow->frxq[HASH_RXQ_ETH].ibv_flow));
1761 flow->frxq[HASH_RXQ_ETH].ibv_flow = NULL;
1763 if (flow->frxq[HASH_RXQ_ETH].ibv_attr) {
1764 rte_free(flow->frxq[HASH_RXQ_ETH].ibv_attr);
1765 flow->frxq[HASH_RXQ_ETH].ibv_attr = NULL;
1768 claim_zero(ibv_destroy_counter_set(flow->cs));
1776 * Create hash Rx queues when RSS is enabled.
1779 * Pointer to Ethernet device.
1781 * Internal parser structure.
1783 * Pointer to the rte_flow.
1785 * Perform verbose error reporting if not NULL.
1788 * 0 on success, a negative errno value otherwise and rte_errno is set.
1791 mlx5_flow_create_action_queue_rss(struct rte_eth_dev *dev,
1792 struct mlx5_flow_parse *parser,
1793 struct rte_flow *flow,
1794 struct rte_flow_error *error)
1798 for (i = 0; i != hash_rxq_init_n; ++i) {
1799 uint64_t hash_fields;
1801 if (!parser->queue[i].ibv_attr)
1803 flow->frxq[i].ibv_attr = parser->queue[i].ibv_attr;
1804 parser->queue[i].ibv_attr = NULL;
1805 hash_fields = hash_rxq_init[i].hash_fields;
1806 if (!dev->data->dev_started)
1808 flow->frxq[i].hrxq =
1810 parser->rss_conf.rss_key,
1811 parser->rss_conf.rss_key_len,
1815 if (flow->frxq[i].hrxq)
1817 flow->frxq[i].hrxq =
1819 parser->rss_conf.rss_key,
1820 parser->rss_conf.rss_key_len,
1824 if (!flow->frxq[i].hrxq) {
1825 return rte_flow_error_set(error, ENOMEM,
1826 RTE_FLOW_ERROR_TYPE_HANDLE,
1828 "cannot create hash rxq");
1835 * Complete flow rule creation.
1838 * Pointer to Ethernet device.
1840 * Internal parser structure.
1842 * Pointer to the rte_flow.
1844 * Perform verbose error reporting if not NULL.
1847 * 0 on success, a negative errno value otherwise and rte_errno is set.
1850 mlx5_flow_create_action_queue(struct rte_eth_dev *dev,
1851 struct mlx5_flow_parse *parser,
1852 struct rte_flow *flow,
1853 struct rte_flow_error *error)
1855 struct priv *priv __rte_unused = dev->data->dev_private;
1858 unsigned int flows_n = 0;
1862 assert(!parser->drop);
1863 ret = mlx5_flow_create_action_queue_rss(dev, parser, flow, error);
1867 flow->cs = parser->cs;
1868 if (!dev->data->dev_started)
1870 for (i = 0; i != hash_rxq_init_n; ++i) {
1871 if (!flow->frxq[i].hrxq)
1873 flow->frxq[i].ibv_flow =
1874 ibv_create_flow(flow->frxq[i].hrxq->qp,
1875 flow->frxq[i].ibv_attr);
1876 if (!flow->frxq[i].ibv_flow) {
1877 rte_flow_error_set(error, ENOMEM,
1878 RTE_FLOW_ERROR_TYPE_HANDLE,
1879 NULL, "flow rule creation failure");
1883 DRV_LOG(DEBUG, "port %u %p type %d QP %p ibv_flow %p",
1886 (void *)flow->frxq[i].hrxq,
1887 (void *)flow->frxq[i].ibv_flow);
1890 rte_flow_error_set(error, EINVAL, RTE_FLOW_ERROR_TYPE_HANDLE,
1891 NULL, "internal error in flow creation");
1894 for (i = 0; i != parser->queues_n; ++i) {
1895 struct mlx5_rxq_data *q =
1896 (*priv->rxqs)[parser->queues[i]];
1898 q->mark |= parser->mark;
1902 ret = rte_errno; /* Save rte_errno before cleanup. */
1904 for (i = 0; i != hash_rxq_init_n; ++i) {
1905 if (flow->frxq[i].ibv_flow) {
1906 struct ibv_flow *ibv_flow = flow->frxq[i].ibv_flow;
1908 claim_zero(ibv_destroy_flow(ibv_flow));
1910 if (flow->frxq[i].hrxq)
1911 mlx5_hrxq_release(dev, flow->frxq[i].hrxq);
1912 if (flow->frxq[i].ibv_attr)
1913 rte_free(flow->frxq[i].ibv_attr);
1916 claim_zero(ibv_destroy_counter_set(flow->cs));
1920 rte_errno = ret; /* Restore rte_errno. */
1928 * Pointer to Ethernet device.
1930 * Pointer to a TAILQ flow list.
1932 * Flow rule attributes.
1933 * @param[in] pattern
1934 * Pattern specification (list terminated by the END pattern item).
1935 * @param[in] actions
1936 * Associated actions (list terminated by the END action).
1938 * Perform verbose error reporting if not NULL.
1941 * A flow on success, NULL otherwise and rte_errno is set.
1943 static struct rte_flow *
1944 mlx5_flow_list_create(struct rte_eth_dev *dev,
1945 struct mlx5_flows *list,
1946 const struct rte_flow_attr *attr,
1947 const struct rte_flow_item items[],
1948 const struct rte_flow_action actions[],
1949 struct rte_flow_error *error)
1951 struct mlx5_flow_parse parser = { .create = 1, };
1952 struct rte_flow *flow = NULL;
1956 ret = mlx5_flow_convert(dev, attr, items, actions, error, &parser);
1959 flow = rte_calloc(__func__, 1,
1960 sizeof(*flow) + parser.queues_n * sizeof(uint16_t),
1963 rte_flow_error_set(error, ENOMEM,
1964 RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
1966 "cannot allocate flow memory");
1969 /* Copy queues configuration. */
1970 flow->queues = (uint16_t (*)[])(flow + 1);
1971 memcpy(flow->queues, parser.queues, parser.queues_n * sizeof(uint16_t));
1972 flow->queues_n = parser.queues_n;
1973 flow->mark = parser.mark;
1974 /* Copy RSS configuration. */
1975 flow->rss_conf = parser.rss_conf;
1976 flow->rss_conf.rss_key = flow->rss_key;
1977 memcpy(flow->rss_key, parser.rss_key, parser.rss_conf.rss_key_len);
1978 /* finalise the flow. */
1980 ret = mlx5_flow_create_action_queue_drop(dev, &parser, flow,
1983 ret = mlx5_flow_create_action_queue(dev, &parser, flow, error);
1986 TAILQ_INSERT_TAIL(list, flow, next);
1987 DRV_LOG(DEBUG, "port %u flow created %p", dev->data->port_id,
1991 for (i = 0; i != hash_rxq_init_n; ++i) {
1992 if (parser.queue[i].ibv_attr)
1993 rte_free(parser.queue[i].ibv_attr);
2000 * Validate a flow supported by the NIC.
2002 * @see rte_flow_validate()
2006 mlx5_flow_validate(struct rte_eth_dev *dev,
2007 const struct rte_flow_attr *attr,
2008 const struct rte_flow_item items[],
2009 const struct rte_flow_action actions[],
2010 struct rte_flow_error *error)
2012 struct mlx5_flow_parse parser = { .create = 0, };
2014 return mlx5_flow_convert(dev, attr, items, actions, error, &parser);
2020 * @see rte_flow_create()
2024 mlx5_flow_create(struct rte_eth_dev *dev,
2025 const struct rte_flow_attr *attr,
2026 const struct rte_flow_item items[],
2027 const struct rte_flow_action actions[],
2028 struct rte_flow_error *error)
2030 struct priv *priv = dev->data->dev_private;
2032 return mlx5_flow_list_create(dev, &priv->flows, attr, items, actions,
2037 * Destroy a flow in a list.
2040 * Pointer to Ethernet device.
2042 * Pointer to a TAILQ flow list.
2047 mlx5_flow_list_destroy(struct rte_eth_dev *dev, struct mlx5_flows *list,
2048 struct rte_flow *flow)
2050 struct priv *priv = dev->data->dev_private;
2053 if (flow->drop || !flow->mark)
2055 for (i = 0; i != flow->queues_n; ++i) {
2056 struct rte_flow *tmp;
2060 * To remove the mark from the queue, the queue must not be
2061 * present in any other marked flow (RSS or not).
2063 TAILQ_FOREACH(tmp, list, next) {
2065 uint16_t *tqs = NULL;
2070 for (j = 0; j != hash_rxq_init_n; ++j) {
2071 if (!tmp->frxq[j].hrxq)
2073 tqs = tmp->frxq[j].hrxq->ind_table->queues;
2074 tq_n = tmp->frxq[j].hrxq->ind_table->queues_n;
2078 for (j = 0; (j != tq_n) && !mark; j++)
2079 if (tqs[j] == (*flow->queues)[i])
2082 (*priv->rxqs)[(*flow->queues)[i]]->mark = mark;
2086 if (flow->frxq[HASH_RXQ_ETH].ibv_flow)
2087 claim_zero(ibv_destroy_flow
2088 (flow->frxq[HASH_RXQ_ETH].ibv_flow));
2089 rte_free(flow->frxq[HASH_RXQ_ETH].ibv_attr);
2091 for (i = 0; i != hash_rxq_init_n; ++i) {
2092 struct mlx5_flow *frxq = &flow->frxq[i];
2095 claim_zero(ibv_destroy_flow(frxq->ibv_flow));
2097 mlx5_hrxq_release(dev, frxq->hrxq);
2099 rte_free(frxq->ibv_attr);
2103 claim_zero(ibv_destroy_counter_set(flow->cs));
2106 TAILQ_REMOVE(list, flow, next);
2107 DRV_LOG(DEBUG, "port %u flow destroyed %p", dev->data->port_id,
2113 * Destroy all flows.
2116 * Pointer to Ethernet device.
2118 * Pointer to a TAILQ flow list.
2121 mlx5_flow_list_flush(struct rte_eth_dev *dev, struct mlx5_flows *list)
2123 while (!TAILQ_EMPTY(list)) {
2124 struct rte_flow *flow;
2126 flow = TAILQ_FIRST(list);
2127 mlx5_flow_list_destroy(dev, list, flow);
2132 * Create drop queue.
2135 * Pointer to Ethernet device.
2138 * 0 on success, a negative errno value otherwise and rte_errno is set.
2141 mlx5_flow_create_drop_queue(struct rte_eth_dev *dev)
2143 struct priv *priv = dev->data->dev_private;
2144 struct mlx5_hrxq_drop *fdq = NULL;
2148 fdq = rte_calloc(__func__, 1, sizeof(*fdq), 0);
2151 "port %u cannot allocate memory for drop queue",
2152 dev->data->port_id);
2156 fdq->cq = ibv_create_cq(priv->ctx, 1, NULL, NULL, 0);
2158 DRV_LOG(WARNING, "port %u cannot allocate CQ for drop queue",
2159 dev->data->port_id);
2163 fdq->wq = ibv_create_wq(priv->ctx,
2164 &(struct ibv_wq_init_attr){
2165 .wq_type = IBV_WQT_RQ,
2172 DRV_LOG(WARNING, "port %u cannot allocate WQ for drop queue",
2173 dev->data->port_id);
2177 fdq->ind_table = ibv_create_rwq_ind_table(priv->ctx,
2178 &(struct ibv_rwq_ind_table_init_attr){
2179 .log_ind_tbl_size = 0,
2180 .ind_tbl = &fdq->wq,
2183 if (!fdq->ind_table) {
2185 "port %u cannot allocate indirection table for drop"
2187 dev->data->port_id);
2191 fdq->qp = ibv_create_qp_ex(priv->ctx,
2192 &(struct ibv_qp_init_attr_ex){
2193 .qp_type = IBV_QPT_RAW_PACKET,
2195 IBV_QP_INIT_ATTR_PD |
2196 IBV_QP_INIT_ATTR_IND_TABLE |
2197 IBV_QP_INIT_ATTR_RX_HASH,
2198 .rx_hash_conf = (struct ibv_rx_hash_conf){
2200 IBV_RX_HASH_FUNC_TOEPLITZ,
2201 .rx_hash_key_len = rss_hash_default_key_len,
2202 .rx_hash_key = rss_hash_default_key,
2203 .rx_hash_fields_mask = 0,
2205 .rwq_ind_tbl = fdq->ind_table,
2209 DRV_LOG(WARNING, "port %u cannot allocate QP for drop queue",
2210 dev->data->port_id);
2214 priv->flow_drop_queue = fdq;
2218 claim_zero(ibv_destroy_qp(fdq->qp));
2220 claim_zero(ibv_destroy_rwq_ind_table(fdq->ind_table));
2222 claim_zero(ibv_destroy_wq(fdq->wq));
2224 claim_zero(ibv_destroy_cq(fdq->cq));
2227 priv->flow_drop_queue = NULL;
2232 * Delete drop queue.
2235 * Pointer to Ethernet device.
2238 mlx5_flow_delete_drop_queue(struct rte_eth_dev *dev)
2240 struct priv *priv = dev->data->dev_private;
2241 struct mlx5_hrxq_drop *fdq = priv->flow_drop_queue;
2246 claim_zero(ibv_destroy_qp(fdq->qp));
2248 claim_zero(ibv_destroy_rwq_ind_table(fdq->ind_table));
2250 claim_zero(ibv_destroy_wq(fdq->wq));
2252 claim_zero(ibv_destroy_cq(fdq->cq));
2254 priv->flow_drop_queue = NULL;
2261 * Pointer to Ethernet device.
2263 * Pointer to a TAILQ flow list.
2266 mlx5_flow_stop(struct rte_eth_dev *dev, struct mlx5_flows *list)
2268 struct priv *priv = dev->data->dev_private;
2269 struct rte_flow *flow;
2271 TAILQ_FOREACH_REVERSE(flow, list, mlx5_flows, next) {
2273 struct mlx5_ind_table_ibv *ind_tbl = NULL;
2276 if (!flow->frxq[HASH_RXQ_ETH].ibv_flow)
2278 claim_zero(ibv_destroy_flow
2279 (flow->frxq[HASH_RXQ_ETH].ibv_flow));
2280 flow->frxq[HASH_RXQ_ETH].ibv_flow = NULL;
2281 DRV_LOG(DEBUG, "port %u flow %p removed",
2282 dev->data->port_id, (void *)flow);
2286 /* Verify the flow has not already been cleaned. */
2287 for (i = 0; i != hash_rxq_init_n; ++i) {
2288 if (!flow->frxq[i].ibv_flow)
2291 * Indirection table may be necessary to remove the
2292 * flags in the Rx queues.
2293 * This helps to speed-up the process by avoiding
2296 ind_tbl = flow->frxq[i].hrxq->ind_table;
2299 if (i == hash_rxq_init_n)
2303 for (i = 0; i != ind_tbl->queues_n; ++i)
2304 (*priv->rxqs)[ind_tbl->queues[i]]->mark = 0;
2306 for (i = 0; i != hash_rxq_init_n; ++i) {
2307 if (!flow->frxq[i].ibv_flow)
2309 claim_zero(ibv_destroy_flow(flow->frxq[i].ibv_flow));
2310 flow->frxq[i].ibv_flow = NULL;
2311 mlx5_hrxq_release(dev, flow->frxq[i].hrxq);
2312 flow->frxq[i].hrxq = NULL;
2314 DRV_LOG(DEBUG, "port %u flow %p removed", dev->data->port_id,
2323 * Pointer to Ethernet device.
2325 * Pointer to a TAILQ flow list.
2328 * 0 on success, a negative errno value otherwise and rte_errno is set.
2331 mlx5_flow_start(struct rte_eth_dev *dev, struct mlx5_flows *list)
2333 struct priv *priv = dev->data->dev_private;
2334 struct rte_flow *flow;
2336 TAILQ_FOREACH(flow, list, next) {
2340 flow->frxq[HASH_RXQ_ETH].ibv_flow =
2342 (priv->flow_drop_queue->qp,
2343 flow->frxq[HASH_RXQ_ETH].ibv_attr);
2344 if (!flow->frxq[HASH_RXQ_ETH].ibv_flow) {
2346 "port %u flow %p cannot be applied",
2347 dev->data->port_id, (void *)flow);
2351 DRV_LOG(DEBUG, "port %u flow %p applied",
2352 dev->data->port_id, (void *)flow);
2356 for (i = 0; i != hash_rxq_init_n; ++i) {
2357 if (!flow->frxq[i].ibv_attr)
2359 flow->frxq[i].hrxq =
2360 mlx5_hrxq_get(dev, flow->rss_conf.rss_key,
2361 flow->rss_conf.rss_key_len,
2362 hash_rxq_init[i].hash_fields,
2365 if (flow->frxq[i].hrxq)
2367 flow->frxq[i].hrxq =
2368 mlx5_hrxq_new(dev, flow->rss_conf.rss_key,
2369 flow->rss_conf.rss_key_len,
2370 hash_rxq_init[i].hash_fields,
2373 if (!flow->frxq[i].hrxq) {
2375 "port %u flow %p cannot be applied",
2376 dev->data->port_id, (void *)flow);
2381 flow->frxq[i].ibv_flow =
2382 ibv_create_flow(flow->frxq[i].hrxq->qp,
2383 flow->frxq[i].ibv_attr);
2384 if (!flow->frxq[i].ibv_flow) {
2386 "port %u flow %p cannot be applied",
2387 dev->data->port_id, (void *)flow);
2391 DRV_LOG(DEBUG, "port %u flow %p applied",
2392 dev->data->port_id, (void *)flow);
2396 for (i = 0; i != flow->queues_n; ++i)
2397 (*priv->rxqs)[(*flow->queues)[i]]->mark = 1;
2403 * Verify the flow list is empty
2406 * Pointer to Ethernet device.
2408 * @return the number of flows not released.
2411 mlx5_flow_verify(struct rte_eth_dev *dev)
2413 struct priv *priv = dev->data->dev_private;
2414 struct rte_flow *flow;
2417 TAILQ_FOREACH(flow, &priv->flows, next) {
2418 DRV_LOG(DEBUG, "port %u flow %p still referenced",
2419 dev->data->port_id, (void *)flow);
2426 * Enable a control flow configured from the control plane.
2429 * Pointer to Ethernet device.
2431 * An Ethernet flow spec to apply.
2433 * An Ethernet flow mask to apply.
2435 * A VLAN flow spec to apply.
2437 * A VLAN flow mask to apply.
2440 * 0 on success, a negative errno value otherwise and rte_errno is set.
2443 mlx5_ctrl_flow_vlan(struct rte_eth_dev *dev,
2444 struct rte_flow_item_eth *eth_spec,
2445 struct rte_flow_item_eth *eth_mask,
2446 struct rte_flow_item_vlan *vlan_spec,
2447 struct rte_flow_item_vlan *vlan_mask)
2449 struct priv *priv = dev->data->dev_private;
2450 const struct rte_flow_attr attr = {
2452 .priority = MLX5_CTRL_FLOW_PRIORITY,
2454 struct rte_flow_item items[] = {
2456 .type = RTE_FLOW_ITEM_TYPE_ETH,
2462 .type = (vlan_spec) ? RTE_FLOW_ITEM_TYPE_VLAN :
2463 RTE_FLOW_ITEM_TYPE_END,
2469 .type = RTE_FLOW_ITEM_TYPE_END,
2472 struct rte_flow_action actions[] = {
2474 .type = RTE_FLOW_ACTION_TYPE_RSS,
2477 .type = RTE_FLOW_ACTION_TYPE_END,
2480 struct rte_flow *flow;
2481 struct rte_flow_error error;
2484 struct rte_flow_action_rss rss;
2486 const struct rte_eth_rss_conf *rss_conf;
2488 uint16_t queue[RTE_MAX_QUEUES_PER_PORT];
2492 if (!priv->reta_idx_n) {
2496 for (i = 0; i != priv->reta_idx_n; ++i)
2497 action_rss.local.queue[i] = (*priv->reta_idx)[i];
2498 action_rss.local.rss_conf = &priv->rss_conf;
2499 action_rss.local.num = priv->reta_idx_n;
2500 actions[0].conf = (const void *)&action_rss.rss;
2501 flow = mlx5_flow_list_create(dev, &priv->ctrl_flows, &attr, items,
2509 * Enable a flow control configured from the control plane.
2512 * Pointer to Ethernet device.
2514 * An Ethernet flow spec to apply.
2516 * An Ethernet flow mask to apply.
2519 * 0 on success, a negative errno value otherwise and rte_errno is set.
2522 mlx5_ctrl_flow(struct rte_eth_dev *dev,
2523 struct rte_flow_item_eth *eth_spec,
2524 struct rte_flow_item_eth *eth_mask)
2526 return mlx5_ctrl_flow_vlan(dev, eth_spec, eth_mask, NULL, NULL);
2532 * @see rte_flow_destroy()
2536 mlx5_flow_destroy(struct rte_eth_dev *dev,
2537 struct rte_flow *flow,
2538 struct rte_flow_error *error __rte_unused)
2540 struct priv *priv = dev->data->dev_private;
2542 mlx5_flow_list_destroy(dev, &priv->flows, flow);
2547 * Destroy all flows.
2549 * @see rte_flow_flush()
2553 mlx5_flow_flush(struct rte_eth_dev *dev,
2554 struct rte_flow_error *error __rte_unused)
2556 struct priv *priv = dev->data->dev_private;
2558 mlx5_flow_list_flush(dev, &priv->flows);
2562 #ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
2564 * Query flow counter.
2568 * @param counter_value
2569 * returned data from the counter.
2572 * 0 on success, a negative errno value otherwise and rte_errno is set.
2575 mlx5_flow_query_count(struct ibv_counter_set *cs,
2576 struct mlx5_flow_counter_stats *counter_stats,
2577 struct rte_flow_query_count *query_count,
2578 struct rte_flow_error *error)
2580 uint64_t counters[2];
2581 struct ibv_query_counter_set_attr query_cs_attr = {
2583 .query_flags = IBV_COUNTER_SET_FORCE_UPDATE,
2585 struct ibv_counter_set_data query_out = {
2587 .outlen = 2 * sizeof(uint64_t),
2589 int err = ibv_query_counter_set(&query_cs_attr, &query_out);
2592 return rte_flow_error_set(error, err,
2593 RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
2595 "cannot read counter");
2596 query_count->hits_set = 1;
2597 query_count->bytes_set = 1;
2598 query_count->hits = counters[0] - counter_stats->hits;
2599 query_count->bytes = counters[1] - counter_stats->bytes;
2600 if (query_count->reset) {
2601 counter_stats->hits = counters[0];
2602 counter_stats->bytes = counters[1];
2610 * @see rte_flow_query()
2614 mlx5_flow_query(struct rte_eth_dev *dev __rte_unused,
2615 struct rte_flow *flow,
2616 enum rte_flow_action_type action __rte_unused,
2618 struct rte_flow_error *error)
2623 ret = mlx5_flow_query_count(flow->cs,
2624 &flow->counter_stats,
2625 (struct rte_flow_query_count *)data,
2630 return rte_flow_error_set(error, EINVAL,
2631 RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
2633 "no counter found for flow");
2642 * @see rte_flow_isolate()
2646 mlx5_flow_isolate(struct rte_eth_dev *dev,
2648 struct rte_flow_error *error)
2650 struct priv *priv = dev->data->dev_private;
2652 if (dev->data->dev_started) {
2653 rte_flow_error_set(error, EBUSY,
2654 RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
2656 "port must be stopped first");
2659 priv->isolated = !!enable;
2661 dev->dev_ops = &mlx5_dev_ops_isolate;
2663 dev->dev_ops = &mlx5_dev_ops;
2668 * Convert a flow director filter to a generic flow.
2671 * Pointer to Ethernet device.
2672 * @param fdir_filter
2673 * Flow director filter to add.
2675 * Generic flow parameters structure.
2678 * 0 on success, a negative errno value otherwise and rte_errno is set.
2681 mlx5_fdir_filter_convert(struct rte_eth_dev *dev,
2682 const struct rte_eth_fdir_filter *fdir_filter,
2683 struct mlx5_fdir *attributes)
2685 struct priv *priv = dev->data->dev_private;
2686 const struct rte_eth_fdir_input *input = &fdir_filter->input;
2687 const struct rte_eth_fdir_masks *mask =
2688 &dev->data->dev_conf.fdir_conf.mask;
2690 /* Validate queue number. */
2691 if (fdir_filter->action.rx_queue >= priv->rxqs_n) {
2692 DRV_LOG(ERR, "port %u invalid queue number %d",
2693 dev->data->port_id, fdir_filter->action.rx_queue);
2697 attributes->attr.ingress = 1;
2698 attributes->items[0] = (struct rte_flow_item) {
2699 .type = RTE_FLOW_ITEM_TYPE_ETH,
2700 .spec = &attributes->l2,
2701 .mask = &attributes->l2_mask,
2703 switch (fdir_filter->action.behavior) {
2704 case RTE_ETH_FDIR_ACCEPT:
2705 attributes->actions[0] = (struct rte_flow_action){
2706 .type = RTE_FLOW_ACTION_TYPE_QUEUE,
2707 .conf = &attributes->queue,
2710 case RTE_ETH_FDIR_REJECT:
2711 attributes->actions[0] = (struct rte_flow_action){
2712 .type = RTE_FLOW_ACTION_TYPE_DROP,
2716 DRV_LOG(ERR, "port %u invalid behavior %d",
2718 fdir_filter->action.behavior);
2719 rte_errno = ENOTSUP;
2722 attributes->queue.index = fdir_filter->action.rx_queue;
2724 switch (fdir_filter->input.flow_type) {
2725 case RTE_ETH_FLOW_NONFRAG_IPV4_UDP:
2726 case RTE_ETH_FLOW_NONFRAG_IPV4_TCP:
2727 case RTE_ETH_FLOW_NONFRAG_IPV4_OTHER:
2728 attributes->l3.ipv4.hdr = (struct ipv4_hdr){
2729 .src_addr = input->flow.ip4_flow.src_ip,
2730 .dst_addr = input->flow.ip4_flow.dst_ip,
2731 .time_to_live = input->flow.ip4_flow.ttl,
2732 .type_of_service = input->flow.ip4_flow.tos,
2733 .next_proto_id = input->flow.ip4_flow.proto,
2735 attributes->l3_mask.ipv4.hdr = (struct ipv4_hdr){
2736 .src_addr = mask->ipv4_mask.src_ip,
2737 .dst_addr = mask->ipv4_mask.dst_ip,
2738 .time_to_live = mask->ipv4_mask.ttl,
2739 .type_of_service = mask->ipv4_mask.tos,
2740 .next_proto_id = mask->ipv4_mask.proto,
2742 attributes->items[1] = (struct rte_flow_item){
2743 .type = RTE_FLOW_ITEM_TYPE_IPV4,
2744 .spec = &attributes->l3,
2745 .mask = &attributes->l3_mask,
2748 case RTE_ETH_FLOW_NONFRAG_IPV6_UDP:
2749 case RTE_ETH_FLOW_NONFRAG_IPV6_TCP:
2750 case RTE_ETH_FLOW_NONFRAG_IPV6_OTHER:
2751 attributes->l3.ipv6.hdr = (struct ipv6_hdr){
2752 .hop_limits = input->flow.ipv6_flow.hop_limits,
2753 .proto = input->flow.ipv6_flow.proto,
2756 memcpy(attributes->l3.ipv6.hdr.src_addr,
2757 input->flow.ipv6_flow.src_ip,
2758 RTE_DIM(attributes->l3.ipv6.hdr.src_addr));
2759 memcpy(attributes->l3.ipv6.hdr.dst_addr,
2760 input->flow.ipv6_flow.dst_ip,
2761 RTE_DIM(attributes->l3.ipv6.hdr.src_addr));
2762 memcpy(attributes->l3_mask.ipv6.hdr.src_addr,
2763 mask->ipv6_mask.src_ip,
2764 RTE_DIM(attributes->l3_mask.ipv6.hdr.src_addr));
2765 memcpy(attributes->l3_mask.ipv6.hdr.dst_addr,
2766 mask->ipv6_mask.dst_ip,
2767 RTE_DIM(attributes->l3_mask.ipv6.hdr.src_addr));
2768 attributes->items[1] = (struct rte_flow_item){
2769 .type = RTE_FLOW_ITEM_TYPE_IPV6,
2770 .spec = &attributes->l3,
2771 .mask = &attributes->l3_mask,
2775 DRV_LOG(ERR, "port %u invalid flow type%d",
2776 dev->data->port_id, fdir_filter->input.flow_type);
2777 rte_errno = ENOTSUP;
2781 switch (fdir_filter->input.flow_type) {
2782 case RTE_ETH_FLOW_NONFRAG_IPV4_UDP:
2783 attributes->l4.udp.hdr = (struct udp_hdr){
2784 .src_port = input->flow.udp4_flow.src_port,
2785 .dst_port = input->flow.udp4_flow.dst_port,
2787 attributes->l4_mask.udp.hdr = (struct udp_hdr){
2788 .src_port = mask->src_port_mask,
2789 .dst_port = mask->dst_port_mask,
2791 attributes->items[2] = (struct rte_flow_item){
2792 .type = RTE_FLOW_ITEM_TYPE_UDP,
2793 .spec = &attributes->l4,
2794 .mask = &attributes->l4_mask,
2797 case RTE_ETH_FLOW_NONFRAG_IPV4_TCP:
2798 attributes->l4.tcp.hdr = (struct tcp_hdr){
2799 .src_port = input->flow.tcp4_flow.src_port,
2800 .dst_port = input->flow.tcp4_flow.dst_port,
2802 attributes->l4_mask.tcp.hdr = (struct tcp_hdr){
2803 .src_port = mask->src_port_mask,
2804 .dst_port = mask->dst_port_mask,
2806 attributes->items[2] = (struct rte_flow_item){
2807 .type = RTE_FLOW_ITEM_TYPE_TCP,
2808 .spec = &attributes->l4,
2809 .mask = &attributes->l4_mask,
2812 case RTE_ETH_FLOW_NONFRAG_IPV6_UDP:
2813 attributes->l4.udp.hdr = (struct udp_hdr){
2814 .src_port = input->flow.udp6_flow.src_port,
2815 .dst_port = input->flow.udp6_flow.dst_port,
2817 attributes->l4_mask.udp.hdr = (struct udp_hdr){
2818 .src_port = mask->src_port_mask,
2819 .dst_port = mask->dst_port_mask,
2821 attributes->items[2] = (struct rte_flow_item){
2822 .type = RTE_FLOW_ITEM_TYPE_UDP,
2823 .spec = &attributes->l4,
2824 .mask = &attributes->l4_mask,
2827 case RTE_ETH_FLOW_NONFRAG_IPV6_TCP:
2828 attributes->l4.tcp.hdr = (struct tcp_hdr){
2829 .src_port = input->flow.tcp6_flow.src_port,
2830 .dst_port = input->flow.tcp6_flow.dst_port,
2832 attributes->l4_mask.tcp.hdr = (struct tcp_hdr){
2833 .src_port = mask->src_port_mask,
2834 .dst_port = mask->dst_port_mask,
2836 attributes->items[2] = (struct rte_flow_item){
2837 .type = RTE_FLOW_ITEM_TYPE_TCP,
2838 .spec = &attributes->l4,
2839 .mask = &attributes->l4_mask,
2842 case RTE_ETH_FLOW_NONFRAG_IPV4_OTHER:
2843 case RTE_ETH_FLOW_NONFRAG_IPV6_OTHER:
2846 DRV_LOG(ERR, "port %u invalid flow type%d",
2847 dev->data->port_id, fdir_filter->input.flow_type);
2848 rte_errno = ENOTSUP;
2855 * Add new flow director filter and store it in list.
2858 * Pointer to Ethernet device.
2859 * @param fdir_filter
2860 * Flow director filter to add.
2863 * 0 on success, a negative errno value otherwise and rte_errno is set.
2866 mlx5_fdir_filter_add(struct rte_eth_dev *dev,
2867 const struct rte_eth_fdir_filter *fdir_filter)
2869 struct priv *priv = dev->data->dev_private;
2870 struct mlx5_fdir attributes = {
2873 .dst.addr_bytes = "\x00\x00\x00\x00\x00\x00",
2874 .src.addr_bytes = "\x00\x00\x00\x00\x00\x00",
2878 struct mlx5_flow_parse parser = {
2879 .layer = HASH_RXQ_ETH,
2881 struct rte_flow_error error;
2882 struct rte_flow *flow;
2885 ret = mlx5_fdir_filter_convert(dev, fdir_filter, &attributes);
2888 ret = mlx5_flow_convert(dev, &attributes.attr, attributes.items,
2889 attributes.actions, &error, &parser);
2892 flow = mlx5_flow_list_create(dev, &priv->flows, &attributes.attr,
2893 attributes.items, attributes.actions,
2896 DRV_LOG(DEBUG, "port %u FDIR created %p", dev->data->port_id,
2904 * Delete specific filter.
2907 * Pointer to Ethernet device.
2908 * @param fdir_filter
2909 * Filter to be deleted.
2912 * 0 on success, a negative errno value otherwise and rte_errno is set.
2915 mlx5_fdir_filter_delete(struct rte_eth_dev *dev,
2916 const struct rte_eth_fdir_filter *fdir_filter)
2918 struct priv *priv = dev->data->dev_private;
2919 struct mlx5_fdir attributes = {
2922 struct mlx5_flow_parse parser = {
2924 .layer = HASH_RXQ_ETH,
2926 struct rte_flow_error error;
2927 struct rte_flow *flow;
2931 ret = mlx5_fdir_filter_convert(dev, fdir_filter, &attributes);
2934 ret = mlx5_flow_convert(dev, &attributes.attr, attributes.items,
2935 attributes.actions, &error, &parser);
2939 * Special case for drop action which is only set in the
2940 * specifications when the flow is created. In this situation the
2941 * drop specification is missing.
2944 struct ibv_flow_spec_action_drop *drop;
2946 drop = (void *)((uintptr_t)parser.queue[HASH_RXQ_ETH].ibv_attr +
2947 parser.queue[HASH_RXQ_ETH].offset);
2948 *drop = (struct ibv_flow_spec_action_drop){
2949 .type = IBV_FLOW_SPEC_ACTION_DROP,
2950 .size = sizeof(struct ibv_flow_spec_action_drop),
2952 parser.queue[HASH_RXQ_ETH].ibv_attr->num_of_specs++;
2954 TAILQ_FOREACH(flow, &priv->flows, next) {
2955 struct ibv_flow_attr *attr;
2956 struct ibv_spec_header *attr_h;
2958 struct ibv_flow_attr *flow_attr;
2959 struct ibv_spec_header *flow_h;
2961 unsigned int specs_n;
2962 unsigned int queue_id = parser.drop ? HASH_RXQ_ETH :
2965 attr = parser.queue[queue_id].ibv_attr;
2966 flow_attr = flow->frxq[queue_id].ibv_attr;
2967 /* Compare first the attributes. */
2969 memcmp(attr, flow_attr, sizeof(struct ibv_flow_attr)))
2971 if (attr->num_of_specs == 0)
2973 spec = (void *)((uintptr_t)attr +
2974 sizeof(struct ibv_flow_attr));
2975 flow_spec = (void *)((uintptr_t)flow_attr +
2976 sizeof(struct ibv_flow_attr));
2977 specs_n = RTE_MIN(attr->num_of_specs, flow_attr->num_of_specs);
2978 for (i = 0; i != specs_n; ++i) {
2981 if (memcmp(spec, flow_spec,
2982 RTE_MIN(attr_h->size, flow_h->size)))
2984 spec = (void *)((uintptr_t)spec + attr_h->size);
2985 flow_spec = (void *)((uintptr_t)flow_spec +
2988 /* At this point, the flow match. */
2991 /* The flow does not match. */
2994 ret = rte_errno; /* Save rte_errno before cleanup. */
2996 mlx5_flow_list_destroy(dev, &priv->flows, flow);
2998 for (i = 0; i != hash_rxq_init_n; ++i) {
2999 if (parser.queue[i].ibv_attr)
3000 rte_free(parser.queue[i].ibv_attr);
3002 rte_errno = ret; /* Restore rte_errno. */
3007 * Update queue for specific filter.
3010 * Pointer to Ethernet device.
3011 * @param fdir_filter
3012 * Filter to be updated.
3015 * 0 on success, a negative errno value otherwise and rte_errno is set.
3018 mlx5_fdir_filter_update(struct rte_eth_dev *dev,
3019 const struct rte_eth_fdir_filter *fdir_filter)
3023 ret = mlx5_fdir_filter_delete(dev, fdir_filter);
3026 return mlx5_fdir_filter_add(dev, fdir_filter);
3030 * Flush all filters.
3033 * Pointer to Ethernet device.
3036 mlx5_fdir_filter_flush(struct rte_eth_dev *dev)
3038 struct priv *priv = dev->data->dev_private;
3040 mlx5_flow_list_flush(dev, &priv->flows);
3044 * Get flow director information.
3047 * Pointer to Ethernet device.
3048 * @param[out] fdir_info
3049 * Resulting flow director information.
3052 mlx5_fdir_info_get(struct rte_eth_dev *dev, struct rte_eth_fdir_info *fdir_info)
3054 struct rte_eth_fdir_masks *mask =
3055 &dev->data->dev_conf.fdir_conf.mask;
3057 fdir_info->mode = dev->data->dev_conf.fdir_conf.mode;
3058 fdir_info->guarant_spc = 0;
3059 rte_memcpy(&fdir_info->mask, mask, sizeof(fdir_info->mask));
3060 fdir_info->max_flexpayload = 0;
3061 fdir_info->flow_types_mask[0] = 0;
3062 fdir_info->flex_payload_unit = 0;
3063 fdir_info->max_flex_payload_segment_num = 0;
3064 fdir_info->flex_payload_limit = 0;
3065 memset(&fdir_info->flex_conf, 0, sizeof(fdir_info->flex_conf));
3069 * Deal with flow director operations.
3072 * Pointer to Ethernet device.
3074 * Operation to perform.
3076 * Pointer to operation-specific structure.
3079 * 0 on success, a negative errno value otherwise and rte_errno is set.
3082 mlx5_fdir_ctrl_func(struct rte_eth_dev *dev, enum rte_filter_op filter_op,
3085 enum rte_fdir_mode fdir_mode =
3086 dev->data->dev_conf.fdir_conf.mode;
3088 if (filter_op == RTE_ETH_FILTER_NOP)
3090 if (fdir_mode != RTE_FDIR_MODE_PERFECT &&
3091 fdir_mode != RTE_FDIR_MODE_PERFECT_MAC_VLAN) {
3092 DRV_LOG(ERR, "port %u flow director mode %d not supported",
3093 dev->data->port_id, fdir_mode);
3097 switch (filter_op) {
3098 case RTE_ETH_FILTER_ADD:
3099 return mlx5_fdir_filter_add(dev, arg);
3100 case RTE_ETH_FILTER_UPDATE:
3101 return mlx5_fdir_filter_update(dev, arg);
3102 case RTE_ETH_FILTER_DELETE:
3103 return mlx5_fdir_filter_delete(dev, arg);
3104 case RTE_ETH_FILTER_FLUSH:
3105 mlx5_fdir_filter_flush(dev);
3107 case RTE_ETH_FILTER_INFO:
3108 mlx5_fdir_info_get(dev, arg);
3111 DRV_LOG(DEBUG, "port %u unknown operation %u",
3112 dev->data->port_id, filter_op);
3120 * Manage filter operations.
3123 * Pointer to Ethernet device structure.
3124 * @param filter_type
3127 * Operation to perform.
3129 * Pointer to operation-specific structure.
3132 * 0 on success, a negative errno value otherwise and rte_errno is set.
3135 mlx5_dev_filter_ctrl(struct rte_eth_dev *dev,
3136 enum rte_filter_type filter_type,
3137 enum rte_filter_op filter_op,
3140 switch (filter_type) {
3141 case RTE_ETH_FILTER_GENERIC:
3142 if (filter_op != RTE_ETH_FILTER_GET) {
3146 *(const void **)arg = &mlx5_flow_ops;
3148 case RTE_ETH_FILTER_FDIR:
3149 return mlx5_fdir_ctrl_func(dev, filter_op, arg);
3151 DRV_LOG(ERR, "port %u filter type (%d) not supported",
3152 dev->data->port_id, filter_type);
3153 rte_errno = ENOTSUP;