4 * Copyright 2016 6WIND S.A.
5 * Copyright 2016 Mellanox.
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
11 * * Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * * Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
17 * * Neither the name of 6WIND S.A. nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
34 #include <sys/queue.h>
38 /* ISO C doesn't support unnamed structs/unions, disabling -pedantic. */
40 #pragma GCC diagnostic ignored "-Wpedantic"
42 #include <infiniband/verbs.h>
44 #pragma GCC diagnostic error "-Wpedantic"
47 #include <rte_common.h>
48 #include <rte_ethdev.h>
50 #include <rte_flow_driver.h>
51 #include <rte_malloc.h>
55 #include "mlx5_defs.h"
58 /* Define minimal priority for control plane flows. */
59 #define MLX5_CTRL_FLOW_PRIORITY 4
61 /* Internet Protocol versions. */
65 #ifndef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
66 struct ibv_counter_set {
71 ibv_destroy_counter_set(struct ibv_counter_set *cs)
78 /* Dev ops structure defined in mlx5.c */
79 extern const struct eth_dev_ops mlx5_dev_ops;
80 extern const struct eth_dev_ops mlx5_dev_ops_isolate;
82 /** Structure give to the conversion functions. */
83 struct mlx5_flow_data {
84 struct mlx5_flow_parse *parser; /** Parser context. */
85 struct rte_flow_error *error; /** Error context. */
89 mlx5_flow_create_eth(const struct rte_flow_item *item,
90 const void *default_mask,
91 struct mlx5_flow_data *data);
94 mlx5_flow_create_vlan(const struct rte_flow_item *item,
95 const void *default_mask,
96 struct mlx5_flow_data *data);
99 mlx5_flow_create_ipv4(const struct rte_flow_item *item,
100 const void *default_mask,
101 struct mlx5_flow_data *data);
104 mlx5_flow_create_ipv6(const struct rte_flow_item *item,
105 const void *default_mask,
106 struct mlx5_flow_data *data);
109 mlx5_flow_create_udp(const struct rte_flow_item *item,
110 const void *default_mask,
111 struct mlx5_flow_data *data);
114 mlx5_flow_create_tcp(const struct rte_flow_item *item,
115 const void *default_mask,
116 struct mlx5_flow_data *data);
119 mlx5_flow_create_vxlan(const struct rte_flow_item *item,
120 const void *default_mask,
121 struct mlx5_flow_data *data);
123 struct mlx5_flow_parse;
126 mlx5_flow_create_copy(struct mlx5_flow_parse *parser, void *src,
130 mlx5_flow_create_flag_mark(struct mlx5_flow_parse *parser, uint32_t mark_id);
133 mlx5_flow_create_count(struct rte_eth_dev *dev, struct mlx5_flow_parse *parser);
135 /* Hash RX queue types. */
146 /* Initialization data for hash RX queue. */
147 struct hash_rxq_init {
148 uint64_t hash_fields; /* Fields that participate in the hash. */
149 uint64_t dpdk_rss_hf; /* Matching DPDK RSS hash fields. */
150 unsigned int flow_priority; /* Flow priority to use. */
151 unsigned int ip_version; /* Internet protocol. */
154 /* Initialization data for hash RX queues. */
155 const struct hash_rxq_init hash_rxq_init[] = {
157 .hash_fields = (IBV_RX_HASH_SRC_IPV4 |
158 IBV_RX_HASH_DST_IPV4 |
159 IBV_RX_HASH_SRC_PORT_TCP |
160 IBV_RX_HASH_DST_PORT_TCP),
161 .dpdk_rss_hf = ETH_RSS_NONFRAG_IPV4_TCP,
163 .ip_version = MLX5_IPV4,
166 .hash_fields = (IBV_RX_HASH_SRC_IPV4 |
167 IBV_RX_HASH_DST_IPV4 |
168 IBV_RX_HASH_SRC_PORT_UDP |
169 IBV_RX_HASH_DST_PORT_UDP),
170 .dpdk_rss_hf = ETH_RSS_NONFRAG_IPV4_UDP,
172 .ip_version = MLX5_IPV4,
175 .hash_fields = (IBV_RX_HASH_SRC_IPV4 |
176 IBV_RX_HASH_DST_IPV4),
177 .dpdk_rss_hf = (ETH_RSS_IPV4 |
180 .ip_version = MLX5_IPV4,
183 .hash_fields = (IBV_RX_HASH_SRC_IPV6 |
184 IBV_RX_HASH_DST_IPV6 |
185 IBV_RX_HASH_SRC_PORT_TCP |
186 IBV_RX_HASH_DST_PORT_TCP),
187 .dpdk_rss_hf = ETH_RSS_NONFRAG_IPV6_TCP,
189 .ip_version = MLX5_IPV6,
192 .hash_fields = (IBV_RX_HASH_SRC_IPV6 |
193 IBV_RX_HASH_DST_IPV6 |
194 IBV_RX_HASH_SRC_PORT_UDP |
195 IBV_RX_HASH_DST_PORT_UDP),
196 .dpdk_rss_hf = ETH_RSS_NONFRAG_IPV6_UDP,
198 .ip_version = MLX5_IPV6,
201 .hash_fields = (IBV_RX_HASH_SRC_IPV6 |
202 IBV_RX_HASH_DST_IPV6),
203 .dpdk_rss_hf = (ETH_RSS_IPV6 |
206 .ip_version = MLX5_IPV6,
215 /* Number of entries in hash_rxq_init[]. */
216 const unsigned int hash_rxq_init_n = RTE_DIM(hash_rxq_init);
218 /** Structure for holding counter stats. */
219 struct mlx5_flow_counter_stats {
220 uint64_t hits; /**< Number of packets matched by the rule. */
221 uint64_t bytes; /**< Number of bytes matched by the rule. */
224 /** Structure for Drop queue. */
225 struct mlx5_hrxq_drop {
226 struct ibv_rwq_ind_table *ind_table; /**< Indirection table. */
227 struct ibv_qp *qp; /**< Verbs queue pair. */
228 struct ibv_wq *wq; /**< Verbs work queue. */
229 struct ibv_cq *cq; /**< Verbs completion queue. */
232 /* Flows structures. */
234 uint64_t hash_fields; /**< Fields that participate in the hash. */
235 struct ibv_flow_attr *ibv_attr; /**< Pointer to Verbs attributes. */
236 struct ibv_flow *ibv_flow; /**< Verbs flow. */
237 struct mlx5_hrxq *hrxq; /**< Hash Rx queues. */
240 /* Drop flows structures. */
241 struct mlx5_flow_drop {
242 struct ibv_flow_attr *ibv_attr; /**< Pointer to Verbs attributes. */
243 struct ibv_flow *ibv_flow; /**< Verbs flow. */
247 TAILQ_ENTRY(rte_flow) next; /**< Pointer to the next flow structure. */
248 uint32_t mark:1; /**< Set if the flow is marked. */
249 uint32_t drop:1; /**< Drop queue. */
250 uint16_t queues_n; /**< Number of entries in queue[]. */
251 uint16_t (*queues)[]; /**< Queues indexes to use. */
252 struct rte_eth_rss_conf rss_conf; /**< RSS configuration */
253 uint8_t rss_key[40]; /**< copy of the RSS key. */
254 struct ibv_counter_set *cs; /**< Holds the counters for the rule. */
255 struct mlx5_flow_counter_stats counter_stats;/**<The counter stats. */
256 struct mlx5_flow frxq[RTE_DIM(hash_rxq_init)];
257 /**< Flow with Rx queue. */
260 /** Static initializer for items. */
262 (const enum rte_flow_item_type []){ \
263 __VA_ARGS__, RTE_FLOW_ITEM_TYPE_END, \
266 /** Structure to generate a simple graph of layers supported by the NIC. */
267 struct mlx5_flow_items {
268 /** List of possible actions for these items. */
269 const enum rte_flow_action_type *const actions;
270 /** Bit-masks corresponding to the possibilities for the item. */
273 * Default bit-masks to use when item->mask is not provided. When
274 * \default_mask is also NULL, the full supported bit-mask (\mask) is
277 const void *default_mask;
278 /** Bit-masks size in bytes. */
279 const unsigned int mask_sz;
281 * Conversion function from rte_flow to NIC specific flow.
284 * rte_flow item to convert.
285 * @param default_mask
286 * Default bit-masks to use when item->mask is not provided.
288 * Internal structure to store the conversion.
291 * 0 on success, a negative errno value otherwise and rte_errno is
294 int (*convert)(const struct rte_flow_item *item,
295 const void *default_mask,
296 struct mlx5_flow_data *data);
297 /** Size in bytes of the destination structure. */
298 const unsigned int dst_sz;
299 /** List of possible following items. */
300 const enum rte_flow_item_type *const items;
303 /** Valid action for this PMD. */
304 static const enum rte_flow_action_type valid_actions[] = {
305 RTE_FLOW_ACTION_TYPE_DROP,
306 RTE_FLOW_ACTION_TYPE_QUEUE,
307 RTE_FLOW_ACTION_TYPE_MARK,
308 RTE_FLOW_ACTION_TYPE_FLAG,
309 #ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
310 RTE_FLOW_ACTION_TYPE_COUNT,
312 RTE_FLOW_ACTION_TYPE_END,
315 /** Graph of supported items and associated actions. */
316 static const struct mlx5_flow_items mlx5_flow_items[] = {
317 [RTE_FLOW_ITEM_TYPE_END] = {
318 .items = ITEMS(RTE_FLOW_ITEM_TYPE_ETH,
319 RTE_FLOW_ITEM_TYPE_VXLAN),
321 [RTE_FLOW_ITEM_TYPE_ETH] = {
322 .items = ITEMS(RTE_FLOW_ITEM_TYPE_VLAN,
323 RTE_FLOW_ITEM_TYPE_IPV4,
324 RTE_FLOW_ITEM_TYPE_IPV6),
325 .actions = valid_actions,
326 .mask = &(const struct rte_flow_item_eth){
327 .dst.addr_bytes = "\xff\xff\xff\xff\xff\xff",
328 .src.addr_bytes = "\xff\xff\xff\xff\xff\xff",
331 .default_mask = &rte_flow_item_eth_mask,
332 .mask_sz = sizeof(struct rte_flow_item_eth),
333 .convert = mlx5_flow_create_eth,
334 .dst_sz = sizeof(struct ibv_flow_spec_eth),
336 [RTE_FLOW_ITEM_TYPE_VLAN] = {
337 .items = ITEMS(RTE_FLOW_ITEM_TYPE_IPV4,
338 RTE_FLOW_ITEM_TYPE_IPV6),
339 .actions = valid_actions,
340 .mask = &(const struct rte_flow_item_vlan){
343 .default_mask = &rte_flow_item_vlan_mask,
344 .mask_sz = sizeof(struct rte_flow_item_vlan),
345 .convert = mlx5_flow_create_vlan,
348 [RTE_FLOW_ITEM_TYPE_IPV4] = {
349 .items = ITEMS(RTE_FLOW_ITEM_TYPE_UDP,
350 RTE_FLOW_ITEM_TYPE_TCP),
351 .actions = valid_actions,
352 .mask = &(const struct rte_flow_item_ipv4){
356 .type_of_service = -1,
360 .default_mask = &rte_flow_item_ipv4_mask,
361 .mask_sz = sizeof(struct rte_flow_item_ipv4),
362 .convert = mlx5_flow_create_ipv4,
363 .dst_sz = sizeof(struct ibv_flow_spec_ipv4_ext),
365 [RTE_FLOW_ITEM_TYPE_IPV6] = {
366 .items = ITEMS(RTE_FLOW_ITEM_TYPE_UDP,
367 RTE_FLOW_ITEM_TYPE_TCP),
368 .actions = valid_actions,
369 .mask = &(const struct rte_flow_item_ipv6){
372 0xff, 0xff, 0xff, 0xff,
373 0xff, 0xff, 0xff, 0xff,
374 0xff, 0xff, 0xff, 0xff,
375 0xff, 0xff, 0xff, 0xff,
378 0xff, 0xff, 0xff, 0xff,
379 0xff, 0xff, 0xff, 0xff,
380 0xff, 0xff, 0xff, 0xff,
381 0xff, 0xff, 0xff, 0xff,
388 .default_mask = &rte_flow_item_ipv6_mask,
389 .mask_sz = sizeof(struct rte_flow_item_ipv6),
390 .convert = mlx5_flow_create_ipv6,
391 .dst_sz = sizeof(struct ibv_flow_spec_ipv6),
393 [RTE_FLOW_ITEM_TYPE_UDP] = {
394 .items = ITEMS(RTE_FLOW_ITEM_TYPE_VXLAN),
395 .actions = valid_actions,
396 .mask = &(const struct rte_flow_item_udp){
402 .default_mask = &rte_flow_item_udp_mask,
403 .mask_sz = sizeof(struct rte_flow_item_udp),
404 .convert = mlx5_flow_create_udp,
405 .dst_sz = sizeof(struct ibv_flow_spec_tcp_udp),
407 [RTE_FLOW_ITEM_TYPE_TCP] = {
408 .actions = valid_actions,
409 .mask = &(const struct rte_flow_item_tcp){
415 .default_mask = &rte_flow_item_tcp_mask,
416 .mask_sz = sizeof(struct rte_flow_item_tcp),
417 .convert = mlx5_flow_create_tcp,
418 .dst_sz = sizeof(struct ibv_flow_spec_tcp_udp),
420 [RTE_FLOW_ITEM_TYPE_VXLAN] = {
421 .items = ITEMS(RTE_FLOW_ITEM_TYPE_ETH),
422 .actions = valid_actions,
423 .mask = &(const struct rte_flow_item_vxlan){
424 .vni = "\xff\xff\xff",
426 .default_mask = &rte_flow_item_vxlan_mask,
427 .mask_sz = sizeof(struct rte_flow_item_vxlan),
428 .convert = mlx5_flow_create_vxlan,
429 .dst_sz = sizeof(struct ibv_flow_spec_tunnel),
433 /** Structure to pass to the conversion function. */
434 struct mlx5_flow_parse {
435 uint32_t inner; /**< Set once VXLAN is encountered. */
437 /**< Whether resources should remain after a validate. */
438 uint32_t drop:1; /**< Target is a drop queue. */
439 uint32_t mark:1; /**< Mark is present in the flow. */
440 uint32_t count:1; /**< Count is present in the flow. */
441 uint32_t mark_id; /**< Mark identifier. */
442 uint16_t queues[RTE_MAX_QUEUES_PER_PORT]; /**< Queues indexes to use. */
443 uint16_t queues_n; /**< Number of entries in queue[]. */
444 struct rte_eth_rss_conf rss_conf; /**< RSS configuration */
445 uint8_t rss_key[40]; /**< copy of the RSS key. */
446 enum hash_rxq_type layer; /**< Last pattern layer detected. */
447 struct ibv_counter_set *cs; /**< Holds the counter set for the rule */
449 struct ibv_flow_attr *ibv_attr;
450 /**< Pointer to Verbs attributes. */
452 /**< Current position or total size of the attribute. */
453 } queue[RTE_DIM(hash_rxq_init)];
456 static const struct rte_flow_ops mlx5_flow_ops = {
457 .validate = mlx5_flow_validate,
458 .create = mlx5_flow_create,
459 .destroy = mlx5_flow_destroy,
460 .flush = mlx5_flow_flush,
461 #ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
462 .query = mlx5_flow_query,
466 .isolate = mlx5_flow_isolate,
469 /* Convert FDIR request to Generic flow. */
471 struct rte_flow_attr attr;
472 struct rte_flow_action actions[2];
473 struct rte_flow_item items[4];
474 struct rte_flow_item_eth l2;
475 struct rte_flow_item_eth l2_mask;
477 struct rte_flow_item_ipv4 ipv4;
478 struct rte_flow_item_ipv6 ipv6;
481 struct rte_flow_item_ipv4 ipv4;
482 struct rte_flow_item_ipv6 ipv6;
485 struct rte_flow_item_udp udp;
486 struct rte_flow_item_tcp tcp;
489 struct rte_flow_item_udp udp;
490 struct rte_flow_item_tcp tcp;
492 struct rte_flow_action_queue queue;
495 /* Verbs specification header. */
496 struct ibv_spec_header {
497 enum ibv_flow_spec_type type;
502 * Check item is fully supported by the NIC matching capability.
505 * Item specification.
507 * Bit-masks covering supported fields to compare with spec, last and mask in
510 * Bit-Mask size in bytes.
513 * 0 on success, a negative errno value otherwise and rte_errno is set.
516 mlx5_flow_item_validate(const struct rte_flow_item *item,
517 const uint8_t *mask, unsigned int size)
520 const uint8_t *spec = item->spec;
521 const uint8_t *last = item->last;
522 const uint8_t *m = item->mask ? item->mask : mask;
524 if (!spec && (item->mask || last))
529 * Single-pass check to make sure that:
530 * - item->mask is supported, no bits are set outside mask.
531 * - Both masked item->spec and item->last are equal (no range
534 for (i = 0; i < size; i++) {
537 if ((m[i] | mask[i]) != mask[i])
539 if (last && ((spec[i] & m[i]) != (last[i] & m[i])))
549 * Copy the RSS configuration from the user ones, of the rss_conf is null,
550 * uses the driver one.
553 * Internal parser structure.
555 * User RSS configuration to save.
558 * 0 on success, a negative errno value otherwise and rte_errno is set.
561 mlx5_flow_convert_rss_conf(struct mlx5_flow_parse *parser,
562 const struct rte_eth_rss_conf *rss_conf)
565 * This function is also called at the beginning of
566 * mlx5_flow_convert_actions() to initialize the parser with the
567 * device default RSS configuration.
570 if (rss_conf->rss_hf & MLX5_RSS_HF_MASK) {
574 if (rss_conf->rss_key_len != 40) {
578 if (rss_conf->rss_key_len && rss_conf->rss_key) {
579 parser->rss_conf.rss_key_len = rss_conf->rss_key_len;
580 memcpy(parser->rss_key, rss_conf->rss_key,
581 rss_conf->rss_key_len);
582 parser->rss_conf.rss_key = parser->rss_key;
584 parser->rss_conf.rss_hf = rss_conf->rss_hf;
590 * Extract attribute to the parser.
593 * Flow rule attributes.
595 * Perform verbose error reporting if not NULL.
598 * 0 on success, a negative errno value otherwise and rte_errno is set.
601 mlx5_flow_convert_attributes(const struct rte_flow_attr *attr,
602 struct rte_flow_error *error)
605 rte_flow_error_set(error, ENOTSUP,
606 RTE_FLOW_ERROR_TYPE_ATTR_GROUP,
608 "groups are not supported");
611 if (attr->priority && attr->priority != MLX5_CTRL_FLOW_PRIORITY) {
612 rte_flow_error_set(error, ENOTSUP,
613 RTE_FLOW_ERROR_TYPE_ATTR_PRIORITY,
615 "priorities are not supported");
619 rte_flow_error_set(error, ENOTSUP,
620 RTE_FLOW_ERROR_TYPE_ATTR_EGRESS,
622 "egress is not supported");
625 if (!attr->ingress) {
626 rte_flow_error_set(error, ENOTSUP,
627 RTE_FLOW_ERROR_TYPE_ATTR_INGRESS,
629 "only ingress is supported");
636 * Extract actions request to the parser.
639 * Pointer to Ethernet device.
641 * Associated actions (list terminated by the END action).
643 * Perform verbose error reporting if not NULL.
644 * @param[in, out] parser
645 * Internal parser structure.
648 * 0 on success, a negative errno value otherwise and rte_errno is set.
651 mlx5_flow_convert_actions(struct rte_eth_dev *dev,
652 const struct rte_flow_action actions[],
653 struct rte_flow_error *error,
654 struct mlx5_flow_parse *parser)
656 struct priv *priv = dev->data->dev_private;
660 * Add default RSS configuration necessary for Verbs to create QP even
661 * if no RSS is necessary.
663 ret = mlx5_flow_convert_rss_conf(parser,
664 (const struct rte_eth_rss_conf *)
668 for (; actions->type != RTE_FLOW_ACTION_TYPE_END; ++actions) {
669 if (actions->type == RTE_FLOW_ACTION_TYPE_VOID) {
671 } else if (actions->type == RTE_FLOW_ACTION_TYPE_DROP) {
673 } else if (actions->type == RTE_FLOW_ACTION_TYPE_QUEUE) {
674 const struct rte_flow_action_queue *queue =
675 (const struct rte_flow_action_queue *)
680 if (!queue || (queue->index > (priv->rxqs_n - 1)))
681 goto exit_action_not_supported;
682 for (n = 0; n < parser->queues_n; ++n) {
683 if (parser->queues[n] == queue->index) {
688 if (parser->queues_n > 1 && !found) {
689 rte_flow_error_set(error, ENOTSUP,
690 RTE_FLOW_ERROR_TYPE_ACTION,
692 "queue action not in RSS queues");
696 parser->queues_n = 1;
697 parser->queues[0] = queue->index;
699 } else if (actions->type == RTE_FLOW_ACTION_TYPE_RSS) {
700 const struct rte_flow_action_rss *rss =
701 (const struct rte_flow_action_rss *)
705 if (!rss || !rss->num) {
706 rte_flow_error_set(error, EINVAL,
707 RTE_FLOW_ERROR_TYPE_ACTION,
712 if (parser->queues_n == 1) {
715 assert(parser->queues_n);
716 for (n = 0; n < rss->num; ++n) {
717 if (parser->queues[0] ==
724 rte_flow_error_set(error, ENOTSUP,
725 RTE_FLOW_ERROR_TYPE_ACTION,
727 "queue action not in RSS"
732 if (rss->num > RTE_DIM(parser->queues)) {
733 rte_flow_error_set(error, EINVAL,
734 RTE_FLOW_ERROR_TYPE_ACTION,
736 "too many queues for RSS"
740 for (n = 0; n < rss->num; ++n) {
741 if (rss->queue[n] >= priv->rxqs_n) {
742 rte_flow_error_set(error, EINVAL,
743 RTE_FLOW_ERROR_TYPE_ACTION,
745 "queue id > number of"
750 for (n = 0; n < rss->num; ++n)
751 parser->queues[n] = rss->queue[n];
752 parser->queues_n = rss->num;
753 if (mlx5_flow_convert_rss_conf(parser, rss->rss_conf)) {
754 rte_flow_error_set(error, EINVAL,
755 RTE_FLOW_ERROR_TYPE_ACTION,
757 "wrong RSS configuration");
760 } else if (actions->type == RTE_FLOW_ACTION_TYPE_MARK) {
761 const struct rte_flow_action_mark *mark =
762 (const struct rte_flow_action_mark *)
766 rte_flow_error_set(error, EINVAL,
767 RTE_FLOW_ERROR_TYPE_ACTION,
769 "mark must be defined");
771 } else if (mark->id >= MLX5_FLOW_MARK_MAX) {
772 rte_flow_error_set(error, ENOTSUP,
773 RTE_FLOW_ERROR_TYPE_ACTION,
775 "mark must be between 0"
780 parser->mark_id = mark->id;
781 } else if (actions->type == RTE_FLOW_ACTION_TYPE_FLAG) {
783 } else if (actions->type == RTE_FLOW_ACTION_TYPE_COUNT &&
784 priv->counter_set_supported) {
787 goto exit_action_not_supported;
790 if (parser->drop && parser->mark)
792 if (!parser->queues_n && !parser->drop) {
793 rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_HANDLE,
794 NULL, "no valid action");
798 exit_action_not_supported:
799 rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ACTION,
800 actions, "action not supported");
808 * Pattern specification (list terminated by the END pattern item).
810 * Perform verbose error reporting if not NULL.
811 * @param[in, out] parser
812 * Internal parser structure.
815 * 0 on success, a negative errno value otherwise and rte_errno is set.
818 mlx5_flow_convert_items_validate(const struct rte_flow_item items[],
819 struct rte_flow_error *error,
820 struct mlx5_flow_parse *parser)
822 const struct mlx5_flow_items *cur_item = mlx5_flow_items;
826 /* Initialise the offsets to start after verbs attribute. */
827 for (i = 0; i != hash_rxq_init_n; ++i)
828 parser->queue[i].offset = sizeof(struct ibv_flow_attr);
829 for (; items->type != RTE_FLOW_ITEM_TYPE_END; ++items) {
830 const struct mlx5_flow_items *token = NULL;
833 if (items->type == RTE_FLOW_ITEM_TYPE_VOID)
837 cur_item->items[i] != RTE_FLOW_ITEM_TYPE_END;
839 if (cur_item->items[i] == items->type) {
840 token = &mlx5_flow_items[items->type];
846 goto exit_item_not_supported;
849 ret = mlx5_flow_item_validate(items,
850 (const uint8_t *)cur_item->mask,
853 goto exit_item_not_supported;
854 if (items->type == RTE_FLOW_ITEM_TYPE_VXLAN) {
856 rte_flow_error_set(error, ENOTSUP,
857 RTE_FLOW_ERROR_TYPE_ITEM,
859 "cannot recognize multiple"
860 " VXLAN encapsulations");
863 parser->inner = IBV_FLOW_SPEC_INNER;
866 parser->queue[HASH_RXQ_ETH].offset += cur_item->dst_sz;
868 for (n = 0; n != hash_rxq_init_n; ++n)
869 parser->queue[n].offset += cur_item->dst_sz;
873 parser->queue[HASH_RXQ_ETH].offset +=
874 sizeof(struct ibv_flow_spec_action_drop);
877 for (i = 0; i != hash_rxq_init_n; ++i)
878 parser->queue[i].offset +=
879 sizeof(struct ibv_flow_spec_action_tag);
882 #ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
883 unsigned int size = sizeof(struct ibv_flow_spec_counter_action);
885 for (i = 0; i != hash_rxq_init_n; ++i)
886 parser->queue[i].offset += size;
888 rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ACTION,
890 "Count action supported only on "
891 "MLNX_OFED_4.2 and above");
895 exit_item_not_supported:
896 return rte_flow_error_set(error, -ret, RTE_FLOW_ERROR_TYPE_ITEM,
897 items, "item not supported");
901 * Allocate memory space to store verbs flow attributes.
904 * Amount of byte to allocate.
906 * Perform verbose error reporting if not NULL.
909 * A verbs flow attribute on success, NULL otherwise and rte_errno is set.
911 static struct ibv_flow_attr *
912 mlx5_flow_convert_allocate(unsigned int size, struct rte_flow_error *error)
914 struct ibv_flow_attr *ibv_attr;
916 ibv_attr = rte_calloc(__func__, 1, size, 0);
918 rte_flow_error_set(error, ENOMEM,
919 RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
921 "cannot allocate verbs spec attributes");
928 * Make inner packet matching with an higher priority from the non Inner
931 * @param[in, out] parser
932 * Internal parser structure.
934 * User flow attribute.
937 mlx5_flow_update_priority(struct mlx5_flow_parse *parser,
938 const struct rte_flow_attr *attr)
943 parser->queue[HASH_RXQ_ETH].ibv_attr->priority =
945 hash_rxq_init[HASH_RXQ_ETH].flow_priority;
948 for (i = 0; i != hash_rxq_init_n; ++i) {
949 if (parser->queue[i].ibv_attr) {
950 parser->queue[i].ibv_attr->priority =
952 hash_rxq_init[i].flow_priority -
953 (parser->inner ? 1 : 0);
959 * Finalise verbs flow attributes.
961 * @param[in, out] parser
962 * Internal parser structure.
965 mlx5_flow_convert_finalise(struct mlx5_flow_parse *parser)
967 const unsigned int ipv4 =
968 hash_rxq_init[parser->layer].ip_version == MLX5_IPV4;
969 const enum hash_rxq_type hmin = ipv4 ? HASH_RXQ_TCPV4 : HASH_RXQ_TCPV6;
970 const enum hash_rxq_type hmax = ipv4 ? HASH_RXQ_IPV4 : HASH_RXQ_IPV6;
971 const enum hash_rxq_type ohmin = ipv4 ? HASH_RXQ_TCPV6 : HASH_RXQ_TCPV4;
972 const enum hash_rxq_type ohmax = ipv4 ? HASH_RXQ_IPV6 : HASH_RXQ_IPV4;
973 const enum hash_rxq_type ip = ipv4 ? HASH_RXQ_IPV4 : HASH_RXQ_IPV6;
976 /* Remove any other flow not matching the pattern. */
977 if (parser->queues_n == 1 && !parser->rss_conf.rss_hf) {
978 for (i = 0; i != hash_rxq_init_n; ++i) {
979 if (i == HASH_RXQ_ETH)
981 rte_free(parser->queue[i].ibv_attr);
982 parser->queue[i].ibv_attr = NULL;
986 if (parser->layer == HASH_RXQ_ETH) {
990 * This layer becomes useless as the pattern define under
993 rte_free(parser->queue[HASH_RXQ_ETH].ibv_attr);
994 parser->queue[HASH_RXQ_ETH].ibv_attr = NULL;
996 /* Remove opposite kind of layer e.g. IPv6 if the pattern is IPv4. */
997 for (i = ohmin; i != (ohmax + 1); ++i) {
998 if (!parser->queue[i].ibv_attr)
1000 rte_free(parser->queue[i].ibv_attr);
1001 parser->queue[i].ibv_attr = NULL;
1003 /* Remove impossible flow according to the RSS configuration. */
1004 if (hash_rxq_init[parser->layer].dpdk_rss_hf &
1005 parser->rss_conf.rss_hf) {
1006 /* Remove any other flow. */
1007 for (i = hmin; i != (hmax + 1); ++i) {
1008 if ((i == parser->layer) ||
1009 (!parser->queue[i].ibv_attr))
1011 rte_free(parser->queue[i].ibv_attr);
1012 parser->queue[i].ibv_attr = NULL;
1014 } else if (!parser->queue[ip].ibv_attr) {
1015 /* no RSS possible with the current configuration. */
1016 parser->queues_n = 1;
1021 * Fill missing layers in verbs specifications, or compute the correct
1022 * offset to allocate the memory space for the attributes and
1025 for (i = 0; i != hash_rxq_init_n - 1; ++i) {
1027 struct ibv_flow_spec_ipv4_ext ipv4;
1028 struct ibv_flow_spec_ipv6 ipv6;
1029 struct ibv_flow_spec_tcp_udp udp_tcp;
1034 if (i == parser->layer)
1036 if (parser->layer == HASH_RXQ_ETH) {
1037 if (hash_rxq_init[i].ip_version == MLX5_IPV4) {
1038 size = sizeof(struct ibv_flow_spec_ipv4_ext);
1039 specs.ipv4 = (struct ibv_flow_spec_ipv4_ext){
1040 .type = IBV_FLOW_SPEC_IPV4_EXT,
1044 size = sizeof(struct ibv_flow_spec_ipv6);
1045 specs.ipv6 = (struct ibv_flow_spec_ipv6){
1046 .type = IBV_FLOW_SPEC_IPV6,
1050 if (parser->queue[i].ibv_attr) {
1051 dst = (void *)((uintptr_t)
1052 parser->queue[i].ibv_attr +
1053 parser->queue[i].offset);
1054 memcpy(dst, &specs, size);
1055 ++parser->queue[i].ibv_attr->num_of_specs;
1057 parser->queue[i].offset += size;
1059 if ((i == HASH_RXQ_UDPV4) || (i == HASH_RXQ_TCPV4) ||
1060 (i == HASH_RXQ_UDPV6) || (i == HASH_RXQ_TCPV6)) {
1061 size = sizeof(struct ibv_flow_spec_tcp_udp);
1062 specs.udp_tcp = (struct ibv_flow_spec_tcp_udp) {
1063 .type = ((i == HASH_RXQ_UDPV4 ||
1064 i == HASH_RXQ_UDPV6) ?
1069 if (parser->queue[i].ibv_attr) {
1070 dst = (void *)((uintptr_t)
1071 parser->queue[i].ibv_attr +
1072 parser->queue[i].offset);
1073 memcpy(dst, &specs, size);
1074 ++parser->queue[i].ibv_attr->num_of_specs;
1076 parser->queue[i].offset += size;
1082 * Validate and convert a flow supported by the NIC.
1085 * Pointer to Ethernet device.
1087 * Flow rule attributes.
1088 * @param[in] pattern
1089 * Pattern specification (list terminated by the END pattern item).
1090 * @param[in] actions
1091 * Associated actions (list terminated by the END action).
1093 * Perform verbose error reporting if not NULL.
1094 * @param[in, out] parser
1095 * Internal parser structure.
1098 * 0 on success, a negative errno value otherwise and rte_errno is set.
1101 mlx5_flow_convert(struct rte_eth_dev *dev,
1102 const struct rte_flow_attr *attr,
1103 const struct rte_flow_item items[],
1104 const struct rte_flow_action actions[],
1105 struct rte_flow_error *error,
1106 struct mlx5_flow_parse *parser)
1108 const struct mlx5_flow_items *cur_item = mlx5_flow_items;
1112 /* First step. Validate the attributes, items and actions. */
1113 *parser = (struct mlx5_flow_parse){
1114 .create = parser->create,
1115 .layer = HASH_RXQ_ETH,
1116 .mark_id = MLX5_FLOW_MARK_DEFAULT,
1118 ret = mlx5_flow_convert_attributes(attr, error);
1121 ret = mlx5_flow_convert_actions(dev, actions, error, parser);
1124 ret = mlx5_flow_convert_items_validate(items, error, parser);
1127 mlx5_flow_convert_finalise(parser);
1130 * Allocate the memory space to store verbs specifications.
1133 unsigned int offset = parser->queue[HASH_RXQ_ETH].offset;
1135 parser->queue[HASH_RXQ_ETH].ibv_attr =
1136 mlx5_flow_convert_allocate(offset, error);
1137 if (!parser->queue[HASH_RXQ_ETH].ibv_attr)
1139 parser->queue[HASH_RXQ_ETH].offset =
1140 sizeof(struct ibv_flow_attr);
1142 for (i = 0; i != hash_rxq_init_n; ++i) {
1143 unsigned int offset;
1145 if (!(parser->rss_conf.rss_hf &
1146 hash_rxq_init[i].dpdk_rss_hf) &&
1147 (i != HASH_RXQ_ETH))
1149 offset = parser->queue[i].offset;
1150 parser->queue[i].ibv_attr =
1151 mlx5_flow_convert_allocate(offset, error);
1152 if (!parser->queue[i].ibv_attr)
1154 parser->queue[i].offset = sizeof(struct ibv_flow_attr);
1157 /* Third step. Conversion parse, fill the specifications. */
1159 for (; items->type != RTE_FLOW_ITEM_TYPE_END; ++items) {
1160 struct mlx5_flow_data data = {
1165 if (items->type == RTE_FLOW_ITEM_TYPE_VOID)
1167 cur_item = &mlx5_flow_items[items->type];
1168 ret = cur_item->convert(items,
1169 (cur_item->default_mask ?
1170 cur_item->default_mask :
1177 mlx5_flow_create_flag_mark(parser, parser->mark_id);
1178 if (parser->count && parser->create) {
1179 mlx5_flow_create_count(dev, parser);
1181 goto exit_count_error;
1184 * Last step. Complete missing specification to reach the RSS
1188 mlx5_flow_convert_finalise(parser);
1189 mlx5_flow_update_priority(parser, attr);
1191 /* Only verification is expected, all resources should be released. */
1192 if (!parser->create) {
1193 for (i = 0; i != hash_rxq_init_n; ++i) {
1194 if (parser->queue[i].ibv_attr) {
1195 rte_free(parser->queue[i].ibv_attr);
1196 parser->queue[i].ibv_attr = NULL;
1202 for (i = 0; i != hash_rxq_init_n; ++i) {
1203 if (parser->queue[i].ibv_attr) {
1204 rte_free(parser->queue[i].ibv_attr);
1205 parser->queue[i].ibv_attr = NULL;
1208 rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
1209 NULL, "cannot allocate verbs spec attributes");
1212 rte_flow_error_set(error, EINVAL, RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
1213 NULL, "cannot create counter");
1218 * Copy the specification created into the flow.
1221 * Internal parser structure.
1223 * Create specification.
1225 * Size in bytes of the specification to copy.
1228 mlx5_flow_create_copy(struct mlx5_flow_parse *parser, void *src,
1234 for (i = 0; i != hash_rxq_init_n; ++i) {
1235 if (!parser->queue[i].ibv_attr)
1237 /* Specification must be the same l3 type or none. */
1238 if (parser->layer == HASH_RXQ_ETH ||
1239 (hash_rxq_init[parser->layer].ip_version ==
1240 hash_rxq_init[i].ip_version) ||
1241 (hash_rxq_init[i].ip_version == 0)) {
1242 dst = (void *)((uintptr_t)parser->queue[i].ibv_attr +
1243 parser->queue[i].offset);
1244 memcpy(dst, src, size);
1245 ++parser->queue[i].ibv_attr->num_of_specs;
1246 parser->queue[i].offset += size;
1252 * Convert Ethernet item to Verbs specification.
1255 * Item specification.
1256 * @param default_mask[in]
1257 * Default bit-masks to use when item->mask is not provided.
1258 * @param data[in, out]
1262 * 0 on success, a negative errno value otherwise and rte_errno is set.
1265 mlx5_flow_create_eth(const struct rte_flow_item *item,
1266 const void *default_mask,
1267 struct mlx5_flow_data *data)
1269 const struct rte_flow_item_eth *spec = item->spec;
1270 const struct rte_flow_item_eth *mask = item->mask;
1271 struct mlx5_flow_parse *parser = data->parser;
1272 const unsigned int eth_size = sizeof(struct ibv_flow_spec_eth);
1273 struct ibv_flow_spec_eth eth = {
1274 .type = parser->inner | IBV_FLOW_SPEC_ETH,
1278 /* Don't update layer for the inner pattern. */
1280 parser->layer = HASH_RXQ_ETH;
1285 mask = default_mask;
1286 memcpy(ð.val.dst_mac, spec->dst.addr_bytes, ETHER_ADDR_LEN);
1287 memcpy(ð.val.src_mac, spec->src.addr_bytes, ETHER_ADDR_LEN);
1288 eth.val.ether_type = spec->type;
1289 memcpy(ð.mask.dst_mac, mask->dst.addr_bytes, ETHER_ADDR_LEN);
1290 memcpy(ð.mask.src_mac, mask->src.addr_bytes, ETHER_ADDR_LEN);
1291 eth.mask.ether_type = mask->type;
1292 /* Remove unwanted bits from values. */
1293 for (i = 0; i < ETHER_ADDR_LEN; ++i) {
1294 eth.val.dst_mac[i] &= eth.mask.dst_mac[i];
1295 eth.val.src_mac[i] &= eth.mask.src_mac[i];
1297 eth.val.ether_type &= eth.mask.ether_type;
1299 mlx5_flow_create_copy(parser, ð, eth_size);
1304 * Convert VLAN item to Verbs specification.
1307 * Item specification.
1308 * @param default_mask[in]
1309 * Default bit-masks to use when item->mask is not provided.
1310 * @param data[in, out]
1314 * 0 on success, a negative errno value otherwise and rte_errno is set.
1317 mlx5_flow_create_vlan(const struct rte_flow_item *item,
1318 const void *default_mask,
1319 struct mlx5_flow_data *data)
1321 const struct rte_flow_item_vlan *spec = item->spec;
1322 const struct rte_flow_item_vlan *mask = item->mask;
1323 struct mlx5_flow_parse *parser = data->parser;
1324 struct ibv_flow_spec_eth *eth;
1325 const unsigned int eth_size = sizeof(struct ibv_flow_spec_eth);
1330 mask = default_mask;
1332 for (i = 0; i != hash_rxq_init_n; ++i) {
1333 if (!parser->queue[i].ibv_attr)
1336 eth = (void *)((uintptr_t)parser->queue[i].ibv_attr +
1337 parser->queue[i].offset - eth_size);
1338 eth->val.vlan_tag = spec->tci;
1339 eth->mask.vlan_tag = mask->tci;
1340 eth->val.vlan_tag &= eth->mask.vlan_tag;
1342 * From verbs perspective an empty VLAN is equivalent
1343 * to a packet without VLAN layer.
1345 if (!eth->mask.vlan_tag)
1351 return rte_flow_error_set(data->error, EINVAL, RTE_FLOW_ERROR_TYPE_ITEM,
1352 item, "VLAN cannot be empty");
1356 * Convert IPv4 item to Verbs specification.
1359 * Item specification.
1360 * @param default_mask[in]
1361 * Default bit-masks to use when item->mask is not provided.
1362 * @param data[in, out]
1366 * 0 on success, a negative errno value otherwise and rte_errno is set.
1369 mlx5_flow_create_ipv4(const struct rte_flow_item *item,
1370 const void *default_mask,
1371 struct mlx5_flow_data *data)
1373 const struct rte_flow_item_ipv4 *spec = item->spec;
1374 const struct rte_flow_item_ipv4 *mask = item->mask;
1375 struct mlx5_flow_parse *parser = data->parser;
1376 unsigned int ipv4_size = sizeof(struct ibv_flow_spec_ipv4_ext);
1377 struct ibv_flow_spec_ipv4_ext ipv4 = {
1378 .type = parser->inner | IBV_FLOW_SPEC_IPV4_EXT,
1382 /* Don't update layer for the inner pattern. */
1384 parser->layer = HASH_RXQ_IPV4;
1387 mask = default_mask;
1388 ipv4.val = (struct ibv_flow_ipv4_ext_filter){
1389 .src_ip = spec->hdr.src_addr,
1390 .dst_ip = spec->hdr.dst_addr,
1391 .proto = spec->hdr.next_proto_id,
1392 .tos = spec->hdr.type_of_service,
1394 ipv4.mask = (struct ibv_flow_ipv4_ext_filter){
1395 .src_ip = mask->hdr.src_addr,
1396 .dst_ip = mask->hdr.dst_addr,
1397 .proto = mask->hdr.next_proto_id,
1398 .tos = mask->hdr.type_of_service,
1400 /* Remove unwanted bits from values. */
1401 ipv4.val.src_ip &= ipv4.mask.src_ip;
1402 ipv4.val.dst_ip &= ipv4.mask.dst_ip;
1403 ipv4.val.proto &= ipv4.mask.proto;
1404 ipv4.val.tos &= ipv4.mask.tos;
1406 mlx5_flow_create_copy(parser, &ipv4, ipv4_size);
1411 * Convert IPv6 item to Verbs specification.
1414 * Item specification.
1415 * @param default_mask[in]
1416 * Default bit-masks to use when item->mask is not provided.
1417 * @param data[in, out]
1421 * 0 on success, a negative errno value otherwise and rte_errno is set.
1424 mlx5_flow_create_ipv6(const struct rte_flow_item *item,
1425 const void *default_mask,
1426 struct mlx5_flow_data *data)
1428 const struct rte_flow_item_ipv6 *spec = item->spec;
1429 const struct rte_flow_item_ipv6 *mask = item->mask;
1430 struct mlx5_flow_parse *parser = data->parser;
1431 unsigned int ipv6_size = sizeof(struct ibv_flow_spec_ipv6);
1432 struct ibv_flow_spec_ipv6 ipv6 = {
1433 .type = parser->inner | IBV_FLOW_SPEC_IPV6,
1437 /* Don't update layer for the inner pattern. */
1439 parser->layer = HASH_RXQ_IPV6;
1442 uint32_t vtc_flow_val;
1443 uint32_t vtc_flow_mask;
1446 mask = default_mask;
1447 memcpy(&ipv6.val.src_ip, spec->hdr.src_addr,
1448 RTE_DIM(ipv6.val.src_ip));
1449 memcpy(&ipv6.val.dst_ip, spec->hdr.dst_addr,
1450 RTE_DIM(ipv6.val.dst_ip));
1451 memcpy(&ipv6.mask.src_ip, mask->hdr.src_addr,
1452 RTE_DIM(ipv6.mask.src_ip));
1453 memcpy(&ipv6.mask.dst_ip, mask->hdr.dst_addr,
1454 RTE_DIM(ipv6.mask.dst_ip));
1455 vtc_flow_val = rte_be_to_cpu_32(spec->hdr.vtc_flow);
1456 vtc_flow_mask = rte_be_to_cpu_32(mask->hdr.vtc_flow);
1457 ipv6.val.flow_label =
1458 rte_cpu_to_be_32((vtc_flow_val & IPV6_HDR_FL_MASK) >>
1460 ipv6.val.traffic_class = (vtc_flow_val & IPV6_HDR_TC_MASK) >>
1462 ipv6.val.next_hdr = spec->hdr.proto;
1463 ipv6.val.hop_limit = spec->hdr.hop_limits;
1464 ipv6.mask.flow_label =
1465 rte_cpu_to_be_32((vtc_flow_mask & IPV6_HDR_FL_MASK) >>
1467 ipv6.mask.traffic_class = (vtc_flow_mask & IPV6_HDR_TC_MASK) >>
1469 ipv6.mask.next_hdr = mask->hdr.proto;
1470 ipv6.mask.hop_limit = mask->hdr.hop_limits;
1471 /* Remove unwanted bits from values. */
1472 for (i = 0; i < RTE_DIM(ipv6.val.src_ip); ++i) {
1473 ipv6.val.src_ip[i] &= ipv6.mask.src_ip[i];
1474 ipv6.val.dst_ip[i] &= ipv6.mask.dst_ip[i];
1476 ipv6.val.flow_label &= ipv6.mask.flow_label;
1477 ipv6.val.traffic_class &= ipv6.mask.traffic_class;
1478 ipv6.val.next_hdr &= ipv6.mask.next_hdr;
1479 ipv6.val.hop_limit &= ipv6.mask.hop_limit;
1481 mlx5_flow_create_copy(parser, &ipv6, ipv6_size);
1486 * Convert UDP item to Verbs specification.
1489 * Item specification.
1490 * @param default_mask[in]
1491 * Default bit-masks to use when item->mask is not provided.
1492 * @param data[in, out]
1496 * 0 on success, a negative errno value otherwise and rte_errno is set.
1499 mlx5_flow_create_udp(const struct rte_flow_item *item,
1500 const void *default_mask,
1501 struct mlx5_flow_data *data)
1503 const struct rte_flow_item_udp *spec = item->spec;
1504 const struct rte_flow_item_udp *mask = item->mask;
1505 struct mlx5_flow_parse *parser = data->parser;
1506 unsigned int udp_size = sizeof(struct ibv_flow_spec_tcp_udp);
1507 struct ibv_flow_spec_tcp_udp udp = {
1508 .type = parser->inner | IBV_FLOW_SPEC_UDP,
1512 /* Don't update layer for the inner pattern. */
1513 if (!parser->inner) {
1514 if (parser->layer == HASH_RXQ_IPV4)
1515 parser->layer = HASH_RXQ_UDPV4;
1517 parser->layer = HASH_RXQ_UDPV6;
1521 mask = default_mask;
1522 udp.val.dst_port = spec->hdr.dst_port;
1523 udp.val.src_port = spec->hdr.src_port;
1524 udp.mask.dst_port = mask->hdr.dst_port;
1525 udp.mask.src_port = mask->hdr.src_port;
1526 /* Remove unwanted bits from values. */
1527 udp.val.src_port &= udp.mask.src_port;
1528 udp.val.dst_port &= udp.mask.dst_port;
1530 mlx5_flow_create_copy(parser, &udp, udp_size);
1535 * Convert TCP item to Verbs specification.
1538 * Item specification.
1539 * @param default_mask[in]
1540 * Default bit-masks to use when item->mask is not provided.
1541 * @param data[in, out]
1545 * 0 on success, a negative errno value otherwise and rte_errno is set.
1548 mlx5_flow_create_tcp(const struct rte_flow_item *item,
1549 const void *default_mask,
1550 struct mlx5_flow_data *data)
1552 const struct rte_flow_item_tcp *spec = item->spec;
1553 const struct rte_flow_item_tcp *mask = item->mask;
1554 struct mlx5_flow_parse *parser = data->parser;
1555 unsigned int tcp_size = sizeof(struct ibv_flow_spec_tcp_udp);
1556 struct ibv_flow_spec_tcp_udp tcp = {
1557 .type = parser->inner | IBV_FLOW_SPEC_TCP,
1561 /* Don't update layer for the inner pattern. */
1562 if (!parser->inner) {
1563 if (parser->layer == HASH_RXQ_IPV4)
1564 parser->layer = HASH_RXQ_TCPV4;
1566 parser->layer = HASH_RXQ_TCPV6;
1570 mask = default_mask;
1571 tcp.val.dst_port = spec->hdr.dst_port;
1572 tcp.val.src_port = spec->hdr.src_port;
1573 tcp.mask.dst_port = mask->hdr.dst_port;
1574 tcp.mask.src_port = mask->hdr.src_port;
1575 /* Remove unwanted bits from values. */
1576 tcp.val.src_port &= tcp.mask.src_port;
1577 tcp.val.dst_port &= tcp.mask.dst_port;
1579 mlx5_flow_create_copy(parser, &tcp, tcp_size);
1584 * Convert VXLAN item to Verbs specification.
1587 * Item specification.
1588 * @param default_mask[in]
1589 * Default bit-masks to use when item->mask is not provided.
1590 * @param data[in, out]
1594 * 0 on success, a negative errno value otherwise and rte_errno is set.
1597 mlx5_flow_create_vxlan(const struct rte_flow_item *item,
1598 const void *default_mask,
1599 struct mlx5_flow_data *data)
1601 const struct rte_flow_item_vxlan *spec = item->spec;
1602 const struct rte_flow_item_vxlan *mask = item->mask;
1603 struct mlx5_flow_parse *parser = data->parser;
1604 unsigned int size = sizeof(struct ibv_flow_spec_tunnel);
1605 struct ibv_flow_spec_tunnel vxlan = {
1606 .type = parser->inner | IBV_FLOW_SPEC_VXLAN_TUNNEL,
1615 parser->inner = IBV_FLOW_SPEC_INNER;
1618 mask = default_mask;
1619 memcpy(&id.vni[1], spec->vni, 3);
1620 vxlan.val.tunnel_id = id.vlan_id;
1621 memcpy(&id.vni[1], mask->vni, 3);
1622 vxlan.mask.tunnel_id = id.vlan_id;
1623 /* Remove unwanted bits from values. */
1624 vxlan.val.tunnel_id &= vxlan.mask.tunnel_id;
1627 * Tunnel id 0 is equivalent as not adding a VXLAN layer, if only this
1628 * layer is defined in the Verbs specification it is interpreted as
1629 * wildcard and all packets will match this rule, if it follows a full
1630 * stack layer (ex: eth / ipv4 / udp), all packets matching the layers
1631 * before will also match this rule.
1632 * To avoid such situation, VNI 0 is currently refused.
1634 if (!vxlan.val.tunnel_id)
1635 return rte_flow_error_set(data->error, EINVAL,
1636 RTE_FLOW_ERROR_TYPE_ITEM,
1638 "VxLAN vni cannot be 0");
1639 mlx5_flow_create_copy(parser, &vxlan, size);
1644 * Convert mark/flag action to Verbs specification.
1647 * Internal parser structure.
1652 * 0 on success, a negative errno value otherwise and rte_errno is set.
1655 mlx5_flow_create_flag_mark(struct mlx5_flow_parse *parser, uint32_t mark_id)
1657 unsigned int size = sizeof(struct ibv_flow_spec_action_tag);
1658 struct ibv_flow_spec_action_tag tag = {
1659 .type = IBV_FLOW_SPEC_ACTION_TAG,
1661 .tag_id = mlx5_flow_mark_set(mark_id),
1664 assert(parser->mark);
1665 mlx5_flow_create_copy(parser, &tag, size);
1670 * Convert count action to Verbs specification.
1673 * Pointer to Ethernet device.
1675 * Pointer to MLX5 flow parser structure.
1678 * 0 on success, a negative errno value otherwise and rte_errno is set.
1681 mlx5_flow_create_count(struct rte_eth_dev *dev __rte_unused,
1682 struct mlx5_flow_parse *parser __rte_unused)
1684 #ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
1685 struct priv *priv = dev->data->dev_private;
1686 unsigned int size = sizeof(struct ibv_flow_spec_counter_action);
1687 struct ibv_counter_set_init_attr init_attr = {0};
1688 struct ibv_flow_spec_counter_action counter = {
1689 .type = IBV_FLOW_SPEC_ACTION_COUNT,
1691 .counter_set_handle = 0,
1694 init_attr.counter_set_id = 0;
1695 parser->cs = ibv_create_counter_set(priv->ctx, &init_attr);
1700 counter.counter_set_handle = parser->cs->handle;
1701 mlx5_flow_create_copy(parser, &counter, size);
1707 * Complete flow rule creation with a drop queue.
1710 * Pointer to Ethernet device.
1712 * Internal parser structure.
1714 * Pointer to the rte_flow.
1716 * Perform verbose error reporting if not NULL.
1719 * 0 on success, a negative errno value otherwise and rte_errno is set.
1722 mlx5_flow_create_action_queue_drop(struct rte_eth_dev *dev,
1723 struct mlx5_flow_parse *parser,
1724 struct rte_flow *flow,
1725 struct rte_flow_error *error)
1727 struct priv *priv = dev->data->dev_private;
1728 struct ibv_flow_spec_action_drop *drop;
1729 unsigned int size = sizeof(struct ibv_flow_spec_action_drop);
1734 drop = (void *)((uintptr_t)parser->queue[HASH_RXQ_ETH].ibv_attr +
1735 parser->queue[HASH_RXQ_ETH].offset);
1736 *drop = (struct ibv_flow_spec_action_drop){
1737 .type = IBV_FLOW_SPEC_ACTION_DROP,
1740 ++parser->queue[HASH_RXQ_ETH].ibv_attr->num_of_specs;
1741 parser->queue[HASH_RXQ_ETH].offset += size;
1742 flow->frxq[HASH_RXQ_ETH].ibv_attr =
1743 parser->queue[HASH_RXQ_ETH].ibv_attr;
1745 flow->cs = parser->cs;
1746 if (!dev->data->dev_started)
1748 parser->queue[HASH_RXQ_ETH].ibv_attr = NULL;
1749 flow->frxq[HASH_RXQ_ETH].ibv_flow =
1750 ibv_create_flow(priv->flow_drop_queue->qp,
1751 flow->frxq[HASH_RXQ_ETH].ibv_attr);
1752 if (!flow->frxq[HASH_RXQ_ETH].ibv_flow) {
1753 rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
1754 NULL, "flow rule creation failure");
1760 if (flow->frxq[HASH_RXQ_ETH].ibv_flow) {
1761 claim_zero(ibv_destroy_flow(flow->frxq[HASH_RXQ_ETH].ibv_flow));
1762 flow->frxq[HASH_RXQ_ETH].ibv_flow = NULL;
1764 if (flow->frxq[HASH_RXQ_ETH].ibv_attr) {
1765 rte_free(flow->frxq[HASH_RXQ_ETH].ibv_attr);
1766 flow->frxq[HASH_RXQ_ETH].ibv_attr = NULL;
1769 claim_zero(ibv_destroy_counter_set(flow->cs));
1777 * Create hash Rx queues when RSS is enabled.
1780 * Pointer to Ethernet device.
1782 * Internal parser structure.
1784 * Pointer to the rte_flow.
1786 * Perform verbose error reporting if not NULL.
1789 * 0 on success, a negative errno value otherwise and rte_errno is set.
1792 mlx5_flow_create_action_queue_rss(struct rte_eth_dev *dev,
1793 struct mlx5_flow_parse *parser,
1794 struct rte_flow *flow,
1795 struct rte_flow_error *error)
1799 for (i = 0; i != hash_rxq_init_n; ++i) {
1800 uint64_t hash_fields;
1802 if (!parser->queue[i].ibv_attr)
1804 flow->frxq[i].ibv_attr = parser->queue[i].ibv_attr;
1805 parser->queue[i].ibv_attr = NULL;
1806 hash_fields = hash_rxq_init[i].hash_fields;
1807 if (!dev->data->dev_started)
1809 flow->frxq[i].hrxq =
1811 parser->rss_conf.rss_key,
1812 parser->rss_conf.rss_key_len,
1816 if (flow->frxq[i].hrxq)
1818 flow->frxq[i].hrxq =
1820 parser->rss_conf.rss_key,
1821 parser->rss_conf.rss_key_len,
1825 if (!flow->frxq[i].hrxq) {
1826 return rte_flow_error_set(error, ENOMEM,
1827 RTE_FLOW_ERROR_TYPE_HANDLE,
1829 "cannot create hash rxq");
1836 * Complete flow rule creation.
1839 * Pointer to Ethernet device.
1841 * Internal parser structure.
1843 * Pointer to the rte_flow.
1845 * Perform verbose error reporting if not NULL.
1848 * 0 on success, a negative errno value otherwise and rte_errno is set.
1851 mlx5_flow_create_action_queue(struct rte_eth_dev *dev,
1852 struct mlx5_flow_parse *parser,
1853 struct rte_flow *flow,
1854 struct rte_flow_error *error)
1856 struct priv *priv __rte_unused = dev->data->dev_private;
1859 unsigned int flows_n = 0;
1863 assert(!parser->drop);
1864 ret = mlx5_flow_create_action_queue_rss(dev, parser, flow, error);
1868 flow->cs = parser->cs;
1869 if (!dev->data->dev_started)
1871 for (i = 0; i != hash_rxq_init_n; ++i) {
1872 if (!flow->frxq[i].hrxq)
1874 flow->frxq[i].ibv_flow =
1875 ibv_create_flow(flow->frxq[i].hrxq->qp,
1876 flow->frxq[i].ibv_attr);
1877 if (!flow->frxq[i].ibv_flow) {
1878 rte_flow_error_set(error, ENOMEM,
1879 RTE_FLOW_ERROR_TYPE_HANDLE,
1880 NULL, "flow rule creation failure");
1884 DRV_LOG(DEBUG, "port %u %p type %d QP %p ibv_flow %p",
1887 (void *)flow->frxq[i].hrxq,
1888 (void *)flow->frxq[i].ibv_flow);
1891 rte_flow_error_set(error, EINVAL, RTE_FLOW_ERROR_TYPE_HANDLE,
1892 NULL, "internal error in flow creation");
1895 for (i = 0; i != parser->queues_n; ++i) {
1896 struct mlx5_rxq_data *q =
1897 (*priv->rxqs)[parser->queues[i]];
1899 q->mark |= parser->mark;
1903 ret = rte_errno; /* Save rte_errno before cleanup. */
1905 for (i = 0; i != hash_rxq_init_n; ++i) {
1906 if (flow->frxq[i].ibv_flow) {
1907 struct ibv_flow *ibv_flow = flow->frxq[i].ibv_flow;
1909 claim_zero(ibv_destroy_flow(ibv_flow));
1911 if (flow->frxq[i].hrxq)
1912 mlx5_hrxq_release(dev, flow->frxq[i].hrxq);
1913 if (flow->frxq[i].ibv_attr)
1914 rte_free(flow->frxq[i].ibv_attr);
1917 claim_zero(ibv_destroy_counter_set(flow->cs));
1921 rte_errno = ret; /* Restore rte_errno. */
1929 * Pointer to Ethernet device.
1931 * Pointer to a TAILQ flow list.
1933 * Flow rule attributes.
1934 * @param[in] pattern
1935 * Pattern specification (list terminated by the END pattern item).
1936 * @param[in] actions
1937 * Associated actions (list terminated by the END action).
1939 * Perform verbose error reporting if not NULL.
1942 * A flow on success, NULL otherwise and rte_errno is set.
1944 static struct rte_flow *
1945 mlx5_flow_list_create(struct rte_eth_dev *dev,
1946 struct mlx5_flows *list,
1947 const struct rte_flow_attr *attr,
1948 const struct rte_flow_item items[],
1949 const struct rte_flow_action actions[],
1950 struct rte_flow_error *error)
1952 struct mlx5_flow_parse parser = { .create = 1, };
1953 struct rte_flow *flow = NULL;
1957 ret = mlx5_flow_convert(dev, attr, items, actions, error, &parser);
1960 flow = rte_calloc(__func__, 1,
1961 sizeof(*flow) + parser.queues_n * sizeof(uint16_t),
1964 rte_flow_error_set(error, ENOMEM,
1965 RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
1967 "cannot allocate flow memory");
1970 /* Copy queues configuration. */
1971 flow->queues = (uint16_t (*)[])(flow + 1);
1972 memcpy(flow->queues, parser.queues, parser.queues_n * sizeof(uint16_t));
1973 flow->queues_n = parser.queues_n;
1974 flow->mark = parser.mark;
1975 /* Copy RSS configuration. */
1976 flow->rss_conf = parser.rss_conf;
1977 flow->rss_conf.rss_key = flow->rss_key;
1978 memcpy(flow->rss_key, parser.rss_key, parser.rss_conf.rss_key_len);
1979 /* finalise the flow. */
1981 ret = mlx5_flow_create_action_queue_drop(dev, &parser, flow,
1984 ret = mlx5_flow_create_action_queue(dev, &parser, flow, error);
1987 TAILQ_INSERT_TAIL(list, flow, next);
1988 DRV_LOG(DEBUG, "port %u flow created %p", dev->data->port_id,
1992 for (i = 0; i != hash_rxq_init_n; ++i) {
1993 if (parser.queue[i].ibv_attr)
1994 rte_free(parser.queue[i].ibv_attr);
2001 * Validate a flow supported by the NIC.
2003 * @see rte_flow_validate()
2007 mlx5_flow_validate(struct rte_eth_dev *dev,
2008 const struct rte_flow_attr *attr,
2009 const struct rte_flow_item items[],
2010 const struct rte_flow_action actions[],
2011 struct rte_flow_error *error)
2013 struct mlx5_flow_parse parser = { .create = 0, };
2015 return mlx5_flow_convert(dev, attr, items, actions, error, &parser);
2021 * @see rte_flow_create()
2025 mlx5_flow_create(struct rte_eth_dev *dev,
2026 const struct rte_flow_attr *attr,
2027 const struct rte_flow_item items[],
2028 const struct rte_flow_action actions[],
2029 struct rte_flow_error *error)
2031 struct priv *priv = dev->data->dev_private;
2033 return mlx5_flow_list_create(dev, &priv->flows, attr, items, actions,
2038 * Destroy a flow in a list.
2041 * Pointer to Ethernet device.
2043 * Pointer to a TAILQ flow list.
2048 mlx5_flow_list_destroy(struct rte_eth_dev *dev, struct mlx5_flows *list,
2049 struct rte_flow *flow)
2051 struct priv *priv = dev->data->dev_private;
2054 if (flow->drop || !flow->mark)
2056 for (i = 0; i != flow->queues_n; ++i) {
2057 struct rte_flow *tmp;
2061 * To remove the mark from the queue, the queue must not be
2062 * present in any other marked flow (RSS or not).
2064 TAILQ_FOREACH(tmp, list, next) {
2066 uint16_t *tqs = NULL;
2071 for (j = 0; j != hash_rxq_init_n; ++j) {
2072 if (!tmp->frxq[j].hrxq)
2074 tqs = tmp->frxq[j].hrxq->ind_table->queues;
2075 tq_n = tmp->frxq[j].hrxq->ind_table->queues_n;
2079 for (j = 0; (j != tq_n) && !mark; j++)
2080 if (tqs[j] == (*flow->queues)[i])
2083 (*priv->rxqs)[(*flow->queues)[i]]->mark = mark;
2087 if (flow->frxq[HASH_RXQ_ETH].ibv_flow)
2088 claim_zero(ibv_destroy_flow
2089 (flow->frxq[HASH_RXQ_ETH].ibv_flow));
2090 rte_free(flow->frxq[HASH_RXQ_ETH].ibv_attr);
2092 for (i = 0; i != hash_rxq_init_n; ++i) {
2093 struct mlx5_flow *frxq = &flow->frxq[i];
2096 claim_zero(ibv_destroy_flow(frxq->ibv_flow));
2098 mlx5_hrxq_release(dev, frxq->hrxq);
2100 rte_free(frxq->ibv_attr);
2104 claim_zero(ibv_destroy_counter_set(flow->cs));
2107 TAILQ_REMOVE(list, flow, next);
2108 DRV_LOG(DEBUG, "port %u flow destroyed %p", dev->data->port_id,
2114 * Destroy all flows.
2117 * Pointer to Ethernet device.
2119 * Pointer to a TAILQ flow list.
2122 mlx5_flow_list_flush(struct rte_eth_dev *dev, struct mlx5_flows *list)
2124 while (!TAILQ_EMPTY(list)) {
2125 struct rte_flow *flow;
2127 flow = TAILQ_FIRST(list);
2128 mlx5_flow_list_destroy(dev, list, flow);
2133 * Create drop queue.
2136 * Pointer to Ethernet device.
2139 * 0 on success, a negative errno value otherwise and rte_errno is set.
2142 mlx5_flow_create_drop_queue(struct rte_eth_dev *dev)
2144 struct priv *priv = dev->data->dev_private;
2145 struct mlx5_hrxq_drop *fdq = NULL;
2149 fdq = rte_calloc(__func__, 1, sizeof(*fdq), 0);
2152 "port %u cannot allocate memory for drop queue",
2153 dev->data->port_id);
2157 fdq->cq = ibv_create_cq(priv->ctx, 1, NULL, NULL, 0);
2159 DRV_LOG(WARNING, "port %u cannot allocate CQ for drop queue",
2160 dev->data->port_id);
2164 fdq->wq = ibv_create_wq(priv->ctx,
2165 &(struct ibv_wq_init_attr){
2166 .wq_type = IBV_WQT_RQ,
2173 DRV_LOG(WARNING, "port %u cannot allocate WQ for drop queue",
2174 dev->data->port_id);
2178 fdq->ind_table = ibv_create_rwq_ind_table(priv->ctx,
2179 &(struct ibv_rwq_ind_table_init_attr){
2180 .log_ind_tbl_size = 0,
2181 .ind_tbl = &fdq->wq,
2184 if (!fdq->ind_table) {
2186 "port %u cannot allocate indirection table for drop"
2188 dev->data->port_id);
2192 fdq->qp = ibv_create_qp_ex(priv->ctx,
2193 &(struct ibv_qp_init_attr_ex){
2194 .qp_type = IBV_QPT_RAW_PACKET,
2196 IBV_QP_INIT_ATTR_PD |
2197 IBV_QP_INIT_ATTR_IND_TABLE |
2198 IBV_QP_INIT_ATTR_RX_HASH,
2199 .rx_hash_conf = (struct ibv_rx_hash_conf){
2201 IBV_RX_HASH_FUNC_TOEPLITZ,
2202 .rx_hash_key_len = rss_hash_default_key_len,
2203 .rx_hash_key = rss_hash_default_key,
2204 .rx_hash_fields_mask = 0,
2206 .rwq_ind_tbl = fdq->ind_table,
2210 DRV_LOG(WARNING, "port %u cannot allocate QP for drop queue",
2211 dev->data->port_id);
2215 priv->flow_drop_queue = fdq;
2219 claim_zero(ibv_destroy_qp(fdq->qp));
2221 claim_zero(ibv_destroy_rwq_ind_table(fdq->ind_table));
2223 claim_zero(ibv_destroy_wq(fdq->wq));
2225 claim_zero(ibv_destroy_cq(fdq->cq));
2228 priv->flow_drop_queue = NULL;
2233 * Delete drop queue.
2236 * Pointer to Ethernet device.
2239 mlx5_flow_delete_drop_queue(struct rte_eth_dev *dev)
2241 struct priv *priv = dev->data->dev_private;
2242 struct mlx5_hrxq_drop *fdq = priv->flow_drop_queue;
2247 claim_zero(ibv_destroy_qp(fdq->qp));
2249 claim_zero(ibv_destroy_rwq_ind_table(fdq->ind_table));
2251 claim_zero(ibv_destroy_wq(fdq->wq));
2253 claim_zero(ibv_destroy_cq(fdq->cq));
2255 priv->flow_drop_queue = NULL;
2262 * Pointer to Ethernet device.
2264 * Pointer to a TAILQ flow list.
2267 mlx5_flow_stop(struct rte_eth_dev *dev, struct mlx5_flows *list)
2269 struct priv *priv = dev->data->dev_private;
2270 struct rte_flow *flow;
2272 TAILQ_FOREACH_REVERSE(flow, list, mlx5_flows, next) {
2274 struct mlx5_ind_table_ibv *ind_tbl = NULL;
2277 if (!flow->frxq[HASH_RXQ_ETH].ibv_flow)
2279 claim_zero(ibv_destroy_flow
2280 (flow->frxq[HASH_RXQ_ETH].ibv_flow));
2281 flow->frxq[HASH_RXQ_ETH].ibv_flow = NULL;
2282 DRV_LOG(DEBUG, "port %u flow %p removed",
2283 dev->data->port_id, (void *)flow);
2287 /* Verify the flow has not already been cleaned. */
2288 for (i = 0; i != hash_rxq_init_n; ++i) {
2289 if (!flow->frxq[i].ibv_flow)
2292 * Indirection table may be necessary to remove the
2293 * flags in the Rx queues.
2294 * This helps to speed-up the process by avoiding
2297 ind_tbl = flow->frxq[i].hrxq->ind_table;
2300 if (i == hash_rxq_init_n)
2304 for (i = 0; i != ind_tbl->queues_n; ++i)
2305 (*priv->rxqs)[ind_tbl->queues[i]]->mark = 0;
2307 for (i = 0; i != hash_rxq_init_n; ++i) {
2308 if (!flow->frxq[i].ibv_flow)
2310 claim_zero(ibv_destroy_flow(flow->frxq[i].ibv_flow));
2311 flow->frxq[i].ibv_flow = NULL;
2312 mlx5_hrxq_release(dev, flow->frxq[i].hrxq);
2313 flow->frxq[i].hrxq = NULL;
2315 DRV_LOG(DEBUG, "port %u flow %p removed", dev->data->port_id,
2324 * Pointer to Ethernet device.
2326 * Pointer to a TAILQ flow list.
2329 * 0 on success, a negative errno value otherwise and rte_errno is set.
2332 mlx5_flow_start(struct rte_eth_dev *dev, struct mlx5_flows *list)
2334 struct priv *priv = dev->data->dev_private;
2335 struct rte_flow *flow;
2337 TAILQ_FOREACH(flow, list, next) {
2341 flow->frxq[HASH_RXQ_ETH].ibv_flow =
2343 (priv->flow_drop_queue->qp,
2344 flow->frxq[HASH_RXQ_ETH].ibv_attr);
2345 if (!flow->frxq[HASH_RXQ_ETH].ibv_flow) {
2347 "port %u flow %p cannot be applied",
2348 dev->data->port_id, (void *)flow);
2352 DRV_LOG(DEBUG, "port %u flow %p applied",
2353 dev->data->port_id, (void *)flow);
2357 for (i = 0; i != hash_rxq_init_n; ++i) {
2358 if (!flow->frxq[i].ibv_attr)
2360 flow->frxq[i].hrxq =
2361 mlx5_hrxq_get(dev, flow->rss_conf.rss_key,
2362 flow->rss_conf.rss_key_len,
2363 hash_rxq_init[i].hash_fields,
2366 if (flow->frxq[i].hrxq)
2368 flow->frxq[i].hrxq =
2369 mlx5_hrxq_new(dev, flow->rss_conf.rss_key,
2370 flow->rss_conf.rss_key_len,
2371 hash_rxq_init[i].hash_fields,
2374 if (!flow->frxq[i].hrxq) {
2376 "port %u flow %p cannot be applied",
2377 dev->data->port_id, (void *)flow);
2382 flow->frxq[i].ibv_flow =
2383 ibv_create_flow(flow->frxq[i].hrxq->qp,
2384 flow->frxq[i].ibv_attr);
2385 if (!flow->frxq[i].ibv_flow) {
2387 "port %u flow %p cannot be applied",
2388 dev->data->port_id, (void *)flow);
2392 DRV_LOG(DEBUG, "port %u flow %p applied",
2393 dev->data->port_id, (void *)flow);
2397 for (i = 0; i != flow->queues_n; ++i)
2398 (*priv->rxqs)[(*flow->queues)[i]]->mark = 1;
2404 * Verify the flow list is empty
2407 * Pointer to Ethernet device.
2409 * @return the number of flows not released.
2412 mlx5_flow_verify(struct rte_eth_dev *dev)
2414 struct priv *priv = dev->data->dev_private;
2415 struct rte_flow *flow;
2418 TAILQ_FOREACH(flow, &priv->flows, next) {
2419 DRV_LOG(DEBUG, "port %u flow %p still referenced",
2420 dev->data->port_id, (void *)flow);
2427 * Enable a control flow configured from the control plane.
2430 * Pointer to Ethernet device.
2432 * An Ethernet flow spec to apply.
2434 * An Ethernet flow mask to apply.
2436 * A VLAN flow spec to apply.
2438 * A VLAN flow mask to apply.
2441 * 0 on success, a negative errno value otherwise and rte_errno is set.
2444 mlx5_ctrl_flow_vlan(struct rte_eth_dev *dev,
2445 struct rte_flow_item_eth *eth_spec,
2446 struct rte_flow_item_eth *eth_mask,
2447 struct rte_flow_item_vlan *vlan_spec,
2448 struct rte_flow_item_vlan *vlan_mask)
2450 struct priv *priv = dev->data->dev_private;
2451 const struct rte_flow_attr attr = {
2453 .priority = MLX5_CTRL_FLOW_PRIORITY,
2455 struct rte_flow_item items[] = {
2457 .type = RTE_FLOW_ITEM_TYPE_ETH,
2463 .type = (vlan_spec) ? RTE_FLOW_ITEM_TYPE_VLAN :
2464 RTE_FLOW_ITEM_TYPE_END,
2470 .type = RTE_FLOW_ITEM_TYPE_END,
2473 struct rte_flow_action actions[] = {
2475 .type = RTE_FLOW_ACTION_TYPE_RSS,
2478 .type = RTE_FLOW_ACTION_TYPE_END,
2481 struct rte_flow *flow;
2482 struct rte_flow_error error;
2485 struct rte_flow_action_rss rss;
2487 const struct rte_eth_rss_conf *rss_conf;
2489 uint16_t queue[RTE_MAX_QUEUES_PER_PORT];
2493 if (!priv->reta_idx_n) {
2497 for (i = 0; i != priv->reta_idx_n; ++i)
2498 action_rss.local.queue[i] = (*priv->reta_idx)[i];
2499 action_rss.local.rss_conf = &priv->rss_conf;
2500 action_rss.local.num = priv->reta_idx_n;
2501 actions[0].conf = (const void *)&action_rss.rss;
2502 flow = mlx5_flow_list_create(dev, &priv->ctrl_flows, &attr, items,
2510 * Enable a flow control configured from the control plane.
2513 * Pointer to Ethernet device.
2515 * An Ethernet flow spec to apply.
2517 * An Ethernet flow mask to apply.
2520 * 0 on success, a negative errno value otherwise and rte_errno is set.
2523 mlx5_ctrl_flow(struct rte_eth_dev *dev,
2524 struct rte_flow_item_eth *eth_spec,
2525 struct rte_flow_item_eth *eth_mask)
2527 return mlx5_ctrl_flow_vlan(dev, eth_spec, eth_mask, NULL, NULL);
2533 * @see rte_flow_destroy()
2537 mlx5_flow_destroy(struct rte_eth_dev *dev,
2538 struct rte_flow *flow,
2539 struct rte_flow_error *error __rte_unused)
2541 struct priv *priv = dev->data->dev_private;
2543 mlx5_flow_list_destroy(dev, &priv->flows, flow);
2548 * Destroy all flows.
2550 * @see rte_flow_flush()
2554 mlx5_flow_flush(struct rte_eth_dev *dev,
2555 struct rte_flow_error *error __rte_unused)
2557 struct priv *priv = dev->data->dev_private;
2559 mlx5_flow_list_flush(dev, &priv->flows);
2563 #ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
2565 * Query flow counter.
2569 * @param counter_value
2570 * returned data from the counter.
2573 * 0 on success, a negative errno value otherwise and rte_errno is set.
2576 mlx5_flow_query_count(struct ibv_counter_set *cs,
2577 struct mlx5_flow_counter_stats *counter_stats,
2578 struct rte_flow_query_count *query_count,
2579 struct rte_flow_error *error)
2581 uint64_t counters[2];
2582 struct ibv_query_counter_set_attr query_cs_attr = {
2584 .query_flags = IBV_COUNTER_SET_FORCE_UPDATE,
2586 struct ibv_counter_set_data query_out = {
2588 .outlen = 2 * sizeof(uint64_t),
2590 int err = ibv_query_counter_set(&query_cs_attr, &query_out);
2593 return rte_flow_error_set(error, err,
2594 RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
2596 "cannot read counter");
2597 query_count->hits_set = 1;
2598 query_count->bytes_set = 1;
2599 query_count->hits = counters[0] - counter_stats->hits;
2600 query_count->bytes = counters[1] - counter_stats->bytes;
2601 if (query_count->reset) {
2602 counter_stats->hits = counters[0];
2603 counter_stats->bytes = counters[1];
2611 * @see rte_flow_query()
2615 mlx5_flow_query(struct rte_eth_dev *dev __rte_unused,
2616 struct rte_flow *flow,
2617 enum rte_flow_action_type action __rte_unused,
2619 struct rte_flow_error *error)
2624 ret = mlx5_flow_query_count(flow->cs,
2625 &flow->counter_stats,
2626 (struct rte_flow_query_count *)data,
2631 return rte_flow_error_set(error, EINVAL,
2632 RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
2634 "no counter found for flow");
2643 * @see rte_flow_isolate()
2647 mlx5_flow_isolate(struct rte_eth_dev *dev,
2649 struct rte_flow_error *error)
2651 struct priv *priv = dev->data->dev_private;
2653 if (dev->data->dev_started) {
2654 rte_flow_error_set(error, EBUSY,
2655 RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
2657 "port must be stopped first");
2660 priv->isolated = !!enable;
2662 dev->dev_ops = &mlx5_dev_ops_isolate;
2664 dev->dev_ops = &mlx5_dev_ops;
2669 * Convert a flow director filter to a generic flow.
2672 * Pointer to Ethernet device.
2673 * @param fdir_filter
2674 * Flow director filter to add.
2676 * Generic flow parameters structure.
2679 * 0 on success, a negative errno value otherwise and rte_errno is set.
2682 mlx5_fdir_filter_convert(struct rte_eth_dev *dev,
2683 const struct rte_eth_fdir_filter *fdir_filter,
2684 struct mlx5_fdir *attributes)
2686 struct priv *priv = dev->data->dev_private;
2687 const struct rte_eth_fdir_input *input = &fdir_filter->input;
2688 const struct rte_eth_fdir_masks *mask =
2689 &dev->data->dev_conf.fdir_conf.mask;
2691 /* Validate queue number. */
2692 if (fdir_filter->action.rx_queue >= priv->rxqs_n) {
2693 DRV_LOG(ERR, "port %u invalid queue number %d",
2694 dev->data->port_id, fdir_filter->action.rx_queue);
2698 attributes->attr.ingress = 1;
2699 attributes->items[0] = (struct rte_flow_item) {
2700 .type = RTE_FLOW_ITEM_TYPE_ETH,
2701 .spec = &attributes->l2,
2702 .mask = &attributes->l2_mask,
2704 switch (fdir_filter->action.behavior) {
2705 case RTE_ETH_FDIR_ACCEPT:
2706 attributes->actions[0] = (struct rte_flow_action){
2707 .type = RTE_FLOW_ACTION_TYPE_QUEUE,
2708 .conf = &attributes->queue,
2711 case RTE_ETH_FDIR_REJECT:
2712 attributes->actions[0] = (struct rte_flow_action){
2713 .type = RTE_FLOW_ACTION_TYPE_DROP,
2717 DRV_LOG(ERR, "port %u invalid behavior %d",
2719 fdir_filter->action.behavior);
2720 rte_errno = ENOTSUP;
2723 attributes->queue.index = fdir_filter->action.rx_queue;
2725 switch (fdir_filter->input.flow_type) {
2726 case RTE_ETH_FLOW_NONFRAG_IPV4_UDP:
2727 case RTE_ETH_FLOW_NONFRAG_IPV4_TCP:
2728 case RTE_ETH_FLOW_NONFRAG_IPV4_OTHER:
2729 attributes->l3.ipv4.hdr = (struct ipv4_hdr){
2730 .src_addr = input->flow.ip4_flow.src_ip,
2731 .dst_addr = input->flow.ip4_flow.dst_ip,
2732 .time_to_live = input->flow.ip4_flow.ttl,
2733 .type_of_service = input->flow.ip4_flow.tos,
2734 .next_proto_id = input->flow.ip4_flow.proto,
2736 attributes->l3_mask.ipv4.hdr = (struct ipv4_hdr){
2737 .src_addr = mask->ipv4_mask.src_ip,
2738 .dst_addr = mask->ipv4_mask.dst_ip,
2739 .time_to_live = mask->ipv4_mask.ttl,
2740 .type_of_service = mask->ipv4_mask.tos,
2741 .next_proto_id = mask->ipv4_mask.proto,
2743 attributes->items[1] = (struct rte_flow_item){
2744 .type = RTE_FLOW_ITEM_TYPE_IPV4,
2745 .spec = &attributes->l3,
2746 .mask = &attributes->l3_mask,
2749 case RTE_ETH_FLOW_NONFRAG_IPV6_UDP:
2750 case RTE_ETH_FLOW_NONFRAG_IPV6_TCP:
2751 case RTE_ETH_FLOW_NONFRAG_IPV6_OTHER:
2752 attributes->l3.ipv6.hdr = (struct ipv6_hdr){
2753 .hop_limits = input->flow.ipv6_flow.hop_limits,
2754 .proto = input->flow.ipv6_flow.proto,
2757 memcpy(attributes->l3.ipv6.hdr.src_addr,
2758 input->flow.ipv6_flow.src_ip,
2759 RTE_DIM(attributes->l3.ipv6.hdr.src_addr));
2760 memcpy(attributes->l3.ipv6.hdr.dst_addr,
2761 input->flow.ipv6_flow.dst_ip,
2762 RTE_DIM(attributes->l3.ipv6.hdr.src_addr));
2763 memcpy(attributes->l3_mask.ipv6.hdr.src_addr,
2764 mask->ipv6_mask.src_ip,
2765 RTE_DIM(attributes->l3_mask.ipv6.hdr.src_addr));
2766 memcpy(attributes->l3_mask.ipv6.hdr.dst_addr,
2767 mask->ipv6_mask.dst_ip,
2768 RTE_DIM(attributes->l3_mask.ipv6.hdr.src_addr));
2769 attributes->items[1] = (struct rte_flow_item){
2770 .type = RTE_FLOW_ITEM_TYPE_IPV6,
2771 .spec = &attributes->l3,
2772 .mask = &attributes->l3_mask,
2776 DRV_LOG(ERR, "port %u invalid flow type%d",
2777 dev->data->port_id, fdir_filter->input.flow_type);
2778 rte_errno = ENOTSUP;
2782 switch (fdir_filter->input.flow_type) {
2783 case RTE_ETH_FLOW_NONFRAG_IPV4_UDP:
2784 attributes->l4.udp.hdr = (struct udp_hdr){
2785 .src_port = input->flow.udp4_flow.src_port,
2786 .dst_port = input->flow.udp4_flow.dst_port,
2788 attributes->l4_mask.udp.hdr = (struct udp_hdr){
2789 .src_port = mask->src_port_mask,
2790 .dst_port = mask->dst_port_mask,
2792 attributes->items[2] = (struct rte_flow_item){
2793 .type = RTE_FLOW_ITEM_TYPE_UDP,
2794 .spec = &attributes->l4,
2795 .mask = &attributes->l4_mask,
2798 case RTE_ETH_FLOW_NONFRAG_IPV4_TCP:
2799 attributes->l4.tcp.hdr = (struct tcp_hdr){
2800 .src_port = input->flow.tcp4_flow.src_port,
2801 .dst_port = input->flow.tcp4_flow.dst_port,
2803 attributes->l4_mask.tcp.hdr = (struct tcp_hdr){
2804 .src_port = mask->src_port_mask,
2805 .dst_port = mask->dst_port_mask,
2807 attributes->items[2] = (struct rte_flow_item){
2808 .type = RTE_FLOW_ITEM_TYPE_TCP,
2809 .spec = &attributes->l4,
2810 .mask = &attributes->l4_mask,
2813 case RTE_ETH_FLOW_NONFRAG_IPV6_UDP:
2814 attributes->l4.udp.hdr = (struct udp_hdr){
2815 .src_port = input->flow.udp6_flow.src_port,
2816 .dst_port = input->flow.udp6_flow.dst_port,
2818 attributes->l4_mask.udp.hdr = (struct udp_hdr){
2819 .src_port = mask->src_port_mask,
2820 .dst_port = mask->dst_port_mask,
2822 attributes->items[2] = (struct rte_flow_item){
2823 .type = RTE_FLOW_ITEM_TYPE_UDP,
2824 .spec = &attributes->l4,
2825 .mask = &attributes->l4_mask,
2828 case RTE_ETH_FLOW_NONFRAG_IPV6_TCP:
2829 attributes->l4.tcp.hdr = (struct tcp_hdr){
2830 .src_port = input->flow.tcp6_flow.src_port,
2831 .dst_port = input->flow.tcp6_flow.dst_port,
2833 attributes->l4_mask.tcp.hdr = (struct tcp_hdr){
2834 .src_port = mask->src_port_mask,
2835 .dst_port = mask->dst_port_mask,
2837 attributes->items[2] = (struct rte_flow_item){
2838 .type = RTE_FLOW_ITEM_TYPE_TCP,
2839 .spec = &attributes->l4,
2840 .mask = &attributes->l4_mask,
2843 case RTE_ETH_FLOW_NONFRAG_IPV4_OTHER:
2844 case RTE_ETH_FLOW_NONFRAG_IPV6_OTHER:
2847 DRV_LOG(ERR, "port %u invalid flow type%d",
2848 dev->data->port_id, fdir_filter->input.flow_type);
2849 rte_errno = ENOTSUP;
2856 * Add new flow director filter and store it in list.
2859 * Pointer to Ethernet device.
2860 * @param fdir_filter
2861 * Flow director filter to add.
2864 * 0 on success, a negative errno value otherwise and rte_errno is set.
2867 mlx5_fdir_filter_add(struct rte_eth_dev *dev,
2868 const struct rte_eth_fdir_filter *fdir_filter)
2870 struct priv *priv = dev->data->dev_private;
2871 struct mlx5_fdir attributes = {
2874 .dst.addr_bytes = "\x00\x00\x00\x00\x00\x00",
2875 .src.addr_bytes = "\x00\x00\x00\x00\x00\x00",
2879 struct mlx5_flow_parse parser = {
2880 .layer = HASH_RXQ_ETH,
2882 struct rte_flow_error error;
2883 struct rte_flow *flow;
2886 ret = mlx5_fdir_filter_convert(dev, fdir_filter, &attributes);
2889 ret = mlx5_flow_convert(dev, &attributes.attr, attributes.items,
2890 attributes.actions, &error, &parser);
2893 flow = mlx5_flow_list_create(dev, &priv->flows, &attributes.attr,
2894 attributes.items, attributes.actions,
2897 DRV_LOG(DEBUG, "port %u FDIR created %p", dev->data->port_id,
2905 * Delete specific filter.
2908 * Pointer to Ethernet device.
2909 * @param fdir_filter
2910 * Filter to be deleted.
2913 * 0 on success, a negative errno value otherwise and rte_errno is set.
2916 mlx5_fdir_filter_delete(struct rte_eth_dev *dev,
2917 const struct rte_eth_fdir_filter *fdir_filter)
2919 struct priv *priv = dev->data->dev_private;
2920 struct mlx5_fdir attributes = {
2923 struct mlx5_flow_parse parser = {
2925 .layer = HASH_RXQ_ETH,
2927 struct rte_flow_error error;
2928 struct rte_flow *flow;
2932 ret = mlx5_fdir_filter_convert(dev, fdir_filter, &attributes);
2935 ret = mlx5_flow_convert(dev, &attributes.attr, attributes.items,
2936 attributes.actions, &error, &parser);
2940 * Special case for drop action which is only set in the
2941 * specifications when the flow is created. In this situation the
2942 * drop specification is missing.
2945 struct ibv_flow_spec_action_drop *drop;
2947 drop = (void *)((uintptr_t)parser.queue[HASH_RXQ_ETH].ibv_attr +
2948 parser.queue[HASH_RXQ_ETH].offset);
2949 *drop = (struct ibv_flow_spec_action_drop){
2950 .type = IBV_FLOW_SPEC_ACTION_DROP,
2951 .size = sizeof(struct ibv_flow_spec_action_drop),
2953 parser.queue[HASH_RXQ_ETH].ibv_attr->num_of_specs++;
2955 TAILQ_FOREACH(flow, &priv->flows, next) {
2956 struct ibv_flow_attr *attr;
2957 struct ibv_spec_header *attr_h;
2959 struct ibv_flow_attr *flow_attr;
2960 struct ibv_spec_header *flow_h;
2962 unsigned int specs_n;
2963 unsigned int queue_id;
2966 * Search for a non-empty ibv_attr. There should be only one
2967 * because no RSS action is allowed for FDIR. This should have
2968 * been referenced directly by parser.layer but due to a bug in
2969 * mlx5_flow_convert() as of v17.11.4, parser.layer isn't
2970 * correct. This bug will have to be addressed later.
2972 for (queue_id = 0; queue_id != hash_rxq_init_n; ++queue_id) {
2973 attr = parser.queue[queue_id].ibv_attr;
2977 assert(!parser.drop || queue_id == HASH_RXQ_ETH);
2978 flow_attr = flow->frxq[queue_id].ibv_attr;
2979 /* Compare first the attributes. */
2981 memcmp(attr, flow_attr, sizeof(struct ibv_flow_attr)))
2983 if (attr->num_of_specs == 0)
2985 spec = (void *)((uintptr_t)attr +
2986 sizeof(struct ibv_flow_attr));
2987 flow_spec = (void *)((uintptr_t)flow_attr +
2988 sizeof(struct ibv_flow_attr));
2989 specs_n = RTE_MIN(attr->num_of_specs, flow_attr->num_of_specs);
2990 for (i = 0; i != specs_n; ++i) {
2993 if (memcmp(spec, flow_spec,
2994 RTE_MIN(attr_h->size, flow_h->size)))
2996 spec = (void *)((uintptr_t)spec + attr_h->size);
2997 flow_spec = (void *)((uintptr_t)flow_spec +
3000 /* At this point, the flow match. */
3003 /* The flow does not match. */
3007 mlx5_flow_list_destroy(dev, &priv->flows, flow);
3010 ret = rte_errno; /* Save rte_errno before cleanup. */
3011 for (i = 0; i != hash_rxq_init_n; ++i) {
3012 if (parser.queue[i].ibv_attr)
3013 rte_free(parser.queue[i].ibv_attr);
3016 rte_errno = ret; /* Restore rte_errno. */
3023 * Update queue for specific filter.
3026 * Pointer to Ethernet device.
3027 * @param fdir_filter
3028 * Filter to be updated.
3031 * 0 on success, a negative errno value otherwise and rte_errno is set.
3034 mlx5_fdir_filter_update(struct rte_eth_dev *dev,
3035 const struct rte_eth_fdir_filter *fdir_filter)
3039 ret = mlx5_fdir_filter_delete(dev, fdir_filter);
3042 return mlx5_fdir_filter_add(dev, fdir_filter);
3046 * Flush all filters.
3049 * Pointer to Ethernet device.
3052 mlx5_fdir_filter_flush(struct rte_eth_dev *dev)
3054 struct priv *priv = dev->data->dev_private;
3056 mlx5_flow_list_flush(dev, &priv->flows);
3060 * Get flow director information.
3063 * Pointer to Ethernet device.
3064 * @param[out] fdir_info
3065 * Resulting flow director information.
3068 mlx5_fdir_info_get(struct rte_eth_dev *dev, struct rte_eth_fdir_info *fdir_info)
3070 struct rte_eth_fdir_masks *mask =
3071 &dev->data->dev_conf.fdir_conf.mask;
3073 fdir_info->mode = dev->data->dev_conf.fdir_conf.mode;
3074 fdir_info->guarant_spc = 0;
3075 rte_memcpy(&fdir_info->mask, mask, sizeof(fdir_info->mask));
3076 fdir_info->max_flexpayload = 0;
3077 fdir_info->flow_types_mask[0] = 0;
3078 fdir_info->flex_payload_unit = 0;
3079 fdir_info->max_flex_payload_segment_num = 0;
3080 fdir_info->flex_payload_limit = 0;
3081 memset(&fdir_info->flex_conf, 0, sizeof(fdir_info->flex_conf));
3085 * Deal with flow director operations.
3088 * Pointer to Ethernet device.
3090 * Operation to perform.
3092 * Pointer to operation-specific structure.
3095 * 0 on success, a negative errno value otherwise and rte_errno is set.
3098 mlx5_fdir_ctrl_func(struct rte_eth_dev *dev, enum rte_filter_op filter_op,
3101 enum rte_fdir_mode fdir_mode =
3102 dev->data->dev_conf.fdir_conf.mode;
3104 if (filter_op == RTE_ETH_FILTER_NOP)
3106 if (fdir_mode != RTE_FDIR_MODE_PERFECT &&
3107 fdir_mode != RTE_FDIR_MODE_PERFECT_MAC_VLAN) {
3108 DRV_LOG(ERR, "port %u flow director mode %d not supported",
3109 dev->data->port_id, fdir_mode);
3113 switch (filter_op) {
3114 case RTE_ETH_FILTER_ADD:
3115 return mlx5_fdir_filter_add(dev, arg);
3116 case RTE_ETH_FILTER_UPDATE:
3117 return mlx5_fdir_filter_update(dev, arg);
3118 case RTE_ETH_FILTER_DELETE:
3119 return mlx5_fdir_filter_delete(dev, arg);
3120 case RTE_ETH_FILTER_FLUSH:
3121 mlx5_fdir_filter_flush(dev);
3123 case RTE_ETH_FILTER_INFO:
3124 mlx5_fdir_info_get(dev, arg);
3127 DRV_LOG(DEBUG, "port %u unknown operation %u",
3128 dev->data->port_id, filter_op);
3136 * Manage filter operations.
3139 * Pointer to Ethernet device structure.
3140 * @param filter_type
3143 * Operation to perform.
3145 * Pointer to operation-specific structure.
3148 * 0 on success, a negative errno value otherwise and rte_errno is set.
3151 mlx5_dev_filter_ctrl(struct rte_eth_dev *dev,
3152 enum rte_filter_type filter_type,
3153 enum rte_filter_op filter_op,
3156 switch (filter_type) {
3157 case RTE_ETH_FILTER_GENERIC:
3158 if (filter_op != RTE_ETH_FILTER_GET) {
3162 *(const void **)arg = &mlx5_flow_ops;
3164 case RTE_ETH_FILTER_FDIR:
3165 return mlx5_fdir_ctrl_func(dev, filter_op, arg);
3167 DRV_LOG(ERR, "port %u filter type (%d) not supported",
3168 dev->data->port_id, filter_type);
3169 rte_errno = ENOTSUP;