4 * Copyright 2016 6WIND S.A.
5 * Copyright 2016 Mellanox.
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
11 * * Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * * Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
17 * * Neither the name of 6WIND S.A. nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
34 #include <sys/queue.h>
38 /* ISO C doesn't support unnamed structs/unions, disabling -pedantic. */
40 #pragma GCC diagnostic ignored "-Wpedantic"
42 #include <infiniband/verbs.h>
44 #pragma GCC diagnostic error "-Wpedantic"
47 #include <rte_ethdev.h>
49 #include <rte_flow_driver.h>
50 #include <rte_malloc.h>
53 #include "mlx5_defs.h"
56 /* Define minimal priority for control plane flows. */
57 #define MLX5_CTRL_FLOW_PRIORITY 4
59 /* Internet Protocol versions. */
63 #ifndef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
64 struct ibv_counter_set_init_attr {
67 struct ibv_flow_spec_counter_action {
70 struct ibv_counter_set {
75 ibv_destroy_counter_set(struct ibv_counter_set *cs)
82 /* Dev ops structure defined in mlx5.c */
83 extern const struct eth_dev_ops mlx5_dev_ops;
84 extern const struct eth_dev_ops mlx5_dev_ops_isolate;
87 mlx5_flow_create_eth(const struct rte_flow_item *item,
88 const void *default_mask,
92 mlx5_flow_create_vlan(const struct rte_flow_item *item,
93 const void *default_mask,
97 mlx5_flow_create_ipv4(const struct rte_flow_item *item,
98 const void *default_mask,
102 mlx5_flow_create_ipv6(const struct rte_flow_item *item,
103 const void *default_mask,
107 mlx5_flow_create_udp(const struct rte_flow_item *item,
108 const void *default_mask,
112 mlx5_flow_create_tcp(const struct rte_flow_item *item,
113 const void *default_mask,
117 mlx5_flow_create_vxlan(const struct rte_flow_item *item,
118 const void *default_mask,
121 struct mlx5_flow_parse;
124 mlx5_flow_create_copy(struct mlx5_flow_parse *parser, void *src,
128 mlx5_flow_create_flag_mark(struct mlx5_flow_parse *parser, uint32_t mark_id);
131 mlx5_flow_create_count(struct priv *priv, struct mlx5_flow_parse *parser);
133 /* Hash RX queue types. */
144 /* Initialization data for hash RX queue. */
145 struct hash_rxq_init {
146 uint64_t hash_fields; /* Fields that participate in the hash. */
147 uint64_t dpdk_rss_hf; /* Matching DPDK RSS hash fields. */
148 unsigned int flow_priority; /* Flow priority to use. */
149 unsigned int ip_version; /* Internet protocol. */
152 /* Initialization data for hash RX queues. */
153 const struct hash_rxq_init hash_rxq_init[] = {
155 .hash_fields = (IBV_RX_HASH_SRC_IPV4 |
156 IBV_RX_HASH_DST_IPV4 |
157 IBV_RX_HASH_SRC_PORT_TCP |
158 IBV_RX_HASH_DST_PORT_TCP),
159 .dpdk_rss_hf = ETH_RSS_NONFRAG_IPV4_TCP,
161 .ip_version = MLX5_IPV4,
164 .hash_fields = (IBV_RX_HASH_SRC_IPV4 |
165 IBV_RX_HASH_DST_IPV4 |
166 IBV_RX_HASH_SRC_PORT_UDP |
167 IBV_RX_HASH_DST_PORT_UDP),
168 .dpdk_rss_hf = ETH_RSS_NONFRAG_IPV4_UDP,
170 .ip_version = MLX5_IPV4,
173 .hash_fields = (IBV_RX_HASH_SRC_IPV4 |
174 IBV_RX_HASH_DST_IPV4),
175 .dpdk_rss_hf = (ETH_RSS_IPV4 |
178 .ip_version = MLX5_IPV4,
181 .hash_fields = (IBV_RX_HASH_SRC_IPV6 |
182 IBV_RX_HASH_DST_IPV6 |
183 IBV_RX_HASH_SRC_PORT_TCP |
184 IBV_RX_HASH_DST_PORT_TCP),
185 .dpdk_rss_hf = ETH_RSS_NONFRAG_IPV6_TCP,
187 .ip_version = MLX5_IPV6,
190 .hash_fields = (IBV_RX_HASH_SRC_IPV6 |
191 IBV_RX_HASH_DST_IPV6 |
192 IBV_RX_HASH_SRC_PORT_UDP |
193 IBV_RX_HASH_DST_PORT_UDP),
194 .dpdk_rss_hf = ETH_RSS_NONFRAG_IPV6_UDP,
196 .ip_version = MLX5_IPV6,
199 .hash_fields = (IBV_RX_HASH_SRC_IPV6 |
200 IBV_RX_HASH_DST_IPV6),
201 .dpdk_rss_hf = (ETH_RSS_IPV6 |
204 .ip_version = MLX5_IPV6,
213 /* Number of entries in hash_rxq_init[]. */
214 const unsigned int hash_rxq_init_n = RTE_DIM(hash_rxq_init);
216 /** Structure for holding counter stats. */
217 struct mlx5_flow_counter_stats {
218 uint64_t hits; /**< Number of packets matched by the rule. */
219 uint64_t bytes; /**< Number of bytes matched by the rule. */
222 /** Structure for Drop queue. */
223 struct mlx5_hrxq_drop {
224 struct ibv_rwq_ind_table *ind_table; /**< Indirection table. */
225 struct ibv_qp *qp; /**< Verbs queue pair. */
226 struct ibv_wq *wq; /**< Verbs work queue. */
227 struct ibv_cq *cq; /**< Verbs completion queue. */
230 /* Flows structures. */
232 uint64_t hash_fields; /**< Fields that participate in the hash. */
233 struct ibv_flow_attr *ibv_attr; /**< Pointer to Verbs attributes. */
234 struct ibv_flow *ibv_flow; /**< Verbs flow. */
235 struct mlx5_hrxq *hrxq; /**< Hash Rx queues. */
238 /* Drop flows structures. */
239 struct mlx5_flow_drop {
240 struct ibv_flow_attr *ibv_attr; /**< Pointer to Verbs attributes. */
241 struct ibv_flow *ibv_flow; /**< Verbs flow. */
245 TAILQ_ENTRY(rte_flow) next; /**< Pointer to the next flow structure. */
246 uint32_t mark:1; /**< Set if the flow is marked. */
247 uint32_t drop:1; /**< Drop queue. */
248 uint16_t queues_n; /**< Number of entries in queue[]. */
249 uint16_t (*queues)[]; /**< Queues indexes to use. */
250 struct rte_eth_rss_conf rss_conf; /**< RSS configuration */
251 uint8_t rss_key[40]; /**< copy of the RSS key. */
252 struct ibv_counter_set *cs; /**< Holds the counters for the rule. */
253 struct mlx5_flow_counter_stats counter_stats;/**<The counter stats. */
254 struct mlx5_flow frxq[RTE_DIM(hash_rxq_init)];
255 /**< Flow with Rx queue. */
258 /** Static initializer for items. */
260 (const enum rte_flow_item_type []){ \
261 __VA_ARGS__, RTE_FLOW_ITEM_TYPE_END, \
264 /** Structure to generate a simple graph of layers supported by the NIC. */
265 struct mlx5_flow_items {
266 /** List of possible actions for these items. */
267 const enum rte_flow_action_type *const actions;
268 /** Bit-masks corresponding to the possibilities for the item. */
271 * Default bit-masks to use when item->mask is not provided. When
272 * \default_mask is also NULL, the full supported bit-mask (\mask) is
275 const void *default_mask;
276 /** Bit-masks size in bytes. */
277 const unsigned int mask_sz;
279 * Conversion function from rte_flow to NIC specific flow.
282 * rte_flow item to convert.
283 * @param default_mask
284 * Default bit-masks to use when item->mask is not provided.
286 * Internal structure to store the conversion.
289 * 0 on success, negative value otherwise.
291 int (*convert)(const struct rte_flow_item *item,
292 const void *default_mask,
294 /** Size in bytes of the destination structure. */
295 const unsigned int dst_sz;
296 /** List of possible following items. */
297 const enum rte_flow_item_type *const items;
300 /** Valid action for this PMD. */
301 static const enum rte_flow_action_type valid_actions[] = {
302 RTE_FLOW_ACTION_TYPE_DROP,
303 RTE_FLOW_ACTION_TYPE_QUEUE,
304 RTE_FLOW_ACTION_TYPE_MARK,
305 RTE_FLOW_ACTION_TYPE_FLAG,
306 #ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
307 RTE_FLOW_ACTION_TYPE_COUNT,
309 RTE_FLOW_ACTION_TYPE_END,
312 /** Graph of supported items and associated actions. */
313 static const struct mlx5_flow_items mlx5_flow_items[] = {
314 [RTE_FLOW_ITEM_TYPE_END] = {
315 .items = ITEMS(RTE_FLOW_ITEM_TYPE_ETH,
316 RTE_FLOW_ITEM_TYPE_VXLAN),
318 [RTE_FLOW_ITEM_TYPE_ETH] = {
319 .items = ITEMS(RTE_FLOW_ITEM_TYPE_VLAN,
320 RTE_FLOW_ITEM_TYPE_IPV4,
321 RTE_FLOW_ITEM_TYPE_IPV6),
322 .actions = valid_actions,
323 .mask = &(const struct rte_flow_item_eth){
324 .dst.addr_bytes = "\xff\xff\xff\xff\xff\xff",
325 .src.addr_bytes = "\xff\xff\xff\xff\xff\xff",
328 .default_mask = &rte_flow_item_eth_mask,
329 .mask_sz = sizeof(struct rte_flow_item_eth),
330 .convert = mlx5_flow_create_eth,
331 .dst_sz = sizeof(struct ibv_flow_spec_eth),
333 [RTE_FLOW_ITEM_TYPE_VLAN] = {
334 .items = ITEMS(RTE_FLOW_ITEM_TYPE_IPV4,
335 RTE_FLOW_ITEM_TYPE_IPV6),
336 .actions = valid_actions,
337 .mask = &(const struct rte_flow_item_vlan){
340 .default_mask = &rte_flow_item_vlan_mask,
341 .mask_sz = sizeof(struct rte_flow_item_vlan),
342 .convert = mlx5_flow_create_vlan,
345 [RTE_FLOW_ITEM_TYPE_IPV4] = {
346 .items = ITEMS(RTE_FLOW_ITEM_TYPE_UDP,
347 RTE_FLOW_ITEM_TYPE_TCP),
348 .actions = valid_actions,
349 .mask = &(const struct rte_flow_item_ipv4){
353 .type_of_service = -1,
357 .default_mask = &rte_flow_item_ipv4_mask,
358 .mask_sz = sizeof(struct rte_flow_item_ipv4),
359 .convert = mlx5_flow_create_ipv4,
360 .dst_sz = sizeof(struct ibv_flow_spec_ipv4_ext),
362 [RTE_FLOW_ITEM_TYPE_IPV6] = {
363 .items = ITEMS(RTE_FLOW_ITEM_TYPE_UDP,
364 RTE_FLOW_ITEM_TYPE_TCP),
365 .actions = valid_actions,
366 .mask = &(const struct rte_flow_item_ipv6){
369 0xff, 0xff, 0xff, 0xff,
370 0xff, 0xff, 0xff, 0xff,
371 0xff, 0xff, 0xff, 0xff,
372 0xff, 0xff, 0xff, 0xff,
375 0xff, 0xff, 0xff, 0xff,
376 0xff, 0xff, 0xff, 0xff,
377 0xff, 0xff, 0xff, 0xff,
378 0xff, 0xff, 0xff, 0xff,
385 .default_mask = &rte_flow_item_ipv6_mask,
386 .mask_sz = sizeof(struct rte_flow_item_ipv6),
387 .convert = mlx5_flow_create_ipv6,
388 .dst_sz = sizeof(struct ibv_flow_spec_ipv6),
390 [RTE_FLOW_ITEM_TYPE_UDP] = {
391 .items = ITEMS(RTE_FLOW_ITEM_TYPE_VXLAN),
392 .actions = valid_actions,
393 .mask = &(const struct rte_flow_item_udp){
399 .default_mask = &rte_flow_item_udp_mask,
400 .mask_sz = sizeof(struct rte_flow_item_udp),
401 .convert = mlx5_flow_create_udp,
402 .dst_sz = sizeof(struct ibv_flow_spec_tcp_udp),
404 [RTE_FLOW_ITEM_TYPE_TCP] = {
405 .actions = valid_actions,
406 .mask = &(const struct rte_flow_item_tcp){
412 .default_mask = &rte_flow_item_tcp_mask,
413 .mask_sz = sizeof(struct rte_flow_item_tcp),
414 .convert = mlx5_flow_create_tcp,
415 .dst_sz = sizeof(struct ibv_flow_spec_tcp_udp),
417 [RTE_FLOW_ITEM_TYPE_VXLAN] = {
418 .items = ITEMS(RTE_FLOW_ITEM_TYPE_ETH),
419 .actions = valid_actions,
420 .mask = &(const struct rte_flow_item_vxlan){
421 .vni = "\xff\xff\xff",
423 .default_mask = &rte_flow_item_vxlan_mask,
424 .mask_sz = sizeof(struct rte_flow_item_vxlan),
425 .convert = mlx5_flow_create_vxlan,
426 .dst_sz = sizeof(struct ibv_flow_spec_tunnel),
430 /** Structure to pass to the conversion function. */
431 struct mlx5_flow_parse {
432 uint32_t inner; /**< Set once VXLAN is encountered. */
434 /**< Whether resources should remain after a validate. */
435 uint32_t drop:1; /**< Target is a drop queue. */
436 uint32_t mark:1; /**< Mark is present in the flow. */
437 uint32_t count:1; /**< Count is present in the flow. */
438 uint32_t mark_id; /**< Mark identifier. */
439 uint16_t queues[RTE_MAX_QUEUES_PER_PORT]; /**< Queues indexes to use. */
440 uint16_t queues_n; /**< Number of entries in queue[]. */
441 struct rte_eth_rss_conf rss_conf; /**< RSS configuration */
442 uint8_t rss_key[40]; /**< copy of the RSS key. */
443 enum hash_rxq_type layer; /**< Last pattern layer detected. */
444 struct ibv_counter_set *cs; /**< Holds the counter set for the rule */
446 struct ibv_flow_attr *ibv_attr;
447 /**< Pointer to Verbs attributes. */
449 /**< Current position or total size of the attribute. */
450 } queue[RTE_DIM(hash_rxq_init)];
453 static const struct rte_flow_ops mlx5_flow_ops = {
454 .validate = mlx5_flow_validate,
455 .create = mlx5_flow_create,
456 .destroy = mlx5_flow_destroy,
457 .flush = mlx5_flow_flush,
458 #ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
459 .query = mlx5_flow_query,
463 .isolate = mlx5_flow_isolate,
466 /* Convert FDIR request to Generic flow. */
468 struct rte_flow_attr attr;
469 struct rte_flow_action actions[2];
470 struct rte_flow_item items[4];
471 struct rte_flow_item_eth l2;
472 struct rte_flow_item_eth l2_mask;
474 struct rte_flow_item_ipv4 ipv4;
475 struct rte_flow_item_ipv6 ipv6;
478 struct rte_flow_item_udp udp;
479 struct rte_flow_item_tcp tcp;
481 struct rte_flow_action_queue queue;
484 /* Verbs specification header. */
485 struct ibv_spec_header {
486 enum ibv_flow_spec_type type;
491 * Check support for a given item.
494 * Item specification.
496 * Bit-masks covering supported fields to compare with spec, last and mask in
499 * Bit-Mask size in bytes.
505 mlx5_flow_item_validate(const struct rte_flow_item *item,
506 const uint8_t *mask, unsigned int size)
510 if (!item->spec && (item->mask || item->last))
512 if (item->spec && !item->mask) {
514 const uint8_t *spec = item->spec;
516 for (i = 0; i < size; ++i)
517 if ((spec[i] | mask[i]) != mask[i])
520 if (item->last && !item->mask) {
522 const uint8_t *spec = item->last;
524 for (i = 0; i < size; ++i)
525 if ((spec[i] | mask[i]) != mask[i])
530 const uint8_t *spec = item->spec;
532 for (i = 0; i < size; ++i)
533 if ((spec[i] | mask[i]) != mask[i])
536 if (item->spec && item->last) {
539 const uint8_t *apply = mask;
544 for (i = 0; i < size; ++i) {
545 spec[i] = ((const uint8_t *)item->spec)[i] & apply[i];
546 last[i] = ((const uint8_t *)item->last)[i] & apply[i];
548 ret = memcmp(spec, last, size);
554 * Copy the RSS configuration from the user ones, of the rss_conf is null,
555 * uses the driver one.
558 * Pointer to private structure.
560 * Internal parser structure.
562 * User RSS configuration to save.
565 * 0 on success, errno value on failure.
568 priv_flow_convert_rss_conf(struct priv *priv,
569 struct mlx5_flow_parse *parser,
570 const struct rte_eth_rss_conf *rss_conf)
573 * This function is also called at the beginning of
574 * priv_flow_convert_actions() to initialize the parser with the
575 * device default RSS configuration.
579 if (rss_conf->rss_hf & MLX5_RSS_HF_MASK)
581 if (rss_conf->rss_key_len != 40)
583 if (rss_conf->rss_key_len && rss_conf->rss_key) {
584 parser->rss_conf.rss_key_len = rss_conf->rss_key_len;
585 memcpy(parser->rss_key, rss_conf->rss_key,
586 rss_conf->rss_key_len);
587 parser->rss_conf.rss_key = parser->rss_key;
589 parser->rss_conf.rss_hf = rss_conf->rss_hf;
595 * Extract attribute to the parser.
598 * Pointer to private structure.
600 * Flow rule attributes.
602 * Perform verbose error reporting if not NULL.
603 * @param[in, out] parser
604 * Internal parser structure.
607 * 0 on success, a negative errno value otherwise and rte_errno is set.
610 priv_flow_convert_attributes(struct priv *priv,
611 const struct rte_flow_attr *attr,
612 struct rte_flow_error *error,
613 struct mlx5_flow_parse *parser)
618 rte_flow_error_set(error, ENOTSUP,
619 RTE_FLOW_ERROR_TYPE_ATTR_GROUP,
621 "groups are not supported");
624 if (attr->priority && attr->priority != MLX5_CTRL_FLOW_PRIORITY) {
625 rte_flow_error_set(error, ENOTSUP,
626 RTE_FLOW_ERROR_TYPE_ATTR_PRIORITY,
628 "priorities are not supported");
632 rte_flow_error_set(error, ENOTSUP,
633 RTE_FLOW_ERROR_TYPE_ATTR_EGRESS,
635 "egress is not supported");
638 if (!attr->ingress) {
639 rte_flow_error_set(error, ENOTSUP,
640 RTE_FLOW_ERROR_TYPE_ATTR_INGRESS,
642 "only ingress is supported");
649 * Extract actions request to the parser.
652 * Pointer to private structure.
654 * Associated actions (list terminated by the END action).
656 * Perform verbose error reporting if not NULL.
657 * @param[in, out] parser
658 * Internal parser structure.
661 * 0 on success, a negative errno value otherwise and rte_errno is set.
664 priv_flow_convert_actions(struct priv *priv,
665 const struct rte_flow_action actions[],
666 struct rte_flow_error *error,
667 struct mlx5_flow_parse *parser)
670 * Add default RSS configuration necessary for Verbs to create QP even
671 * if no RSS is necessary.
673 priv_flow_convert_rss_conf(priv, parser,
674 (const struct rte_eth_rss_conf *)
676 for (; actions->type != RTE_FLOW_ACTION_TYPE_END; ++actions) {
677 if (actions->type == RTE_FLOW_ACTION_TYPE_VOID) {
679 } else if (actions->type == RTE_FLOW_ACTION_TYPE_DROP) {
681 } else if (actions->type == RTE_FLOW_ACTION_TYPE_QUEUE) {
682 const struct rte_flow_action_queue *queue =
683 (const struct rte_flow_action_queue *)
688 if (!queue || (queue->index > (priv->rxqs_n - 1)))
689 goto exit_action_not_supported;
690 for (n = 0; n < parser->queues_n; ++n) {
691 if (parser->queues[n] == queue->index) {
696 if (parser->queues_n > 1 && !found) {
697 rte_flow_error_set(error, ENOTSUP,
698 RTE_FLOW_ERROR_TYPE_ACTION,
700 "queue action not in RSS queues");
704 parser->queues_n = 1;
705 parser->queues[0] = queue->index;
707 } else if (actions->type == RTE_FLOW_ACTION_TYPE_RSS) {
708 const struct rte_flow_action_rss *rss =
709 (const struct rte_flow_action_rss *)
713 if (!rss || !rss->num) {
714 rte_flow_error_set(error, EINVAL,
715 RTE_FLOW_ERROR_TYPE_ACTION,
720 if (parser->queues_n == 1) {
723 assert(parser->queues_n);
724 for (n = 0; n < rss->num; ++n) {
725 if (parser->queues[0] ==
732 rte_flow_error_set(error, ENOTSUP,
733 RTE_FLOW_ERROR_TYPE_ACTION,
735 "queue action not in RSS"
740 for (n = 0; n < rss->num; ++n) {
741 if (rss->queue[n] >= priv->rxqs_n) {
742 rte_flow_error_set(error, EINVAL,
743 RTE_FLOW_ERROR_TYPE_ACTION,
745 "queue id > number of"
750 for (n = 0; n < rss->num; ++n)
751 parser->queues[n] = rss->queue[n];
752 parser->queues_n = rss->num;
753 if (priv_flow_convert_rss_conf(priv, parser,
755 rte_flow_error_set(error, EINVAL,
756 RTE_FLOW_ERROR_TYPE_ACTION,
758 "wrong RSS configuration");
761 } else if (actions->type == RTE_FLOW_ACTION_TYPE_MARK) {
762 const struct rte_flow_action_mark *mark =
763 (const struct rte_flow_action_mark *)
767 rte_flow_error_set(error, EINVAL,
768 RTE_FLOW_ERROR_TYPE_ACTION,
770 "mark must be defined");
772 } else if (mark->id >= MLX5_FLOW_MARK_MAX) {
773 rte_flow_error_set(error, ENOTSUP,
774 RTE_FLOW_ERROR_TYPE_ACTION,
776 "mark must be between 0"
781 parser->mark_id = mark->id;
782 } else if (actions->type == RTE_FLOW_ACTION_TYPE_FLAG) {
784 } else if (actions->type == RTE_FLOW_ACTION_TYPE_COUNT &&
785 priv->counter_set_supported) {
788 goto exit_action_not_supported;
791 if (parser->drop && parser->mark)
793 if (!parser->queues_n && !parser->drop) {
794 rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_HANDLE,
795 NULL, "no valid action");
799 exit_action_not_supported:
800 rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ACTION,
801 actions, "action not supported");
809 * Pointer to private structure.
811 * Pattern specification (list terminated by the END pattern item).
813 * Perform verbose error reporting if not NULL.
814 * @param[in, out] parser
815 * Internal parser structure.
818 * 0 on success, a negative errno value otherwise and rte_errno is set.
821 priv_flow_convert_items_validate(struct priv *priv,
822 const struct rte_flow_item items[],
823 struct rte_flow_error *error,
824 struct mlx5_flow_parse *parser)
826 const struct mlx5_flow_items *cur_item = mlx5_flow_items;
830 /* Initialise the offsets to start after verbs attribute. */
831 for (i = 0; i != hash_rxq_init_n; ++i)
832 parser->queue[i].offset = sizeof(struct ibv_flow_attr);
833 for (; items->type != RTE_FLOW_ITEM_TYPE_END; ++items) {
834 const struct mlx5_flow_items *token = NULL;
838 if (items->type == RTE_FLOW_ITEM_TYPE_VOID)
842 cur_item->items[i] != RTE_FLOW_ITEM_TYPE_END;
844 if (cur_item->items[i] == items->type) {
845 token = &mlx5_flow_items[items->type];
850 goto exit_item_not_supported;
852 err = mlx5_flow_item_validate(items,
853 (const uint8_t *)cur_item->mask,
856 goto exit_item_not_supported;
857 if (items->type == RTE_FLOW_ITEM_TYPE_VXLAN) {
859 rte_flow_error_set(error, ENOTSUP,
860 RTE_FLOW_ERROR_TYPE_ITEM,
862 "cannot recognize multiple"
863 " VXLAN encapsulations");
866 parser->inner = IBV_FLOW_SPEC_INNER;
869 parser->queue[HASH_RXQ_ETH].offset += cur_item->dst_sz;
871 for (n = 0; n != hash_rxq_init_n; ++n)
872 parser->queue[n].offset += cur_item->dst_sz;
876 parser->queue[HASH_RXQ_ETH].offset +=
877 sizeof(struct ibv_flow_spec_action_drop);
880 for (i = 0; i != hash_rxq_init_n; ++i)
881 parser->queue[i].offset +=
882 sizeof(struct ibv_flow_spec_action_tag);
885 unsigned int size = sizeof(struct ibv_flow_spec_counter_action);
887 for (i = 0; i != hash_rxq_init_n; ++i)
888 parser->queue[i].offset += size;
891 exit_item_not_supported:
892 rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM,
893 items, "item not supported");
898 * Allocate memory space to store verbs flow attributes.
901 * Pointer to private structure.
902 * @param[in] priority
905 * Amount of byte to allocate.
907 * Perform verbose error reporting if not NULL.
910 * A verbs flow attribute on success, NULL otherwise.
912 static struct ibv_flow_attr*
913 priv_flow_convert_allocate(struct priv *priv,
914 unsigned int priority,
916 struct rte_flow_error *error)
918 struct ibv_flow_attr *ibv_attr;
921 ibv_attr = rte_calloc(__func__, 1, size, 0);
923 rte_flow_error_set(error, ENOMEM,
924 RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
926 "cannot allocate verbs spec attributes.");
929 ibv_attr->priority = priority;
934 * Finalise verbs flow attributes.
937 * Pointer to private structure.
938 * @param[in, out] parser
939 * Internal parser structure.
942 priv_flow_convert_finalise(struct priv *priv, struct mlx5_flow_parse *parser)
944 const unsigned int ipv4 =
945 hash_rxq_init[parser->layer].ip_version == MLX5_IPV4;
946 const enum hash_rxq_type hmin = ipv4 ? HASH_RXQ_TCPV4 : HASH_RXQ_TCPV6;
947 const enum hash_rxq_type hmax = ipv4 ? HASH_RXQ_IPV4 : HASH_RXQ_IPV6;
948 const enum hash_rxq_type ohmin = ipv4 ? HASH_RXQ_TCPV6 : HASH_RXQ_TCPV4;
949 const enum hash_rxq_type ohmax = ipv4 ? HASH_RXQ_IPV6 : HASH_RXQ_IPV4;
950 const enum hash_rxq_type ip = ipv4 ? HASH_RXQ_IPV4 : HASH_RXQ_IPV6;
954 if (parser->layer == HASH_RXQ_ETH) {
958 * This layer becomes useless as the pattern define under
961 rte_free(parser->queue[HASH_RXQ_ETH].ibv_attr);
962 parser->queue[HASH_RXQ_ETH].ibv_attr = NULL;
964 /* Remove opposite kind of layer e.g. IPv6 if the pattern is IPv4. */
965 for (i = ohmin; i != (ohmax + 1); ++i) {
966 if (!parser->queue[i].ibv_attr)
968 rte_free(parser->queue[i].ibv_attr);
969 parser->queue[i].ibv_attr = NULL;
971 /* Remove impossible flow according to the RSS configuration. */
972 if (hash_rxq_init[parser->layer].dpdk_rss_hf &
973 parser->rss_conf.rss_hf) {
974 /* Remove any other flow. */
975 for (i = hmin; i != (hmax + 1); ++i) {
976 if ((i == parser->layer) ||
977 (!parser->queue[i].ibv_attr))
979 rte_free(parser->queue[i].ibv_attr);
980 parser->queue[i].ibv_attr = NULL;
982 } else if (!parser->queue[ip].ibv_attr) {
983 /* no RSS possible with the current configuration. */
984 parser->queues_n = 1;
989 * Fill missing layers in verbs specifications, or compute the correct
990 * offset to allocate the memory space for the attributes and
993 for (i = 0; i != hash_rxq_init_n - 1; ++i) {
995 struct ibv_flow_spec_ipv4_ext ipv4;
996 struct ibv_flow_spec_ipv6 ipv6;
997 struct ibv_flow_spec_tcp_udp udp_tcp;
1002 if (i == parser->layer)
1004 if (parser->layer == HASH_RXQ_ETH) {
1005 if (hash_rxq_init[i].ip_version == MLX5_IPV4) {
1006 size = sizeof(struct ibv_flow_spec_ipv4_ext);
1007 specs.ipv4 = (struct ibv_flow_spec_ipv4_ext){
1008 .type = IBV_FLOW_SPEC_IPV4_EXT,
1012 size = sizeof(struct ibv_flow_spec_ipv6);
1013 specs.ipv6 = (struct ibv_flow_spec_ipv6){
1014 .type = IBV_FLOW_SPEC_IPV6,
1018 if (parser->queue[i].ibv_attr) {
1019 dst = (void *)((uintptr_t)
1020 parser->queue[i].ibv_attr +
1021 parser->queue[i].offset);
1022 memcpy(dst, &specs, size);
1023 ++parser->queue[i].ibv_attr->num_of_specs;
1025 parser->queue[i].offset += size;
1027 if ((i == HASH_RXQ_UDPV4) || (i == HASH_RXQ_TCPV4) ||
1028 (i == HASH_RXQ_UDPV6) || (i == HASH_RXQ_TCPV6)) {
1029 size = sizeof(struct ibv_flow_spec_tcp_udp);
1030 specs.udp_tcp = (struct ibv_flow_spec_tcp_udp) {
1031 .type = ((i == HASH_RXQ_UDPV4 ||
1032 i == HASH_RXQ_UDPV6) ?
1037 if (parser->queue[i].ibv_attr) {
1038 dst = (void *)((uintptr_t)
1039 parser->queue[i].ibv_attr +
1040 parser->queue[i].offset);
1041 memcpy(dst, &specs, size);
1042 ++parser->queue[i].ibv_attr->num_of_specs;
1044 parser->queue[i].offset += size;
1050 * Validate and convert a flow supported by the NIC.
1053 * Pointer to private structure.
1055 * Flow rule attributes.
1056 * @param[in] pattern
1057 * Pattern specification (list terminated by the END pattern item).
1058 * @param[in] actions
1059 * Associated actions (list terminated by the END action).
1061 * Perform verbose error reporting if not NULL.
1062 * @param[in, out] parser
1063 * Internal parser structure.
1066 * 0 on success, a negative errno value otherwise and rte_errno is set.
1069 priv_flow_convert(struct priv *priv,
1070 const struct rte_flow_attr *attr,
1071 const struct rte_flow_item items[],
1072 const struct rte_flow_action actions[],
1073 struct rte_flow_error *error,
1074 struct mlx5_flow_parse *parser)
1076 const struct mlx5_flow_items *cur_item = mlx5_flow_items;
1080 /* First step. Validate the attributes, items and actions. */
1081 *parser = (struct mlx5_flow_parse){
1082 .create = parser->create,
1083 .layer = HASH_RXQ_ETH,
1084 .mark_id = MLX5_FLOW_MARK_DEFAULT,
1086 ret = priv_flow_convert_attributes(priv, attr, error, parser);
1089 ret = priv_flow_convert_actions(priv, actions, error, parser);
1092 ret = priv_flow_convert_items_validate(priv, items, error, parser);
1095 priv_flow_convert_finalise(priv, parser);
1098 * Allocate the memory space to store verbs specifications.
1101 parser->queue[HASH_RXQ_ETH].ibv_attr =
1102 priv_flow_convert_allocate
1103 (priv, attr->priority,
1104 parser->queue[HASH_RXQ_ETH].offset,
1106 if (!parser->queue[HASH_RXQ_ETH].ibv_attr)
1108 parser->queue[HASH_RXQ_ETH].offset =
1109 sizeof(struct ibv_flow_attr);
1111 for (i = 0; i != hash_rxq_init_n; ++i) {
1112 unsigned int priority =
1114 hash_rxq_init[i].flow_priority;
1115 unsigned int offset;
1117 if (!(parser->rss_conf.rss_hf &
1118 hash_rxq_init[i].dpdk_rss_hf) &&
1119 (i != HASH_RXQ_ETH))
1121 offset = parser->queue[i].offset;
1122 parser->queue[i].ibv_attr =
1123 priv_flow_convert_allocate(priv, priority,
1125 if (!parser->queue[i].ibv_attr)
1127 parser->queue[i].offset = sizeof(struct ibv_flow_attr);
1130 /* Third step. Conversion parse, fill the specifications. */
1132 for (; items->type != RTE_FLOW_ITEM_TYPE_END; ++items) {
1133 if (items->type == RTE_FLOW_ITEM_TYPE_VOID)
1135 cur_item = &mlx5_flow_items[items->type];
1136 ret = cur_item->convert(items,
1137 (cur_item->default_mask ?
1138 cur_item->default_mask :
1142 rte_flow_error_set(error, ret,
1143 RTE_FLOW_ERROR_TYPE_ITEM,
1144 items, "item not supported");
1149 mlx5_flow_create_flag_mark(parser, parser->mark_id);
1150 if (parser->count && parser->create) {
1151 mlx5_flow_create_count(priv, parser);
1153 goto exit_count_error;
1156 * Last step. Complete missing specification to reach the RSS
1159 if (!parser->drop) {
1160 priv_flow_convert_finalise(priv, parser);
1162 parser->queue[HASH_RXQ_ETH].ibv_attr->priority =
1164 hash_rxq_init[parser->layer].flow_priority;
1167 /* Only verification is expected, all resources should be released. */
1168 if (!parser->create) {
1169 for (i = 0; i != hash_rxq_init_n; ++i) {
1170 if (parser->queue[i].ibv_attr) {
1171 rte_free(parser->queue[i].ibv_attr);
1172 parser->queue[i].ibv_attr = NULL;
1178 for (i = 0; i != hash_rxq_init_n; ++i) {
1179 if (parser->queue[i].ibv_attr) {
1180 rte_free(parser->queue[i].ibv_attr);
1181 parser->queue[i].ibv_attr = NULL;
1184 rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
1185 NULL, "cannot allocate verbs spec attributes.");
1188 rte_flow_error_set(error, EINVAL, RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
1189 NULL, "cannot create counter.");
1194 * Copy the specification created into the flow.
1197 * Internal parser structure.
1199 * Create specification.
1201 * Size in bytes of the specification to copy.
1204 mlx5_flow_create_copy(struct mlx5_flow_parse *parser, void *src,
1210 for (i = 0; i != hash_rxq_init_n; ++i) {
1211 if (!parser->queue[i].ibv_attr)
1213 /* Specification must be the same l3 type or none. */
1214 if (parser->layer == HASH_RXQ_ETH ||
1215 (hash_rxq_init[parser->layer].ip_version ==
1216 hash_rxq_init[i].ip_version) ||
1217 (hash_rxq_init[i].ip_version == 0)) {
1218 dst = (void *)((uintptr_t)parser->queue[i].ibv_attr +
1219 parser->queue[i].offset);
1220 memcpy(dst, src, size);
1221 ++parser->queue[i].ibv_attr->num_of_specs;
1222 parser->queue[i].offset += size;
1228 * Convert Ethernet item to Verbs specification.
1231 * Item specification.
1232 * @param default_mask[in]
1233 * Default bit-masks to use when item->mask is not provided.
1234 * @param data[in, out]
1238 mlx5_flow_create_eth(const struct rte_flow_item *item,
1239 const void *default_mask,
1242 const struct rte_flow_item_eth *spec = item->spec;
1243 const struct rte_flow_item_eth *mask = item->mask;
1244 struct mlx5_flow_parse *parser = (struct mlx5_flow_parse *)data;
1245 const unsigned int eth_size = sizeof(struct ibv_flow_spec_eth);
1246 struct ibv_flow_spec_eth eth = {
1247 .type = parser->inner | IBV_FLOW_SPEC_ETH,
1251 /* Don't update layer for the inner pattern. */
1253 parser->layer = HASH_RXQ_ETH;
1258 mask = default_mask;
1259 memcpy(ð.val.dst_mac, spec->dst.addr_bytes, ETHER_ADDR_LEN);
1260 memcpy(ð.val.src_mac, spec->src.addr_bytes, ETHER_ADDR_LEN);
1261 eth.val.ether_type = spec->type;
1262 memcpy(ð.mask.dst_mac, mask->dst.addr_bytes, ETHER_ADDR_LEN);
1263 memcpy(ð.mask.src_mac, mask->src.addr_bytes, ETHER_ADDR_LEN);
1264 eth.mask.ether_type = mask->type;
1265 /* Remove unwanted bits from values. */
1266 for (i = 0; i < ETHER_ADDR_LEN; ++i) {
1267 eth.val.dst_mac[i] &= eth.mask.dst_mac[i];
1268 eth.val.src_mac[i] &= eth.mask.src_mac[i];
1270 eth.val.ether_type &= eth.mask.ether_type;
1272 mlx5_flow_create_copy(parser, ð, eth_size);
1277 * Convert VLAN item to Verbs specification.
1280 * Item specification.
1281 * @param default_mask[in]
1282 * Default bit-masks to use when item->mask is not provided.
1283 * @param data[in, out]
1287 mlx5_flow_create_vlan(const struct rte_flow_item *item,
1288 const void *default_mask,
1291 const struct rte_flow_item_vlan *spec = item->spec;
1292 const struct rte_flow_item_vlan *mask = item->mask;
1293 struct mlx5_flow_parse *parser = (struct mlx5_flow_parse *)data;
1294 struct ibv_flow_spec_eth *eth;
1295 const unsigned int eth_size = sizeof(struct ibv_flow_spec_eth);
1300 mask = default_mask;
1302 for (i = 0; i != hash_rxq_init_n; ++i) {
1303 if (!parser->queue[i].ibv_attr)
1306 eth = (void *)((uintptr_t)parser->queue[i].ibv_attr +
1307 parser->queue[i].offset - eth_size);
1308 eth->val.vlan_tag = spec->tci;
1309 eth->mask.vlan_tag = mask->tci;
1310 eth->val.vlan_tag &= eth->mask.vlan_tag;
1317 * Convert IPv4 item to Verbs specification.
1320 * Item specification.
1321 * @param default_mask[in]
1322 * Default bit-masks to use when item->mask is not provided.
1323 * @param data[in, out]
1327 mlx5_flow_create_ipv4(const struct rte_flow_item *item,
1328 const void *default_mask,
1331 const struct rte_flow_item_ipv4 *spec = item->spec;
1332 const struct rte_flow_item_ipv4 *mask = item->mask;
1333 struct mlx5_flow_parse *parser = (struct mlx5_flow_parse *)data;
1334 unsigned int ipv4_size = sizeof(struct ibv_flow_spec_ipv4_ext);
1335 struct ibv_flow_spec_ipv4_ext ipv4 = {
1336 .type = parser->inner | IBV_FLOW_SPEC_IPV4_EXT,
1340 /* Don't update layer for the inner pattern. */
1342 parser->layer = HASH_RXQ_IPV4;
1345 mask = default_mask;
1346 ipv4.val = (struct ibv_flow_ipv4_ext_filter){
1347 .src_ip = spec->hdr.src_addr,
1348 .dst_ip = spec->hdr.dst_addr,
1349 .proto = spec->hdr.next_proto_id,
1350 .tos = spec->hdr.type_of_service,
1352 ipv4.mask = (struct ibv_flow_ipv4_ext_filter){
1353 .src_ip = mask->hdr.src_addr,
1354 .dst_ip = mask->hdr.dst_addr,
1355 .proto = mask->hdr.next_proto_id,
1356 .tos = mask->hdr.type_of_service,
1358 /* Remove unwanted bits from values. */
1359 ipv4.val.src_ip &= ipv4.mask.src_ip;
1360 ipv4.val.dst_ip &= ipv4.mask.dst_ip;
1361 ipv4.val.proto &= ipv4.mask.proto;
1362 ipv4.val.tos &= ipv4.mask.tos;
1364 mlx5_flow_create_copy(parser, &ipv4, ipv4_size);
1369 * Convert IPv6 item to Verbs specification.
1372 * Item specification.
1373 * @param default_mask[in]
1374 * Default bit-masks to use when item->mask is not provided.
1375 * @param data[in, out]
1379 mlx5_flow_create_ipv6(const struct rte_flow_item *item,
1380 const void *default_mask,
1383 const struct rte_flow_item_ipv6 *spec = item->spec;
1384 const struct rte_flow_item_ipv6 *mask = item->mask;
1385 struct mlx5_flow_parse *parser = (struct mlx5_flow_parse *)data;
1386 unsigned int ipv6_size = sizeof(struct ibv_flow_spec_ipv6);
1387 struct ibv_flow_spec_ipv6 ipv6 = {
1388 .type = parser->inner | IBV_FLOW_SPEC_IPV6,
1392 /* Don't update layer for the inner pattern. */
1394 parser->layer = HASH_RXQ_IPV6;
1399 mask = default_mask;
1400 memcpy(&ipv6.val.src_ip, spec->hdr.src_addr,
1401 RTE_DIM(ipv6.val.src_ip));
1402 memcpy(&ipv6.val.dst_ip, spec->hdr.dst_addr,
1403 RTE_DIM(ipv6.val.dst_ip));
1404 memcpy(&ipv6.mask.src_ip, mask->hdr.src_addr,
1405 RTE_DIM(ipv6.mask.src_ip));
1406 memcpy(&ipv6.mask.dst_ip, mask->hdr.dst_addr,
1407 RTE_DIM(ipv6.mask.dst_ip));
1408 ipv6.mask.flow_label = mask->hdr.vtc_flow;
1409 ipv6.mask.next_hdr = mask->hdr.proto;
1410 ipv6.mask.hop_limit = mask->hdr.hop_limits;
1411 /* Remove unwanted bits from values. */
1412 for (i = 0; i < RTE_DIM(ipv6.val.src_ip); ++i) {
1413 ipv6.val.src_ip[i] &= ipv6.mask.src_ip[i];
1414 ipv6.val.dst_ip[i] &= ipv6.mask.dst_ip[i];
1416 ipv6.val.flow_label &= ipv6.mask.flow_label;
1417 ipv6.val.next_hdr &= ipv6.mask.next_hdr;
1418 ipv6.val.hop_limit &= ipv6.mask.hop_limit;
1420 mlx5_flow_create_copy(parser, &ipv6, ipv6_size);
1425 * Convert UDP item to Verbs specification.
1428 * Item specification.
1429 * @param default_mask[in]
1430 * Default bit-masks to use when item->mask is not provided.
1431 * @param data[in, out]
1435 mlx5_flow_create_udp(const struct rte_flow_item *item,
1436 const void *default_mask,
1439 const struct rte_flow_item_udp *spec = item->spec;
1440 const struct rte_flow_item_udp *mask = item->mask;
1441 struct mlx5_flow_parse *parser = (struct mlx5_flow_parse *)data;
1442 unsigned int udp_size = sizeof(struct ibv_flow_spec_tcp_udp);
1443 struct ibv_flow_spec_tcp_udp udp = {
1444 .type = parser->inner | IBV_FLOW_SPEC_UDP,
1448 /* Don't update layer for the inner pattern. */
1449 if (!parser->inner) {
1450 if (parser->layer == HASH_RXQ_IPV4)
1451 parser->layer = HASH_RXQ_UDPV4;
1453 parser->layer = HASH_RXQ_UDPV6;
1457 mask = default_mask;
1458 udp.val.dst_port = spec->hdr.dst_port;
1459 udp.val.src_port = spec->hdr.src_port;
1460 udp.mask.dst_port = mask->hdr.dst_port;
1461 udp.mask.src_port = mask->hdr.src_port;
1462 /* Remove unwanted bits from values. */
1463 udp.val.src_port &= udp.mask.src_port;
1464 udp.val.dst_port &= udp.mask.dst_port;
1466 mlx5_flow_create_copy(parser, &udp, udp_size);
1471 * Convert TCP item to Verbs specification.
1474 * Item specification.
1475 * @param default_mask[in]
1476 * Default bit-masks to use when item->mask is not provided.
1477 * @param data[in, out]
1481 mlx5_flow_create_tcp(const struct rte_flow_item *item,
1482 const void *default_mask,
1485 const struct rte_flow_item_tcp *spec = item->spec;
1486 const struct rte_flow_item_tcp *mask = item->mask;
1487 struct mlx5_flow_parse *parser = (struct mlx5_flow_parse *)data;
1488 unsigned int tcp_size = sizeof(struct ibv_flow_spec_tcp_udp);
1489 struct ibv_flow_spec_tcp_udp tcp = {
1490 .type = parser->inner | IBV_FLOW_SPEC_TCP,
1494 /* Don't update layer for the inner pattern. */
1495 if (!parser->inner) {
1496 if (parser->layer == HASH_RXQ_IPV4)
1497 parser->layer = HASH_RXQ_TCPV4;
1499 parser->layer = HASH_RXQ_TCPV6;
1503 mask = default_mask;
1504 tcp.val.dst_port = spec->hdr.dst_port;
1505 tcp.val.src_port = spec->hdr.src_port;
1506 tcp.mask.dst_port = mask->hdr.dst_port;
1507 tcp.mask.src_port = mask->hdr.src_port;
1508 /* Remove unwanted bits from values. */
1509 tcp.val.src_port &= tcp.mask.src_port;
1510 tcp.val.dst_port &= tcp.mask.dst_port;
1512 mlx5_flow_create_copy(parser, &tcp, tcp_size);
1517 * Convert VXLAN item to Verbs specification.
1520 * Item specification.
1521 * @param default_mask[in]
1522 * Default bit-masks to use when item->mask is not provided.
1523 * @param data[in, out]
1527 mlx5_flow_create_vxlan(const struct rte_flow_item *item,
1528 const void *default_mask,
1531 const struct rte_flow_item_vxlan *spec = item->spec;
1532 const struct rte_flow_item_vxlan *mask = item->mask;
1533 struct mlx5_flow_parse *parser = (struct mlx5_flow_parse *)data;
1534 unsigned int size = sizeof(struct ibv_flow_spec_tunnel);
1535 struct ibv_flow_spec_tunnel vxlan = {
1536 .type = parser->inner | IBV_FLOW_SPEC_VXLAN_TUNNEL,
1545 parser->inner = IBV_FLOW_SPEC_INNER;
1548 mask = default_mask;
1549 memcpy(&id.vni[1], spec->vni, 3);
1550 vxlan.val.tunnel_id = id.vlan_id;
1551 memcpy(&id.vni[1], mask->vni, 3);
1552 vxlan.mask.tunnel_id = id.vlan_id;
1553 /* Remove unwanted bits from values. */
1554 vxlan.val.tunnel_id &= vxlan.mask.tunnel_id;
1557 * Tunnel id 0 is equivalent as not adding a VXLAN layer, if only this
1558 * layer is defined in the Verbs specification it is interpreted as
1559 * wildcard and all packets will match this rule, if it follows a full
1560 * stack layer (ex: eth / ipv4 / udp), all packets matching the layers
1561 * before will also match this rule.
1562 * To avoid such situation, VNI 0 is currently refused.
1564 if (!vxlan.val.tunnel_id)
1566 mlx5_flow_create_copy(parser, &vxlan, size);
1571 * Convert mark/flag action to Verbs specification.
1574 * Internal parser structure.
1579 mlx5_flow_create_flag_mark(struct mlx5_flow_parse *parser, uint32_t mark_id)
1581 unsigned int size = sizeof(struct ibv_flow_spec_action_tag);
1582 struct ibv_flow_spec_action_tag tag = {
1583 .type = IBV_FLOW_SPEC_ACTION_TAG,
1585 .tag_id = mlx5_flow_mark_set(mark_id),
1588 assert(parser->mark);
1589 mlx5_flow_create_copy(parser, &tag, size);
1594 * Convert count action to Verbs specification.
1597 * Pointer to private structure.
1599 * Pointer to MLX5 flow parser structure.
1602 * 0 on success, errno value on failure.
1605 mlx5_flow_create_count(struct priv *priv __rte_unused,
1606 struct mlx5_flow_parse *parser __rte_unused)
1608 #ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
1609 unsigned int size = sizeof(struct ibv_flow_spec_counter_action);
1610 struct ibv_counter_set_init_attr init_attr = {0};
1611 struct ibv_flow_spec_counter_action counter = {
1612 .type = IBV_FLOW_SPEC_ACTION_COUNT,
1614 .counter_set_handle = 0,
1617 init_attr.counter_set_id = 0;
1618 parser->cs = ibv_create_counter_set(priv->ctx, &init_attr);
1621 counter.counter_set_handle = parser->cs->handle;
1622 mlx5_flow_create_copy(parser, &counter, size);
1628 * Complete flow rule creation with a drop queue.
1631 * Pointer to private structure.
1633 * Internal parser structure.
1635 * Pointer to the rte_flow.
1637 * Perform verbose error reporting if not NULL.
1640 * 0 on success, errno value on failure.
1643 priv_flow_create_action_queue_drop(struct priv *priv,
1644 struct mlx5_flow_parse *parser,
1645 struct rte_flow *flow,
1646 struct rte_flow_error *error)
1648 struct ibv_flow_spec_action_drop *drop;
1649 unsigned int size = sizeof(struct ibv_flow_spec_action_drop);
1655 drop = (void *)((uintptr_t)parser->queue[HASH_RXQ_ETH].ibv_attr +
1656 parser->queue[HASH_RXQ_ETH].offset);
1657 *drop = (struct ibv_flow_spec_action_drop){
1658 .type = IBV_FLOW_SPEC_ACTION_DROP,
1661 ++parser->queue[HASH_RXQ_ETH].ibv_attr->num_of_specs;
1662 parser->queue[HASH_RXQ_ETH].offset += size;
1663 flow->frxq[HASH_RXQ_ETH].ibv_attr =
1664 parser->queue[HASH_RXQ_ETH].ibv_attr;
1666 flow->cs = parser->cs;
1667 if (!priv->dev->data->dev_started)
1669 parser->queue[HASH_RXQ_ETH].ibv_attr = NULL;
1670 flow->frxq[HASH_RXQ_ETH].ibv_flow =
1671 ibv_create_flow(priv->flow_drop_queue->qp,
1672 flow->frxq[HASH_RXQ_ETH].ibv_attr);
1673 if (!flow->frxq[HASH_RXQ_ETH].ibv_flow) {
1674 rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
1675 NULL, "flow rule creation failure");
1682 if (flow->frxq[HASH_RXQ_ETH].ibv_flow) {
1683 claim_zero(ibv_destroy_flow(flow->frxq[HASH_RXQ_ETH].ibv_flow));
1684 flow->frxq[HASH_RXQ_ETH].ibv_flow = NULL;
1686 if (flow->frxq[HASH_RXQ_ETH].ibv_attr) {
1687 rte_free(flow->frxq[HASH_RXQ_ETH].ibv_attr);
1688 flow->frxq[HASH_RXQ_ETH].ibv_attr = NULL;
1691 claim_zero(ibv_destroy_counter_set(flow->cs));
1699 * Create hash Rx queues when RSS is enabled.
1702 * Pointer to private structure.
1704 * Internal parser structure.
1706 * Pointer to the rte_flow.
1708 * Perform verbose error reporting if not NULL.
1711 * 0 on success, a errno value otherwise and rte_errno is set.
1714 priv_flow_create_action_queue_rss(struct priv *priv,
1715 struct mlx5_flow_parse *parser,
1716 struct rte_flow *flow,
1717 struct rte_flow_error *error)
1721 for (i = 0; i != hash_rxq_init_n; ++i) {
1722 uint64_t hash_fields;
1724 if (!parser->queue[i].ibv_attr)
1726 flow->frxq[i].ibv_attr = parser->queue[i].ibv_attr;
1727 parser->queue[i].ibv_attr = NULL;
1728 hash_fields = hash_rxq_init[i].hash_fields;
1729 if (!priv->dev->data->dev_started)
1731 flow->frxq[i].hrxq =
1732 mlx5_priv_hrxq_get(priv,
1733 parser->rss_conf.rss_key,
1734 parser->rss_conf.rss_key_len,
1738 if (flow->frxq[i].hrxq)
1740 flow->frxq[i].hrxq =
1741 mlx5_priv_hrxq_new(priv,
1742 parser->rss_conf.rss_key,
1743 parser->rss_conf.rss_key_len,
1747 if (!flow->frxq[i].hrxq) {
1748 rte_flow_error_set(error, ENOMEM,
1749 RTE_FLOW_ERROR_TYPE_HANDLE,
1750 NULL, "cannot create hash rxq");
1758 * Complete flow rule creation.
1761 * Pointer to private structure.
1763 * Internal parser structure.
1765 * Pointer to the rte_flow.
1767 * Perform verbose error reporting if not NULL.
1770 * 0 on success, a errno value otherwise and rte_errno is set.
1773 priv_flow_create_action_queue(struct priv *priv,
1774 struct mlx5_flow_parse *parser,
1775 struct rte_flow *flow,
1776 struct rte_flow_error *error)
1783 assert(!parser->drop);
1784 err = priv_flow_create_action_queue_rss(priv, parser, flow, error);
1788 flow->cs = parser->cs;
1789 if (!priv->dev->data->dev_started)
1791 for (i = 0; i != hash_rxq_init_n; ++i) {
1792 if (!flow->frxq[i].hrxq)
1794 flow->frxq[i].ibv_flow =
1795 ibv_create_flow(flow->frxq[i].hrxq->qp,
1796 flow->frxq[i].ibv_attr);
1797 if (!flow->frxq[i].ibv_flow) {
1798 rte_flow_error_set(error, ENOMEM,
1799 RTE_FLOW_ERROR_TYPE_HANDLE,
1800 NULL, "flow rule creation failure");
1804 DEBUG("%p type %d QP %p ibv_flow %p",
1806 (void *)flow->frxq[i].hrxq,
1807 (void *)flow->frxq[i].ibv_flow);
1809 for (i = 0; i != parser->queues_n; ++i) {
1810 struct mlx5_rxq_data *q =
1811 (*priv->rxqs)[parser->queues[i]];
1813 q->mark |= parser->mark;
1818 for (i = 0; i != hash_rxq_init_n; ++i) {
1819 if (flow->frxq[i].ibv_flow) {
1820 struct ibv_flow *ibv_flow = flow->frxq[i].ibv_flow;
1822 claim_zero(ibv_destroy_flow(ibv_flow));
1824 if (flow->frxq[i].hrxq)
1825 mlx5_priv_hrxq_release(priv, flow->frxq[i].hrxq);
1826 if (flow->frxq[i].ibv_attr)
1827 rte_free(flow->frxq[i].ibv_attr);
1830 claim_zero(ibv_destroy_counter_set(flow->cs));
1841 * Pointer to private structure.
1843 * Pointer to a TAILQ flow list.
1845 * Flow rule attributes.
1846 * @param[in] pattern
1847 * Pattern specification (list terminated by the END pattern item).
1848 * @param[in] actions
1849 * Associated actions (list terminated by the END action).
1851 * Perform verbose error reporting if not NULL.
1854 * A flow on success, NULL otherwise.
1856 static struct rte_flow *
1857 priv_flow_create(struct priv *priv,
1858 struct mlx5_flows *list,
1859 const struct rte_flow_attr *attr,
1860 const struct rte_flow_item items[],
1861 const struct rte_flow_action actions[],
1862 struct rte_flow_error *error)
1864 struct mlx5_flow_parse parser = { .create = 1, };
1865 struct rte_flow *flow = NULL;
1869 err = priv_flow_convert(priv, attr, items, actions, error, &parser);
1872 flow = rte_calloc(__func__, 1,
1873 sizeof(*flow) + parser.queues_n * sizeof(uint16_t),
1876 rte_flow_error_set(error, ENOMEM,
1877 RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
1879 "cannot allocate flow memory");
1882 /* Copy queues configuration. */
1883 flow->queues = (uint16_t (*)[])(flow + 1);
1884 memcpy(flow->queues, parser.queues, parser.queues_n * sizeof(uint16_t));
1885 flow->queues_n = parser.queues_n;
1886 flow->mark = parser.mark;
1887 /* Copy RSS configuration. */
1888 flow->rss_conf = parser.rss_conf;
1889 flow->rss_conf.rss_key = flow->rss_key;
1890 memcpy(flow->rss_key, parser.rss_key, parser.rss_conf.rss_key_len);
1891 /* finalise the flow. */
1893 err = priv_flow_create_action_queue_drop(priv, &parser, flow,
1896 err = priv_flow_create_action_queue(priv, &parser, flow, error);
1899 TAILQ_INSERT_TAIL(list, flow, next);
1900 DEBUG("Flow created %p", (void *)flow);
1903 for (i = 0; i != hash_rxq_init_n; ++i) {
1904 if (parser.queue[i].ibv_attr)
1905 rte_free(parser.queue[i].ibv_attr);
1912 * Validate a flow supported by the NIC.
1914 * @see rte_flow_validate()
1918 mlx5_flow_validate(struct rte_eth_dev *dev,
1919 const struct rte_flow_attr *attr,
1920 const struct rte_flow_item items[],
1921 const struct rte_flow_action actions[],
1922 struct rte_flow_error *error)
1924 struct priv *priv = dev->data->dev_private;
1926 struct mlx5_flow_parse parser = { .create = 0, };
1929 ret = priv_flow_convert(priv, attr, items, actions, error, &parser);
1937 * @see rte_flow_create()
1941 mlx5_flow_create(struct rte_eth_dev *dev,
1942 const struct rte_flow_attr *attr,
1943 const struct rte_flow_item items[],
1944 const struct rte_flow_action actions[],
1945 struct rte_flow_error *error)
1947 struct priv *priv = dev->data->dev_private;
1948 struct rte_flow *flow;
1951 flow = priv_flow_create(priv, &priv->flows, attr, items, actions,
1961 * Pointer to private structure.
1963 * Pointer to a TAILQ flow list.
1968 priv_flow_destroy(struct priv *priv,
1969 struct mlx5_flows *list,
1970 struct rte_flow *flow)
1974 if (flow->drop || !flow->mark)
1976 for (i = 0; i != flow->queues_n; ++i) {
1977 struct rte_flow *tmp;
1981 * To remove the mark from the queue, the queue must not be
1982 * present in any other marked flow (RSS or not).
1984 TAILQ_FOREACH(tmp, list, next) {
1986 uint16_t *tqs = NULL;
1991 for (j = 0; j != hash_rxq_init_n; ++j) {
1992 if (!tmp->frxq[j].hrxq)
1994 tqs = tmp->frxq[j].hrxq->ind_table->queues;
1995 tq_n = tmp->frxq[j].hrxq->ind_table->queues_n;
1999 for (j = 0; (j != tq_n) && !mark; j++)
2000 if (tqs[j] == (*flow->queues)[i])
2003 (*priv->rxqs)[(*flow->queues)[i]]->mark = mark;
2007 if (flow->frxq[HASH_RXQ_ETH].ibv_flow)
2008 claim_zero(ibv_destroy_flow
2009 (flow->frxq[HASH_RXQ_ETH].ibv_flow));
2010 rte_free(flow->frxq[HASH_RXQ_ETH].ibv_attr);
2012 for (i = 0; i != hash_rxq_init_n; ++i) {
2013 struct mlx5_flow *frxq = &flow->frxq[i];
2016 claim_zero(ibv_destroy_flow(frxq->ibv_flow));
2018 mlx5_priv_hrxq_release(priv, frxq->hrxq);
2020 rte_free(frxq->ibv_attr);
2024 claim_zero(ibv_destroy_counter_set(flow->cs));
2027 TAILQ_REMOVE(list, flow, next);
2028 DEBUG("Flow destroyed %p", (void *)flow);
2033 * Destroy all flows.
2036 * Pointer to private structure.
2038 * Pointer to a TAILQ flow list.
2041 priv_flow_flush(struct priv *priv, struct mlx5_flows *list)
2043 while (!TAILQ_EMPTY(list)) {
2044 struct rte_flow *flow;
2046 flow = TAILQ_FIRST(list);
2047 priv_flow_destroy(priv, list, flow);
2052 * Create drop queue.
2055 * Pointer to private structure.
2061 priv_flow_create_drop_queue(struct priv *priv)
2063 struct mlx5_hrxq_drop *fdq = NULL;
2067 fdq = rte_calloc(__func__, 1, sizeof(*fdq), 0);
2069 WARN("cannot allocate memory for drop queue");
2072 fdq->cq = ibv_create_cq(priv->ctx, 1, NULL, NULL, 0);
2074 WARN("cannot allocate CQ for drop queue");
2077 fdq->wq = ibv_create_wq(priv->ctx,
2078 &(struct ibv_wq_init_attr){
2079 .wq_type = IBV_WQT_RQ,
2086 WARN("cannot allocate WQ for drop queue");
2089 fdq->ind_table = ibv_create_rwq_ind_table(priv->ctx,
2090 &(struct ibv_rwq_ind_table_init_attr){
2091 .log_ind_tbl_size = 0,
2092 .ind_tbl = &fdq->wq,
2095 if (!fdq->ind_table) {
2096 WARN("cannot allocate indirection table for drop queue");
2099 fdq->qp = ibv_create_qp_ex(priv->ctx,
2100 &(struct ibv_qp_init_attr_ex){
2101 .qp_type = IBV_QPT_RAW_PACKET,
2103 IBV_QP_INIT_ATTR_PD |
2104 IBV_QP_INIT_ATTR_IND_TABLE |
2105 IBV_QP_INIT_ATTR_RX_HASH,
2106 .rx_hash_conf = (struct ibv_rx_hash_conf){
2108 IBV_RX_HASH_FUNC_TOEPLITZ,
2109 .rx_hash_key_len = rss_hash_default_key_len,
2110 .rx_hash_key = rss_hash_default_key,
2111 .rx_hash_fields_mask = 0,
2113 .rwq_ind_tbl = fdq->ind_table,
2117 WARN("cannot allocate QP for drop queue");
2120 priv->flow_drop_queue = fdq;
2124 claim_zero(ibv_destroy_qp(fdq->qp));
2126 claim_zero(ibv_destroy_rwq_ind_table(fdq->ind_table));
2128 claim_zero(ibv_destroy_wq(fdq->wq));
2130 claim_zero(ibv_destroy_cq(fdq->cq));
2133 priv->flow_drop_queue = NULL;
2138 * Delete drop queue.
2141 * Pointer to private structure.
2144 priv_flow_delete_drop_queue(struct priv *priv)
2146 struct mlx5_hrxq_drop *fdq = priv->flow_drop_queue;
2151 claim_zero(ibv_destroy_qp(fdq->qp));
2153 claim_zero(ibv_destroy_rwq_ind_table(fdq->ind_table));
2155 claim_zero(ibv_destroy_wq(fdq->wq));
2157 claim_zero(ibv_destroy_cq(fdq->cq));
2159 priv->flow_drop_queue = NULL;
2166 * Pointer to private structure.
2168 * Pointer to a TAILQ flow list.
2171 priv_flow_stop(struct priv *priv, struct mlx5_flows *list)
2173 struct rte_flow *flow;
2175 TAILQ_FOREACH_REVERSE(flow, list, mlx5_flows, next) {
2177 struct mlx5_ind_table_ibv *ind_tbl = NULL;
2180 if (!flow->frxq[HASH_RXQ_ETH].ibv_flow)
2182 claim_zero(ibv_destroy_flow
2183 (flow->frxq[HASH_RXQ_ETH].ibv_flow));
2184 flow->frxq[HASH_RXQ_ETH].ibv_flow = NULL;
2185 DEBUG("Flow %p removed", (void *)flow);
2189 /* Verify the flow has not already been cleaned. */
2190 for (i = 0; i != hash_rxq_init_n; ++i) {
2191 if (!flow->frxq[i].ibv_flow)
2194 * Indirection table may be necessary to remove the
2195 * flags in the Rx queues.
2196 * This helps to speed-up the process by avoiding
2199 ind_tbl = flow->frxq[i].hrxq->ind_table;
2202 if (i == hash_rxq_init_n)
2206 for (i = 0; i != ind_tbl->queues_n; ++i)
2207 (*priv->rxqs)[ind_tbl->queues[i]]->mark = 0;
2209 for (i = 0; i != hash_rxq_init_n; ++i) {
2210 if (!flow->frxq[i].ibv_flow)
2212 claim_zero(ibv_destroy_flow(flow->frxq[i].ibv_flow));
2213 flow->frxq[i].ibv_flow = NULL;
2214 mlx5_priv_hrxq_release(priv, flow->frxq[i].hrxq);
2215 flow->frxq[i].hrxq = NULL;
2217 DEBUG("Flow %p removed", (void *)flow);
2225 * Pointer to private structure.
2227 * Pointer to a TAILQ flow list.
2230 * 0 on success, a errno value otherwise and rte_errno is set.
2233 priv_flow_start(struct priv *priv, struct mlx5_flows *list)
2235 struct rte_flow *flow;
2237 TAILQ_FOREACH(flow, list, next) {
2241 flow->frxq[HASH_RXQ_ETH].ibv_flow =
2243 (priv->flow_drop_queue->qp,
2244 flow->frxq[HASH_RXQ_ETH].ibv_attr);
2245 if (!flow->frxq[HASH_RXQ_ETH].ibv_flow) {
2246 DEBUG("Flow %p cannot be applied",
2251 DEBUG("Flow %p applied", (void *)flow);
2255 for (i = 0; i != hash_rxq_init_n; ++i) {
2256 if (!flow->frxq[i].ibv_attr)
2258 flow->frxq[i].hrxq =
2259 mlx5_priv_hrxq_get(priv, flow->rss_conf.rss_key,
2260 flow->rss_conf.rss_key_len,
2261 hash_rxq_init[i].hash_fields,
2264 if (flow->frxq[i].hrxq)
2266 flow->frxq[i].hrxq =
2267 mlx5_priv_hrxq_new(priv, flow->rss_conf.rss_key,
2268 flow->rss_conf.rss_key_len,
2269 hash_rxq_init[i].hash_fields,
2272 if (!flow->frxq[i].hrxq) {
2273 DEBUG("Flow %p cannot be applied",
2279 flow->frxq[i].ibv_flow =
2280 ibv_create_flow(flow->frxq[i].hrxq->qp,
2281 flow->frxq[i].ibv_attr);
2282 if (!flow->frxq[i].ibv_flow) {
2283 DEBUG("Flow %p cannot be applied",
2288 DEBUG("Flow %p applied", (void *)flow);
2292 for (i = 0; i != flow->queues_n; ++i)
2293 (*priv->rxqs)[(*flow->queues)[i]]->mark = 1;
2299 * Verify the flow list is empty
2302 * Pointer to private structure.
2304 * @return the number of flows not released.
2307 priv_flow_verify(struct priv *priv)
2309 struct rte_flow *flow;
2312 TAILQ_FOREACH(flow, &priv->flows, next) {
2313 DEBUG("%p: flow %p still referenced", (void *)priv,
2321 * Enable a control flow configured from the control plane.
2324 * Pointer to Ethernet device.
2326 * An Ethernet flow spec to apply.
2328 * An Ethernet flow mask to apply.
2330 * A VLAN flow spec to apply.
2332 * A VLAN flow mask to apply.
2338 mlx5_ctrl_flow_vlan(struct rte_eth_dev *dev,
2339 struct rte_flow_item_eth *eth_spec,
2340 struct rte_flow_item_eth *eth_mask,
2341 struct rte_flow_item_vlan *vlan_spec,
2342 struct rte_flow_item_vlan *vlan_mask)
2344 struct priv *priv = dev->data->dev_private;
2345 const struct rte_flow_attr attr = {
2347 .priority = MLX5_CTRL_FLOW_PRIORITY,
2349 struct rte_flow_item items[] = {
2351 .type = RTE_FLOW_ITEM_TYPE_ETH,
2357 .type = (vlan_spec) ? RTE_FLOW_ITEM_TYPE_VLAN :
2358 RTE_FLOW_ITEM_TYPE_END,
2364 .type = RTE_FLOW_ITEM_TYPE_END,
2367 struct rte_flow_action actions[] = {
2369 .type = RTE_FLOW_ACTION_TYPE_RSS,
2372 .type = RTE_FLOW_ACTION_TYPE_END,
2375 struct rte_flow *flow;
2376 struct rte_flow_error error;
2379 struct rte_flow_action_rss rss;
2381 const struct rte_eth_rss_conf *rss_conf;
2383 uint16_t queue[RTE_MAX_QUEUES_PER_PORT];
2387 if (!priv->reta_idx_n)
2389 for (i = 0; i != priv->reta_idx_n; ++i)
2390 action_rss.local.queue[i] = (*priv->reta_idx)[i];
2391 action_rss.local.rss_conf = &priv->rss_conf;
2392 action_rss.local.num = priv->reta_idx_n;
2393 actions[0].conf = (const void *)&action_rss.rss;
2394 flow = priv_flow_create(priv, &priv->ctrl_flows, &attr, items, actions,
2402 * Enable a flow control configured from the control plane.
2405 * Pointer to Ethernet device.
2407 * An Ethernet flow spec to apply.
2409 * An Ethernet flow mask to apply.
2415 mlx5_ctrl_flow(struct rte_eth_dev *dev,
2416 struct rte_flow_item_eth *eth_spec,
2417 struct rte_flow_item_eth *eth_mask)
2419 return mlx5_ctrl_flow_vlan(dev, eth_spec, eth_mask, NULL, NULL);
2425 * @see rte_flow_destroy()
2429 mlx5_flow_destroy(struct rte_eth_dev *dev,
2430 struct rte_flow *flow,
2431 struct rte_flow_error *error)
2433 struct priv *priv = dev->data->dev_private;
2437 priv_flow_destroy(priv, &priv->flows, flow);
2443 * Destroy all flows.
2445 * @see rte_flow_flush()
2449 mlx5_flow_flush(struct rte_eth_dev *dev,
2450 struct rte_flow_error *error)
2452 struct priv *priv = dev->data->dev_private;
2456 priv_flow_flush(priv, &priv->flows);
2461 #ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
2463 * Query flow counter.
2467 * @param counter_value
2468 * returned data from the counter.
2471 * 0 on success, a errno value otherwise and rte_errno is set.
2474 priv_flow_query_count(struct ibv_counter_set *cs,
2475 struct mlx5_flow_counter_stats *counter_stats,
2476 struct rte_flow_query_count *query_count,
2477 struct rte_flow_error *error)
2479 uint64_t counters[2];
2480 struct ibv_query_counter_set_attr query_cs_attr = {
2482 .query_flags = IBV_COUNTER_SET_FORCE_UPDATE,
2484 struct ibv_counter_set_data query_out = {
2486 .outlen = 2 * sizeof(uint64_t),
2488 int res = ibv_query_counter_set(&query_cs_attr, &query_out);
2491 rte_flow_error_set(error, -res,
2492 RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
2494 "cannot read counter");
2497 query_count->hits_set = 1;
2498 query_count->bytes_set = 1;
2499 query_count->hits = counters[0] - counter_stats->hits;
2500 query_count->bytes = counters[1] - counter_stats->bytes;
2501 if (query_count->reset) {
2502 counter_stats->hits = counters[0];
2503 counter_stats->bytes = counters[1];
2511 * @see rte_flow_query()
2515 mlx5_flow_query(struct rte_eth_dev *dev,
2516 struct rte_flow *flow,
2517 enum rte_flow_action_type action __rte_unused,
2519 struct rte_flow_error *error)
2521 struct priv *priv = dev->data->dev_private;
2526 res = priv_flow_query_count(flow->cs,
2527 &flow->counter_stats,
2528 (struct rte_flow_query_count *)data,
2531 rte_flow_error_set(error, res,
2532 RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
2534 "no counter found for flow");
2544 * @see rte_flow_isolate()
2548 mlx5_flow_isolate(struct rte_eth_dev *dev,
2550 struct rte_flow_error *error)
2552 struct priv *priv = dev->data->dev_private;
2555 if (dev->data->dev_started) {
2556 rte_flow_error_set(error, EBUSY,
2557 RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
2559 "port must be stopped first");
2563 priv->isolated = !!enable;
2565 priv->dev->dev_ops = &mlx5_dev_ops_isolate;
2567 priv->dev->dev_ops = &mlx5_dev_ops;
2573 * Convert a flow director filter to a generic flow.
2576 * Private structure.
2577 * @param fdir_filter
2578 * Flow director filter to add.
2580 * Generic flow parameters structure.
2583 * 0 on success, errno value on error.
2586 priv_fdir_filter_convert(struct priv *priv,
2587 const struct rte_eth_fdir_filter *fdir_filter,
2588 struct mlx5_fdir *attributes)
2590 const struct rte_eth_fdir_input *input = &fdir_filter->input;
2592 /* Validate queue number. */
2593 if (fdir_filter->action.rx_queue >= priv->rxqs_n) {
2594 ERROR("invalid queue number %d", fdir_filter->action.rx_queue);
2597 attributes->attr.ingress = 1;
2598 attributes->items[0] = (struct rte_flow_item) {
2599 .type = RTE_FLOW_ITEM_TYPE_ETH,
2600 .spec = &attributes->l2,
2601 .mask = &attributes->l2_mask,
2603 switch (fdir_filter->action.behavior) {
2604 case RTE_ETH_FDIR_ACCEPT:
2605 attributes->actions[0] = (struct rte_flow_action){
2606 .type = RTE_FLOW_ACTION_TYPE_QUEUE,
2607 .conf = &attributes->queue,
2610 case RTE_ETH_FDIR_REJECT:
2611 attributes->actions[0] = (struct rte_flow_action){
2612 .type = RTE_FLOW_ACTION_TYPE_DROP,
2616 ERROR("invalid behavior %d", fdir_filter->action.behavior);
2619 attributes->queue.index = fdir_filter->action.rx_queue;
2620 switch (fdir_filter->input.flow_type) {
2621 case RTE_ETH_FLOW_NONFRAG_IPV4_UDP:
2622 attributes->l3.ipv4.hdr = (struct ipv4_hdr){
2623 .src_addr = input->flow.udp4_flow.ip.src_ip,
2624 .dst_addr = input->flow.udp4_flow.ip.dst_ip,
2625 .time_to_live = input->flow.udp4_flow.ip.ttl,
2626 .type_of_service = input->flow.udp4_flow.ip.tos,
2627 .next_proto_id = input->flow.udp4_flow.ip.proto,
2629 attributes->l4.udp.hdr = (struct udp_hdr){
2630 .src_port = input->flow.udp4_flow.src_port,
2631 .dst_port = input->flow.udp4_flow.dst_port,
2633 attributes->items[1] = (struct rte_flow_item){
2634 .type = RTE_FLOW_ITEM_TYPE_IPV4,
2635 .spec = &attributes->l3,
2637 attributes->items[2] = (struct rte_flow_item){
2638 .type = RTE_FLOW_ITEM_TYPE_UDP,
2639 .spec = &attributes->l4,
2642 case RTE_ETH_FLOW_NONFRAG_IPV4_TCP:
2643 attributes->l3.ipv4.hdr = (struct ipv4_hdr){
2644 .src_addr = input->flow.tcp4_flow.ip.src_ip,
2645 .dst_addr = input->flow.tcp4_flow.ip.dst_ip,
2646 .time_to_live = input->flow.tcp4_flow.ip.ttl,
2647 .type_of_service = input->flow.tcp4_flow.ip.tos,
2648 .next_proto_id = input->flow.tcp4_flow.ip.proto,
2650 attributes->l4.tcp.hdr = (struct tcp_hdr){
2651 .src_port = input->flow.tcp4_flow.src_port,
2652 .dst_port = input->flow.tcp4_flow.dst_port,
2654 attributes->items[1] = (struct rte_flow_item){
2655 .type = RTE_FLOW_ITEM_TYPE_IPV4,
2656 .spec = &attributes->l3,
2658 attributes->items[2] = (struct rte_flow_item){
2659 .type = RTE_FLOW_ITEM_TYPE_TCP,
2660 .spec = &attributes->l4,
2663 case RTE_ETH_FLOW_NONFRAG_IPV4_OTHER:
2664 attributes->l3.ipv4.hdr = (struct ipv4_hdr){
2665 .src_addr = input->flow.ip4_flow.src_ip,
2666 .dst_addr = input->flow.ip4_flow.dst_ip,
2667 .time_to_live = input->flow.ip4_flow.ttl,
2668 .type_of_service = input->flow.ip4_flow.tos,
2669 .next_proto_id = input->flow.ip4_flow.proto,
2671 attributes->items[1] = (struct rte_flow_item){
2672 .type = RTE_FLOW_ITEM_TYPE_IPV4,
2673 .spec = &attributes->l3,
2676 case RTE_ETH_FLOW_NONFRAG_IPV6_UDP:
2677 attributes->l3.ipv6.hdr = (struct ipv6_hdr){
2678 .hop_limits = input->flow.udp6_flow.ip.hop_limits,
2679 .proto = input->flow.udp6_flow.ip.proto,
2681 memcpy(attributes->l3.ipv6.hdr.src_addr,
2682 input->flow.udp6_flow.ip.src_ip,
2683 RTE_DIM(attributes->l3.ipv6.hdr.src_addr));
2684 memcpy(attributes->l3.ipv6.hdr.dst_addr,
2685 input->flow.udp6_flow.ip.dst_ip,
2686 RTE_DIM(attributes->l3.ipv6.hdr.src_addr));
2687 attributes->l4.udp.hdr = (struct udp_hdr){
2688 .src_port = input->flow.udp6_flow.src_port,
2689 .dst_port = input->flow.udp6_flow.dst_port,
2691 attributes->items[1] = (struct rte_flow_item){
2692 .type = RTE_FLOW_ITEM_TYPE_IPV6,
2693 .spec = &attributes->l3,
2695 attributes->items[2] = (struct rte_flow_item){
2696 .type = RTE_FLOW_ITEM_TYPE_UDP,
2697 .spec = &attributes->l4,
2700 case RTE_ETH_FLOW_NONFRAG_IPV6_TCP:
2701 attributes->l3.ipv6.hdr = (struct ipv6_hdr){
2702 .hop_limits = input->flow.tcp6_flow.ip.hop_limits,
2703 .proto = input->flow.tcp6_flow.ip.proto,
2705 memcpy(attributes->l3.ipv6.hdr.src_addr,
2706 input->flow.tcp6_flow.ip.src_ip,
2707 RTE_DIM(attributes->l3.ipv6.hdr.src_addr));
2708 memcpy(attributes->l3.ipv6.hdr.dst_addr,
2709 input->flow.tcp6_flow.ip.dst_ip,
2710 RTE_DIM(attributes->l3.ipv6.hdr.src_addr));
2711 attributes->l4.tcp.hdr = (struct tcp_hdr){
2712 .src_port = input->flow.tcp6_flow.src_port,
2713 .dst_port = input->flow.tcp6_flow.dst_port,
2715 attributes->items[1] = (struct rte_flow_item){
2716 .type = RTE_FLOW_ITEM_TYPE_IPV6,
2717 .spec = &attributes->l3,
2719 attributes->items[2] = (struct rte_flow_item){
2720 .type = RTE_FLOW_ITEM_TYPE_TCP,
2721 .spec = &attributes->l4,
2724 case RTE_ETH_FLOW_NONFRAG_IPV6_OTHER:
2725 attributes->l3.ipv6.hdr = (struct ipv6_hdr){
2726 .hop_limits = input->flow.ipv6_flow.hop_limits,
2727 .proto = input->flow.ipv6_flow.proto,
2729 memcpy(attributes->l3.ipv6.hdr.src_addr,
2730 input->flow.ipv6_flow.src_ip,
2731 RTE_DIM(attributes->l3.ipv6.hdr.src_addr));
2732 memcpy(attributes->l3.ipv6.hdr.dst_addr,
2733 input->flow.ipv6_flow.dst_ip,
2734 RTE_DIM(attributes->l3.ipv6.hdr.src_addr));
2735 attributes->items[1] = (struct rte_flow_item){
2736 .type = RTE_FLOW_ITEM_TYPE_IPV6,
2737 .spec = &attributes->l3,
2741 ERROR("invalid flow type%d",
2742 fdir_filter->input.flow_type);
2749 * Add new flow director filter and store it in list.
2752 * Private structure.
2753 * @param fdir_filter
2754 * Flow director filter to add.
2757 * 0 on success, errno value on failure.
2760 priv_fdir_filter_add(struct priv *priv,
2761 const struct rte_eth_fdir_filter *fdir_filter)
2763 struct mlx5_fdir attributes = {
2766 .dst.addr_bytes = "\x00\x00\x00\x00\x00\x00",
2767 .src.addr_bytes = "\x00\x00\x00\x00\x00\x00",
2771 struct mlx5_flow_parse parser = {
2772 .layer = HASH_RXQ_ETH,
2774 struct rte_flow_error error;
2775 struct rte_flow *flow;
2778 ret = priv_fdir_filter_convert(priv, fdir_filter, &attributes);
2781 ret = priv_flow_convert(priv, &attributes.attr, attributes.items,
2782 attributes.actions, &error, &parser);
2785 flow = priv_flow_create(priv,
2792 DEBUG("FDIR created %p", (void *)flow);
2799 * Delete specific filter.
2802 * Private structure.
2803 * @param fdir_filter
2804 * Filter to be deleted.
2807 * 0 on success, errno value on failure.
2810 priv_fdir_filter_delete(struct priv *priv,
2811 const struct rte_eth_fdir_filter *fdir_filter)
2813 struct mlx5_fdir attributes = {
2816 struct mlx5_flow_parse parser = {
2818 .layer = HASH_RXQ_ETH,
2820 struct rte_flow_error error;
2821 struct rte_flow *flow;
2825 ret = priv_fdir_filter_convert(priv, fdir_filter, &attributes);
2828 ret = priv_flow_convert(priv, &attributes.attr, attributes.items,
2829 attributes.actions, &error, &parser);
2833 * Special case for drop action which is only set in the
2834 * specifications when the flow is created. In this situation the
2835 * drop specification is missing.
2838 struct ibv_flow_spec_action_drop *drop;
2840 drop = (void *)((uintptr_t)parser.queue[HASH_RXQ_ETH].ibv_attr +
2841 parser.queue[HASH_RXQ_ETH].offset);
2842 *drop = (struct ibv_flow_spec_action_drop){
2843 .type = IBV_FLOW_SPEC_ACTION_DROP,
2844 .size = sizeof(struct ibv_flow_spec_action_drop),
2846 parser.queue[HASH_RXQ_ETH].ibv_attr->num_of_specs++;
2848 TAILQ_FOREACH(flow, &priv->flows, next) {
2849 struct ibv_flow_attr *attr;
2850 struct ibv_spec_header *attr_h;
2852 struct ibv_flow_attr *flow_attr;
2853 struct ibv_spec_header *flow_h;
2855 unsigned int specs_n;
2857 attr = parser.queue[HASH_RXQ_ETH].ibv_attr;
2858 flow_attr = flow->frxq[HASH_RXQ_ETH].ibv_attr;
2859 /* Compare first the attributes. */
2860 if (memcmp(attr, flow_attr, sizeof(struct ibv_flow_attr)))
2862 if (attr->num_of_specs == 0)
2864 spec = (void *)((uintptr_t)attr +
2865 sizeof(struct ibv_flow_attr));
2866 flow_spec = (void *)((uintptr_t)flow_attr +
2867 sizeof(struct ibv_flow_attr));
2868 specs_n = RTE_MIN(attr->num_of_specs, flow_attr->num_of_specs);
2869 for (i = 0; i != specs_n; ++i) {
2872 if (memcmp(spec, flow_spec,
2873 RTE_MIN(attr_h->size, flow_h->size)))
2875 spec = (void *)((uintptr_t)spec + attr_h->size);
2876 flow_spec = (void *)((uintptr_t)flow_spec +
2879 /* At this point, the flow match. */
2882 /* The flow does not match. */
2886 priv_flow_destroy(priv, &priv->flows, flow);
2888 for (i = 0; i != hash_rxq_init_n; ++i) {
2889 if (parser.queue[i].ibv_attr)
2890 rte_free(parser.queue[i].ibv_attr);
2896 * Update queue for specific filter.
2899 * Private structure.
2900 * @param fdir_filter
2901 * Filter to be updated.
2904 * 0 on success, errno value on failure.
2907 priv_fdir_filter_update(struct priv *priv,
2908 const struct rte_eth_fdir_filter *fdir_filter)
2912 ret = priv_fdir_filter_delete(priv, fdir_filter);
2915 ret = priv_fdir_filter_add(priv, fdir_filter);
2920 * Flush all filters.
2923 * Private structure.
2926 priv_fdir_filter_flush(struct priv *priv)
2928 priv_flow_flush(priv, &priv->flows);
2932 * Get flow director information.
2935 * Private structure.
2936 * @param[out] fdir_info
2937 * Resulting flow director information.
2940 priv_fdir_info_get(struct priv *priv, struct rte_eth_fdir_info *fdir_info)
2942 struct rte_eth_fdir_masks *mask =
2943 &priv->dev->data->dev_conf.fdir_conf.mask;
2945 fdir_info->mode = priv->dev->data->dev_conf.fdir_conf.mode;
2946 fdir_info->guarant_spc = 0;
2947 rte_memcpy(&fdir_info->mask, mask, sizeof(fdir_info->mask));
2948 fdir_info->max_flexpayload = 0;
2949 fdir_info->flow_types_mask[0] = 0;
2950 fdir_info->flex_payload_unit = 0;
2951 fdir_info->max_flex_payload_segment_num = 0;
2952 fdir_info->flex_payload_limit = 0;
2953 memset(&fdir_info->flex_conf, 0, sizeof(fdir_info->flex_conf));
2957 * Deal with flow director operations.
2960 * Pointer to private structure.
2962 * Operation to perform.
2964 * Pointer to operation-specific structure.
2967 * 0 on success, errno value on failure.
2970 priv_fdir_ctrl_func(struct priv *priv, enum rte_filter_op filter_op, void *arg)
2972 enum rte_fdir_mode fdir_mode =
2973 priv->dev->data->dev_conf.fdir_conf.mode;
2976 if (filter_op == RTE_ETH_FILTER_NOP)
2978 if (fdir_mode != RTE_FDIR_MODE_PERFECT &&
2979 fdir_mode != RTE_FDIR_MODE_PERFECT_MAC_VLAN) {
2980 ERROR("%p: flow director mode %d not supported",
2981 (void *)priv, fdir_mode);
2984 switch (filter_op) {
2985 case RTE_ETH_FILTER_ADD:
2986 ret = priv_fdir_filter_add(priv, arg);
2988 case RTE_ETH_FILTER_UPDATE:
2989 ret = priv_fdir_filter_update(priv, arg);
2991 case RTE_ETH_FILTER_DELETE:
2992 ret = priv_fdir_filter_delete(priv, arg);
2994 case RTE_ETH_FILTER_FLUSH:
2995 priv_fdir_filter_flush(priv);
2997 case RTE_ETH_FILTER_INFO:
2998 priv_fdir_info_get(priv, arg);
3001 DEBUG("%p: unknown operation %u", (void *)priv,
3010 * Manage filter operations.
3013 * Pointer to Ethernet device structure.
3014 * @param filter_type
3017 * Operation to perform.
3019 * Pointer to operation-specific structure.
3022 * 0 on success, negative errno value on failure.
3025 mlx5_dev_filter_ctrl(struct rte_eth_dev *dev,
3026 enum rte_filter_type filter_type,
3027 enum rte_filter_op filter_op,
3031 struct priv *priv = dev->data->dev_private;
3033 switch (filter_type) {
3034 case RTE_ETH_FILTER_GENERIC:
3035 if (filter_op != RTE_ETH_FILTER_GET)
3037 *(const void **)arg = &mlx5_flow_ops;
3039 case RTE_ETH_FILTER_FDIR:
3041 ret = priv_fdir_ctrl_func(priv, filter_op, arg);
3045 ERROR("%p: filter type (%d) not supported",
3046 (void *)dev, filter_type);