4 * Copyright 2017 6WIND S.A.
5 * Copyright 2017 Mellanox.
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
11 * * Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * * Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
17 * * Neither the name of 6WIND S.A. nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 #include <rte_debug.h>
37 #include <rte_atomic.h>
38 #include <rte_ethdev.h>
39 #include <rte_malloc.h>
41 #include <rte_cycles.h>
43 #include "failsafe_private.h"
45 static struct rte_eth_dev_info default_infos = {
46 /* Max possible number of elements */
47 .max_rx_pktlen = UINT32_MAX,
48 .max_rx_queues = RTE_MAX_QUEUES_PER_PORT,
49 .max_tx_queues = RTE_MAX_QUEUES_PER_PORT,
50 .max_mac_addrs = FAILSAFE_MAX_ETHADDR,
51 .max_hash_mac_addrs = UINT32_MAX,
52 .max_vfs = UINT16_MAX,
53 .max_vmdq_pools = UINT16_MAX,
58 .nb_seg_max = UINT16_MAX,
59 .nb_mtu_seg_max = UINT16_MAX,
65 .nb_seg_max = UINT16_MAX,
66 .nb_mtu_seg_max = UINT16_MAX,
69 * Set of capabilities that can be verified upon
70 * configuring a sub-device.
73 DEV_RX_OFFLOAD_VLAN_STRIP |
74 DEV_RX_OFFLOAD_QINQ_STRIP |
75 DEV_RX_OFFLOAD_IPV4_CKSUM |
76 DEV_RX_OFFLOAD_UDP_CKSUM |
77 DEV_RX_OFFLOAD_TCP_CKSUM |
78 DEV_RX_OFFLOAD_TCP_LRO,
79 .tx_offload_capa = 0x0,
80 .flow_type_rss_offloads = 0x0,
84 fs_dev_configure(struct rte_eth_dev *dev)
86 struct sub_device *sdev;
90 FOREACH_SUBDEV(sdev, i, dev) {
91 int rmv_interrupt = 0;
92 int lsc_interrupt = 0;
95 if (sdev->state != DEV_PROBED)
98 rmv_interrupt = ETH(sdev)->data->dev_flags &
101 DEBUG("Enabling RMV interrupts for sub_device %d", i);
102 dev->data->dev_conf.intr_conf.rmv = 1;
104 DEBUG("sub_device %d does not support RMV event", i);
106 lsc_enabled = dev->data->dev_conf.intr_conf.lsc;
107 lsc_interrupt = lsc_enabled &&
108 (ETH(sdev)->data->dev_flags &
109 RTE_ETH_DEV_INTR_LSC);
111 DEBUG("Enabling LSC interrupts for sub_device %d", i);
112 dev->data->dev_conf.intr_conf.lsc = 1;
113 } else if (lsc_enabled && !lsc_interrupt) {
114 DEBUG("Disabling LSC interrupts for sub_device %d", i);
115 dev->data->dev_conf.intr_conf.lsc = 0;
117 DEBUG("Configuring sub-device %d", i);
119 ret = rte_eth_dev_configure(PORT_ID(sdev),
120 dev->data->nb_rx_queues,
121 dev->data->nb_tx_queues,
122 &dev->data->dev_conf);
124 ERROR("Could not configure sub_device %d", i);
127 if (rmv_interrupt && sdev->rmv_callback == 0) {
128 ret = rte_eth_dev_callback_register(PORT_ID(sdev),
129 RTE_ETH_EVENT_INTR_RMV,
130 failsafe_eth_rmv_event_callback,
133 WARN("Failed to register RMV callback for sub_device %d",
136 sdev->rmv_callback = 1;
138 dev->data->dev_conf.intr_conf.rmv = 0;
139 if (lsc_interrupt && sdev->lsc_callback == 0) {
140 ret = rte_eth_dev_callback_register(PORT_ID(sdev),
141 RTE_ETH_EVENT_INTR_LSC,
142 failsafe_eth_lsc_event_callback,
145 WARN("Failed to register LSC callback for sub_device %d",
148 sdev->lsc_callback = 1;
150 dev->data->dev_conf.intr_conf.lsc = lsc_enabled;
151 sdev->state = DEV_ACTIVE;
153 if (PRIV(dev)->state < DEV_ACTIVE)
154 PRIV(dev)->state = DEV_ACTIVE;
159 fs_dev_start(struct rte_eth_dev *dev)
161 struct sub_device *sdev;
165 FOREACH_SUBDEV(sdev, i, dev) {
166 if (sdev->state != DEV_ACTIVE)
168 DEBUG("Starting sub_device %d", i);
169 ret = rte_eth_dev_start(PORT_ID(sdev));
172 sdev->state = DEV_STARTED;
174 if (PRIV(dev)->state < DEV_STARTED)
175 PRIV(dev)->state = DEV_STARTED;
176 fs_switch_dev(dev, NULL);
181 fs_dev_stop(struct rte_eth_dev *dev)
183 struct sub_device *sdev;
186 PRIV(dev)->state = DEV_STARTED - 1;
187 FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_STARTED) {
188 rte_eth_dev_stop(PORT_ID(sdev));
189 sdev->state = DEV_STARTED - 1;
194 fs_dev_set_link_up(struct rte_eth_dev *dev)
196 struct sub_device *sdev;
200 FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE) {
201 DEBUG("Calling rte_eth_dev_set_link_up on sub_device %d", i);
202 ret = rte_eth_dev_set_link_up(PORT_ID(sdev));
204 ERROR("Operation rte_eth_dev_set_link_up failed for sub_device %d"
205 " with error %d", i, ret);
213 fs_dev_set_link_down(struct rte_eth_dev *dev)
215 struct sub_device *sdev;
219 FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE) {
220 DEBUG("Calling rte_eth_dev_set_link_down on sub_device %d", i);
221 ret = rte_eth_dev_set_link_down(PORT_ID(sdev));
223 ERROR("Operation rte_eth_dev_set_link_down failed for sub_device %d"
224 " with error %d", i, ret);
231 static void fs_dev_free_queues(struct rte_eth_dev *dev);
233 fs_dev_close(struct rte_eth_dev *dev)
235 struct sub_device *sdev;
238 failsafe_hotplug_alarm_cancel(dev);
239 if (PRIV(dev)->state == DEV_STARTED)
240 dev->dev_ops->dev_stop(dev);
241 PRIV(dev)->state = DEV_ACTIVE - 1;
242 FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE) {
243 DEBUG("Closing sub_device %d", i);
244 failsafe_eth_dev_unregister_callbacks(sdev);
245 rte_eth_dev_close(PORT_ID(sdev));
246 sdev->state = DEV_ACTIVE - 1;
248 fs_dev_free_queues(dev);
252 fs_rx_queue_release(void *queue)
254 struct rte_eth_dev *dev;
255 struct sub_device *sdev;
262 dev = rxq->priv->dev;
263 FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE)
264 SUBOPS(sdev, rx_queue_release)
265 (ETH(sdev)->data->rx_queues[rxq->qid]);
266 dev->data->rx_queues[rxq->qid] = NULL;
271 fs_rx_queue_setup(struct rte_eth_dev *dev,
272 uint16_t rx_queue_id,
274 unsigned int socket_id,
275 const struct rte_eth_rxconf *rx_conf,
276 struct rte_mempool *mb_pool)
278 struct sub_device *sdev;
283 rxq = dev->data->rx_queues[rx_queue_id];
285 fs_rx_queue_release(rxq);
286 dev->data->rx_queues[rx_queue_id] = NULL;
288 rxq = rte_zmalloc(NULL,
290 sizeof(rte_atomic64_t) * PRIV(dev)->subs_tail,
291 RTE_CACHE_LINE_SIZE);
294 FOREACH_SUBDEV(sdev, i, dev)
295 rte_atomic64_init(&rxq->refcnt[i]);
296 rxq->qid = rx_queue_id;
297 rxq->socket_id = socket_id;
298 rxq->info.mp = mb_pool;
299 rxq->info.conf = *rx_conf;
300 rxq->info.nb_desc = nb_rx_desc;
301 rxq->priv = PRIV(dev);
302 dev->data->rx_queues[rx_queue_id] = rxq;
303 FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE) {
304 ret = rte_eth_rx_queue_setup(PORT_ID(sdev),
306 nb_rx_desc, socket_id,
309 ERROR("RX queue setup failed for sub_device %d", i);
315 fs_rx_queue_release(rxq);
320 fs_tx_queue_release(void *queue)
322 struct rte_eth_dev *dev;
323 struct sub_device *sdev;
330 dev = txq->priv->dev;
331 FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE)
332 SUBOPS(sdev, tx_queue_release)
333 (ETH(sdev)->data->tx_queues[txq->qid]);
334 dev->data->tx_queues[txq->qid] = NULL;
339 fs_tx_queue_setup(struct rte_eth_dev *dev,
340 uint16_t tx_queue_id,
342 unsigned int socket_id,
343 const struct rte_eth_txconf *tx_conf)
345 struct sub_device *sdev;
350 txq = dev->data->tx_queues[tx_queue_id];
352 fs_tx_queue_release(txq);
353 dev->data->tx_queues[tx_queue_id] = NULL;
355 txq = rte_zmalloc("ethdev TX queue",
357 sizeof(rte_atomic64_t) * PRIV(dev)->subs_tail,
358 RTE_CACHE_LINE_SIZE);
361 FOREACH_SUBDEV(sdev, i, dev)
362 rte_atomic64_init(&txq->refcnt[i]);
363 txq->qid = tx_queue_id;
364 txq->socket_id = socket_id;
365 txq->info.conf = *tx_conf;
366 txq->info.nb_desc = nb_tx_desc;
367 txq->priv = PRIV(dev);
368 dev->data->tx_queues[tx_queue_id] = txq;
369 FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE) {
370 ret = rte_eth_tx_queue_setup(PORT_ID(sdev),
372 nb_tx_desc, socket_id,
375 ERROR("TX queue setup failed for sub_device %d", i);
381 fs_tx_queue_release(txq);
386 fs_dev_free_queues(struct rte_eth_dev *dev)
390 for (i = 0; i < dev->data->nb_rx_queues; i++) {
391 fs_rx_queue_release(dev->data->rx_queues[i]);
392 dev->data->rx_queues[i] = NULL;
394 dev->data->nb_rx_queues = 0;
395 for (i = 0; i < dev->data->nb_tx_queues; i++) {
396 fs_tx_queue_release(dev->data->tx_queues[i]);
397 dev->data->tx_queues[i] = NULL;
399 dev->data->nb_tx_queues = 0;
403 fs_promiscuous_enable(struct rte_eth_dev *dev)
405 struct sub_device *sdev;
408 FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE)
409 rte_eth_promiscuous_enable(PORT_ID(sdev));
413 fs_promiscuous_disable(struct rte_eth_dev *dev)
415 struct sub_device *sdev;
418 FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE)
419 rte_eth_promiscuous_disable(PORT_ID(sdev));
423 fs_allmulticast_enable(struct rte_eth_dev *dev)
425 struct sub_device *sdev;
428 FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE)
429 rte_eth_allmulticast_enable(PORT_ID(sdev));
433 fs_allmulticast_disable(struct rte_eth_dev *dev)
435 struct sub_device *sdev;
438 FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE)
439 rte_eth_allmulticast_disable(PORT_ID(sdev));
443 fs_link_update(struct rte_eth_dev *dev,
444 int wait_to_complete)
446 struct sub_device *sdev;
450 FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE) {
451 DEBUG("Calling link_update on sub_device %d", i);
452 ret = (SUBOPS(sdev, link_update))(ETH(sdev), wait_to_complete);
453 if (ret && ret != -1) {
454 ERROR("Link update failed for sub_device %d with error %d",
459 if (TX_SUBDEV(dev)) {
460 struct rte_eth_link *l1;
461 struct rte_eth_link *l2;
463 l1 = &dev->data->dev_link;
464 l2 = Ð(TX_SUBDEV(dev))->data->dev_link;
465 if (memcmp(l1, l2, sizeof(*l1))) {
474 fs_stats_get(struct rte_eth_dev *dev,
475 struct rte_eth_stats *stats)
477 struct sub_device *sdev;
481 rte_memcpy(stats, &PRIV(dev)->stats_accumulator, sizeof(*stats));
482 FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE) {
483 struct rte_eth_stats *snapshot = &sdev->stats_snapshot.stats;
484 uint64_t *timestamp = &sdev->stats_snapshot.timestamp;
486 ret = rte_eth_stats_get(PORT_ID(sdev), snapshot);
488 ERROR("Operation rte_eth_stats_get failed for sub_device %d with error %d",
493 *timestamp = rte_rdtsc();
494 failsafe_stats_increment(stats, snapshot);
500 fs_stats_reset(struct rte_eth_dev *dev)
502 struct sub_device *sdev;
505 FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE) {
506 rte_eth_stats_reset(PORT_ID(sdev));
507 memset(&sdev->stats_snapshot, 0, sizeof(struct rte_eth_stats));
509 memset(&PRIV(dev)->stats_accumulator, 0, sizeof(struct rte_eth_stats));
513 * Fail-safe dev_infos_get rules:
517 * Use the maximum possible values for any field, so as not
518 * to impede any further configuration effort.
520 * Limits capabilities to those that are understood by the
521 * fail-safe PMD. This understanding stems from the fail-safe
522 * being capable of verifying that the related capability is
523 * expressed within the device configuration (struct rte_eth_conf).
525 * At least one probed sub_device:
527 * Uses values from the active probed sub_device
528 * The rationale here is that if any sub_device is less capable
529 * (for example concerning the number of queues) than the active
530 * sub_device, then its subsequent configuration will fail.
531 * It is impossible to foresee this failure when the failing sub_device
532 * is supposed to be plugged-in later on, so the configuration process
533 * is the single point of failure and error reporting.
535 * Uses a logical AND of RX capabilities among
536 * all sub_devices and the default capabilities.
537 * Uses a logical AND of TX capabilities among
538 * the active probed sub_device and the default capabilities.
542 fs_dev_infos_get(struct rte_eth_dev *dev,
543 struct rte_eth_dev_info *infos)
545 struct sub_device *sdev;
548 sdev = TX_SUBDEV(dev);
550 DEBUG("No probed device, using default infos");
551 rte_memcpy(&PRIV(dev)->infos, &default_infos,
552 sizeof(default_infos));
554 uint32_t rx_offload_capa;
556 rx_offload_capa = default_infos.rx_offload_capa;
557 FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_PROBED) {
558 rte_eth_dev_info_get(PORT_ID(sdev),
560 rx_offload_capa &= PRIV(dev)->infos.rx_offload_capa;
562 sdev = TX_SUBDEV(dev);
563 rte_eth_dev_info_get(PORT_ID(sdev), &PRIV(dev)->infos);
564 PRIV(dev)->infos.rx_offload_capa = rx_offload_capa;
565 PRIV(dev)->infos.tx_offload_capa &=
566 default_infos.tx_offload_capa;
567 PRIV(dev)->infos.flow_type_rss_offloads &=
568 default_infos.flow_type_rss_offloads;
570 rte_memcpy(infos, &PRIV(dev)->infos, sizeof(*infos));
573 static const uint32_t *
574 fs_dev_supported_ptypes_get(struct rte_eth_dev *dev)
576 struct sub_device *sdev;
577 struct rte_eth_dev *edev;
579 sdev = TX_SUBDEV(dev);
583 /* ENOTSUP: counts as no supported ptypes */
584 if (SUBOPS(sdev, dev_supported_ptypes_get) == NULL)
587 * The API does not permit to do a clean AND of all ptypes,
588 * It is also incomplete by design and we do not really care
589 * to have a best possible value in this context.
590 * We just return the ptypes of the device of highest
591 * priority, usually the PREFERRED device.
593 return SUBOPS(sdev, dev_supported_ptypes_get)(edev);
597 fs_mtu_set(struct rte_eth_dev *dev, uint16_t mtu)
599 struct sub_device *sdev;
603 FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE) {
604 DEBUG("Calling rte_eth_dev_set_mtu on sub_device %d", i);
605 ret = rte_eth_dev_set_mtu(PORT_ID(sdev), mtu);
607 ERROR("Operation rte_eth_dev_set_mtu failed for sub_device %d with error %d",
616 fs_vlan_filter_set(struct rte_eth_dev *dev, uint16_t vlan_id, int on)
618 struct sub_device *sdev;
622 FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE) {
623 DEBUG("Calling rte_eth_dev_vlan_filter on sub_device %d", i);
624 ret = rte_eth_dev_vlan_filter(PORT_ID(sdev), vlan_id, on);
626 ERROR("Operation rte_eth_dev_vlan_filter failed for sub_device %d"
627 " with error %d", i, ret);
635 fs_flow_ctrl_get(struct rte_eth_dev *dev,
636 struct rte_eth_fc_conf *fc_conf)
638 struct sub_device *sdev;
640 sdev = TX_SUBDEV(dev);
643 if (SUBOPS(sdev, flow_ctrl_get) == NULL)
645 return SUBOPS(sdev, flow_ctrl_get)(ETH(sdev), fc_conf);
649 fs_flow_ctrl_set(struct rte_eth_dev *dev,
650 struct rte_eth_fc_conf *fc_conf)
652 struct sub_device *sdev;
656 FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE) {
657 DEBUG("Calling rte_eth_dev_flow_ctrl_set on sub_device %d", i);
658 ret = rte_eth_dev_flow_ctrl_set(PORT_ID(sdev), fc_conf);
660 ERROR("Operation rte_eth_dev_flow_ctrl_set failed for sub_device %d"
661 " with error %d", i, ret);
669 fs_mac_addr_remove(struct rte_eth_dev *dev, uint32_t index)
671 struct sub_device *sdev;
674 /* No check: already done within the rte_eth_dev_mac_addr_remove
675 * call for the fail-safe device.
677 FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE)
678 rte_eth_dev_mac_addr_remove(PORT_ID(sdev),
679 &dev->data->mac_addrs[index]);
680 PRIV(dev)->mac_addr_pool[index] = 0;
684 fs_mac_addr_add(struct rte_eth_dev *dev,
685 struct ether_addr *mac_addr,
689 struct sub_device *sdev;
693 RTE_ASSERT(index < FAILSAFE_MAX_ETHADDR);
694 FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE) {
695 ret = rte_eth_dev_mac_addr_add(PORT_ID(sdev), mac_addr, vmdq);
697 ERROR("Operation rte_eth_dev_mac_addr_add failed for sub_device %"
698 PRIu8 " with error %d", i, ret);
702 if (index >= PRIV(dev)->nb_mac_addr) {
703 DEBUG("Growing mac_addrs array");
704 PRIV(dev)->nb_mac_addr = index;
706 PRIV(dev)->mac_addr_pool[index] = vmdq;
711 fs_mac_addr_set(struct rte_eth_dev *dev, struct ether_addr *mac_addr)
713 struct sub_device *sdev;
716 FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE)
717 rte_eth_dev_default_mac_addr_set(PORT_ID(sdev), mac_addr);
721 fs_filter_ctrl(struct rte_eth_dev *dev,
722 enum rte_filter_type type,
723 enum rte_filter_op op,
726 struct sub_device *sdev;
730 if (type == RTE_ETH_FILTER_GENERIC &&
731 op == RTE_ETH_FILTER_GET) {
732 *(const void **)arg = &fs_flow_ops;
735 FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE) {
736 DEBUG("Calling rte_eth_dev_filter_ctrl on sub_device %d", i);
737 ret = rte_eth_dev_filter_ctrl(PORT_ID(sdev), type, op, arg);
739 ERROR("Operation rte_eth_dev_filter_ctrl failed for sub_device %d"
740 " with error %d", i, ret);
747 const struct eth_dev_ops failsafe_ops = {
748 .dev_configure = fs_dev_configure,
749 .dev_start = fs_dev_start,
750 .dev_stop = fs_dev_stop,
751 .dev_set_link_down = fs_dev_set_link_down,
752 .dev_set_link_up = fs_dev_set_link_up,
753 .dev_close = fs_dev_close,
754 .promiscuous_enable = fs_promiscuous_enable,
755 .promiscuous_disable = fs_promiscuous_disable,
756 .allmulticast_enable = fs_allmulticast_enable,
757 .allmulticast_disable = fs_allmulticast_disable,
758 .link_update = fs_link_update,
759 .stats_get = fs_stats_get,
760 .stats_reset = fs_stats_reset,
761 .dev_infos_get = fs_dev_infos_get,
762 .dev_supported_ptypes_get = fs_dev_supported_ptypes_get,
763 .mtu_set = fs_mtu_set,
764 .vlan_filter_set = fs_vlan_filter_set,
765 .rx_queue_setup = fs_rx_queue_setup,
766 .tx_queue_setup = fs_tx_queue_setup,
767 .rx_queue_release = fs_rx_queue_release,
768 .tx_queue_release = fs_tx_queue_release,
769 .flow_ctrl_get = fs_flow_ctrl_get,
770 .flow_ctrl_set = fs_flow_ctrl_set,
771 .mac_addr_remove = fs_mac_addr_remove,
772 .mac_addr_add = fs_mac_addr_add,
773 .mac_addr_set = fs_mac_addr_set,
774 .filter_ctrl = fs_filter_ctrl,