New upstream version 17.11.4
[deb_dpdk.git] / drivers / net / tap / tap_flow.c
index cf1c8a2..f10bbf6 100644 (file)
@@ -82,6 +82,8 @@ enum {
 };
 #endif
 
+#define ISOLATE_HANDLE 1
+
 struct rte_flow {
        LIST_ENTRY(rte_flow) next; /* Pointer to the next rte_flow structure */
        struct rte_flow *remote_flow; /* associated remote flow */
@@ -98,6 +100,7 @@ struct convert_data {
 struct remote_rule {
        struct rte_flow_attr attr;
        struct rte_flow_item items[2];
+       struct rte_flow_action actions[2];
        int mirred;
 };
 
@@ -126,11 +129,17 @@ tap_flow_destroy(struct rte_eth_dev *dev,
                 struct rte_flow *flow,
                 struct rte_flow_error *error);
 
+static int
+tap_flow_isolate(struct rte_eth_dev *dev,
+                int set,
+                struct rte_flow_error *error);
+
 static const struct rte_flow_ops tap_flow_ops = {
        .validate = tap_flow_validate,
        .create = tap_flow_create,
        .destroy = tap_flow_destroy,
        .flush = tap_flow_flush,
+       .isolate = tap_flow_isolate,
 };
 
 /* Static initializer for items. */
@@ -258,6 +267,47 @@ static const struct tap_flow_items tap_flow_items[] = {
        },
 };
 
+/*
+ *                TC rules, by growing priority
+ *
+ *        Remote netdevice                  Tap netdevice
+ * +-------------+-------------+  +-------------+-------------+
+ * |   Ingress   |   Egress    |  |   Ingress   |   Egress    |
+ * |-------------|-------------|  |-------------|-------------|
+ * |             |  \       /  |  |             |  REMOTE TX  | prio 1
+ * |             |   \     /   |  |             |   \     /   | prio 2
+ * |  EXPLICIT   |    \   /    |  |  EXPLICIT   |    \   /    |   .
+ * |             |     \ /     |  |             |     \ /     |   .
+ * |    RULES    |      X      |  |    RULES    |      X      |   .
+ * |      .      |     / \     |  |      .      |     / \     |   .
+ * |      .      |    /   \    |  |      .      |    /   \    |   .
+ * |      .      |   /     \   |  |      .      |   /     \   |   .
+ * |      .      |  /       \  |  |      .      |  /       \  |   .
+ *
+ *      ....           ....           ....           ....
+ *
+ * |      .      |  \       /  |  |      .      |  \       /  |   .
+ * |      .      |   \     /   |  |      .      |   \     /   |   .
+ * |             |    \   /    |  |             |    \   /    |
+ * |  LOCAL_MAC  |     \ /     |  |    \   /    |     \ /     | last prio - 5
+ * |   PROMISC   |      X      |  |     \ /     |      X      | last prio - 4
+ * |   ALLMULTI  |     / \     |  |      X      |     / \     | last prio - 3
+ * |  BROADCAST  |    /   \    |  |     / \     |    /   \    | last prio - 2
+ * | BROADCASTV6 |   /     \   |  |    /   \    |   /     \   | last prio - 1
+ * |     xx      |  /       \  |  |   ISOLATE   |  /       \  | last prio
+ * +-------------+-------------+  +-------------+-------------+
+ *
+ * The implicit flow rules are stored in a list in with mandatorily the last two
+ * being the ISOLATE and REMOTE_TX rules. e.g.:
+ *
+ * LOCAL_MAC -> BROADCAST -> BROADCASTV6 -> REMOTE_TX -> ISOLATE -> NULL
+ *
+ * That enables tap_flow_isolate() to remove implicit rules by popping the list
+ * head and remove it as long as it applies on the remote netdevice. The
+ * implicit rule for TX redirection is not removed, as isolate concerns only
+ * incoming traffic.
+ */
+
 static struct remote_rule implicit_rte_flows[TAP_REMOTE_MAX_IDX] = {
        [TAP_REMOTE_LOCAL_MAC] = {
                .attr = {
@@ -364,6 +414,19 @@ static struct remote_rule implicit_rte_flows[TAP_REMOTE_MAX_IDX] = {
                },
                .mirred = TCA_EGRESS_MIRROR,
        },
+       [TAP_ISOLATE] = {
+               .attr = {
+                       .group = MAX_GROUP,
+                       .priority = PRIORITY_MASK - TAP_ISOLATE,
+                       .ingress = 1,
+               },
+               .items[0] = {
+                       .type = RTE_FLOW_ITEM_TYPE_VOID,
+               },
+               .items[1] = {
+                       .type = RTE_FLOW_ITEM_TYPE_END,
+               },
+       },
 };
 
 /**
@@ -401,10 +464,7 @@ tap_flow_create_eth(const struct rte_flow_item *item, void *data)
        if (!flow)
                return 0;
        msg = &flow->msg;
-       if (spec->type & mask->type)
-               msg->t.tcm_info = TC_H_MAKE(msg->t.tcm_info,
-                                           (spec->type & mask->type));
-       if (!is_zero_ether_addr(&spec->dst)) {
+       if (!is_zero_ether_addr(&mask->dst)) {
                nlattr_add(&msg->nh, TCA_FLOWER_KEY_ETH_DST, ETHER_ADDR_LEN,
                           &spec->dst.addr_bytes);
                nlattr_add(&msg->nh,
@@ -508,17 +568,15 @@ tap_flow_create_ipv4(const struct rte_flow_item *item, void *data)
        msg = &flow->msg;
        if (!info->eth_type)
                info->eth_type = htons(ETH_P_IP);
-       if (!info->vlan)
-               msg->t.tcm_info = TC_H_MAKE(msg->t.tcm_info, htons(ETH_P_IP));
        if (!spec)
                return 0;
-       if (spec->hdr.dst_addr) {
+       if (mask->hdr.dst_addr) {
                nlattr_add32(&msg->nh, TCA_FLOWER_KEY_IPV4_DST,
                             spec->hdr.dst_addr);
                nlattr_add32(&msg->nh, TCA_FLOWER_KEY_IPV4_DST_MASK,
                             mask->hdr.dst_addr);
        }
-       if (spec->hdr.src_addr) {
+       if (mask->hdr.src_addr) {
                nlattr_add32(&msg->nh, TCA_FLOWER_KEY_IPV4_SRC,
                             spec->hdr.src_addr);
                nlattr_add32(&msg->nh, TCA_FLOWER_KEY_IPV4_SRC_MASK,
@@ -566,17 +624,15 @@ tap_flow_create_ipv6(const struct rte_flow_item *item, void *data)
        msg = &flow->msg;
        if (!info->eth_type)
                info->eth_type = htons(ETH_P_IPV6);
-       if (!info->vlan)
-               msg->t.tcm_info = TC_H_MAKE(msg->t.tcm_info, htons(ETH_P_IPV6));
        if (!spec)
                return 0;
-       if (memcmp(spec->hdr.dst_addr, empty_addr, 16)) {
+       if (memcmp(mask->hdr.dst_addr, empty_addr, 16)) {
                nlattr_add(&msg->nh, TCA_FLOWER_KEY_IPV6_DST,
                           sizeof(spec->hdr.dst_addr), &spec->hdr.dst_addr);
                nlattr_add(&msg->nh, TCA_FLOWER_KEY_IPV6_DST_MASK,
                           sizeof(mask->hdr.dst_addr), &mask->hdr.dst_addr);
        }
-       if (memcmp(spec->hdr.src_addr, empty_addr, 16)) {
+       if (memcmp(mask->hdr.src_addr, empty_addr, 16)) {
                nlattr_add(&msg->nh, TCA_FLOWER_KEY_IPV6_SRC,
                           sizeof(spec->hdr.src_addr), &spec->hdr.src_addr);
                nlattr_add(&msg->nh, TCA_FLOWER_KEY_IPV6_SRC_MASK,
@@ -624,10 +680,10 @@ tap_flow_create_udp(const struct rte_flow_item *item, void *data)
        nlattr_add8(&msg->nh, TCA_FLOWER_KEY_IP_PROTO, IPPROTO_UDP);
        if (!spec)
                return 0;
-       if (spec->hdr.dst_port & mask->hdr.dst_port)
+       if (mask->hdr.dst_port)
                nlattr_add16(&msg->nh, TCA_FLOWER_KEY_UDP_DST,
                             spec->hdr.dst_port);
-       if (spec->hdr.src_port & mask->hdr.src_port)
+       if (mask->hdr.src_port)
                nlattr_add16(&msg->nh, TCA_FLOWER_KEY_UDP_SRC,
                             spec->hdr.src_port);
        return 0;
@@ -670,10 +726,10 @@ tap_flow_create_tcp(const struct rte_flow_item *item, void *data)
        nlattr_add8(&msg->nh, TCA_FLOWER_KEY_IP_PROTO, IPPROTO_TCP);
        if (!spec)
                return 0;
-       if (spec->hdr.dst_port & mask->hdr.dst_port)
+       if (mask->hdr.dst_port)
                nlattr_add16(&msg->nh, TCA_FLOWER_KEY_TCP_DST,
                             spec->hdr.dst_port);
-       if (spec->hdr.src_port & mask->hdr.src_port)
+       if (mask->hdr.src_port)
                nlattr_add16(&msg->nh, TCA_FLOWER_KEY_TCP_SRC,
                             spec->hdr.src_port);
        return 0;
@@ -978,16 +1034,13 @@ priv_flow_process(struct pmd_internals *pmd,
                if (err)
                        goto exit_item_not_supported;
                if (flow && cur_item->convert) {
-                       if (!pmd->flower_vlan_support &&
-                           cur_item->convert == tap_flow_create_vlan)
-                               goto exit_item_not_supported;
                        err = cur_item->convert(items, &data);
                        if (err)
                                goto exit_item_not_supported;
                }
        }
        if (flow) {
-               if (pmd->flower_vlan_support && data.vlan) {
+               if (data.vlan) {
                        nlattr_add16(&flow->msg.nh, TCA_FLOWER_KEY_ETH_TYPE,
                                     htons(ETH_P_8021Q));
                        nlattr_add16(&flow->msg.nh,
@@ -1036,13 +1089,29 @@ priv_flow_process(struct pmd_internals *pmd,
                        const struct rte_flow_action_queue *queue =
                                (const struct rte_flow_action_queue *)
                                actions->conf;
+
                        if (action)
                                goto exit_action_not_supported;
                        action = 1;
-                       if (!queue || (queue->index >= pmd->nb_queues))
+                       if (!queue ||
+                           (queue->index > pmd->dev->data->nb_rx_queues - 1))
                                goto exit_action_not_supported;
                        if (flow)
                                err = add_action_skbedit(flow, queue->index);
+               } else if (actions->type == RTE_FLOW_ACTION_TYPE_RSS) {
+                       /* Fake RSS support. */
+                       const struct rte_flow_action_rss *rss =
+                               (const struct rte_flow_action_rss *)
+                               actions->conf;
+
+                       if (action)
+                               goto exit_action_not_supported;
+                       action = 1;
+                       if (!rss || rss->num < 1 ||
+                           (rss->queue[0] > pmd->dev->data->nb_rx_queues - 1))
+                               goto exit_action_not_supported;
+                       if (flow)
+                               err = add_action_skbedit(flow, rss->queue[0]);
                } else {
                        goto exit_action_not_supported;
                }
@@ -1175,7 +1244,8 @@ tap_flow_create(struct rte_eth_dev *dev,
                        "Kernel refused TC filter rule creation (%d): %s\n",
                        errno, strerror(errno));
                rte_flow_error_set(error, EEXIST, RTE_FLOW_ERROR_TYPE_HANDLE,
-                                  NULL, "overlapping rules");
+                                  NULL,
+                                  "overlapping rules or Kernel too old for flower support");
                goto fail;
        }
        LIST_INSERT_HEAD(&pmd->flows, flow, next);
@@ -1220,7 +1290,8 @@ tap_flow_create(struct rte_eth_dev *dev,
                                errno, strerror(errno));
                        rte_flow_error_set(
                                error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
-                               NULL, "overlapping rules");
+                               NULL,
+                               "overlapping rules or Kernel too old for flower support");
                        goto fail;
                }
                flow->remote_flow = remote_flow;
@@ -1324,6 +1395,78 @@ tap_flow_destroy(struct rte_eth_dev *dev,
        return tap_flow_destroy_pmd(pmd, flow, error);
 }
 
+/**
+ * Enable/disable flow isolation.
+ *
+ * @see rte_flow_isolate()
+ * @see rte_flow_ops
+ */
+static int
+tap_flow_isolate(struct rte_eth_dev *dev,
+                int set,
+                struct rte_flow_error *error __rte_unused)
+{
+       struct pmd_internals *pmd = dev->data->dev_private;
+
+       if (set)
+               pmd->flow_isolate = 1;
+       else
+               pmd->flow_isolate = 0;
+       /*
+        * If netdevice is there, setup appropriate flow rules immediately.
+        * Otherwise it will be set when bringing up the netdevice (tun_alloc).
+        */
+       if (!pmd->rxq[0].fd)
+               return 0;
+       if (set) {
+               struct rte_flow *flow;
+
+               while (1) {
+                       flow = LIST_FIRST(&pmd->implicit_flows);
+                       if (!flow)
+                               break;
+                       /*
+                        * Remove all implicit rules on the remote.
+                        * Keep the local rule to redirect packets on TX.
+                        * Keep also the last implicit local rule: ISOLATE.
+                        */
+                       if (flow->msg.t.tcm_ifindex == pmd->if_index)
+                               break;
+                       if (tap_flow_destroy_pmd(pmd, flow, NULL) < 0)
+                               goto error;
+               }
+               /* Switch the TC rule according to pmd->flow_isolate */
+               if (tap_flow_implicit_create(pmd, TAP_ISOLATE) == -1)
+                       goto error;
+       } else {
+               /* Switch the TC rule according to pmd->flow_isolate */
+               if (tap_flow_implicit_create(pmd, TAP_ISOLATE) == -1)
+                       goto error;
+               if (!pmd->remote_if_index)
+                       return 0;
+               if (tap_flow_implicit_create(pmd, TAP_REMOTE_TX) < 0)
+                       goto error;
+               if (tap_flow_implicit_create(pmd, TAP_REMOTE_LOCAL_MAC) < 0)
+                       goto error;
+               if (tap_flow_implicit_create(pmd, TAP_REMOTE_BROADCAST) < 0)
+                       goto error;
+               if (tap_flow_implicit_create(pmd, TAP_REMOTE_BROADCASTV6) < 0)
+                       goto error;
+               if (dev->data->promiscuous &&
+                   tap_flow_implicit_create(pmd, TAP_REMOTE_PROMISC) < 0)
+                       goto error;
+               if (dev->data->all_multicast &&
+                   tap_flow_implicit_create(pmd, TAP_REMOTE_ALLMULTI) < 0)
+                       goto error;
+       }
+       return 0;
+error:
+       pmd->flow_isolate = 0;
+       return rte_flow_error_set(
+               error, ENOTSUP, RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
+               "TC rule creation failed");
+}
+
 /**
  * Destroy all flows.
  *
@@ -1358,6 +1501,13 @@ tap_flow_flush(struct rte_eth_dev *dev, struct rte_flow_error *error)
 int tap_flow_implicit_create(struct pmd_internals *pmd,
                             enum implicit_rule_index idx)
 {
+       uint16_t flags = NLM_F_REQUEST | NLM_F_ACK | NLM_F_EXCL | NLM_F_CREATE;
+       struct rte_flow_action *actions = implicit_rte_flows[idx].actions;
+       struct rte_flow_action isolate_actions[2] = {
+               [1] = {
+                       .type = RTE_FLOW_ACTION_TYPE_END,
+               },
+       };
        struct rte_flow_item *items = implicit_rte_flows[idx].items;
        struct rte_flow_attr *attr = &implicit_rte_flows[idx].attr;
        struct rte_flow_item_eth eth_local = { .type = 0 };
@@ -1378,12 +1528,20 @@ int tap_flow_implicit_create(struct pmd_internals *pmd,
 
        remote_flow = rte_malloc(__func__, sizeof(struct rte_flow), 0);
        if (!remote_flow) {
-               RTE_LOG(ERR, PMD, "Cannot allocate memory for rte_flow");
+               RTE_LOG(ERR, PMD, "Cannot allocate memory for rte_flow\n");
                goto fail;
        }
        msg = &remote_flow->msg;
        if (idx == TAP_REMOTE_TX) {
                if_index = pmd->if_index;
+       } else if (idx == TAP_ISOLATE) {
+               if_index = pmd->if_index;
+               /* Don't be exclusive for this rule, it can be changed later. */
+               flags = NLM_F_REQUEST | NLM_F_ACK | NLM_F_CREATE;
+               isolate_actions[0].type = pmd->flow_isolate ?
+                       RTE_FLOW_ACTION_TYPE_DROP :
+                       RTE_FLOW_ACTION_TYPE_PASSTHRU;
+               actions = isolate_actions;
        } else if (idx == TAP_REMOTE_LOCAL_MAC) {
                /*
                 * eth addr couldn't be set in implicit_rte_flows[] as it is not
@@ -1392,18 +1550,25 @@ int tap_flow_implicit_create(struct pmd_internals *pmd,
                memcpy(&eth_local.dst, &pmd->eth_addr, sizeof(pmd->eth_addr));
                items = items_local;
        }
-       tc_init_msg(msg, if_index, RTM_NEWTFILTER,
-                   NLM_F_REQUEST | NLM_F_ACK | NLM_F_EXCL | NLM_F_CREATE);
+       tc_init_msg(msg, if_index, RTM_NEWTFILTER, flags);
        msg->t.tcm_info = TC_H_MAKE(0, htons(ETH_P_ALL));
-       tap_flow_set_handle(remote_flow);
-       if (priv_flow_process(pmd, attr, items, NULL, NULL,
+       /*
+        * The ISOLATE rule is always present and must have a static handle, as
+        * the action is changed whether the feature is enabled (DROP) or
+        * disabled (PASSTHRU).
+        */
+       if (idx == TAP_ISOLATE)
+               remote_flow->msg.t.tcm_handle = ISOLATE_HANDLE;
+       else
+               tap_flow_set_handle(remote_flow);
+       if (priv_flow_process(pmd, attr, items, actions, NULL,
                              remote_flow, implicit_rte_flows[idx].mirred)) {
                RTE_LOG(ERR, PMD, "rte flow rule validation failed\n");
                goto fail;
        }
        err = nl_send(pmd->nlsk_fd, &msg->nh);
        if (err < 0) {
-               RTE_LOG(ERR, PMD, "Failure sending nl request");
+               RTE_LOG(ERR, PMD, "Failure sending nl request\n");
                goto fail;
        }
        err = nl_recv_ack(pmd->nlsk_fd);
@@ -1488,10 +1653,6 @@ tap_dev_filter_ctrl(struct rte_eth_dev *dev,
                    enum rte_filter_op filter_op,
                    void *arg)
 {
-       struct pmd_internals *pmd = dev->data->dev_private;
-
-       if (!pmd->flower_support)
-               return -ENOTSUP;
        switch (filter_type) {
        case RTE_ETH_FILTER_GENERIC:
                if (filter_op != RTE_ETH_FILTER_GET)
@@ -1499,7 +1660,7 @@ tap_dev_filter_ctrl(struct rte_eth_dev *dev,
                *(const void **)arg = &tap_flow_ops;
                return 0;
        default:
-               RTE_LOG(ERR, PMD, "%p: filter type (%d) not supported",
+               RTE_LOG(ERR, PMD, "%p: filter type (%d) not supported\n",
                        (void *)dev, filter_type);
        }
        return -EINVAL;