};
#endif
+#define ISOLATE_HANDLE 1
+
struct rte_flow {
LIST_ENTRY(rte_flow) next; /* Pointer to the next rte_flow structure */
struct rte_flow *remote_flow; /* associated remote flow */
struct remote_rule {
struct rte_flow_attr attr;
struct rte_flow_item items[2];
+ struct rte_flow_action actions[2];
int mirred;
};
struct rte_flow *flow,
struct rte_flow_error *error);
+static int
+tap_flow_isolate(struct rte_eth_dev *dev,
+ int set,
+ struct rte_flow_error *error);
+
static const struct rte_flow_ops tap_flow_ops = {
.validate = tap_flow_validate,
.create = tap_flow_create,
.destroy = tap_flow_destroy,
.flush = tap_flow_flush,
+ .isolate = tap_flow_isolate,
};
/* Static initializer for items. */
},
};
+/*
+ * TC rules, by growing priority
+ *
+ * Remote netdevice Tap netdevice
+ * +-------------+-------------+ +-------------+-------------+
+ * | Ingress | Egress | | Ingress | Egress |
+ * |-------------|-------------| |-------------|-------------|
+ * | | \ / | | | REMOTE TX | prio 1
+ * | | \ / | | | \ / | prio 2
+ * | EXPLICIT | \ / | | EXPLICIT | \ / | .
+ * | | \ / | | | \ / | .
+ * | RULES | X | | RULES | X | .
+ * | . | / \ | | . | / \ | .
+ * | . | / \ | | . | / \ | .
+ * | . | / \ | | . | / \ | .
+ * | . | / \ | | . | / \ | .
+ *
+ * .... .... .... ....
+ *
+ * | . | \ / | | . | \ / | .
+ * | . | \ / | | . | \ / | .
+ * | | \ / | | | \ / |
+ * | LOCAL_MAC | \ / | | \ / | \ / | last prio - 5
+ * | PROMISC | X | | \ / | X | last prio - 4
+ * | ALLMULTI | / \ | | X | / \ | last prio - 3
+ * | BROADCAST | / \ | | / \ | / \ | last prio - 2
+ * | BROADCASTV6 | / \ | | / \ | / \ | last prio - 1
+ * | xx | / \ | | ISOLATE | / \ | last prio
+ * +-------------+-------------+ +-------------+-------------+
+ *
+ * The implicit flow rules are stored in a list in with mandatorily the last two
+ * being the ISOLATE and REMOTE_TX rules. e.g.:
+ *
+ * LOCAL_MAC -> BROADCAST -> BROADCASTV6 -> REMOTE_TX -> ISOLATE -> NULL
+ *
+ * That enables tap_flow_isolate() to remove implicit rules by popping the list
+ * head and remove it as long as it applies on the remote netdevice. The
+ * implicit rule for TX redirection is not removed, as isolate concerns only
+ * incoming traffic.
+ */
+
static struct remote_rule implicit_rte_flows[TAP_REMOTE_MAX_IDX] = {
[TAP_REMOTE_LOCAL_MAC] = {
.attr = {
},
.mirred = TCA_EGRESS_MIRROR,
},
+ [TAP_ISOLATE] = {
+ .attr = {
+ .group = MAX_GROUP,
+ .priority = PRIORITY_MASK - TAP_ISOLATE,
+ .ingress = 1,
+ },
+ .items[0] = {
+ .type = RTE_FLOW_ITEM_TYPE_VOID,
+ },
+ .items[1] = {
+ .type = RTE_FLOW_ITEM_TYPE_END,
+ },
+ },
};
/**
if (!flow)
return 0;
msg = &flow->msg;
- if (!is_zero_ether_addr(&spec->dst)) {
+ if (!is_zero_ether_addr(&mask->dst)) {
nlattr_add(&msg->nh, TCA_FLOWER_KEY_ETH_DST, ETHER_ADDR_LEN,
&spec->dst.addr_bytes);
nlattr_add(&msg->nh,
info->eth_type = htons(ETH_P_IP);
if (!spec)
return 0;
- if (spec->hdr.dst_addr) {
+ if (mask->hdr.dst_addr) {
nlattr_add32(&msg->nh, TCA_FLOWER_KEY_IPV4_DST,
spec->hdr.dst_addr);
nlattr_add32(&msg->nh, TCA_FLOWER_KEY_IPV4_DST_MASK,
mask->hdr.dst_addr);
}
- if (spec->hdr.src_addr) {
+ if (mask->hdr.src_addr) {
nlattr_add32(&msg->nh, TCA_FLOWER_KEY_IPV4_SRC,
spec->hdr.src_addr);
nlattr_add32(&msg->nh, TCA_FLOWER_KEY_IPV4_SRC_MASK,
info->eth_type = htons(ETH_P_IPV6);
if (!spec)
return 0;
- if (memcmp(spec->hdr.dst_addr, empty_addr, 16)) {
+ if (memcmp(mask->hdr.dst_addr, empty_addr, 16)) {
nlattr_add(&msg->nh, TCA_FLOWER_KEY_IPV6_DST,
sizeof(spec->hdr.dst_addr), &spec->hdr.dst_addr);
nlattr_add(&msg->nh, TCA_FLOWER_KEY_IPV6_DST_MASK,
sizeof(mask->hdr.dst_addr), &mask->hdr.dst_addr);
}
- if (memcmp(spec->hdr.src_addr, empty_addr, 16)) {
+ if (memcmp(mask->hdr.src_addr, empty_addr, 16)) {
nlattr_add(&msg->nh, TCA_FLOWER_KEY_IPV6_SRC,
sizeof(spec->hdr.src_addr), &spec->hdr.src_addr);
nlattr_add(&msg->nh, TCA_FLOWER_KEY_IPV6_SRC_MASK,
nlattr_add8(&msg->nh, TCA_FLOWER_KEY_IP_PROTO, IPPROTO_UDP);
if (!spec)
return 0;
- if (spec->hdr.dst_port & mask->hdr.dst_port)
+ if (mask->hdr.dst_port)
nlattr_add16(&msg->nh, TCA_FLOWER_KEY_UDP_DST,
spec->hdr.dst_port);
- if (spec->hdr.src_port & mask->hdr.src_port)
+ if (mask->hdr.src_port)
nlattr_add16(&msg->nh, TCA_FLOWER_KEY_UDP_SRC,
spec->hdr.src_port);
return 0;
nlattr_add8(&msg->nh, TCA_FLOWER_KEY_IP_PROTO, IPPROTO_TCP);
if (!spec)
return 0;
- if (spec->hdr.dst_port & mask->hdr.dst_port)
+ if (mask->hdr.dst_port)
nlattr_add16(&msg->nh, TCA_FLOWER_KEY_TCP_DST,
spec->hdr.dst_port);
- if (spec->hdr.src_port & mask->hdr.src_port)
+ if (mask->hdr.src_port)
nlattr_add16(&msg->nh, TCA_FLOWER_KEY_TCP_SRC,
spec->hdr.src_port);
return 0;
if (err)
goto exit_item_not_supported;
if (flow && cur_item->convert) {
- if (!pmd->flower_vlan_support &&
- cur_item->convert == tap_flow_create_vlan)
- goto exit_item_not_supported;
err = cur_item->convert(items, &data);
if (err)
goto exit_item_not_supported;
}
}
if (flow) {
- if (pmd->flower_vlan_support && data.vlan) {
+ if (data.vlan) {
nlattr_add16(&flow->msg.nh, TCA_FLOWER_KEY_ETH_TYPE,
htons(ETH_P_8021Q));
nlattr_add16(&flow->msg.nh,
const struct rte_flow_action_queue *queue =
(const struct rte_flow_action_queue *)
actions->conf;
+
if (action)
goto exit_action_not_supported;
action = 1;
- if (!queue || (queue->index >= pmd->nb_queues))
+ if (!queue ||
+ (queue->index > pmd->dev->data->nb_rx_queues - 1))
goto exit_action_not_supported;
if (flow)
err = add_action_skbedit(flow, queue->index);
+ } else if (actions->type == RTE_FLOW_ACTION_TYPE_RSS) {
+ /* Fake RSS support. */
+ const struct rte_flow_action_rss *rss =
+ (const struct rte_flow_action_rss *)
+ actions->conf;
+
+ if (action)
+ goto exit_action_not_supported;
+ action = 1;
+ if (!rss || rss->num < 1 ||
+ (rss->queue[0] > pmd->dev->data->nb_rx_queues - 1))
+ goto exit_action_not_supported;
+ if (flow)
+ err = add_action_skbedit(flow, rss->queue[0]);
} else {
goto exit_action_not_supported;
}
"Kernel refused TC filter rule creation (%d): %s\n",
errno, strerror(errno));
rte_flow_error_set(error, EEXIST, RTE_FLOW_ERROR_TYPE_HANDLE,
- NULL, "overlapping rules");
+ NULL,
+ "overlapping rules or Kernel too old for flower support");
goto fail;
}
LIST_INSERT_HEAD(&pmd->flows, flow, next);
errno, strerror(errno));
rte_flow_error_set(
error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
- NULL, "overlapping rules");
+ NULL,
+ "overlapping rules or Kernel too old for flower support");
goto fail;
}
flow->remote_flow = remote_flow;
return tap_flow_destroy_pmd(pmd, flow, error);
}
+/**
+ * Enable/disable flow isolation.
+ *
+ * @see rte_flow_isolate()
+ * @see rte_flow_ops
+ */
+static int
+tap_flow_isolate(struct rte_eth_dev *dev,
+ int set,
+ struct rte_flow_error *error __rte_unused)
+{
+ struct pmd_internals *pmd = dev->data->dev_private;
+
+ if (set)
+ pmd->flow_isolate = 1;
+ else
+ pmd->flow_isolate = 0;
+ /*
+ * If netdevice is there, setup appropriate flow rules immediately.
+ * Otherwise it will be set when bringing up the netdevice (tun_alloc).
+ */
+ if (!pmd->rxq[0].fd)
+ return 0;
+ if (set) {
+ struct rte_flow *flow;
+
+ while (1) {
+ flow = LIST_FIRST(&pmd->implicit_flows);
+ if (!flow)
+ break;
+ /*
+ * Remove all implicit rules on the remote.
+ * Keep the local rule to redirect packets on TX.
+ * Keep also the last implicit local rule: ISOLATE.
+ */
+ if (flow->msg.t.tcm_ifindex == pmd->if_index)
+ break;
+ if (tap_flow_destroy_pmd(pmd, flow, NULL) < 0)
+ goto error;
+ }
+ /* Switch the TC rule according to pmd->flow_isolate */
+ if (tap_flow_implicit_create(pmd, TAP_ISOLATE) == -1)
+ goto error;
+ } else {
+ /* Switch the TC rule according to pmd->flow_isolate */
+ if (tap_flow_implicit_create(pmd, TAP_ISOLATE) == -1)
+ goto error;
+ if (!pmd->remote_if_index)
+ return 0;
+ if (tap_flow_implicit_create(pmd, TAP_REMOTE_TX) < 0)
+ goto error;
+ if (tap_flow_implicit_create(pmd, TAP_REMOTE_LOCAL_MAC) < 0)
+ goto error;
+ if (tap_flow_implicit_create(pmd, TAP_REMOTE_BROADCAST) < 0)
+ goto error;
+ if (tap_flow_implicit_create(pmd, TAP_REMOTE_BROADCASTV6) < 0)
+ goto error;
+ if (dev->data->promiscuous &&
+ tap_flow_implicit_create(pmd, TAP_REMOTE_PROMISC) < 0)
+ goto error;
+ if (dev->data->all_multicast &&
+ tap_flow_implicit_create(pmd, TAP_REMOTE_ALLMULTI) < 0)
+ goto error;
+ }
+ return 0;
+error:
+ pmd->flow_isolate = 0;
+ return rte_flow_error_set(
+ error, ENOTSUP, RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
+ "TC rule creation failed");
+}
+
/**
* Destroy all flows.
*
int tap_flow_implicit_create(struct pmd_internals *pmd,
enum implicit_rule_index idx)
{
+ uint16_t flags = NLM_F_REQUEST | NLM_F_ACK | NLM_F_EXCL | NLM_F_CREATE;
+ struct rte_flow_action *actions = implicit_rte_flows[idx].actions;
+ struct rte_flow_action isolate_actions[2] = {
+ [1] = {
+ .type = RTE_FLOW_ACTION_TYPE_END,
+ },
+ };
struct rte_flow_item *items = implicit_rte_flows[idx].items;
struct rte_flow_attr *attr = &implicit_rte_flows[idx].attr;
struct rte_flow_item_eth eth_local = { .type = 0 };
remote_flow = rte_malloc(__func__, sizeof(struct rte_flow), 0);
if (!remote_flow) {
- RTE_LOG(ERR, PMD, "Cannot allocate memory for rte_flow");
+ RTE_LOG(ERR, PMD, "Cannot allocate memory for rte_flow\n");
goto fail;
}
msg = &remote_flow->msg;
if (idx == TAP_REMOTE_TX) {
if_index = pmd->if_index;
+ } else if (idx == TAP_ISOLATE) {
+ if_index = pmd->if_index;
+ /* Don't be exclusive for this rule, it can be changed later. */
+ flags = NLM_F_REQUEST | NLM_F_ACK | NLM_F_CREATE;
+ isolate_actions[0].type = pmd->flow_isolate ?
+ RTE_FLOW_ACTION_TYPE_DROP :
+ RTE_FLOW_ACTION_TYPE_PASSTHRU;
+ actions = isolate_actions;
} else if (idx == TAP_REMOTE_LOCAL_MAC) {
/*
* eth addr couldn't be set in implicit_rte_flows[] as it is not
memcpy(ð_local.dst, &pmd->eth_addr, sizeof(pmd->eth_addr));
items = items_local;
}
- tc_init_msg(msg, if_index, RTM_NEWTFILTER,
- NLM_F_REQUEST | NLM_F_ACK | NLM_F_EXCL | NLM_F_CREATE);
+ tc_init_msg(msg, if_index, RTM_NEWTFILTER, flags);
msg->t.tcm_info = TC_H_MAKE(0, htons(ETH_P_ALL));
- tap_flow_set_handle(remote_flow);
- if (priv_flow_process(pmd, attr, items, NULL, NULL,
+ /*
+ * The ISOLATE rule is always present and must have a static handle, as
+ * the action is changed whether the feature is enabled (DROP) or
+ * disabled (PASSTHRU).
+ */
+ if (idx == TAP_ISOLATE)
+ remote_flow->msg.t.tcm_handle = ISOLATE_HANDLE;
+ else
+ tap_flow_set_handle(remote_flow);
+ if (priv_flow_process(pmd, attr, items, actions, NULL,
remote_flow, implicit_rte_flows[idx].mirred)) {
RTE_LOG(ERR, PMD, "rte flow rule validation failed\n");
goto fail;
}
err = nl_send(pmd->nlsk_fd, &msg->nh);
if (err < 0) {
- RTE_LOG(ERR, PMD, "Failure sending nl request");
+ RTE_LOG(ERR, PMD, "Failure sending nl request\n");
goto fail;
}
err = nl_recv_ack(pmd->nlsk_fd);
enum rte_filter_op filter_op,
void *arg)
{
- struct pmd_internals *pmd = dev->data->dev_private;
-
- if (!pmd->flower_support)
- return -ENOTSUP;
switch (filter_type) {
case RTE_ETH_FILTER_GENERIC:
if (filter_op != RTE_ETH_FILTER_GET)
*(const void **)arg = &tap_flow_ops;
return 0;
default:
- RTE_LOG(ERR, PMD, "%p: filter type (%d) not supported",
+ RTE_LOG(ERR, PMD, "%p: filter type (%d) not supported\n",
(void *)dev, filter_type);
}
return -EINVAL;