X-Git-Url: https://gerrit.fd.io/r/gitweb?a=blobdiff_plain;f=src%2Fvnet%2Fdevices%2Ftap%2Ftap.c;h=33d6e3bc84a0d06012b73735d58863913d81ed18;hb=4cef6de59;hp=cbdbbe9e4e237b7218926b73e9a0cdb6bf609260;hpb=86f281a841b5ec67c6a440dfc691e4c42b883df9;p=vpp.git diff --git a/src/vnet/devices/tap/tap.c b/src/vnet/devices/tap/tap.c index cbdbbe9e4e2..33d6e3bc84a 100644 --- a/src/vnet/devices/tap/tap.c +++ b/src/vnet/devices/tap/tap.c @@ -18,15 +18,15 @@ #define _GNU_SOURCE #include #include +#include #include #include #include #include -#include -#include +#include +#include #include #include -#include #include #include @@ -35,6 +35,7 @@ #include #include #include +#include #include #include #include @@ -65,6 +66,10 @@ VNET_HW_INTERFACE_CLASS (tun_device_hw_interface_class, static) = }; /* *INDENT-ON* */ +#define TUN_MAX_PACKET_BYTES 65355 +#define TUN_MIN_PACKET_BYTES 64 +#define TUN_DEFAULT_PACKET_BYTES 1500 + static u32 virtio_eth_flag_change (vnet_main_t * vnm, vnet_hw_interface_t * hi, u32 flags) @@ -74,24 +79,6 @@ virtio_eth_flag_change (vnet_main_t * vnm, vnet_hw_interface_t * hi, return 0; } -static int -open_netns_fd (char *netns) -{ - u8 *s = 0; - int fd; - - if (strncmp (netns, "pid:", 4) == 0) - s = format (0, "/proc/%u/ns/net%c", atoi (netns + 4), 0); - else if (netns[0] == '/') - s = format (0, "%s%c", netns, 0); - else - s = format (0, "/var/run/netns/%s%c", netns, 0); - - fd = open ((char *) s, O_RDONLY); - vec_free (s); - return fd; -} - #define TAP_MAX_INSTANCE 1024 static void @@ -111,8 +98,11 @@ tap_free (vlib_main_t * vm, virtio_if_t * vif) virtio_vring_free_tx (vm, vif, TX_QUEUE (i)); /* *INDENT-ON* */ - _IOCTL (vif->tap_fds[0], TUNSETPERSIST, (void *) (uintptr_t) 0); - tap_log_dbg (vif, "TUNSETPERSIST: unset"); + if (vif->tap_fds) + { + _IOCTL (vif->tap_fds[0], TUNSETPERSIST, (void *) (uintptr_t) 0); + tap_log_dbg (vif, "TUNSETPERSIST: unset"); + } error: vec_foreach_index (i, vif->tap_fds) close (vif->tap_fds[i]); @@ -139,18 +129,19 @@ tap_create_if (vlib_main_t * vm, tap_create_if_args_t * args) tap_main_t *tm = &tap_main; vnet_sw_interface_t *sw; vnet_hw_interface_t *hw; - int i, j, num_vhost_queues; + int i, num_vhost_queues; int old_netns_fd = -1; struct ifreq ifr = {.ifr_flags = IFF_NO_PI | IFF_VNET_HDR }; struct ifreq get_ifr = {.ifr_flags = 0 }; size_t hdrsz; - struct vhost_memory *vhost_mem = 0; + vhost_memory_t *vhost_mem = 0; virtio_if_t *vif = 0; clib_error_t *err = 0; unsigned int tap_features; int tfd = -1, qfd = -1, vfd = -1, nfd = -1; char *host_if_name = 0; unsigned int offload = 0; + int sndbuf = 0; if (args->id != ~0) { @@ -179,11 +170,22 @@ tap_create_if (vlib_main_t * vm, tap_create_if_args_t * args) { vif->type = VIRTIO_IF_TYPE_TUN; ifr.ifr_flags |= IFF_TUN; + + /* + * From kernel 4.20, xdp support has been added in tun_sendmsg. + * If sndbuf == INT_MAX, vhost batches the packet and processes + * them using xdp data path for tun driver. It assumes packets + * are ethernet frames (It needs to be fixed). + * To avoid xdp data path in tun driver, sndbuf value should + * be < INT_MAX. + */ + sndbuf = INT_MAX - 1; } else { vif->type = VIRTIO_IF_TYPE_TAP; ifr.ifr_flags |= IFF_TAP; + sndbuf = INT_MAX; } vif->dev_instance = vif - vim->interfaces; @@ -197,7 +199,7 @@ tap_create_if (vlib_main_t * vm, tap_create_if_args_t * args) { host_if_name = (char *) args->host_if_name; clib_memcpy (ifr.ifr_name, host_if_name, - clib_min (IFNAMSIZ, strlen (host_if_name))); + clib_min (IFNAMSIZ, vec_len (host_if_name))); } else { @@ -207,15 +209,15 @@ tap_create_if (vlib_main_t * vm, tap_create_if_args_t * args) } if (args->host_namespace) { - old_netns_fd = open ("/proc/self/ns/net", O_RDONLY); - if ((nfd = open_netns_fd ((char *) args->host_namespace)) == -1) + old_netns_fd = clib_netns_open (NULL /* self */); + if ((nfd = clib_netns_open (args->host_namespace)) == -1) { args->rv = VNET_API_ERROR_SYSCALL_ERROR_2; - args->error = clib_error_return_unix (0, "open_netns_fd '%s'", + args->error = clib_error_return_unix (0, "clib_netns_open '%s'", args->host_namespace); goto error; } - if (setns (nfd, CLONE_NEWNET) == -1) + if (clib_setns (nfd) == -1) { args->rv = VNET_API_ERROR_SYSCALL_ERROR_3; args->error = clib_error_return_unix (0, "setns '%s'", @@ -256,7 +258,7 @@ tap_create_if (vlib_main_t * vm, tap_create_if_args_t * args) else ifr.ifr_flags |= IFF_MULTI_QUEUE; - hdrsz = sizeof (struct virtio_net_hdr_v1); + hdrsz = sizeof (virtio_net_hdr_v1_t); if (args->tap_flags & TAP_FLAG_GSO) { offload = TUN_F_CSUM | TUN_F_TSO4 | TUN_F_TSO6; @@ -330,9 +332,9 @@ tap_create_if (vlib_main_t * vm, tap_create_if_args_t * args) vif->tap_fds[i], hdrsz); _IOCTL (vif->tap_fds[i], TUNSETVNETHDRSZ, &hdrsz); - j = INT_MAX; - tap_log_dbg (vif, "TUNSETSNDBUF: fd %d sndbuf %d", vif->tap_fds[i], j); - _IOCTL (vif->tap_fds[i], TUNSETSNDBUF, &j); + tap_log_dbg (vif, "TUNSETSNDBUF: fd %d sndbuf %d", vif->tap_fds[i], + sndbuf); + _IOCTL (vif->tap_fds[i], TUNSETSNDBUF, &sndbuf); tap_log_dbg (vif, "TUNSETOFFLOAD: fd %d offload 0x%lx", vif->tap_fds[i], offload); @@ -403,11 +405,11 @@ tap_create_if (vlib_main_t * vm, tap_create_if_args_t * args) after we change our net namespace */ if (args->host_namespace) { - old_netns_fd = open ("/proc/self/ns/net", O_RDONLY); - if ((nfd = open_netns_fd ((char *) args->host_namespace)) == -1) + old_netns_fd = clib_netns_open (NULL /* self */); + if ((nfd = clib_netns_open (args->host_namespace)) == -1) { args->rv = VNET_API_ERROR_SYSCALL_ERROR_2; - args->error = clib_error_return_unix (0, "open_netns_fd '%s'", + args->error = clib_error_return_unix (0, "clib_netns_open '%s'", args->host_namespace); goto error; } @@ -418,7 +420,7 @@ tap_create_if (vlib_main_t * vm, tap_create_if_args_t * args) args->rv = VNET_API_ERROR_NETLINK_ERROR; goto error; } - if (setns (nfd, CLONE_NEWNET) == -1) + if (clib_setns (nfd) == -1) { args->rv = VNET_API_ERROR_SYSCALL_ERROR_3; args->error = clib_error_return_unix (0, "setns '%s'", @@ -547,7 +549,7 @@ tap_create_if (vlib_main_t * vm, tap_create_if_args_t * args) /* switch back to old net namespace */ if (args->host_namespace) { - if (setns (old_netns_fd, CLONE_NEWNET) == -1) + if (clib_setns (old_netns_fd) == -1) { args->rv = VNET_API_ERROR_SYSCALL_ERROR_2; args->error = clib_error_return_unix (0, "setns '%s'", @@ -576,7 +578,7 @@ tap_create_if (vlib_main_t * vm, tap_create_if_args_t * args) } /* setup features and memtable */ - i = sizeof (struct vhost_memory) + sizeof (struct vhost_memory_region); + i = sizeof (vhost_memory_t) + sizeof (vhost_memory_region_t); vhost_mem = clib_mem_alloc (i); clib_memset (vhost_mem, 0, i); vhost_mem->nregions = 1; @@ -606,9 +608,9 @@ tap_create_if (vlib_main_t * vm, tap_create_if_args_t * args) /* finish initializing queue pair */ for (i = 0; i < num_vhost_queues * 2; i++) { - struct vhost_vring_addr addr = { 0 }; - struct vhost_vring_state state = { 0 }; - struct vhost_vring_file file = { 0 }; + vhost_vring_addr_t addr = { 0 }; + vhost_vring_state_t state = { 0 }; + vhost_vring_file_t file = { 0 }; virtio_vring_t *vring; u16 qp = i >> 1; int fd = vif->vhost_fds[qp]; @@ -708,31 +710,45 @@ tap_create_if (vlib_main_t * vm, tap_create_if_args_t * args) args->sw_if_index = vif->sw_if_index; args->rv = 0; hw = vnet_get_hw_interface (vnm, vif->hw_if_index); - hw->flags |= VNET_HW_INTERFACE_FLAG_SUPPORTS_INT_MODE; + hw->caps |= VNET_HW_INTERFACE_CAP_SUPPORTS_INT_MODE; if (args->tap_flags & TAP_FLAG_GSO) { - hw->flags |= VNET_HW_INTERFACE_FLAG_SUPPORTS_GSO | - VNET_HW_INTERFACE_FLAG_SUPPORTS_TX_L4_CKSUM_OFFLOAD; + hw->caps |= VNET_HW_INTERFACE_CAP_SUPPORTS_TCP_GSO | + VNET_HW_INTERFACE_CAP_SUPPORTS_TX_TCP_CKSUM | + VNET_HW_INTERFACE_CAP_SUPPORTS_TX_UDP_CKSUM; } else if (args->tap_flags & TAP_FLAG_CSUM_OFFLOAD) { - hw->flags |= VNET_HW_INTERFACE_FLAG_SUPPORTS_TX_L4_CKSUM_OFFLOAD; + hw->caps |= VNET_HW_INTERFACE_CAP_SUPPORTS_TX_TCP_CKSUM | + VNET_HW_INTERFACE_CAP_SUPPORTS_TX_UDP_CKSUM; } - vnet_hw_interface_set_input_node (vnm, vif->hw_if_index, - virtio_input_node.index); - - for (i = 0; i < vif->num_rxqs; i++) + if ((args->tap_flags & TAP_FLAG_GSO) + && (args->tap_flags & TAP_FLAG_GRO_COALESCE)) + { + virtio_set_packet_coalesce (vif); + } + if (vif->type == VIRTIO_IF_TYPE_TUN) { - vnet_hw_interface_assign_rx_thread (vnm, vif->hw_if_index, i, ~0); - vnet_hw_interface_set_rx_mode (vnm, vif->hw_if_index, i, - VNET_HW_INTERFACE_RX_MODE_DEFAULT); - virtio_vring_set_numa_node (vm, vif, RX_QUEUE (i)); + hw->max_supported_packet_bytes = TUN_MAX_PACKET_BYTES; + hw->min_packet_bytes = hw->min_supported_packet_bytes = + TUN_MIN_PACKET_BYTES; + hw->max_packet_bytes = + args->host_mtu_size ? args->host_mtu_size : TUN_DEFAULT_PACKET_BYTES; + vnet_sw_interface_set_mtu (vnm, hw->sw_if_index, hw->max_packet_bytes); } + virtio_vring_set_rx_queues (vm, vif); + vif->per_interface_next_index = ~0; vif->flags |= VIRTIO_IF_FLAG_ADMIN_UP; vnet_hw_interface_set_flags (vnm, vif->hw_if_index, VNET_HW_INTERFACE_FLAG_LINK_UP); + /* + * Host tun/tap driver link carrier state is "up" at creation. The + * driver never changes this unless the backend (VPP) changes it using + * TUNSETCARRIER ioctl(). See tap_set_carrier(). + */ + vif->host_carrier_up = 1; vif->cxq_vring = NULL; goto done; @@ -761,7 +777,6 @@ tap_delete_if (vlib_main_t * vm, u32 sw_if_index) { vnet_main_t *vnm = vnet_get_main (); virtio_main_t *mm = &virtio_main; - int i; virtio_if_t *vif; vnet_hw_interface_t *hw; @@ -777,8 +792,6 @@ tap_delete_if (vlib_main_t * vm, u32 sw_if_index) /* bring down the interface */ vnet_hw_interface_set_flags (vnm, vif->hw_if_index, 0); vnet_sw_interface_set_flags (vnm, vif->sw_if_index, 0); - for (i = 0; i < vif->num_rxqs; i++) - vnet_hw_interface_unassign_rx_thread (vnm, vif->hw_if_index, i); if (vif->type == VIRTIO_IF_TYPE_TAP) ethernet_delete_interface (vnm, vif->hw_if_index); @@ -815,28 +828,21 @@ tap_csum_offload_enable_disable (vlib_main_t * vm, u32 sw_if_index, vec_foreach_index (i, vif->tap_fds) _IOCTL (vif->tap_fds[i], TUNSETOFFLOAD, offload); vif->gso_enabled = 0; + vif->packet_coalesce = 0; vif->csum_offload_enabled = enable_disable ? 1 : 0; - if ((hw->flags & VNET_HW_INTERFACE_FLAG_SUPPORTS_GSO) != 0) + if ((hw->caps & VNET_HW_INTERFACE_CAP_SUPPORTS_TCP_GSO) != 0) { - hw->flags &= ~VNET_HW_INTERFACE_FLAG_SUPPORTS_GSO; + hw->caps &= ~VNET_HW_INTERFACE_CAP_SUPPORTS_TCP_GSO; } if (enable_disable) { - if ((hw->flags & VNET_HW_INTERFACE_FLAG_SUPPORTS_TX_L4_CKSUM_OFFLOAD) == - 0) - { - hw->flags |= VNET_HW_INTERFACE_FLAG_SUPPORTS_TX_L4_CKSUM_OFFLOAD; - } + hw->caps |= VNET_HW_INTERFACE_CAP_SUPPORTS_L4_TX_CKSUM; } else { - if ((hw->flags & VNET_HW_INTERFACE_FLAG_SUPPORTS_TX_L4_CKSUM_OFFLOAD) != - 0) - { - hw->flags &= ~VNET_HW_INTERFACE_FLAG_SUPPORTS_TX_L4_CKSUM_OFFLOAD; - } + hw->caps &= ~VNET_HW_INTERFACE_CAP_SUPPORTS_L4_TX_CKSUM; } error: @@ -850,7 +856,8 @@ error: } int -tap_gso_enable_disable (vlib_main_t * vm, u32 sw_if_index, int enable_disable) +tap_gso_enable_disable (vlib_main_t * vm, u32 sw_if_index, int enable_disable, + int is_packet_coalesce) { vnet_main_t *vnm = vnet_get_main (); virtio_main_t *mm = &virtio_main; @@ -875,19 +882,24 @@ tap_gso_enable_disable (vlib_main_t * vm, u32 sw_if_index, int enable_disable) vif->csum_offload_enabled = 0; if (enable_disable) { - if ((hw->flags & VNET_HW_INTERFACE_FLAG_SUPPORTS_GSO) == 0) + if ((hw->caps & VNET_HW_INTERFACE_CAP_SUPPORTS_TCP_GSO) == 0) + { + hw->caps |= VNET_HW_INTERFACE_CAP_SUPPORTS_TCP_GSO | + VNET_HW_INTERFACE_CAP_SUPPORTS_L4_TX_CKSUM; + } + if (is_packet_coalesce) { - hw->flags |= VNET_HW_INTERFACE_FLAG_SUPPORTS_GSO | - VNET_HW_INTERFACE_FLAG_SUPPORTS_TX_L4_CKSUM_OFFLOAD; + virtio_set_packet_coalesce (vif); } } else { - if ((hw->flags & VNET_HW_INTERFACE_FLAG_SUPPORTS_GSO) != 0) + if ((hw->caps & VNET_HW_INTERFACE_CAP_SUPPORTS_TCP_GSO) != 0) { - hw->flags &= ~(VNET_HW_INTERFACE_FLAG_SUPPORTS_GSO | - VNET_HW_INTERFACE_FLAG_SUPPORTS_TX_L4_CKSUM_OFFLOAD); + hw->caps &= ~(VNET_HW_INTERFACE_CAP_SUPPORTS_TCP_GSO | + VNET_HW_INTERFACE_CAP_SUPPORTS_L4_TX_CKSUM); } + vif->packet_coalesce = 0; } error: @@ -912,7 +924,7 @@ tap_dump_ifs (tap_interface_details_t ** out_tapids) tap_interface_details_t *tapid = NULL; /* *INDENT-OFF* */ - pool_foreach (vif, mm->interfaces, + pool_foreach (vif, mm->interfaces) { if ((vif->type != VIRTIO_IF_TYPE_TAP) && (vif->type != VIRTIO_IF_TYPE_TUN)) continue; @@ -922,8 +934,7 @@ tap_dump_ifs (tap_interface_details_t ** out_tapids) tapid->sw_if_index = vif->sw_if_index; hi = vnet_get_hw_interface (vnm, vif->hw_if_index); clib_memcpy(tapid->dev_name, hi->name, - MIN (ARRAY_LEN (tapid->dev_name) - 1, - strlen ((const char *) hi->name))); + MIN (ARRAY_LEN (tapid->dev_name) - 1, vec_len (hi->name))); vring = vec_elt_at_index (vif->rxq_vrings, RX_QUEUE_ACCESS(0)); tapid->rx_ring_sz = vring->size; vring = vec_elt_at_index (vif->txq_vrings, TX_QUEUE_ACCESS(0)); @@ -934,19 +945,19 @@ tap_dump_ifs (tap_interface_details_t ** out_tapids) { clib_memcpy(tapid->host_if_name, vif->host_if_name, MIN (ARRAY_LEN (tapid->host_if_name) - 1, - strlen ((const char *) vif->host_if_name))); + vec_len (vif->host_if_name))); } if (vif->net_ns) { clib_memcpy(tapid->host_namespace, vif->net_ns, MIN (ARRAY_LEN (tapid->host_namespace) - 1, - strlen ((const char *) vif->net_ns))); + vec_len (vif->net_ns))); } if (vif->host_bridge) { clib_memcpy(tapid->host_bridge, vif->host_bridge, MIN (ARRAY_LEN (tapid->host_bridge) - 1, - strlen ((const char *) vif->host_bridge))); + vec_len (vif->host_bridge))); } if (vif->host_ip4_prefix_len) clib_memcpy(tapid->host_ip4_addr.as_u8, &vif->host_ip4_addr, 4); @@ -955,7 +966,7 @@ tap_dump_ifs (tap_interface_details_t ** out_tapids) clib_memcpy(tapid->host_ip6_addr.as_u8, &vif->host_ip6_addr, 16); tapid->host_ip6_prefix_len = vif->host_ip6_prefix_len; tapid->host_mtu_size = vif->host_mtu_size; - ); + } /* *INDENT-ON* */ *out_tapids = r_tapids; @@ -963,6 +974,41 @@ tap_dump_ifs (tap_interface_details_t ** out_tapids) return 0; } +/* + * Set host tap/tun interface carrier state so it will appear to host + * applications that the interface's link state changed. + * + * If the kernel we're building against does not have support for the + * TUNSETCARRIER ioctl command, do nothing. + */ +int +tap_set_carrier (u32 hw_if_index, u32 carrier_up) +{ + int ret = 0; +#ifdef TUNSETCARRIER + vnet_main_t *vnm = vnet_get_main (); + vnet_hw_interface_t *hi = vnet_get_hw_interface (vnm, hw_if_index); + virtio_main_t *mm = &virtio_main; + virtio_if_t *vif; + int *fd; + + vif = pool_elt_at_index (mm->interfaces, hi->dev_instance); + vec_foreach (fd, vif->tap_fds) + { + ret = ioctl (*fd, TUNSETCARRIER, &carrier_up); + if (ret < 0) + { + clib_warning ("ioctl (TUNSETCARRIER) returned %d", ret); + break; + } + } + if (!ret) + vif->host_carrier_up = (carrier_up != 0); +#endif + + return ret; +} + static clib_error_t * tap_mtu_config (vlib_main_t * vm, unformat_input_t * input) { @@ -980,6 +1026,85 @@ tap_mtu_config (vlib_main_t * vm, unformat_input_t * input) return 0; } +/* + * Set host tap/tun interface speed in Mbps. + */ +int +tap_set_speed (u32 hw_if_index, u32 speed) +{ + vnet_main_t *vnm = vnet_get_main (); + vnet_hw_interface_t *hi = vnet_get_hw_interface (vnm, hw_if_index); + virtio_main_t *mm = &virtio_main; + virtio_if_t *vif; + int old_netns_fd = -1; + int nfd = -1; + int ctl_fd = -1; + struct ifreq ifr; + struct ethtool_cmd ecmd; + int ret = -1; + + vif = pool_elt_at_index (mm->interfaces, hi->dev_instance); + + if (vif->net_ns) + { + old_netns_fd = clib_netns_open (NULL /* self */); + if ((nfd = clib_netns_open (vif->net_ns)) == -1) + { + clib_warning ("Cannot open netns"); + goto done; + } + if (clib_setns (nfd) == -1) + { + clib_warning ("Cannot set ns"); + goto done; + } + } + + if ((ctl_fd = socket (AF_INET, SOCK_STREAM, 0)) == -1) + { + clib_warning ("Cannot open control socket"); + goto done; + } + + ecmd.cmd = ETHTOOL_GSET; + clib_memset (&ifr, 0, sizeof (ifr)); + clib_memcpy (ifr.ifr_name, vif->host_if_name, + strlen ((const char *) vif->host_if_name)); + ifr.ifr_data = (void *) &ecmd; + if ((ret = ioctl (ctl_fd, SIOCETHTOOL, &ifr)) < 0) + { + clib_warning ("Cannot get device settings"); + goto done; + } + + if (ethtool_cmd_speed (&ecmd) != speed) + { + ecmd.cmd = ETHTOOL_SSET; + ethtool_cmd_speed_set (&ecmd, speed); + if ((ret = ioctl (ctl_fd, SIOCETHTOOL, &ifr)) < 0) + { + clib_warning ("Cannot set device settings"); + goto done; + } + } + +done: + if (old_netns_fd != -1) + { + if (clib_setns (old_netns_fd) == -1) + { + clib_warning ("Cannot set old ns"); + } + close (old_netns_fd); + } + if (nfd != -1) + close (nfd); + if (ctl_fd != -1) + close (ctl_fd); + + return ret; +} + /* tap { host-mtu } configuration. */ VLIB_CONFIG_FUNCTION (tap_mtu_config, "tap");