X-Git-Url: https://gerrit.fd.io/r/gitweb?a=blobdiff_plain;f=src%2Fvnet%2Fdevices%2Faf_packet%2Faf_packet.c;h=ac6fc05a627bdb6127595f4751c8847a37006735;hb=5a7aa51f0;hp=5526d72e7a5bdacff4a4d5ba48fd8a2f97051937;hpb=3414977152ae6362277158dc732e6b9958a6e618;p=vpp.git diff --git a/src/vnet/devices/af_packet/af_packet.c b/src/vnet/devices/af_packet/af_packet.c index 5526d72e7a5..ac6fc05a627 100644 --- a/src/vnet/devices/af_packet/af_packet.c +++ b/src/vnet/devices/af_packet/af_packet.c @@ -33,6 +33,7 @@ #include #include #include +#include #include @@ -47,9 +48,9 @@ VNET_HW_INTERFACE_CLASS (af_packet_ip_device_hw_interface_class, static) = { #define AF_PACKET_DEFAULT_TX_FRAME_SIZE (2048 * 33) // GSO packet of 64KB #define AF_PACKET_TX_BLOCK_NR 1 -#define AF_PACKET_DEFAULT_RX_FRAMES_PER_BLOCK 256 +#define AF_PACKET_DEFAULT_RX_FRAMES_PER_BLOCK 32 #define AF_PACKET_DEFAULT_RX_FRAME_SIZE 2048 -#define AF_PACKET_RX_BLOCK_NR 20 +#define AF_PACKET_RX_BLOCK_NR 160 /*defined in net/if.h but clashes with dpdk headers */ unsigned int if_nametoindex (const char *ifname); @@ -98,13 +99,10 @@ af_packet_read_mtu (af_packet_if_t *apif) static clib_error_t * af_packet_fd_read_ready (clib_file_t * uf) { - af_packet_main_t *apm = &af_packet_main; vnet_main_t *vnm = vnet_get_main (); - u32 idx = uf->private_data; - af_packet_if_t *apif = pool_elt_at_index (apm->interfaces, idx); /* Schedule the rx node */ - vnet_hw_if_rx_queue_set_int_pending (vnm, apif->queue_index); + vnet_hw_if_rx_queue_set_int_pending (vnm, uf->private_data); return 0; } @@ -127,10 +125,74 @@ is_bridge (const u8 * host_if_name) return -1; } +static void +af_packet_set_rx_queues (vlib_main_t *vm, af_packet_if_t *apif) +{ + vnet_main_t *vnm = vnet_get_main (); + af_packet_queue_t *rx_queue; + + vnet_hw_if_set_input_node (vnm, apif->hw_if_index, + af_packet_input_node.index); + + vec_foreach (rx_queue, apif->rx_queues) + { + rx_queue->queue_index = vnet_hw_if_register_rx_queue ( + vnm, apif->hw_if_index, rx_queue->queue_id, VNET_HW_IF_RXQ_THREAD_ANY); + + { + clib_file_t template = { 0 }; + template.read_function = af_packet_fd_read_ready; + template.file_descriptor = rx_queue->fd; + template.private_data = rx_queue->queue_index; + template.flags = UNIX_FILE_EVENT_EDGE_TRIGGERED; + template.description = + format (0, "%U queue %u", format_af_packet_device_name, + apif->dev_instance, rx_queue->queue_id); + rx_queue->clib_file_index = clib_file_add (&file_main, &template); + } + vnet_hw_if_set_rx_queue_file_index (vnm, rx_queue->queue_index, + rx_queue->clib_file_index); + vnet_hw_if_set_rx_queue_mode (vnm, rx_queue->queue_index, + VNET_HW_IF_RX_MODE_INTERRUPT); + rx_queue->mode = VNET_HW_IF_RX_MODE_INTERRUPT; + } + vnet_hw_if_update_runtime_data (vnm, apif->hw_if_index); +} + +static void +af_packet_set_tx_queues (vlib_main_t *vm, af_packet_if_t *apif) +{ + vnet_main_t *vnm = vnet_get_main (); + af_packet_main_t *apm = &af_packet_main; + af_packet_queue_t *tx_queue; + + vec_foreach (tx_queue, apif->tx_queues) + { + tx_queue->queue_index = vnet_hw_if_register_tx_queue ( + vnm, apif->hw_if_index, tx_queue->queue_id); + } + + if (apif->num_txqs == 0) + { + vlib_log_err (apm->log_class, "Interface %U has 0 txq", + format_vnet_hw_if_index_name, vnm, apif->hw_if_index); + return; + } + + for (u32 j = 0; j < vlib_get_n_threads (); j++) + { + u32 qi = apif->tx_queues[j % apif->num_txqs].queue_index; + vnet_hw_if_tx_queue_assign_thread (vnm, qi, j); + } + + vnet_hw_if_update_runtime_data (vnm, apif->hw_if_index); +} + static int create_packet_v3_sock (int host_if_index, tpacket_req3_t *rx_req, - tpacket_req3_t *tx_req, int *fd, u8 **ring, - u32 *hdrlen_ptr, u8 *is_cksum_gso_enabled) + tpacket_req3_t *tx_req, int *fd, af_packet_ring_t *ring, + u32 *hdrlen_ptr, u8 *is_cksum_gso_enabled, + u32 fanout_id, u8 is_fanout) { af_packet_main_t *apm = &af_packet_main; struct sockaddr_ll sll; @@ -139,14 +201,19 @@ create_packet_v3_sock (int host_if_index, tpacket_req3_t *rx_req, int ver = TPACKET_V3; u32 hdrlen = 0; u32 len = sizeof (hdrlen); - u32 ring_sz = rx_req->tp_block_size * rx_req->tp_block_nr + - tx_req->tp_block_size * tx_req->tp_block_nr; + u32 ring_sz = 0; + + if (rx_req) + ring_sz += rx_req->tp_block_size * rx_req->tp_block_nr; + + if (tx_req) + ring_sz += tx_req->tp_block_size * tx_req->tp_block_nr; if ((*fd = socket (AF_PACKET, SOCK_RAW, htons (ETH_P_ALL))) < 0) { - vlib_log_debug (apm->log_class, - "Failed to create AF_PACKET socket: %s (errno %d)", - strerror (errno), errno); + vlib_log_err (apm->log_class, + "Failed to create AF_PACKET socket: %s (errno %d)", + strerror (errno), errno); ret = VNET_API_ERROR_SYSCALL_ERROR_1; goto error; } @@ -158,26 +225,25 @@ create_packet_v3_sock (int host_if_index, tpacket_req3_t *rx_req, sll.sll_ifindex = host_if_index; if (bind (*fd, (struct sockaddr *) &sll, sizeof (sll)) < 0) { - vlib_log_debug (apm->log_class, - "Failed to bind rx packet socket: %s (errno %d)", - strerror (errno), errno); + vlib_log_err (apm->log_class, + "Failed to bind rx packet socket: %s (errno %d)", + strerror (errno), errno); ret = VNET_API_ERROR_SYSCALL_ERROR_1; goto error; } if (setsockopt (*fd, SOL_PACKET, PACKET_VERSION, &ver, sizeof (ver)) < 0) { - vlib_log_debug ( - apm->log_class, - "Failed to set rx packet interface version: %s (errno %d)", - strerror (errno), errno); + vlib_log_err (apm->log_class, + "Failed to set rx packet interface version: %s (errno %d)", + strerror (errno), errno); ret = VNET_API_ERROR_SYSCALL_ERROR_1; goto error; } if (getsockopt (*fd, SOL_PACKET, PACKET_HDRLEN, &hdrlen, &len) < 0) { - vlib_log_debug ( + vlib_log_err ( apm->log_class, "Failed to get packet hdr len error handling option: %s (errno %d)", strerror (errno), errno); @@ -190,7 +256,7 @@ create_packet_v3_sock (int host_if_index, tpacket_req3_t *rx_req, int opt = 1; if (setsockopt (*fd, SOL_PACKET, PACKET_LOSS, &opt, sizeof (opt)) < 0) { - vlib_log_debug ( + vlib_log_err ( apm->log_class, "Failed to set packet tx ring error handling option: %s (errno %d)", strerror (errno), errno); @@ -221,34 +287,52 @@ create_packet_v3_sock (int host_if_index, tpacket_req3_t *rx_req, } #endif - if (setsockopt (*fd, SOL_PACKET, PACKET_RX_RING, rx_req, req_sz) < 0) + if (is_fanout) { - vlib_log_debug (apm->log_class, - "Failed to set packet rx ring options: %s (errno %d)", - strerror (errno), errno); - ret = VNET_API_ERROR_SYSCALL_ERROR_1; - goto error; + int fanout = ((fanout_id & 0xffff) | ((PACKET_FANOUT_HASH) << 16)); + if (setsockopt (*fd, SOL_PACKET, PACKET_FANOUT, &fanout, + sizeof (fanout)) < 0) + { + vlib_log_err (apm->log_class, + "Failed to set fanout options: %s (errno %d)", + strerror (errno), errno); + ret = VNET_API_ERROR_SYSCALL_ERROR_1; + goto error; + } } - if (setsockopt (*fd, SOL_PACKET, PACKET_TX_RING, tx_req, req_sz) < 0) - { - vlib_log_debug (apm->log_class, + if (rx_req) + if (setsockopt (*fd, SOL_PACKET, PACKET_RX_RING, rx_req, req_sz) < 0) + { + vlib_log_err (apm->log_class, + "Failed to set packet rx ring options: %s (errno %d)", + strerror (errno), errno); + ret = VNET_API_ERROR_SYSCALL_ERROR_1; + goto error; + } + + if (tx_req) + if (setsockopt (*fd, SOL_PACKET, PACKET_TX_RING, tx_req, req_sz) < 0) + { + vlib_log_err (apm->log_class, "Failed to set packet tx ring options: %s (errno %d)", strerror (errno), errno); - ret = VNET_API_ERROR_SYSCALL_ERROR_1; - goto error; - } + ret = VNET_API_ERROR_SYSCALL_ERROR_1; + goto error; + } - *ring = mmap (NULL, ring_sz, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_LOCKED, - *fd, 0); - if (*ring == MAP_FAILED) + ring->ring_start_addr = mmap (NULL, ring_sz, PROT_READ | PROT_WRITE, + MAP_SHARED | MAP_LOCKED, *fd, 0); + if (ring->ring_start_addr == MAP_FAILED) { - vlib_log_debug (apm->log_class, "mmap failure: %s (errno %d)", - strerror (errno), errno); + vlib_log_err (apm->log_class, "mmap failure: %s (errno %d)", + strerror (errno), errno); ret = VNET_API_ERROR_SYSCALL_ERROR_1; goto error; } + ring->ring_size = ring_sz; + return 0; error: if (*fd >= 0) @@ -260,30 +344,192 @@ error: } int -af_packet_create_if (af_packet_create_if_arg_t *arg) +af_packet_queue_init (vlib_main_t *vm, af_packet_if_t *apif, + af_packet_create_if_arg_t *arg, + af_packet_queue_t *rx_queue, af_packet_queue_t *tx_queue, + u8 queue_id, u8 is_fanout) { af_packet_main_t *apm = &af_packet_main; - vlib_main_t *vm = vlib_get_main (); - int ret, fd = -1, fd2 = -1; tpacket_req3_t *rx_req = 0; tpacket_req3_t *tx_req = 0; + int ret, fd = -1; + af_packet_ring_t ring = { 0 }; + u8 *ring_addr = 0; + u32 rx_frames_per_block, tx_frames_per_block; + u32 rx_frame_size, tx_frame_size; + u32 hdrlen = 0; + u32 i = 0; + u8 is_cksum_gso_enabled = 0; + + if (rx_queue) + { + rx_frames_per_block = arg->rx_frames_per_block ? + arg->rx_frames_per_block : + AF_PACKET_DEFAULT_RX_FRAMES_PER_BLOCK; + + rx_frame_size = arg->rx_frame_size ? arg->rx_frame_size : + AF_PACKET_DEFAULT_RX_FRAME_SIZE; + vec_validate (rx_queue->rx_req, 0); + rx_queue->rx_req->tp_block_size = rx_frame_size * rx_frames_per_block; + rx_queue->rx_req->tp_frame_size = rx_frame_size; + rx_queue->rx_req->tp_block_nr = AF_PACKET_RX_BLOCK_NR; + rx_queue->rx_req->tp_frame_nr = + AF_PACKET_RX_BLOCK_NR * rx_frames_per_block; + rx_queue->rx_req->tp_retire_blk_tov = 1; // 1 ms block timout + rx_queue->rx_req->tp_feature_req_word = 0; + rx_queue->rx_req->tp_sizeof_priv = 0; + rx_req = rx_queue->rx_req; + } + + if (tx_queue) + { + tx_frames_per_block = arg->tx_frames_per_block ? + arg->tx_frames_per_block : + AF_PACKET_DEFAULT_TX_FRAMES_PER_BLOCK; + tx_frame_size = arg->tx_frame_size ? arg->tx_frame_size : + AF_PACKET_DEFAULT_TX_FRAME_SIZE; + + vec_validate (tx_queue->tx_req, 0); + tx_queue->tx_req->tp_block_size = tx_frame_size * tx_frames_per_block; + tx_queue->tx_req->tp_frame_size = tx_frame_size; + tx_queue->tx_req->tp_block_nr = AF_PACKET_TX_BLOCK_NR; + tx_queue->tx_req->tp_frame_nr = + AF_PACKET_TX_BLOCK_NR * tx_frames_per_block; + tx_queue->tx_req->tp_retire_blk_tov = 0; + tx_queue->tx_req->tp_sizeof_priv = 0; + tx_queue->tx_req->tp_feature_req_word = 0; + tx_req = tx_queue->tx_req; + } + + ret = create_packet_v3_sock (apif->host_if_index, rx_req, tx_req, &fd, &ring, + &hdrlen, &is_cksum_gso_enabled, + apif->dev_instance, is_fanout); + + if (ret != 0) + goto error; + + vec_add1 (apif->rings, ring); + ring_addr = ring.ring_start_addr; + + if (rx_queue) + { + rx_queue->fd = fd; + vec_validate (rx_queue->rx_ring, rx_queue->rx_req->tp_block_nr - 1); + vec_foreach_index (i, rx_queue->rx_ring) + { + rx_queue->rx_ring[i] = + ring_addr + i * rx_queue->rx_req->tp_block_size; + } + + rx_queue->next_rx_block = 0; + rx_queue->queue_id = queue_id; + rx_queue->is_rx_pending = 0; + ring_addr = ring_addr + rx_queue->rx_req->tp_block_size * + rx_queue->rx_req->tp_block_nr; + } + + if (tx_queue) + { + tx_queue->fd = fd; + vec_validate (tx_queue->tx_ring, tx_queue->tx_req->tp_block_nr - 1); + vec_foreach_index (i, tx_queue->tx_ring) + { + tx_queue->tx_ring[i] = + ring_addr + i * tx_queue->tx_req->tp_block_size; + } + + tx_queue->next_tx_frame = 0; + tx_queue->queue_id = queue_id; + clib_spinlock_init (&tx_queue->lockp); + } + + if (queue_id == 0) + { + apif->hdrlen = hdrlen; + apif->is_cksum_gso_enabled = is_cksum_gso_enabled; + } + + return 0; +error: + vlib_log_err (apm->log_class, "Failed to set queue %u error", queue_id); + vec_free (rx_queue->rx_req); + vec_free (tx_queue->tx_req); + return ret; +} + +int +af_packet_device_init (vlib_main_t *vm, af_packet_if_t *apif, + af_packet_create_if_arg_t *args) +{ + af_packet_main_t *apm = &af_packet_main; + af_packet_queue_t *rx_queue = 0; + af_packet_queue_t *tx_queue = 0; + u16 nq = clib_min (args->num_rxqs, args->num_txqs); + u16 i = 0; + int ret = 0; + u8 is_fanout = (args->num_rxqs > 1) ? 1 : 0; + + vec_validate (apif->rx_queues, args->num_rxqs - 1); + vec_validate (apif->tx_queues, args->num_txqs - 1); + + for (; i < nq; i++) + { + rx_queue = vec_elt_at_index (apif->rx_queues, i); + tx_queue = vec_elt_at_index (apif->tx_queues, i); + ret = af_packet_queue_init (vm, apif, args, rx_queue, tx_queue, i, + is_fanout); + if (ret != 0) + goto error; + } + + if (args->num_rxqs > args->num_txqs) + { + for (; i < args->num_rxqs; i++) + { + rx_queue = vec_elt_at_index (apif->rx_queues, i); + ret = + af_packet_queue_init (vm, apif, args, rx_queue, 0, i, is_fanout); + if (ret != 0) + goto error; + } + } + else if (args->num_txqs > args->num_rxqs) + { + for (; i < args->num_txqs; i++) + { + tx_queue = vec_elt_at_index (apif->tx_queues, i); + ret = af_packet_queue_init (vm, apif, args, 0, tx_queue, i, 0); + if (ret != 0) + goto error; + } + } + + apif->num_rxqs = args->num_rxqs; + apif->num_txqs = args->num_txqs; + + return 0; +error: + vlib_log_err (apm->log_class, "Failed to init device error"); + return ret; +} + +int +af_packet_create_if (af_packet_create_if_arg_t *arg) +{ + af_packet_main_t *apm = &af_packet_main; + vlib_main_t *vm = vlib_get_main (); + int fd2 = -1; struct ifreq ifr; - u8 *ring = 0; af_packet_if_t *apif = 0; u8 hw_addr[6]; vnet_sw_interface_t *sw; - vlib_thread_main_t *tm = vlib_get_thread_main (); vnet_main_t *vnm = vnet_get_main (); vnet_hw_if_caps_t caps = VNET_HW_IF_CAP_INT_MODE; uword *p; uword if_index; u8 *host_if_name_dup = 0; int host_if_index = -1; - u32 rx_frames_per_block, tx_frames_per_block; - u32 rx_frame_size, tx_frame_size; - u32 hdrlen = 0; - u32 i = 0; - u8 is_cksum_gso_enabled = 0; + int ret = 0; p = mhash_get (&apm->if_index_by_host_if_name, arg->host_if_name); if (p) @@ -295,35 +541,6 @@ af_packet_create_if (af_packet_create_if_arg_t *arg) host_if_name_dup = vec_dup (arg->host_if_name); - rx_frames_per_block = arg->rx_frames_per_block ? - arg->rx_frames_per_block : - AF_PACKET_DEFAULT_RX_FRAMES_PER_BLOCK; - tx_frames_per_block = arg->tx_frames_per_block ? - arg->tx_frames_per_block : - AF_PACKET_DEFAULT_TX_FRAMES_PER_BLOCK; - rx_frame_size = - arg->rx_frame_size ? arg->rx_frame_size : AF_PACKET_DEFAULT_RX_FRAME_SIZE; - tx_frame_size = - arg->tx_frame_size ? arg->tx_frame_size : AF_PACKET_DEFAULT_TX_FRAME_SIZE; - - vec_validate (rx_req, 0); - rx_req->tp_block_size = rx_frame_size * rx_frames_per_block; - rx_req->tp_frame_size = rx_frame_size; - rx_req->tp_block_nr = AF_PACKET_RX_BLOCK_NR; - rx_req->tp_frame_nr = AF_PACKET_RX_BLOCK_NR * rx_frames_per_block; - rx_req->tp_retire_blk_tov = 0; - rx_req->tp_feature_req_word = 0; - rx_req->tp_sizeof_priv = 0; - - vec_validate (tx_req, 0); - tx_req->tp_block_size = tx_frame_size * tx_frames_per_block; - tx_req->tp_frame_size = tx_frame_size; - tx_req->tp_block_nr = AF_PACKET_TX_BLOCK_NR; - tx_req->tp_frame_nr = AF_PACKET_TX_BLOCK_NR * tx_frames_per_block; - tx_req->tp_retire_blk_tov = 0; - tx_req->tp_sizeof_priv = 0; - tx_req->tp_feature_req_word = 0; - /* * make sure host side of interface is 'UP' before binding AF_PACKET * socket on it. @@ -378,14 +595,7 @@ af_packet_create_if (af_packet_create_if_arg_t *arg) fd2 = -1; } - ret = create_packet_v3_sock (host_if_index, rx_req, tx_req, &fd, &ring, - &hdrlen, &is_cksum_gso_enabled); - - if (ret != 0) - goto error; - ret = is_bridge (arg->host_if_name); - if (ret == 0) /* is a bridge, ignore state */ host_if_index = -1; @@ -393,40 +603,20 @@ af_packet_create_if (af_packet_create_if_arg_t *arg) pool_get (apm->interfaces, apif); if_index = apif - apm->interfaces; + apif->dev_instance = if_index; apif->host_if_index = host_if_index; - apif->fd = fd; - - vec_validate (apif->rx_ring, rx_req->tp_block_nr - 1); - vec_foreach_index (i, apif->rx_ring) - { - apif->rx_ring[i] = ring + i * rx_req->tp_block_size; - } - - ring = ring + rx_req->tp_block_size * rx_req->tp_block_nr; - - vec_validate (apif->tx_ring, tx_req->tp_block_nr - 1); - vec_foreach_index (i, apif->tx_ring) - { - apif->tx_ring[i] = ring + i * tx_req->tp_block_size; - } - - apif->rx_req = rx_req; - apif->tx_req = tx_req; apif->host_if_name = host_if_name_dup; apif->per_interface_next_index = ~0; - apif->next_tx_frame = 0; - apif->next_rx_block = 0; apif->mode = arg->mode; - apif->hdrlen = hdrlen; - apif->is_cksum_gso_enabled = is_cksum_gso_enabled; - apif->ss.is_save = 0; + + ret = af_packet_device_init (vm, apif, arg); + if (ret != 0) + goto error; ret = af_packet_read_mtu (apif); if (ret != 0) goto error; - if (tm->n_vlib_mains > 1) - clib_spinlock_init (&apif->lockp); if (apif->mode != AF_PACKET_IF_MODE_IP) { @@ -447,7 +637,7 @@ af_packet_create_if (af_packet_create_if_arg_t *arg) } eir.dev_class_index = af_packet_device_class.index; - eir.dev_instance = if_index; + eir.dev_instance = apif->dev_instance; eir.address = hw_addr; eir.cb.set_max_frame_size = af_packet_eth_set_max_frame_size; apif->hw_if_index = vnet_eth_register_interface (vnm, &eir); @@ -455,15 +645,15 @@ af_packet_create_if (af_packet_create_if_arg_t *arg) else { apif->hw_if_index = vnet_register_interface ( - vnm, af_packet_device_class.index, if_index, - af_packet_ip_device_hw_interface_class.index, if_index); + vnm, af_packet_device_class.index, apif->dev_instance, + af_packet_ip_device_hw_interface_class.index, apif->dev_instance); } + sw = vnet_get_hw_sw_interface (vnm, apif->hw_if_index); apif->sw_if_index = sw->sw_if_index; - vnet_hw_if_set_input_node (vnm, apif->hw_if_index, - af_packet_input_node.index); - apif->queue_index = vnet_hw_if_register_rx_queue (vnm, apif->hw_if_index, 0, - VNET_HW_IF_RXQ_THREAD_ANY); + + af_packet_set_rx_queues (vm, apif); + af_packet_set_tx_queues (vm, apif); if (apif->is_cksum_gso_enabled) caps |= VNET_HW_IF_CAP_TCP_GSO | VNET_HW_IF_CAP_TX_IP4_CKSUM | @@ -473,22 +663,6 @@ af_packet_create_if (af_packet_create_if_arg_t *arg) vnet_hw_interface_set_flags (vnm, apif->hw_if_index, VNET_HW_INTERFACE_FLAG_LINK_UP); - vnet_hw_if_set_rx_queue_mode (vnm, apif->queue_index, - VNET_HW_IF_RX_MODE_INTERRUPT); - vnet_hw_if_update_runtime_data (vnm, apif->hw_if_index); - { - clib_file_t template = { 0 }; - template.read_function = af_packet_fd_read_ready; - template.file_descriptor = fd; - template.private_data = if_index; - template.flags = UNIX_FILE_EVENT_EDGE_TRIGGERED; - template.description = - format (0, "%U", format_af_packet_device_name, if_index); - apif->clib_file_index = clib_file_add (&file_main, &template); - } - vnet_hw_if_set_rx_queue_file_index (vnm, apif->queue_index, - apif->clib_file_index); - mhash_set_mem (&apm->if_index_by_host_if_name, host_if_name_dup, &if_index, 0); arg->sw_if_index = apif->sw_if_index; @@ -502,20 +676,65 @@ error: fd2 = -1; } vec_free (host_if_name_dup); - vec_free (rx_req); - vec_free (tx_req); + memset (apif, 0, sizeof (*apif)); + pool_put (apm->interfaces, apif); return ret; } +static int +af_packet_rx_queue_free (af_packet_if_t *apif, af_packet_queue_t *rx_queue) +{ + clib_file_del_by_index (&file_main, rx_queue->clib_file_index); + close (rx_queue->fd); + rx_queue->fd = -1; + rx_queue->rx_ring = NULL; + vec_free (rx_queue->rx_req); + rx_queue->rx_req = NULL; + return 0; +} + +static int +af_packet_tx_queue_free (af_packet_if_t *apif, af_packet_queue_t *tx_queue) +{ + close (tx_queue->fd); + tx_queue->fd = -1; + clib_spinlock_free (&tx_queue->lockp); + tx_queue->tx_ring = NULL; + vec_free (tx_queue->tx_req); + tx_queue->tx_req = NULL; + return 0; +} + +static int +af_packet_ring_free (af_packet_if_t *apif, af_packet_ring_t *ring) +{ + af_packet_main_t *apm = &af_packet_main; + + if (ring) + { + // FIXME: unmap the memory + if (munmap (ring->ring_start_addr, ring->ring_size)) + vlib_log_warn (apm->log_class, + "Host interface %s could not free ring %p of size %u", + apif->host_if_name, ring->ring_start_addr, + ring->ring_size); + else + ring->ring_start_addr = 0; + } + + return 0; +} + int af_packet_delete_if (u8 *host_if_name) { vnet_main_t *vnm = vnet_get_main (); af_packet_main_t *apm = &af_packet_main; af_packet_if_t *apif; + af_packet_queue_t *rx_queue; + af_packet_queue_t *tx_queue; + af_packet_ring_t *ring; uword *p; - uword if_index; - u32 ring_sz; p = mhash_get (&apm->if_index_by_host_if_name, host_if_name); if (p == NULL) @@ -525,46 +744,37 @@ af_packet_delete_if (u8 *host_if_name) return VNET_API_ERROR_SYSCALL_ERROR_1; } apif = pool_elt_at_index (apm->interfaces, p[0]); - if_index = apif - apm->interfaces; /* bring down the interface */ vnet_hw_interface_set_flags (vnm, apif->hw_if_index, 0); /* clean up */ - if (apif->clib_file_index != ~0) - { - clib_file_del (&file_main, file_main.file_pool + apif->clib_file_index); - apif->clib_file_index = ~0; - } - else - close (apif->fd); - - ring_sz = apif->rx_req->tp_block_size * apif->rx_req->tp_block_nr + - apif->tx_req->tp_block_size * apif->tx_req->tp_block_nr; - if (munmap (apif->rx_ring, ring_sz)) - vlib_log_warn (apm->log_class, - "Host interface %s could not free rx/tx ring", - host_if_name); - apif->rx_ring = NULL; - apif->tx_ring = NULL; - apif->fd = -1; - - vec_free (apif->rx_req); - apif->rx_req = NULL; - vec_free (apif->tx_req); - apif->tx_req = NULL; + vec_foreach (rx_queue, apif->rx_queues) + af_packet_rx_queue_free (apif, rx_queue); + vec_foreach (tx_queue, apif->tx_queues) + af_packet_tx_queue_free (apif, tx_queue); + vec_foreach (ring, apif->rings) + af_packet_ring_free (apif, ring); + + vec_free (apif->rx_queues); + apif->rx_queues = NULL; + vec_free (apif->tx_queues); + apif->tx_queues = NULL; + vec_free (apif->rings); + apif->rings = NULL; vec_free (apif->host_if_name); apif->host_if_name = NULL; apif->host_if_index = -1; - mhash_unset (&apm->if_index_by_host_if_name, host_if_name, &if_index); + mhash_unset (&apm->if_index_by_host_if_name, host_if_name, p); if (apif->mode != AF_PACKET_IF_MODE_IP) ethernet_delete_interface (vnm, apif->hw_if_index); else vnet_delete_hw_interface (vnm, apif->hw_if_index); + memset (apif, 0, sizeof (*apif)); pool_put (apm->interfaces, apif); return 0;