From 2df2e3d7d5226891e974207aa10d1d6e69418cbf Mon Sep 17 00:00:00 2001 From: Ole Troan Date: Wed, 2 Mar 2016 10:01:43 +0100 Subject: [PATCH] TAP: Jumbo-frame support. Change-Id: I3a0726d7645f775738253d0a47ee04d94d138c9a Signed-off-by: Ole Troan --- vlib/vlib/buffer.c | 9 +++- vlib/vlib/buffer_funcs.h | 1 + vlib/vlib/dpdk_buffer.c | 19 +++++++ vnet/vnet/unix/tapcli.c | 125 +++++++++++++++++++---------------------------- 4 files changed, 79 insertions(+), 75 deletions(-) diff --git a/vlib/vlib/buffer.c b/vlib/vlib/buffer.c index 332b43044d3..c28a0c54889 100644 --- a/vlib/vlib/buffer.c +++ b/vlib/vlib/buffer.c @@ -1248,6 +1248,14 @@ vlib_buffer_chain_append_data_with_alloc(vlib_main_t *vm, return copied; } +/* + * Fills in the required rte_mbuf fields for chained buffers given a VLIB chain. + */ +void vlib_buffer_chain_validate (vlib_main_t * vm, vlib_buffer_t * b_first) +{ + return; +} + static void vlib_serialize_tx (serialize_main_header_t * m, serialize_stream_t * s) { vlib_main_t * vm; @@ -1460,4 +1468,3 @@ VLIB_CLI_COMMAND (show_buffers_command, static) = { .short_help = "Show packet buffer allocation", .function = show_buffers, }; - diff --git a/vlib/vlib/buffer_funcs.h b/vlib/vlib/buffer_funcs.h index 2ea506a094e..eea417a94f0 100644 --- a/vlib/vlib/buffer_funcs.h +++ b/vlib/vlib/buffer_funcs.h @@ -506,6 +506,7 @@ vlib_buffer_chain_append_data_with_alloc(vlib_main_t *vm, vlib_buffer_t *first, vlib_buffer_t **last, void * data, u16 data_len); +void vlib_buffer_chain_validate(vlib_main_t *vm, vlib_buffer_t *first); format_function_t format_vlib_buffer, format_vlib_buffer_and_data, format_vlib_buffer_contents; diff --git a/vlib/vlib/dpdk_buffer.c b/vlib/vlib/dpdk_buffer.c index 145720dd7a4..04a6447d633 100644 --- a/vlib/vlib/dpdk_buffer.c +++ b/vlib/vlib/dpdk_buffer.c @@ -910,6 +910,25 @@ vlib_buffer_chain_append_data_with_alloc(vlib_main_t *vm, return copied; } +/* + * Fills in the required rte_mbuf fields for chained buffers given a VLIB chain. + */ +void vlib_buffer_chain_validate (vlib_main_t * vm, vlib_buffer_t * b_first) +{ + vlib_buffer_t *b = b_first, *prev = b_first; + struct rte_mbuf *mb_first = ((struct rte_mbuf *) b) - 1; + + mb_first->pkt_len = mb_first-> data_len = b_first->current_length; + while (b->flags & VLIB_BUFFER_NEXT_PRESENT) { + b = vlib_get_buffer(vm, b->next_buffer); + mb_first->nb_segs++; + mb_first->pkt_len += b->current_length; + (((struct rte_mbuf *) prev) - 1)->next = (((struct rte_mbuf *) b) - 1); + (((struct rte_mbuf *) b) - 1)->data_len = b->current_length; + prev = b; + } +} + clib_error_t * vlib_buffer_pool_create(vlib_main_t * vm, unsigned num_mbufs, unsigned mbuf_size, unsigned socket_id) diff --git a/vnet/vnet/unix/tapcli.c b/vnet/vnet/unix/tapcli.c index 44af321f796..4421ffb918d 100644 --- a/vnet/vnet/unix/tapcli.c +++ b/vnet/vnet/unix/tapcli.c @@ -139,7 +139,7 @@ tapcli_tx (vlib_main_t * vm, /* Use the sup intfc to finesse vlan subifs */ hw = vnet_get_sup_hw_interface (tm->vnet_main, tx_sw_if_index); tx_sw_if_index = hw->sw_if_index; - + p = hash_get (tm->tapcli_interface_index_by_sw_if_index, tx_sw_if_index); if (p == 0) @@ -177,8 +177,7 @@ tapcli_tx (vlib_main_t * vm, clib_unix_warning ("writev"); } - /* interface output path flattens buffer chains */ - vlib_buffer_free_no_next (vm, buffers, n_packets); + vlib_buffer_free(vm, vlib_frame_vector_args(frame), frame->n_vectors); return n_packets; } @@ -204,19 +203,14 @@ tapcli_rx (vlib_main_t * vm, vlib_frame_t * frame) { tapcli_main_t * tm = &tapcli_main; - vlib_buffer_t * b; + vlib_buffer_t *b_first; u32 bi; -#if DPDK == 0 + vlib_buffer_free_list_t *fl; const uword buffer_size = VLIB_BUFFER_DEFAULT_FREE_LIST_BYTES; - u32 free_list_index = VLIB_BUFFER_DEFAULT_FREE_LIST_INDEX; -#else - dpdk_main_t * dm = &dpdk_main; - const uword buffer_size = MBUF_SIZE; - u32 free_list_index = dm->vlib_buffer_free_list_index; -#endif static u32 * ready_interface_indices; tapcli_interface_t * ti; int i; + word n_bytes_in_packet; vec_reset_length (ready_interface_indices); @@ -228,6 +222,8 @@ tapcli_rx (vlib_main_t * vm, if (vec_len (ready_interface_indices) == 0) return 1; + fl = vlib_buffer_get_free_list(vm, VLIB_BUFFER_DEFAULT_FREE_LIST_INDEX); + for (i = 0; i < vec_len(ready_interface_indices); i++) { /* Clear the "interrupt" bit */ @@ -241,28 +237,20 @@ tapcli_rx (vlib_main_t * vm, { uword n_left = vec_len (tm->rx_buffers); uword n_alloc; - - if (n_left < VLIB_FRAME_SIZE / 2) - { - if (! tm->rx_buffers) - vec_alloc (tm->rx_buffers, VLIB_FRAME_SIZE); - - n_alloc = vlib_buffer_alloc_from_free_list - (vm, tm->rx_buffers + n_left, VLIB_FRAME_SIZE - n_left, - free_list_index); - _vec_len (tm->rx_buffers) = n_left + n_alloc; - } + if (n_left < VLIB_FRAME_SIZE / 2) { + vec_validate(tm->rx_buffers, VLIB_FRAME_SIZE + n_left - 1); + n_alloc = vlib_buffer_alloc(vm, &tm->rx_buffers[n_left], VLIB_FRAME_SIZE); + n_left += n_alloc; + _vec_len (tm->rx_buffers) = n_left; + } } /* Allocate RX buffers from end of rx_buffers. Turn them into iovecs to pass to readv. */ { uword i_rx = vec_len (tm->rx_buffers) - 1; - vlib_buffer_t * b; - word j, n_bytes_left, n_bytes_in_packet; -#if DPDK == 1 - u8 out_of_dpdk_buffers = 0; -#endif + vlib_buffer_t * b, *prev = 0; + word j, n_bytes_left; /* We need enough buffers left for an MTU sized packet. */ if (PREDICT_FALSE(vec_len (tm->rx_buffers) < tm->mtu_buffers)) @@ -277,15 +265,11 @@ tapcli_rx (vlib_main_t * vm, for (j = 0; j < tm->mtu_buffers; j++) { b = vlib_get_buffer (vm, tm->rx_buffers[i_rx - j]); + vlib_buffer_init_for_free_list (b, fl); tm->iovecs[j].iov_base = b->data; tm->iovecs[j].iov_len = buffer_size; } -#if DPDK == 1 - if (PREDICT_FALSE(out_of_dpdk_buffers == 1)) - continue; -#endif - n_bytes_left = readv (ti->unix_fd, tm->iovecs, tm->mtu_buffers); n_bytes_in_packet = n_bytes_left; if (n_bytes_left <= 0) @@ -296,36 +280,29 @@ tapcli_rx (vlib_main_t * vm, } bi = tm->rx_buffers[i_rx]; - while (1) - { - b = vlib_get_buffer (vm, tm->rx_buffers[i_rx]); + b = b_first = vlib_get_buffer (vm, tm->rx_buffers[i_rx]); - b->flags = 0; - b->current_data = 0; - b->current_length = n_bytes_left < buffer_size - ? n_bytes_left : buffer_size; + while (1) { + u32 bi; - n_bytes_left -= buffer_size; + vlib_buffer_init_for_free_list(b, fl); - if (n_bytes_left <= 0) - { -#if DPDK == 1 - struct rte_mbuf *mb = (struct rte_mbuf *)(b - 1); - rte_pktmbuf_data_len (mb) = n_bytes_in_packet; - rte_pktmbuf_pkt_len (mb) = n_bytes_in_packet; -#endif - break; - } - - i_rx--; - b->flags |= VLIB_BUFFER_NEXT_PRESENT; - b->next_buffer = tm->rx_buffers[i_rx]; -#if DPDK == 1 - ASSERT(0); /* $$$$ fixme */ - /* ((struct rte_pktmbuf *)(b->mb))->next = - vlib_get_buffer (vm, tm->rx_buffers[i_rx])->mb; */ -#endif - } + b->current_length = n_bytes_left < buffer_size ? n_bytes_left : buffer_size; + n_bytes_left -= buffer_size; + + if (prev) { + prev->next_buffer = bi; + prev->flags |= VLIB_BUFFER_NEXT_PRESENT; + } + prev = b; + + /* last segment */ + if (n_bytes_left <= 0) break; + + i_rx--; + bi = tm->rx_buffers[i_rx]; + b = vlib_get_buffer (vm, bi); + } /* Interface counters for tapcli interface. */ vlib_increment_combined_counter @@ -338,23 +315,27 @@ tapcli_rx (vlib_main_t * vm, _vec_len (tm->rx_buffers) = i_rx; } - b = vlib_get_buffer (vm, bi); + b_first->total_length_not_including_first_buffer = (n_bytes_in_packet > buffer_size) ? n_bytes_in_packet - buffer_size : 0; + b_first->flags |= VLIB_BUFFER_TOTAL_LENGTH_VALID; + + /* Ensure mbufs are updated */ + vlib_buffer_chain_validate(vm, b_first); /* * Turn this on if you run into * "bad monkey" contexts, and you want to know exactly * which nodes they've visited... See .../vlib/vlib/buffer.h */ - VLIB_BUFFER_TRACE_TRAJECTORY_INIT(b); + VLIB_BUFFER_TRACE_TRAJECTORY_INIT(b_first); { u32 next_index; uword n_trace = vlib_get_trace_count (vm, node); - vnet_buffer (b)->sw_if_index[VLIB_RX] = ti->sw_if_index; - vnet_buffer (b)->sw_if_index[VLIB_TX] = (u32)~0; + vnet_buffer (b_first)->sw_if_index[VLIB_RX] = ti->sw_if_index; + vnet_buffer (b_first)->sw_if_index[VLIB_TX] = (u32)~0; - b->error = node->errors[0]; + b_first->error = node->errors[0]; { next_index = TAPCLI_RX_NEXT_ETHERNET_INPUT; @@ -370,18 +351,17 @@ tapcli_rx (vlib_main_t * vm, next_index = TAPCLI_RX_NEXT_DROP; } - vlib_set_next_frame_buffer (vm, node, next_index, bi); if (n_trace > 0) { vlib_trace_buffer (vm, node, next_index, - b, /* follow_chain */ 1); + b_first, /* follow_chain */ 1); vlib_set_trace_count (vm, node, n_trace - 1); } } } - + return 1; } @@ -433,11 +413,7 @@ static clib_error_t * tapcli_config (vlib_main_t * vm, unformat_input_t * input) { tapcli_main_t *tm = &tapcli_main; -#if DPDK == 0 const uword buffer_size = VLIB_BUFFER_DEFAULT_FREE_LIST_BYTES; -#else - const uword buffer_size = MBUF_SIZE; -#endif while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) { @@ -604,6 +580,7 @@ VNET_DEVICE_CLASS (tapcli_dev_class,static) = { .rx_redirect_to_node = tapcli_set_interface_next_node, .name_renumber = tap_name_renumber, .admin_up_down_function = tapcli_interface_admin_up_down, + .no_flatten_output_chains = 1, }; int vnet_tap_dump_ifs (tapcli_interface_details_t **out_tapids) @@ -790,6 +767,7 @@ int vnet_tap_connect (vlib_main_t * vm, u8 * intfc_name, u8 *hwaddr_arg, { vnet_hw_interface_t * hw; hw = vnet_get_hw_interface (tm->vnet_main, ti->hw_if_index); + hw->max_l3_packet_bytes[VLIB_RX] = hw->max_l3_packet_bytes[VLIB_TX] = tm->mtu_bytes - sizeof(ethernet_header_t); ti->sw_if_index = hw->sw_if_index; if (sw_if_indexp) *sw_if_indexp = hw->sw_if_index; @@ -1004,7 +982,7 @@ tap_connect_command_fn (vlib_main_t * vm, clib_error_t * error; int user_hwaddr = 0; u8 hwaddr[6]; - + if (tm->is_disabled) { return clib_error_return (0, "device disabled..."); @@ -1150,6 +1128,7 @@ tap_connect_command_fn (vlib_main_t * vm, vnet_hw_interface_t * hw; hw = vnet_get_hw_interface (tm->vnet_main, ti->hw_if_index); ti->sw_if_index = hw->sw_if_index; + hw->max_l3_packet_bytes[VLIB_RX] = hw->max_l3_packet_bytes[VLIB_TX] = tm->mtu_bytes - sizeof(ethernet_header_t); } ti->active = 1; @@ -1196,5 +1175,3 @@ tapcli_init (vlib_main_t * vm) } VLIB_INIT_FUNCTION (tapcli_init); - - -- 2.16.6