X-Git-Url: https://gerrit.fd.io/r/gitweb?a=blobdiff_plain;f=src%2Fvnet%2Fdevices%2Fvirtio%2Fvhost_user_input.c;h=97303ce379188621fd325ddb059671aa0355a5c3;hb=96e8cd0e1d6c21e5f47470c146958a9845ee29a6;hp=87a01cefd5bc41af2865a3ae69d4a346cdd8961f;hpb=6a8bfd43a057da68d43074d0abc3c598c5ccb55a;p=vpp.git diff --git a/src/vnet/devices/virtio/vhost_user_input.c b/src/vnet/devices/virtio/vhost_user_input.c index 87a01cefd5b..97303ce3791 100644 --- a/src/vnet/devices/virtio/vhost_user_input.c +++ b/src/vnet/devices/virtio/vhost_user_input.c @@ -92,10 +92,10 @@ static __clib_unused char *vhost_user_input_func_error_strings[] = { static_always_inline void vhost_user_rx_trace (vhost_trace_t * t, vhost_user_intf_t * vui, u16 qid, - vlib_buffer_t * b, vhost_user_vring_t * txvq) + vlib_buffer_t * b, vhost_user_vring_t * txvq, + u16 last_avail_idx) { vhost_user_main_t *vum = &vhost_user_main; - u32 last_avail_idx = txvq->last_avail_idx; u32 desc_current = txvq->avail->ring[last_avail_idx & txvq->qsz_mask]; vring_desc_t *hdr_desc = 0; virtio_net_hdr_mrg_rxbuf_t *hdr; @@ -195,25 +195,27 @@ vhost_user_rx_discard_packet (vlib_main_t * vm, */ u32 discarded_packets = 0; u32 avail_idx = txvq->avail->idx; + u16 mask = txvq->qsz_mask; + u16 last_avail_idx = txvq->last_avail_idx; + u16 last_used_idx = txvq->last_used_idx; while (discarded_packets != discard_max) { if (avail_idx == txvq->last_avail_idx) goto out; - u16 desc_chain_head = - txvq->avail->ring[txvq->last_avail_idx & txvq->qsz_mask]; - txvq->last_avail_idx++; - txvq->used->ring[txvq->last_used_idx & txvq->qsz_mask].id = - desc_chain_head; - txvq->used->ring[txvq->last_used_idx & txvq->qsz_mask].len = 0; - vhost_user_log_dirty_ring (vui, txvq, - ring[txvq->last_used_idx & txvq->qsz_mask]); - txvq->last_used_idx++; + u16 desc_chain_head = txvq->avail->ring[last_avail_idx & mask]; + last_avail_idx++; + txvq->used->ring[last_used_idx & mask].id = desc_chain_head; + txvq->used->ring[last_used_idx & mask].len = 0; + vhost_user_log_dirty_ring (vui, txvq, ring[last_used_idx & mask]); + last_used_idx++; discarded_packets++; } out: - CLIB_MEMORY_BARRIER (); + txvq->last_avail_idx = last_avail_idx; + txvq->last_used_idx = last_used_idx; + CLIB_MEMORY_STORE_BARRIER (); txvq->used->idx = txvq->last_used_idx; vhost_user_log_dirty_ring (vui, txvq, idx); return discarded_packets; @@ -222,7 +224,7 @@ out: /* * In case of overflow, we need to rewind the array of allocated buffers. */ -static __clib_unused void +static_always_inline void vhost_user_input_rewind_buffers (vlib_main_t * vm, vhost_cpu_t * cpu, vlib_buffer_t * b_head) { @@ -241,7 +243,7 @@ vhost_user_input_rewind_buffers (vlib_main_t * vm, cpu->rx_buffers_len++; } -static __clib_unused u32 +static_always_inline u32 vhost_user_if_input (vlib_main_t * vm, vhost_user_main_t * vum, vhost_user_intf_t * vui, @@ -261,10 +263,11 @@ vhost_user_if_input (vlib_main_t * vm, u16 copy_len = 0; u8 feature_arc_idx = fm->device_input_feature_arc_index; u32 current_config_index = ~(u32) 0; + u16 mask = txvq->qsz_mask; /* The descriptor table is not ready yet */ if (PREDICT_FALSE (txvq->avail == 0)) - return 0; + goto done; { /* do we have pending interrupts ? */ @@ -299,13 +302,13 @@ vhost_user_if_input (vlib_main_t * vm, } if (PREDICT_FALSE (txvq->avail->flags & 0xFFFE)) - return 0; + goto done; n_left = (u16) (txvq->avail->idx - txvq->last_avail_idx); /* nothing to do */ if (PREDICT_FALSE (n_left == 0)) - return 0; + goto done; if (PREDICT_FALSE (!vui->admin_up || !(txvq->enabled))) { @@ -318,10 +321,10 @@ vhost_user_if_input (vlib_main_t * vm, */ vhost_user_rx_discard_packet (vm, vui, txvq, VHOST_USER_DOWN_DISCARD_COUNT); - return 0; + goto done; } - if (PREDICT_FALSE (n_left == (txvq->qsz_mask + 1))) + if (PREDICT_FALSE (n_left == (mask + 1))) { /* * Informational error logging when VPP is not @@ -382,223 +385,220 @@ vhost_user_if_input (vlib_main_t * vm, &next_index, 0); } + u16 last_avail_idx = txvq->last_avail_idx; + u16 last_used_idx = txvq->last_used_idx; + + vlib_get_new_next_frame (vm, node, next_index, to_next, n_left_to_next); + + if (next_index == VNET_DEVICE_INPUT_NEXT_ETHERNET_INPUT) + { + /* give some hints to ethernet-input */ + vlib_next_frame_t *nf; + vlib_frame_t *f; + ethernet_input_frame_t *ef; + nf = vlib_node_runtime_get_next_frame (vm, node, next_index); + f = vlib_get_frame (vm, nf->frame_index); + f->flags = ETH_INPUT_FRAME_F_SINGLE_SW_IF_IDX; + + ef = vlib_frame_scalar_args (f); + ef->sw_if_index = vui->sw_if_index; + ef->hw_if_index = vui->hw_if_index; + } + while (n_left > 0) { - vlib_get_new_next_frame (vm, node, next_index, to_next, n_left_to_next); + vlib_buffer_t *b_head, *b_current; + u32 bi_current; + u16 desc_current; + u32 desc_data_offset; + vring_desc_t *desc_table = txvq->desc; - if (next_index == VNET_DEVICE_INPUT_NEXT_ETHERNET_INPUT) + if (PREDICT_FALSE (cpu->rx_buffers_len <= 1)) { - /* give some hints to ethernet-input */ - vlib_next_frame_t *nf; - vlib_frame_t *f; - ethernet_input_frame_t *ef; - nf = vlib_node_runtime_get_next_frame (vm, node, next_index); - f = vlib_get_frame (vm, nf->frame_index); - f->flags = ETH_INPUT_FRAME_F_SINGLE_SW_IF_IDX; - - ef = vlib_frame_scalar_args (f); - ef->sw_if_index = vui->sw_if_index; - ef->hw_if_index = vui->hw_if_index; + /* Not enough rx_buffers + * Note: We yeld on 1 so we don't need to do an additional + * check for the next buffer prefetch. + */ + n_left = 0; + break; } - while (n_left > 0 && n_left_to_next > 0) + desc_current = txvq->avail->ring[last_avail_idx & mask]; + cpu->rx_buffers_len--; + bi_current = cpu->rx_buffers[cpu->rx_buffers_len]; + b_head = b_current = vlib_get_buffer (vm, bi_current); + to_next[0] = bi_current; //We do that now so we can forget about bi_current + to_next++; + n_left_to_next--; + + vlib_prefetch_buffer_with_index + (vm, cpu->rx_buffers[cpu->rx_buffers_len - 1], LOAD); + + /* Just preset the used descriptor id and length for later */ + txvq->used->ring[last_used_idx & mask].id = desc_current; + txvq->used->ring[last_used_idx & mask].len = 0; + vhost_user_log_dirty_ring (vui, txvq, ring[last_used_idx & mask]); + + /* The buffer should already be initialized */ + b_head->total_length_not_including_first_buffer = 0; + b_head->flags |= VLIB_BUFFER_TOTAL_LENGTH_VALID; + + if (PREDICT_FALSE (n_trace)) { - vlib_buffer_t *b_head, *b_current; - u32 bi_current; - u16 desc_current; - u32 desc_data_offset; - vring_desc_t *desc_table = txvq->desc; + //TODO: next_index is not exactly known at that point + vlib_trace_buffer (vm, node, next_index, b_head, + /* follow_chain */ 0); + vhost_trace_t *t0 = + vlib_add_trace (vm, node, b_head, sizeof (t0[0])); + vhost_user_rx_trace (t0, vui, qid, b_head, txvq, last_avail_idx); + n_trace--; + vlib_set_trace_count (vm, node, n_trace); + } - if (PREDICT_FALSE (cpu->rx_buffers_len <= 1)) + /* This depends on the setup but is very consistent + * So I think the CPU branch predictor will make a pretty good job + * at optimizing the decision. */ + if (txvq->desc[desc_current].flags & VIRTQ_DESC_F_INDIRECT) + { + desc_table = map_guest_mem (vui, txvq->desc[desc_current].addr, + &map_hint); + desc_current = 0; + if (PREDICT_FALSE (desc_table == 0)) { - /* Not enough rx_buffers - * Note: We yeld on 1 so we don't need to do an additional - * check for the next buffer prefetch. - */ - n_left = 0; - break; + vlib_error_count (vm, node->node_index, + VHOST_USER_INPUT_FUNC_ERROR_MMAP_FAIL, 1); + goto out; } + } - desc_current = - txvq->avail->ring[txvq->last_avail_idx & txvq->qsz_mask]; - cpu->rx_buffers_len--; - bi_current = cpu->rx_buffers[cpu->rx_buffers_len]; - b_head = b_current = vlib_get_buffer (vm, bi_current); - to_next[0] = bi_current; //We do that now so we can forget about bi_current - to_next++; - n_left_to_next--; - - vlib_prefetch_buffer_with_index - (vm, cpu->rx_buffers[cpu->rx_buffers_len - 1], LOAD); - - /* Just preset the used descriptor id and length for later */ - txvq->used->ring[txvq->last_used_idx & txvq->qsz_mask].id = - desc_current; - txvq->used->ring[txvq->last_used_idx & txvq->qsz_mask].len = 0; - vhost_user_log_dirty_ring (vui, txvq, - ring[txvq->last_used_idx & - txvq->qsz_mask]); - - /* The buffer should already be initialized */ - b_head->total_length_not_including_first_buffer = 0; - b_head->flags |= VLIB_BUFFER_TOTAL_LENGTH_VALID; - - if (PREDICT_FALSE (n_trace)) - { - //TODO: next_index is not exactly known at that point - vlib_trace_buffer (vm, node, next_index, b_head, - /* follow_chain */ 0); - vhost_trace_t *t0 = - vlib_add_trace (vm, node, b_head, sizeof (t0[0])); - vhost_user_rx_trace (t0, vui, qid, b_head, txvq); - n_trace--; - vlib_set_trace_count (vm, node, n_trace); - } + if (PREDICT_TRUE (vui->is_any_layout) || + (!(desc_table[desc_current].flags & VIRTQ_DESC_F_NEXT))) + { + /* ANYLAYOUT or single buffer */ + desc_data_offset = vui->virtio_net_hdr_sz; + } + else + { + /* CSR case without ANYLAYOUT, skip 1st buffer */ + desc_data_offset = desc_table[desc_current].len; + } - /* This depends on the setup but is very consistent - * So I think the CPU branch predictor will make a pretty good job - * at optimizing the decision. */ - if (txvq->desc[desc_current].flags & VIRTQ_DESC_F_INDIRECT) + while (1) + { + /* Get more input if necessary. Or end of packet. */ + if (desc_data_offset == desc_table[desc_current].len) { - desc_table = map_guest_mem (vui, txvq->desc[desc_current].addr, - &map_hint); - desc_current = 0; - if (PREDICT_FALSE (desc_table == 0)) + if (PREDICT_FALSE (desc_table[desc_current].flags & + VIRTQ_DESC_F_NEXT)) + { + desc_current = desc_table[desc_current].next; + desc_data_offset = 0; + } + else { - vlib_error_count (vm, node->node_index, - VHOST_USER_INPUT_FUNC_ERROR_MMAP_FAIL, 1); goto out; } } - if (PREDICT_TRUE (vui->is_any_layout) || - (!(desc_table[desc_current].flags & VIRTQ_DESC_F_NEXT))) - { - /* ANYLAYOUT or single buffer */ - desc_data_offset = vui->virtio_net_hdr_sz; - } - else - { - /* CSR case without ANYLAYOUT, skip 1st buffer */ - desc_data_offset = desc_table[desc_current].len; - } - - while (1) + /* Get more output if necessary. Or end of packet. */ + if (PREDICT_FALSE + (b_current->current_length == VLIB_BUFFER_DATA_SIZE)) { - /* Get more input if necessary. Or end of packet. */ - if (desc_data_offset == desc_table[desc_current].len) + if (PREDICT_FALSE (cpu->rx_buffers_len == 0)) { - if (PREDICT_FALSE (desc_table[desc_current].flags & - VIRTQ_DESC_F_NEXT)) - { - desc_current = desc_table[desc_current].next; - desc_data_offset = 0; - } - else - { - goto out; - } + /* Cancel speculation */ + to_next--; + n_left_to_next++; + + /* + * Checking if there are some left buffers. + * If not, just rewind the used buffers and stop. + * Note: Scheduled copies are not cancelled. This is + * not an issue as they would still be valid. Useless, + * but valid. + */ + vhost_user_input_rewind_buffers (vm, cpu, b_head); + n_left = 0; + goto stop; } - /* Get more output if necessary. Or end of packet. */ - if (PREDICT_FALSE - (b_current->current_length == VLIB_BUFFER_DATA_SIZE)) - { - if (PREDICT_FALSE (cpu->rx_buffers_len == 0)) - { - /* Cancel speculation */ - to_next--; - n_left_to_next++; - - /* - * Checking if there are some left buffers. - * If not, just rewind the used buffers and stop. - * Note: Scheduled copies are not cancelled. This is - * not an issue as they would still be valid. Useless, - * but valid. - */ - vhost_user_input_rewind_buffers (vm, cpu, b_head); - n_left = 0; - goto stop; - } - - /* Get next output */ - cpu->rx_buffers_len--; - u32 bi_next = cpu->rx_buffers[cpu->rx_buffers_len]; - b_current->next_buffer = bi_next; - b_current->flags |= VLIB_BUFFER_NEXT_PRESENT; - bi_current = bi_next; - b_current = vlib_get_buffer (vm, bi_current); - } - - /* Prepare a copy order executed later for the data */ - vhost_copy_t *cpy = &cpu->copy[copy_len]; - copy_len++; - u32 desc_data_l = - desc_table[desc_current].len - desc_data_offset; - cpy->len = VLIB_BUFFER_DATA_SIZE - b_current->current_length; - cpy->len = (cpy->len > desc_data_l) ? desc_data_l : cpy->len; - cpy->dst = (uword) (vlib_buffer_get_current (b_current) + - b_current->current_length); - cpy->src = desc_table[desc_current].addr + desc_data_offset; - - desc_data_offset += cpy->len; - - b_current->current_length += cpy->len; - b_head->total_length_not_including_first_buffer += cpy->len; + /* Get next output */ + cpu->rx_buffers_len--; + u32 bi_next = cpu->rx_buffers[cpu->rx_buffers_len]; + b_current->next_buffer = bi_next; + b_current->flags |= VLIB_BUFFER_NEXT_PRESENT; + bi_current = bi_next; + b_current = vlib_get_buffer (vm, bi_current); } - out: - CLIB_PREFETCH (&n_left, sizeof (n_left), LOAD); + /* Prepare a copy order executed later for the data */ + vhost_copy_t *cpy = &cpu->copy[copy_len]; + copy_len++; + u32 desc_data_l = desc_table[desc_current].len - desc_data_offset; + cpy->len = VLIB_BUFFER_DATA_SIZE - b_current->current_length; + cpy->len = (cpy->len > desc_data_l) ? desc_data_l : cpy->len; + cpy->dst = (uword) (vlib_buffer_get_current (b_current) + + b_current->current_length); + cpy->src = desc_table[desc_current].addr + desc_data_offset; - n_rx_bytes += b_head->total_length_not_including_first_buffer; - n_rx_packets++; + desc_data_offset += cpy->len; - b_head->total_length_not_including_first_buffer -= - b_head->current_length; + b_current->current_length += cpy->len; + b_head->total_length_not_including_first_buffer += cpy->len; + } - /* consume the descriptor and return it as used */ - txvq->last_avail_idx++; - txvq->last_used_idx++; + out: - VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b_head); + n_rx_bytes += b_head->total_length_not_including_first_buffer; + n_rx_packets++; - vnet_buffer (b_head)->sw_if_index[VLIB_RX] = vui->sw_if_index; - vnet_buffer (b_head)->sw_if_index[VLIB_TX] = (u32) ~ 0; - b_head->error = 0; + b_head->total_length_not_including_first_buffer -= + b_head->current_length; - if (current_config_index != ~(u32) 0) - { - b_head->current_config_index = current_config_index; - vnet_buffer (b_head)->feature_arc_index = feature_arc_idx; - } + /* consume the descriptor and return it as used */ + last_avail_idx++; + last_used_idx++; - n_left--; + VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b_head); - /* - * Although separating memory copies from virtio ring parsing - * is beneficial, we can offer to perform the copies from time - * to time in order to free some space in the ring. - */ - if (PREDICT_FALSE (copy_len >= VHOST_USER_RX_COPY_THRESHOLD)) - { - if (PREDICT_FALSE (vhost_user_input_copy (vui, cpu->copy, - copy_len, &map_hint))) - { - vlib_error_count (vm, node->node_index, - VHOST_USER_INPUT_FUNC_ERROR_MMAP_FAIL, 1); - } - copy_len = 0; + vnet_buffer (b_head)->sw_if_index[VLIB_RX] = vui->sw_if_index; + vnet_buffer (b_head)->sw_if_index[VLIB_TX] = (u32) ~ 0; + b_head->error = 0; + + if (current_config_index != ~(u32) 0) + { + b_head->current_config_index = current_config_index; + vnet_buffer (b_head)->feature_arc_index = feature_arc_idx; + } + + n_left--; - /* give buffers back to driver */ - CLIB_MEMORY_BARRIER (); - txvq->used->idx = txvq->last_used_idx; - vhost_user_log_dirty_ring (vui, txvq, idx); + /* + * Although separating memory copies from virtio ring parsing + * is beneficial, we can offer to perform the copies from time + * to time in order to free some space in the ring. + */ + if (PREDICT_FALSE (copy_len >= VHOST_USER_RX_COPY_THRESHOLD)) + { + if (PREDICT_FALSE (vhost_user_input_copy (vui, cpu->copy, + copy_len, &map_hint))) + { + vlib_error_count (vm, node->node_index, + VHOST_USER_INPUT_FUNC_ERROR_MMAP_FAIL, 1); } + copy_len = 0; + + /* give buffers back to driver */ + CLIB_MEMORY_STORE_BARRIER (); + txvq->used->idx = last_used_idx; + vhost_user_log_dirty_ring (vui, txvq, idx); } - stop: - vlib_put_next_frame (vm, node, next_index, n_left_to_next); } +stop: + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + + txvq->last_used_idx = last_used_idx; + txvq->last_avail_idx = last_avail_idx; /* Do the memory copies */ if (PREDICT_FALSE (vhost_user_input_copy (vui, cpu->copy, copy_len, @@ -609,7 +609,7 @@ vhost_user_if_input (vlib_main_t * vm, } /* give buffers back to driver */ - CLIB_MEMORY_BARRIER (); + CLIB_MEMORY_STORE_BARRIER (); txvq->used->idx = txvq->last_used_idx; vhost_user_log_dirty_ring (vui, txvq, idx); @@ -631,6 +631,7 @@ vhost_user_if_input (vlib_main_t * vm, vnet_device_increment_rx_packets (vm->thread_index, n_rx_packets); +done: return n_rx_packets; }