X-Git-Url: https://gerrit.fd.io/r/gitweb?a=blobdiff_plain;f=src%2Fplugins%2Favf%2Finput.c;h=6e437da2a168b872182923fd2eb39315e6dc7e1f;hb=f5a45680ee1c9c586ab2b037e3d71354789e4403;hp=0ccf7721835c8c9afe25b75b61c4635af65c0c41;hpb=171d6aceb039a7f0b0d67c837ff74359dae01ae4;p=vpp.git diff --git a/src/plugins/avf/input.c b/src/plugins/avf/input.c index 0ccf7721835..6e437da2a16 100644 --- a/src/plugins/avf/input.c +++ b/src/plugins/avf/input.c @@ -19,7 +19,7 @@ #include #include #include -#include +#include #include @@ -125,8 +125,7 @@ avf_rxq_refill (vlib_main_t * vm, vlib_node_runtime_t * node, avf_rxq_t * rxq, n_alloc -= 8; } - CLIB_MEMORY_STORE_BARRIER (); - *(rxq->qrx_tail) = slot; + avf_tail_write (rxq->qrx_tail, slot); } @@ -158,6 +157,37 @@ avf_rx_attach_tail (vlib_main_t * vm, vlib_buffer_t * bt, vlib_buffer_t * b, return tlnifb; } +static_always_inline void +avf_process_flow_offload (avf_device_t *ad, avf_per_thread_data_t *ptd, + uword n_rx_packets) +{ + uword n; + avf_flow_lookup_entry_t *fle; + + for (n = 0; n < n_rx_packets; n++) + { + if ((ptd->qw1s[n] & AVF_RXD_STATUS_FLM) == 0) + continue; + + fle = pool_elt_at_index (ad->flow_lookup_entries, ptd->flow_ids[n]); + + if (fle->next_index != (u16) ~0) + { + ptd->next[n] = fle->next_index; + } + + if (fle->flow_id != ~0) + { + ptd->bufs[n]->flow_id = fle->flow_id; + } + + if (fle->buffer_advance != ~0) + { + vlib_buffer_advance (ptd->bufs[n], fle->buffer_advance); + } + } +} + static_always_inline uword avf_process_rx_burst (vlib_main_t * vm, vlib_node_runtime_t * node, avf_per_thread_data_t * ptd, u32 n_left, @@ -200,17 +230,13 @@ avf_process_rx_burst (vlib_main_t * vm, vlib_node_runtime_t * node, n_rx_bytes += avf_rx_attach_tail (vm, &bt, b[3], qw1[3], tail + 3); } - VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b[0]); - VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b[1]); - VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b[2]); - VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b[3]); - /* next */ qw1 += 4; tail += 4; b += 4; n_left -= 4; } + while (n_left) { vlib_buffer_copy_template (b[0], &bt); @@ -220,8 +246,6 @@ avf_process_rx_burst (vlib_main_t * vm, vlib_node_runtime_t * node, if (maybe_multiseg) n_rx_bytes += avf_rx_attach_tail (vm, &bt, b[0], qw1[0], tail + 0); - VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b[0]); - /* next */ qw1 += 1; tail += 1; @@ -232,8 +256,9 @@ avf_process_rx_burst (vlib_main_t * vm, vlib_node_runtime_t * node, } static_always_inline uword -avf_device_input_inline (vlib_main_t * vm, vlib_node_runtime_t * node, - vlib_frame_t * frame, avf_device_t * ad, u16 qid) +avf_device_input_inline (vlib_main_t *vm, vlib_node_runtime_t *node, + vlib_frame_t *frame, avf_device_t *ad, u16 qid, + int with_flows) { avf_main_t *am = &avf_main; vnet_main_t *vnm = vnet_get_main (); @@ -253,8 +278,14 @@ avf_device_input_inline (vlib_main_t * vm, vlib_node_runtime_t * node, avf_rx_desc_t *d, *fd = rxq->descs; #ifdef CLIB_HAVE_VEC256 u64x4 q1x4, or_q1x4 = { 0 }; + u32x4 fdidx4; u64x4 dd_eop_mask4 = u64x4_splat (AVF_RXD_STATUS_DD | AVF_RXD_STATUS_EOP); +#elif defined(CLIB_HAVE_VEC128) + u32x4 q1x4_lo, q1x4_hi, or_q1x4 = { 0 }; + u32x4 fdidx4; + u32x4 dd_eop_mask4 = u32x4_splat (AVF_RXD_STATUS_DD | AVF_RXD_STATUS_EOP); #endif + int single_next = 1; /* is there anything on the ring */ d = fd + next; @@ -300,7 +331,40 @@ avf_device_input_inline (vlib_main_t * vm, vlib_node_runtime_t * node, goto one_by_one; or_q1x4 |= q1x4; + u64x4_store_unaligned (q1x4, ptd->qw1s + n_rx_packets); +#elif defined(CLIB_HAVE_VEC128) + if (n_rx_packets >= AVF_RX_VECTOR_SZ - 4 || next >= size - 4) + goto one_by_one; + + q1x4_lo = + u32x4_gather ((void *) &d[0].qword[1], (void *) &d[1].qword[1], + (void *) &d[2].qword[1], (void *) &d[3].qword[1]); + + /* not all packets are ready or at least one of them is chained */ + if (!u32x4_is_equal (q1x4_lo & dd_eop_mask4, dd_eop_mask4)) + goto one_by_one; + + q1x4_hi = u32x4_gather ( + (void *) &d[0].qword[1] + 4, (void *) &d[1].qword[1] + 4, + (void *) &d[2].qword[1] + 4, (void *) &d[3].qword[1] + 4); + + or_q1x4 |= q1x4_lo; + ptd->qw1s[n_rx_packets + 0] = (u64) q1x4_hi[0] << 32 | (u64) q1x4_lo[0]; + ptd->qw1s[n_rx_packets + 1] = (u64) q1x4_hi[1] << 32 | (u64) q1x4_lo[1]; + ptd->qw1s[n_rx_packets + 2] = (u64) q1x4_hi[2] << 32 | (u64) q1x4_lo[2]; + ptd->qw1s[n_rx_packets + 3] = (u64) q1x4_hi[3] << 32 | (u64) q1x4_lo[3]; +#endif +#if defined(CLIB_HAVE_VEC256) || defined(CLIB_HAVE_VEC128) + + if (with_flows) + { + fdidx4 = u32x4_gather ( + (void *) &d[0].fdid_flex_hi, (void *) &d[1].fdid_flex_hi, + (void *) &d[2].fdid_flex_hi, (void *) &d[3].fdid_flex_hi); + u32x4_store_unaligned (fdidx4, ptd->flow_ids + n_rx_packets); + } + vlib_buffer_copy_indices (bi, rxq->bufs + next, 4); /* next */ @@ -345,6 +409,10 @@ avf_device_input_inline (vlib_main_t * vm, vlib_node_runtime_t * node, } or_qw1 |= ptd->qw1s[n_rx_packets] = d[0].qword[1]; + if (PREDICT_FALSE (with_flows)) + { + ptd->flow_ids[n_rx_packets] = d[0].fdid_flex_hi; + } /* next */ next = (next + 1) & mask; @@ -360,7 +428,7 @@ no_more_desc: rxq->next = next; rxq->n_enqueued -= n_rx_packets + n_tail_desc; -#ifdef CLIB_HAVE_VEC256 +#if defined(CLIB_HAVE_VEC256) || defined(CLIB_HAVE_VEC128) or_qw1 |= or_q1x4[0] | or_q1x4[1] | or_q1x4[2] | or_q1x4[3]; #endif @@ -376,55 +444,91 @@ no_more_desc: else n_rx_bytes = avf_process_rx_burst (vm, node, ptd, n_rx_packets, 0); + /* the MARKed packets may have different next nodes */ + if (PREDICT_FALSE (with_flows && (or_qw1 & AVF_RXD_STATUS_FLM))) + { + u32 n; + single_next = 0; + for (n = 0; n < n_rx_packets; n++) + ptd->next[n] = next_index; + + avf_process_flow_offload (ad, ptd, n_rx_packets); + } + /* packet trace if enabled */ if (PREDICT_FALSE ((n_trace = vlib_get_trace_count (vm, node)))) { - u32 n_left = n_rx_packets, i = 0, j; - bi = to_next; + u32 n_left = n_rx_packets; + u32 i, j; + u16 *next_indices = ptd->next; + i = 0; while (n_trace && n_left) { - vlib_buffer_t *b; - avf_input_trace_t *tr; - b = vlib_get_buffer (vm, bi[0]); - vlib_trace_buffer (vm, node, next_index, b, /* follow_chain */ 0); - tr = vlib_add_trace (vm, node, b, sizeof (*tr)); - tr->next_index = next_index; - tr->qid = qid; - tr->hw_if_index = ad->hw_if_index; - tr->qw1s[0] = ptd->qw1s[i]; - for (j = 1; j < AVF_RX_MAX_DESC_IN_CHAIN; j++) - tr->qw1s[j] = ptd->tails[i].qw1s[j - 1]; + vlib_buffer_t *b = ptd->bufs[i]; + if (PREDICT_FALSE (single_next == 0)) + next_index = next_indices[0]; + + if (PREDICT_TRUE + (vlib_trace_buffer + (vm, node, next_index, b, /* follow_chain */ 0))) + { + avf_input_trace_t *tr = + vlib_add_trace (vm, node, b, sizeof (*tr)); + tr->next_index = next_index; + tr->qid = qid; + tr->hw_if_index = ad->hw_if_index; + tr->qw1s[0] = ptd->qw1s[i]; + tr->flow_id = + (tr->qw1s[0] & AVF_RXD_STATUS_FLM) ? ptd->flow_ids[i] : 0; + for (j = 1; j < AVF_RX_MAX_DESC_IN_CHAIN; j++) + tr->qw1s[j] = ptd->tails[i].qw1s[j - 1]; + + n_trace--; + } /* next */ - n_trace--; n_left--; - bi++; i++; + next_indices++; } vlib_set_trace_count (vm, node, n_trace); } - if (PREDICT_TRUE (next_index == VNET_DEVICE_INPUT_NEXT_ETHERNET_INPUT)) + /* enqueu the packets to the next nodes */ + if (PREDICT_FALSE (with_flows && (or_qw1 & AVF_RXD_STATUS_FLM))) { - vlib_next_frame_t *nf; - vlib_frame_t *f; - ethernet_input_frame_t *ef; - nf = vlib_node_runtime_get_next_frame (vm, node, next_index); - f = vlib_get_frame (vm, nf->frame); - f->flags = ETH_INPUT_FRAME_F_SINGLE_SW_IF_IDX; - - ef = vlib_frame_scalar_args (f); - ef->sw_if_index = ad->sw_if_index; - ef->hw_if_index = ad->hw_if_index; - - if ((or_qw1 & AVF_RXD_ERROR_IPE) == 0) - f->flags |= ETH_INPUT_FRAME_F_IP4_CKSUM_OK; - vlib_frame_no_append (f); + /* release next node's frame vector, in this case we use + vlib_buffer_enqueue_to_next to place the packets + */ + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + + /* enqueue buffers to the next node */ + vlib_buffer_enqueue_to_next (vm, node, to_next, ptd->next, n_rx_packets); } + else + { + if (PREDICT_TRUE (next_index == VNET_DEVICE_INPUT_NEXT_ETHERNET_INPUT)) + { + vlib_next_frame_t *nf; + vlib_frame_t *f; + ethernet_input_frame_t *ef; + nf = vlib_node_runtime_get_next_frame (vm, node, next_index); + f = vlib_get_frame (vm, nf->frame); + f->flags = ETH_INPUT_FRAME_F_SINGLE_SW_IF_IDX; + + ef = vlib_frame_scalar_args (f); + ef->sw_if_index = ad->sw_if_index; + ef->hw_if_index = ad->hw_if_index; + + if ((or_qw1 & AVF_RXD_ERROR_IPE) == 0) + f->flags |= ETH_INPUT_FRAME_F_IP4_CKSUM_OK; + vlib_frame_no_append (f); + } - n_left_to_next -= n_rx_packets; - vlib_put_next_frame (vm, node, next_index, n_left_to_next); + n_left_to_next -= n_rx_packets; + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } vlib_increment_combined_counter (vnm->interface_main.combined_sw_if_counters + VNET_INTERFACE_COUNTER_RX, thr_idx, @@ -444,17 +548,23 @@ VLIB_NODE_FN (avf_input_node) (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) { u32 n_rx = 0; - vnet_device_input_runtime_t *rt = (void *) node->runtime_data; - vnet_device_and_queue_t *dq; - - foreach_device_and_queue (dq, rt->devices_and_queues) - { - avf_device_t *ad; - ad = avf_get_device (dq->dev_instance); - if ((ad->flags & AVF_DEVICE_F_ADMIN_UP) == 0) - continue; - n_rx += avf_device_input_inline (vm, node, frame, ad, dq->queue_id); - } + vnet_hw_if_rxq_poll_vector_t *pv; + + pv = vnet_hw_if_get_rxq_poll_vector (vm, node); + + for (int i = 0; i < vec_len (pv); i++) + { + avf_device_t *ad = avf_get_device (pv[i].dev_instance); + if ((ad->flags & AVF_DEVICE_F_ADMIN_UP) == 0) + continue; + if (PREDICT_FALSE (ad->flags & AVF_DEVICE_F_RX_FLOW_OFFLOAD)) + n_rx += + avf_device_input_inline (vm, node, frame, ad, pv[i].queue_id, 1); + else + n_rx += + avf_device_input_inline (vm, node, frame, ad, pv[i].queue_id, 0); + } + return n_rx; }