- while (n_left_from >= 8 && n_left_to_next >= 4)
- {
- u32 next0, next1, next2, next3;
- u32 sw_if_index0, sw_if_index1, sw_if_index2, sw_if_index3;
-
- /* Prefetch next iteration. */
- {
-
- /* Prefetch the buffer header and packet for the N+2 loop iteration */
- vlib_prefetch_buffer_header (b[4], LOAD);
- vlib_prefetch_buffer_header (b[5], LOAD);
- vlib_prefetch_buffer_header (b[6], LOAD);
- vlib_prefetch_buffer_header (b[7], LOAD);
-
- CLIB_PREFETCH (b[4]->data, CLIB_CACHE_LINE_BYTES, STORE);
- CLIB_PREFETCH (b[5]->data, CLIB_CACHE_LINE_BYTES, STORE);
- CLIB_PREFETCH (b[6]->data, CLIB_CACHE_LINE_BYTES, STORE);
- CLIB_PREFETCH (b[7]->data, CLIB_CACHE_LINE_BYTES, STORE);
-
- /*
- * Don't bother prefetching the bridge-domain config (which
- * depends on the input config above). Only a small number of
- * bridge domains are expected. Plus the structure is small
- * and several fit in a cache line.
- */
- }
-
- /* speculatively enqueue b0 and b1 to the current next frame */
- /* bi is "buffer index", b is pointer to the buffer */
-
- if (do_trace)
- {
- /* RX interface handles */
- sw_if_index0 = vnet_buffer (b[0])->sw_if_index[VLIB_RX];
- sw_if_index1 = vnet_buffer (b[1])->sw_if_index[VLIB_RX];
- sw_if_index2 = vnet_buffer (b[2])->sw_if_index[VLIB_RX];
- sw_if_index3 = vnet_buffer (b[3])->sw_if_index[VLIB_RX];
-
- if (b[0]->flags & VLIB_BUFFER_IS_TRACED)
- {
- ethernet_header_t *h0 = vlib_buffer_get_current (b[0]);
- l2input_trace_t *t =
- vlib_add_trace (vm, node, b[0], sizeof (*t));
- t->sw_if_index = sw_if_index0;
- clib_memcpy_fast (t->dst_and_src, h0->dst_address,
- sizeof (h0->dst_address) +
- sizeof (h0->src_address));
- }
- if (b[1]->flags & VLIB_BUFFER_IS_TRACED)
- {
- ethernet_header_t *h1 = vlib_buffer_get_current (b[1]);
- l2input_trace_t *t =
- vlib_add_trace (vm, node, b[1], sizeof (*t));
- t->sw_if_index = sw_if_index1;
- clib_memcpy_fast (t->dst_and_src, h1->dst_address,
- sizeof (h1->dst_address) +
- sizeof (h1->src_address));
- }
- if (b[2]->flags & VLIB_BUFFER_IS_TRACED)
- {
- ethernet_header_t *h2 = vlib_buffer_get_current (b[2]);
- l2input_trace_t *t =
- vlib_add_trace (vm, node, b[2], sizeof (*t));
- t->sw_if_index = sw_if_index2;
- clib_memcpy_fast (t->dst_and_src, h2->dst_address,
- sizeof (h2->dst_address) +
- sizeof (h2->src_address));
- }
- if (b[3]->flags & VLIB_BUFFER_IS_TRACED)
- {
- ethernet_header_t *h3 = vlib_buffer_get_current (b[3]);
- l2input_trace_t *t =
- vlib_add_trace (vm, node, b[3], sizeof (*t));
- t->sw_if_index = sw_if_index3;
- clib_memcpy_fast (t->dst_and_src, h3->dst_address,
- sizeof (h3->dst_address) +
- sizeof (h3->src_address));
- }
- }
-
- classify_and_dispatch (msm, b[0], &next0);
- classify_and_dispatch (msm, b[1], &next1);
- //show the better performance when clib_memcpy_fast is put here.
- clib_memcpy_fast (to_next, from, sizeof (from[0]) * 4);
- to_next += 4;
- classify_and_dispatch (msm, b[2], &next2);
- classify_and_dispatch (msm, b[3], &next3);
- b += 4;
- n_left_from -= 4;
- n_left_to_next -= 4;
-
- /* verify speculative enqueues, maybe switch current next frame */
- /* if next0==next1==next_index then nothing special needs to be done */
- vlib_validate_buffer_enqueue_x4 (vm, node, next_index,
- to_next, n_left_to_next,
- from[0], from[1], from[2], from[3],
- next0, next1, next2, next3);
- from += 4;
- }
-
- while (n_left_from > 0 && n_left_to_next > 0)
- {
- u32 next0;
- u32 sw_if_index0;
-
- /* speculatively enqueue b0 to the current next frame */
-
- if (do_trace && PREDICT_FALSE (b[0]->flags & VLIB_BUFFER_IS_TRACED))
- {
- ethernet_header_t *h0 = vlib_buffer_get_current (b[0]);
- l2input_trace_t *t =
- vlib_add_trace (vm, node, b[0], sizeof (*t));
- sw_if_index0 = vnet_buffer (b[0])->sw_if_index[VLIB_RX];
- t->sw_if_index = sw_if_index0;
- clib_memcpy_fast (t->dst_and_src, h0->dst_address,
- sizeof (h0->dst_address) +
- sizeof (h0->src_address));
- }
-
- classify_and_dispatch (msm, b[0], &next0);
- b += 1;
- to_next[0] = from[0];
- to_next += 1;
- n_left_from -= 1;
- n_left_to_next -= 1;
-
- /* verify speculative enqueue, maybe switch current next frame */
- vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
- to_next, n_left_to_next,
- from[0], next0);
- from += 1;
- }
-
- vlib_put_next_frame (vm, node, next_index, n_left_to_next);