This saves about 20 clocks/packet in both code paths.
Type: improvement
Signed-off-by: Klement Sekera <ksekera@cisco.com>
Change-Id: Ib559c74bf8168e3ddd764d51b7e5bcd2a557f591
+ b++;
+
+ /* Prefetch next iteration. */
+ if (PREDICT_TRUE (n_left_from >= 2))
+ {
+ vlib_buffer_t *p2;
+
+ p2 = *b;
+
+ vlib_prefetch_buffer_header (p2, LOAD);
+
+ CLIB_PREFETCH (p2->data, CLIB_CACHE_LINE_BYTES, LOAD);
+ }
}
vlib_buffer_enqueue_to_next (vm, node, from, (u16 *) nexts,
}
vlib_buffer_enqueue_to_next (vm, node, from, (u16 *) nexts,
+ b++;
+
+ /* Prefetch next iteration. */
+ if (PREDICT_TRUE (n_left_from >= 2))
+ {
+ vlib_buffer_t *p2;
+
+ p2 = *b;
+
+ vlib_prefetch_buffer_header (p2, LOAD);
+
+ CLIB_PREFETCH (p2->data, CLIB_CACHE_LINE_BYTES, LOAD);
+ }
+
next[0] = vnet_buffer2 (b0)->nat.arc_next;
vnet_buffer (b0)->snat.flags = 0;
next[0] = vnet_buffer2 (b0)->nat.arc_next;
vnet_buffer (b0)->snat.flags = 0;
}
vlib_buffer_enqueue_to_next (vm, node, from, (u16 *) nexts,
}
vlib_buffer_enqueue_to_next (vm, node, from, (u16 *) nexts,