For vxlan_encap, code will touch memory area before the field "data"
in struct vlib_buffer_t, however so far it is not prefetched in cache
yet for this graph node.
After applying the patch, 2~3 cycles per pkt for vxlan4_encap can be
saved on Haswell. It will bring a lot of benefits on DVN platform too.
Change-Id: I26d8c57fb3d2415726be5367117d73eb715e35ad
Signed-off-by: Zhiyong Yang <zhiyong.yang@intel.com>
vlib_prefetch_buffer_header (p2, LOAD);
vlib_prefetch_buffer_header (p3, LOAD);
vlib_prefetch_buffer_header (p2, LOAD);
vlib_prefetch_buffer_header (p3, LOAD);
- CLIB_PREFETCH (p2->data, CLIB_CACHE_LINE_BYTES, LOAD);
- CLIB_PREFETCH (p3->data, CLIB_CACHE_LINE_BYTES, LOAD);
+ CLIB_PREFETCH (p2->data - CLIB_CACHE_LINE_BYTES, 2 * CLIB_CACHE_LINE_BYTES, LOAD);
+ CLIB_PREFETCH (p3->data - CLIB_CACHE_LINE_BYTES, 2 * CLIB_CACHE_LINE_BYTES, LOAD);
}
u32 bi0 = to_next[0] = from[0];
}
u32 bi0 = to_next[0] = from[0];