From: Damjan Marion Date: Wed, 11 May 2016 21:07:18 +0000 (+0200) Subject: Add support for multiple microarchitectures in single binary X-Git-Tag: v16.09-rc1~386 X-Git-Url: https://gerrit.fd.io/r/gitweb?a=commitdiff_plain;h=1c80e831b728ab378949714d5059a0b5b1822a0a;p=vpp.git Add support for multiple microarchitectures in single binary * compiler -march= parameter is changed from native to corei7 so code is always genereted with instructions which are available on the Nehalem microarchitecture (up to SSE4.2) * compiler -mtune= parameter is added so code is optimized for corei7-avx which equals to Sandy Bridge microarchitecture * set of macros is added which allows run-time detection of available cpu instructions (e.g. clib_cpu_supports_avx()) * set of macros is added which allows us to clone graph node funcitons where cloned function is optmized for different microarchitecture Those macros are using following attributes: __attribute__((flatten)) __attribute__((target("arch=core-avx2))) I.e. If applied to foo_node_fn() macro will generate cloned functions foo_node_fn_avx2() and foo_node_fn_avx512() (future) It will also generate function void * foo_node_fn_multiarch_select() which detects available instruction set and returns pointer to the best matching function clone. Change-Id: I2dce0ac92a5ede95fcb56f47f3d1f3c4c040bac0 Signed-off-by: Damjan Marion --- diff --git a/build-data/packages/dpdk.mk b/build-data/packages/dpdk.mk index 2bfc4b49d32..0a4c1610026 100644 --- a/build-data/packages/dpdk.mk +++ b/build-data/packages/dpdk.mk @@ -4,6 +4,11 @@ ifeq ($(DPDK_MARCH),) DPDK_MARCH="native" endif +DPDK_TUNE = $(strip $($(PLATFORM)_mtune)) +ifeq ($(DPDK_TUNE),) + DPDK_MARCH="generic" +endif + ifneq (,$(findstring debug,$(TAG))) DPDK_DEBUG=y else @@ -14,6 +19,7 @@ DPDK_MAKE_ARGS = -C $(call find_source_fn,$(PACKAGE_SOURCE)) \ DPDK_BUILD_DIR=$(PACKAGE_BUILD_DIR) \ DPDK_INSTALL_DIR=$(PACKAGE_INSTALL_DIR) \ DPDK_MARCH=$(DPDK_MARCH) \ + DPDK_TUNE=$(DPDK_TUNE) \ DPDK_DEBUG=$(DPDK_DEBUG) diff --git a/build-data/platforms/vpp.mk b/build-data/platforms/vpp.mk index c381be19b42..ec0d874db4b 100644 --- a/build-data/platforms/vpp.mk +++ b/build-data/platforms/vpp.mk @@ -13,6 +13,9 @@ # vector packet processor vpp_arch = native +vpp_march = corei7 # Nehalem Instruction set +vpp_mtune = corei7-avx # Optimize for Sandy Bridge +vpp_dpdk_arch = corei7 vpp_native_tools = vppapigen vpp_uses_dpdk = yes @@ -40,9 +43,9 @@ vpp_debug_TAG_CFLAGS = -g -O0 -DCLIB_DEBUG -DFORTIFY_SOURCE=2 -march=$(MARCH) \ vpp_debug_TAG_LDFLAGS = -g -O0 -DCLIB_DEBUG -DFORTIFY_SOURCE=2 -march=$(MARCH) \ -fstack-protector-all -fPIC -Werror -vpp_TAG_CFLAGS = -g -O2 -DFORTIFY_SOURCE=2 -march=$(MARCH) \ +vpp_TAG_CFLAGS = -g -O2 -DFORTIFY_SOURCE=2 -march=$(MARCH) -mtune=$(MTUNE) \ -fstack-protector -fPIC -Werror -vpp_TAG_LDFLAGS = -g -O2 -DFORTIFY_SOURCE=2 -march=$(MARCH) \ +vpp_TAG_LDFLAGS = -g -O2 -DFORTIFY_SOURCE=2 -march=$(MARCH) -mtune=$(MTUNE) \ -fstack-protector -fPIC -Werror vpp_gcov_TAG_CFLAGS = -g -O0 -DCLIB_DEBUG -march=$(MARCH) \ diff --git a/build-data/platforms/vpp_lite.mk b/build-data/platforms/vpp_lite.mk index 4183700e681..d35d2347e69 100644 --- a/build-data/platforms/vpp_lite.mk +++ b/build-data/platforms/vpp_lite.mk @@ -13,6 +13,8 @@ # vector packet processor vpp_lite_arch = native +vpp_lite_march = corei7 # Nehalem Instruction set +vpp_lite_mtune = corei7-avx # Optimize for Sandy Bridge vpp_lite_native_tools = vppapigen vpp_lite_uses_dpdk = no @@ -30,7 +32,7 @@ vpp_lite_debug_TAG_CFLAGS = -g -O0 -DCLIB_DEBUG -DFORTIFY_SOURCE=2 -march=$(MARC vpp_lite_debug_TAG_LDFLAGS = -g -O0 -DCLIB_DEBUG -DFORTIFY_SOURCE=2 -march=$(MARCH) \ -fstack-protector-all -fPIC -Werror -vpp_lite_TAG_CFLAGS = -g -O2 -DFORTIFY_SOURCE=2 -march=$(MARCH) \ +vpp_lite_TAG_CFLAGS = -g -O2 -DFORTIFY_SOURCE=2 -march=$(MARCH) -mtune=$(MTUNE) \ -fstack-protector -fPIC -Werror -vpp_lite_TAG_LDFLAGS = -g -O2 -DFORTIFY_SOURCE=2 -march=$(MARCH) \ +vpp_lite_TAG_LDFLAGS = -g -O2 -DFORTIFY_SOURCE=2 -march=$(MARCH) -mtune=$(MTUNE) \ -fstack-protector -fPIC -Werror diff --git a/build-root/Makefile b/build-root/Makefile index a60cbf0a22e..97fb43d49ee 100644 --- a/build-root/Makefile +++ b/build-root/Makefile @@ -220,6 +220,11 @@ else endif export MARCH +MTUNE = $(strip $($(PLATFORM)_mtune)) +ifeq ($(MTUNE),) + MTUNE = generic +endif + ###################################################################### # Generic build stuff ###################################################################### diff --git a/dpdk/Makefile b/dpdk/Makefile index 165058d9530..b7e1e097212 100644 --- a/dpdk/Makefile +++ b/dpdk/Makefile @@ -19,6 +19,7 @@ DPDK_INSTALL_DIR ?= $(CURDIR)/_install DPDK_PKTMBUF_HEADROOM ?= 128 DPDK_DOWNLOAD_DIR ?= $(HOME)/Downloads DPDK_MARCH ?= native +DPDK_TUNE ?= generic DPDK_DEBUG ?= n B := $(DPDK_BUILD_DIR) @@ -51,7 +52,7 @@ DPDK_CPU_LDFLAGS := DPDK_EXTRA_LDFLAGS := -g ifeq ($(DPDK_DEBUG),n) -DPDK_EXTRA_CFLAGS := -g +DPDK_EXTRA_CFLAGS := -g -mtune=$(DPDK_TUNE) else DPDK_EXTRA_CFLAGS := -g -O0 endif diff --git a/vlib/vlib/node.h b/vlib/vlib/node.h index 2caede6e411..9b33a0a3ae0 100644 --- a/vlib/vlib/node.h +++ b/vlib/vlib/node.h @@ -40,6 +40,7 @@ #ifndef included_vlib_node_h #define included_vlib_node_h +#include #include #include #include /* for vlib_trace_filter_t */ @@ -149,6 +150,32 @@ static void __vlib_add_node_registration_##x (void) \ } \ __VA_ARGS__ vlib_node_registration_t x +#if CLIB_DEBUG > 0 +#define VLIB_NODE_FUNCTION_CLONE_TEMPLATE(arch, fn) +#define VLIB_NODE_FUNCTION_MULTIARCH_CLONE(fn) +#define VLIB_NODE_FUNCTION_MULTIARCH(node, fn) +#else +#define VLIB_NODE_FUNCTION_CLONE_TEMPLATE(arch, fn, tgt) \ + uword \ + __attribute__ ((flatten)) \ + __attribute__ ((target (tgt))) \ + CLIB_CPU_OPTIMIZED \ + fn ## _ ## arch ( struct vlib_main_t * vm, \ + struct vlib_node_runtime_t * node, \ + struct vlib_frame_t * frame) \ + { return fn (vm, node, frame); } + +#define VLIB_NODE_FUNCTION_MULTIARCH_CLONE(fn) \ + foreach_march_variant(VLIB_NODE_FUNCTION_CLONE_TEMPLATE, fn) + +#define VLIB_NODE_FUNCTION_MULTIARCH(node, fn) \ + VLIB_NODE_FUNCTION_MULTIARCH_CLONE(fn) \ + CLIB_MULTIARCH_SELECT_FN(fn, static inline) \ + static void __attribute__((__constructor__)) \ + __vlib_node_function_multiarch_select_##node (void) \ + { node.function = fn ## _multiarch_select(); } +#endif + always_inline vlib_node_registration_t * vlib_node_next_registered (vlib_node_registration_t * c) { diff --git a/vnet/vnet/classify/ip_classify.c b/vnet/vnet/classify/ip_classify.c index 8152f6ce353..75e80ad67ab 100644 --- a/vnet/vnet/classify/ip_classify.c +++ b/vnet/vnet/classify/ip_classify.c @@ -329,6 +329,8 @@ VLIB_REGISTER_NODE (ip4_classify_node) = { .next_nodes = IP4_LOOKUP_NEXT_NODES, }; +VLIB_NODE_FUNCTION_MULTIARCH (ip4_classify_node, ip4_classify) + static uword ip6_classify (vlib_main_t * vm, vlib_node_runtime_t * node, @@ -350,6 +352,8 @@ VLIB_REGISTER_NODE (ip6_classify_node) = { .next_nodes = IP6_LOOKUP_NEXT_NODES, }; +VLIB_NODE_FUNCTION_MULTIARCH (ip6_classify_node, ip6_classify) + static clib_error_t * ip_classify_init (vlib_main_t * vm) { diff --git a/vnet/vnet/cop/ip4_whitelist.c b/vnet/vnet/cop/ip4_whitelist.c index 6178e891ffd..5578558c4b1 100644 --- a/vnet/vnet/cop/ip4_whitelist.c +++ b/vnet/vnet/cop/ip4_whitelist.c @@ -348,6 +348,8 @@ VLIB_REGISTER_NODE (ip4_cop_whitelist_node) = { }, }; +VLIB_NODE_FUNCTION_MULTIARCH (ip4_cop_whitelist_node, ip4_cop_whitelist_node_fn) + static clib_error_t * ip4_whitelist_init (vlib_main_t * vm) { diff --git a/vnet/vnet/cop/ip6_whitelist.c b/vnet/vnet/cop/ip6_whitelist.c index dc6a1ee5d84..4a8f33fb727 100644 --- a/vnet/vnet/cop/ip6_whitelist.c +++ b/vnet/vnet/cop/ip6_whitelist.c @@ -286,6 +286,8 @@ VLIB_REGISTER_NODE (ip6_cop_whitelist_node) = { }, }; +VLIB_NODE_FUNCTION_MULTIARCH (ip6_cop_whitelist_node, ip6_cop_whitelist_node_fn) + static clib_error_t * ip6_whitelist_init (vlib_main_t * vm) { diff --git a/vnet/vnet/cop/node1.c b/vnet/vnet/cop/node1.c index 3ee7006acfa..b448b531039 100644 --- a/vnet/vnet/cop/node1.c +++ b/vnet/vnet/cop/node1.c @@ -282,6 +282,8 @@ VLIB_REGISTER_NODE (cop_input_node) = { }, }; +VLIB_NODE_FUNCTION_MULTIARCH (cop_input_node, cop_input_node_fn) + #define foreach_cop_stub \ _(default-cop-whitelist, default_cop_whitelist) diff --git a/vnet/vnet/devices/af_packet/device.c b/vnet/vnet/devices/af_packet/device.c index f0e91487e0d..0671d9e247d 100644 --- a/vnet/vnet/devices/af_packet/device.c +++ b/vnet/vnet/devices/af_packet/device.c @@ -204,3 +204,6 @@ VNET_DEVICE_CLASS (af_packet_device_class) = { .subif_add_del_function = af_packet_subif_add_del_function, .no_flatten_output_chains = 1, }; + +VLIB_DEVICE_TX_FUNCTION_MULTIARCH (af_packet_device_class, + af_packet_interface_tx) diff --git a/vnet/vnet/devices/af_packet/node.c b/vnet/vnet/devices/af_packet/node.c index b622a221b84..0c608ea41bf 100644 --- a/vnet/vnet/devices/af_packet/node.c +++ b/vnet/vnet/devices/af_packet/node.c @@ -267,7 +267,6 @@ af_packet_input_fn (vlib_main_t * vm, vlib_node_runtime_t * node, return n_rx_packets; } - VLIB_REGISTER_NODE (af_packet_input_node) = { .function = af_packet_input_fn, .name = "af-packet-input", @@ -283,3 +282,6 @@ VLIB_REGISTER_NODE (af_packet_input_node) = { [AF_PACKET_INPUT_NEXT_ETHERNET_INPUT] = "ethernet-input", }, }; + +VLIB_NODE_FUNCTION_MULTIARCH (af_packet_input_node, af_packet_input_fn) + diff --git a/vnet/vnet/devices/dpdk/cli.c b/vnet/vnet/devices/dpdk/cli.c index 9ec19867929..9063cad890a 100644 --- a/vnet/vnet/devices/dpdk/cli.c +++ b/vnet/vnet/devices/dpdk/cli.c @@ -759,6 +759,7 @@ set_efd (vlib_main_t *vm, unformat_input_t *input, dpdk_main_t * dm = &dpdk_main; vlib_thread_main_t * tm = vlib_get_thread_main(); clib_error_t * error = NULL; + vlib_node_runtime_t * rt = vlib_node_get_runtime (vm, dpdk_input_node.index); if (unformat(input, "enable")) { if (unformat(input, "dpdk")) { @@ -844,6 +845,13 @@ set_efd (vlib_main_t *vm, unformat_input_t *input, format_unformat_error, input); } + if (dm->efd.enabled) + rt->function = dpdk_input_efd_multiarch_select(); + else if (dm->use_rss) + rt->function = dpdk_input_rss_multiarch_select(); + else + rt->function = dpdk_input_multiarch_select(); + return error; } diff --git a/vnet/vnet/devices/dpdk/device.c b/vnet/vnet/devices/dpdk/device.c index ab85dfb48a9..a38c8d1915f 100644 --- a/vnet/vnet/devices/dpdk/device.c +++ b/vnet/vnet/devices/dpdk/device.c @@ -1184,6 +1184,9 @@ VNET_DEVICE_CLASS (dpdk_device_class) = { .name_renumber = dpdk_device_renumber, }; +VLIB_DEVICE_TX_FUNCTION_MULTIARCH (dpdk_device_class, + dpdk_interface_tx) + void dpdk_set_flowcontrol_callback (vlib_main_t *vm, dpdk_flowcontrol_callback_t callback) { diff --git a/vnet/vnet/devices/dpdk/dpdk.h b/vnet/vnet/devices/dpdk/dpdk.h index 019d83f797a..525cd8d1d6b 100644 --- a/vnet/vnet/devices/dpdk/dpdk.h +++ b/vnet/vnet/devices/dpdk/dpdk.h @@ -584,8 +584,9 @@ dpdk_pmd_t dpdk_get_pmd_type (vnet_hw_interface_t *hi); i8 dpdk_get_cpu_socket (vnet_hw_interface_t *hi); -uword -dpdk_input_rss (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * f); +void * dpdk_input_multiarch_select(); +void * dpdk_input_rss_multiarch_select(); +void * dpdk_input_efd_multiarch_select(); clib_error_t* dpdk_get_hw_interface_stats (u32 hw_if_index, struct rte_eth_stats* dest); diff --git a/vnet/vnet/devices/dpdk/init.c b/vnet/vnet/devices/dpdk/init.c index 63fa4c07e5f..8ee59ff8327 100644 --- a/vnet/vnet/devices/dpdk/init.c +++ b/vnet/vnet/devices/dpdk/init.c @@ -804,6 +804,7 @@ dpdk_config (vlib_main_t * vm, unformat_input_t * input) clib_error_t * error = 0; dpdk_main_t * dm = &dpdk_main; vlib_thread_main_t * tm = vlib_get_thread_main(); + vlib_node_runtime_t * rt = vlib_node_get_runtime (vm, dpdk_input_node.index); u8 * s, * tmp = 0; u8 * pci_dev_id = 0; u8 * rte_cmd = 0, * ethname = 0; @@ -1251,10 +1252,9 @@ dpdk_config (vlib_main_t * vm, unformat_input_t * input) } if (dm->use_rss) - { - vlib_node_runtime_t * rt = vlib_node_get_runtime (vm, dpdk_input_node.index); - rt->function = dpdk_input_rss; - } + rt->function = dpdk_input_rss_multiarch_select(); + else + rt->function = dpdk_input_multiarch_select(); done: return error; } diff --git a/vnet/vnet/devices/dpdk/node.c b/vnet/vnet/devices/dpdk/node.c index a7590a5c566..ca94511b411 100644 --- a/vnet/vnet/devices/dpdk/node.c +++ b/vnet/vnet/devices/dpdk/node.c @@ -251,6 +251,8 @@ VLIB_REGISTER_NODE (handoff_dispatch_node) = { }, }; +VLIB_NODE_FUNCTION_MULTIARCH (handoff_dispatch_node, handoff_dispatch_node_fn) + clib_error_t *handoff_dispatch_init (vlib_main_t *vm) { handoff_dispatch_main_t * mp = &handoff_dispatch_main; @@ -488,7 +490,8 @@ static inline u32 dpdk_device_input ( dpdk_main_t * dm, dpdk_device_t * xd, vlib_node_runtime_t * node, u32 cpu_index, - u16 queue_id) + u16 queue_id, + int use_efd) { u32 n_buffers; u32 next_index = DPDK_RX_NEXT_ETHERNET_INPUT; @@ -510,7 +513,7 @@ static inline u32 dpdk_device_input ( dpdk_main_t * dm, if (n_buffers == 0) { /* check if EFD (dpdk) is enabled */ - if (PREDICT_FALSE(dm->efd.enabled)) + if (PREDICT_FALSE(use_efd && dm->efd.enabled)) { /* reset a few stats */ xd->efd_agent.last_poll_time = 0; @@ -546,7 +549,7 @@ static inline u32 dpdk_device_input ( dpdk_main_t * dm, /* Check for congestion if EFD (Early-Fast-Discard) is enabled * in any mode (e.g. dpdk, monitor, or drop_all) */ - if (PREDICT_FALSE(dm->efd.enabled)) + if (PREDICT_FALSE(use_efd && dm->efd.enabled)) { /* update EFD counters */ dpdk_efd_update_counters(xd, n_buffers, dm->efd.enabled); @@ -793,7 +796,7 @@ dpdk_input (vlib_main_t * vm, { xd = vec_elt_at_index(dm->devices, dq->device); ASSERT(dq->queue_id == 0); - n_rx_packets += dpdk_device_input (dm, xd, node, cpu_index, 0); + n_rx_packets += dpdk_device_input (dm, xd, node, cpu_index, 0, 0); } VIRL_SPEED_LIMIT() @@ -818,7 +821,7 @@ dpdk_input_rss (vlib_main_t * vm, vec_foreach (dq, dm->devices_by_cpu[cpu_index]) { xd = vec_elt_at_index(dm->devices, dq->device); - n_rx_packets += dpdk_device_input (dm, xd, node, cpu_index, dq->queue_id); + n_rx_packets += dpdk_device_input (dm, xd, node, cpu_index, dq->queue_id, 0); } VIRL_SPEED_LIMIT() @@ -826,6 +829,32 @@ dpdk_input_rss (vlib_main_t * vm, return n_rx_packets; } +uword +dpdk_input_efd (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * f) +{ + dpdk_main_t * dm = &dpdk_main; + dpdk_device_t * xd; + uword n_rx_packets = 0; + dpdk_device_and_queue_t * dq; + u32 cpu_index = os_get_cpu_number(); + + /* + * Poll all devices on this cpu for input/interrupts. + */ + vec_foreach (dq, dm->devices_by_cpu[cpu_index]) + { + xd = vec_elt_at_index(dm->devices, dq->device); + n_rx_packets += dpdk_device_input (dm, xd, node, cpu_index, dq->queue_id, 1); + } + + VIRL_SPEED_LIMIT() + + return n_rx_packets; +} + + VLIB_REGISTER_NODE (dpdk_input_node) = { .function = dpdk_input, .type = VLIB_NODE_TYPE_INPUT, @@ -850,6 +879,17 @@ VLIB_REGISTER_NODE (dpdk_input_node) = { }, }; + +/* handle dpdk_input_rss alternative function */ +VLIB_NODE_FUNCTION_MULTIARCH_CLONE(dpdk_input) +VLIB_NODE_FUNCTION_MULTIARCH_CLONE(dpdk_input_rss) +VLIB_NODE_FUNCTION_MULTIARCH_CLONE(dpdk_input_efd) + +/* this macro defines dpdk_input_rss_multiarch_select() */ +CLIB_MULTIARCH_SELECT_FN(dpdk_input); +CLIB_MULTIARCH_SELECT_FN(dpdk_input_rss); +CLIB_MULTIARCH_SELECT_FN(dpdk_input_efd); + /* * Override the next nodes for the dpdk input nodes. * Must be invoked prior to VLIB_INIT_FUNCTION calls. diff --git a/vnet/vnet/devices/netmap/device.c b/vnet/vnet/devices/netmap/device.c index f04e0672c90..a966ffef668 100644 --- a/vnet/vnet/devices/netmap/device.c +++ b/vnet/vnet/devices/netmap/device.c @@ -232,3 +232,6 @@ VNET_DEVICE_CLASS (netmap_device_class) = { .subif_add_del_function = netmap_subif_add_del_function, .no_flatten_output_chains = 1, }; + +VLIB_DEVICE_TX_FUNCTION_MULTIARCH(netmap_device_class, + netmap_interface_tx) diff --git a/vnet/vnet/devices/netmap/node.c b/vnet/vnet/devices/netmap/node.c index 3986c7e4762..f4c39e6259f 100644 --- a/vnet/vnet/devices/netmap/node.c +++ b/vnet/vnet/devices/netmap/node.c @@ -270,7 +270,6 @@ netmap_input_fn (vlib_main_t * vm, vlib_node_runtime_t * node, return n_rx_packets; } - VLIB_REGISTER_NODE (netmap_input_node) = { .function = netmap_input_fn, .name = "netmap-input", @@ -287,3 +286,5 @@ VLIB_REGISTER_NODE (netmap_input_node) = { }, }; +VLIB_NODE_FUNCTION_MULTIARCH (netmap_input_node, netmap_input_fn) + diff --git a/vnet/vnet/devices/ssvm/node.c b/vnet/vnet/devices/ssvm/node.c index 84625696b6b..57b5fd22b5b 100644 --- a/vnet/vnet/devices/ssvm/node.c +++ b/vnet/vnet/devices/ssvm/node.c @@ -334,3 +334,5 @@ VLIB_REGISTER_NODE (ssvm_eth_input_node) = { }, }; +VLIB_NODE_FUNCTION_MULTIARCH (ssvm_eth_input_node, ssvm_eth_input_node_fn) + diff --git a/vnet/vnet/devices/ssvm/ssvm_eth.c b/vnet/vnet/devices/ssvm/ssvm_eth.c index 49f2d5f8e4b..7c65e2121f5 100644 --- a/vnet/vnet/devices/ssvm/ssvm_eth.c +++ b/vnet/vnet/devices/ssvm/ssvm_eth.c @@ -474,3 +474,6 @@ VNET_DEVICE_CLASS (ssvm_eth_device_class) = { .rx_redirect_to_node = ssvm_eth_set_interface_next_node, .no_flatten_output_chains = 1, }; + +VLIB_DEVICE_TX_FUNCTION_MULTIARCH (ssvm_eth_device_class, + ssvm_eth_interface_tx) diff --git a/vnet/vnet/devices/virtio/vhost-user.c b/vnet/vnet/devices/virtio/vhost-user.c index 5902c42b369..ef4993f94f7 100644 --- a/vnet/vnet/devices/virtio/vhost-user.c +++ b/vnet/vnet/devices/virtio/vhost-user.c @@ -1144,6 +1144,8 @@ VLIB_REGISTER_NODE (vhost_user_input_node) = { }, }; +VLIB_NODE_FUNCTION_MULTIARCH (vhost_user_input_node, vhost_user_input) + static uword vhost_user_intfc_tx (vlib_main_t * vm, vlib_node_runtime_t * node, @@ -1373,6 +1375,9 @@ VNET_DEVICE_CLASS (vhost_user_dev_class,static) = { .no_flatten_output_chains = 1, }; +VLIB_DEVICE_TX_FUNCTION_MULTIARCH (vhost_user_dev_class, + vhost_user_intfc_tx) + static uword vhost_user_process (vlib_main_t * vm, vlib_node_runtime_t * rt, diff --git a/vnet/vnet/ethernet/node.c b/vnet/vnet/ethernet/node.c index e8902fd7410..226a66e9a43 100644 --- a/vnet/vnet/ethernet/node.c +++ b/vnet/vnet/ethernet/node.c @@ -945,6 +945,8 @@ VLIB_REGISTER_NODE (ethernet_input_node) = { .unformat_buffer = unformat_ethernet_header, }; +VLIB_NODE_FUNCTION_MULTIARCH (ethernet_input_node, ethernet_input) + VLIB_REGISTER_NODE (ethernet_input_type_node,static) = { .function = ethernet_input_type, .name = "ethernet-input-type", @@ -959,6 +961,8 @@ VLIB_REGISTER_NODE (ethernet_input_type_node,static) = { }, }; +VLIB_NODE_FUNCTION_MULTIARCH (ethernet_input_type_node, ethernet_input_type) + VLIB_REGISTER_NODE (ethernet_input_not_l2_node,static) = { .function = ethernet_input_not_l2, .name = "ethernet-input-not-l2", @@ -973,6 +977,8 @@ VLIB_REGISTER_NODE (ethernet_input_not_l2_node,static) = { }, }; +VLIB_NODE_FUNCTION_MULTIARCH (ethernet_input_not_l2_node, ethernet_input_not_l2) + void ethernet_set_rx_redirect (vnet_main_t * vnm, vnet_hw_interface_t * hi, u32 enable) diff --git a/vnet/vnet/gre/gre.c b/vnet/vnet/gre/gre.c index 075bd6fd46e..6d375159818 100644 --- a/vnet/vnet/gre/gre.c +++ b/vnet/vnet/gre/gre.c @@ -454,6 +454,9 @@ VNET_DEVICE_CLASS (gre_device_class) = { #endif }; +VLIB_DEVICE_TX_FUNCTION_MULTIARCH (gre_device_class, + gre_interface_tx) + VNET_HW_INTERFACE_CLASS (gre_hw_interface_class) = { .name = "GRE", diff --git a/vnet/vnet/gre/node.c b/vnet/vnet/gre/node.c index a91f7e9eed7..e33b2b05ecd 100644 --- a/vnet/vnet/gre/node.c +++ b/vnet/vnet/gre/node.c @@ -434,6 +434,8 @@ VLIB_REGISTER_NODE (gre_input_node) = { .unformat_buffer = unformat_gre_header, }; +VLIB_NODE_FUNCTION_MULTIARCH (gre_input_node, gre_input) + void gre_register_input_protocol (vlib_main_t * vm, gre_protocol_t protocol, diff --git a/vnet/vnet/interface.h b/vnet/vnet/interface.h index 2829a0ccfb3..30dcf276393 100644 --- a/vnet/vnet/interface.h +++ b/vnet/vnet/interface.h @@ -163,6 +163,32 @@ static void __vnet_add_device_class_registration_##x (void) \ } \ __VA_ARGS__ vnet_device_class_t x +#define VLIB_DEVICE_TX_FUNCTION_CLONE_TEMPLATE(arch, fn, tgt) \ + uword \ + __attribute__ ((flatten)) \ + __attribute__ ((target (tgt))) \ + CLIB_CPU_OPTIMIZED \ + fn ## _ ## arch ( vlib_main_t * vm, \ + vlib_node_runtime_t * node, \ + vlib_frame_t * frame) \ + { return fn (vm, node, frame); } + +#define VLIB_DEVICE_TX_FUNCTION_MULTIARCH_CLONE(fn) \ + foreach_march_variant(VLIB_DEVICE_TX_FUNCTION_CLONE_TEMPLATE, fn) + +#if CLIB_DEBUG > 0 +#define VLIB_MULTIARCH_CLONE_AND_SELECT_FN(fn,...) +#define VLIB_DEVICE_TX_FUNCTION_MULTIARCH(dev, fn) +#else +#define VLIB_DEVICE_TX_FUNCTION_MULTIARCH(dev, fn) \ + VLIB_DEVICE_TX_FUNCTION_MULTIARCH_CLONE(fn) \ + CLIB_MULTIARCH_SELECT_FN(fn, static inline) \ + static void __attribute__((__constructor__)) \ + __vlib_device_tx_function_multiarch_select_##dev (void) \ + { dev.tx_function = fn ## _multiarch_select(); } +#endif + + /* Layer-2 (e.g. Ethernet) interface class. */ typedef struct _vnet_hw_interface_class { /* Index into main vector. */ diff --git a/vnet/vnet/interface_output.c b/vnet/vnet/interface_output.c index 04c1b7f59f8..9f9fb707297 100644 --- a/vnet/vnet/interface_output.c +++ b/vnet/vnet/interface_output.c @@ -1140,6 +1140,8 @@ VLIB_REGISTER_NODE (drop_buffers,static) = { .validate_frame = validate_error_frame, }; +VLIB_NODE_FUNCTION_MULTIARCH (drop_buffers, process_drop) + VLIB_REGISTER_NODE (punt_buffers,static) = { .function = process_punt, .flags = (VLIB_NODE_FLAG_FRAME_NO_FREE_AFTER_DISPATCH @@ -1150,12 +1152,16 @@ VLIB_REGISTER_NODE (punt_buffers,static) = { .validate_frame = validate_error_frame, }; +VLIB_NODE_FUNCTION_MULTIARCH (punt_buffers, process_punt) + VLIB_REGISTER_NODE (vnet_per_buffer_interface_output_node,static) = { .function = vnet_per_buffer_interface_output, .name = "interface-output", .vector_size = sizeof (u32), }; +VLIB_NODE_FUNCTION_MULTIARCH (vnet_per_buffer_interface_output_node, vnet_per_buffer_interface_output) + clib_error_t * vnet_per_buffer_interface_output_hw_interface_add_del (vnet_main_t * vnm, u32 hw_if_index, diff --git a/vnet/vnet/ip/ip4_forward.c b/vnet/vnet/ip/ip4_forward.c index a84b83bc225..ae2f9eea12a 100644 --- a/vnet/vnet/ip/ip4_forward.c +++ b/vnet/vnet/ip/ip4_forward.c @@ -1354,6 +1354,8 @@ VLIB_REGISTER_NODE (ip4_lookup_node) = { .next_nodes = IP4_LOOKUP_NEXT_NODES, }; +VLIB_NODE_FUNCTION_MULTIARCH (ip4_lookup_node, ip4_lookup) + static uword ip4_indirect (vlib_main_t * vm, vlib_node_runtime_t * node, @@ -1373,6 +1375,8 @@ VLIB_REGISTER_NODE (ip4_indirect_node) = { .next_nodes = IP4_LOOKUP_NEXT_NODES, }; +VLIB_NODE_FUNCTION_MULTIARCH (ip4_indirect_node, ip4_indirect) + /* Global IP4 main. */ ip4_main_t ip4_main; @@ -1612,6 +1616,8 @@ VLIB_REGISTER_NODE (ip4_drop_node,static) = { }, }; +VLIB_NODE_FUNCTION_MULTIARCH (ip4_drop_node, ip4_drop) + VLIB_REGISTER_NODE (ip4_punt_node,static) = { .function = ip4_punt, .name = "ip4-punt", @@ -1625,6 +1631,8 @@ VLIB_REGISTER_NODE (ip4_punt_node,static) = { }, }; +VLIB_NODE_FUNCTION_MULTIARCH (ip4_punt_node, ip4_punt) + VLIB_REGISTER_NODE (ip4_miss_node,static) = { .function = ip4_miss, .name = "ip4-miss", @@ -1638,6 +1646,8 @@ VLIB_REGISTER_NODE (ip4_miss_node,static) = { }, }; +VLIB_NODE_FUNCTION_MULTIARCH (ip4_miss_node, ip4_miss) + /* Compute TCP/UDP/ICMP4 checksum in software. */ u16 ip4_tcp_udp_compute_checksum (vlib_main_t * vm, vlib_buffer_t * p0, @@ -2076,6 +2086,8 @@ VLIB_REGISTER_NODE (ip4_local_node,static) = { }, }; +VLIB_NODE_FUNCTION_MULTIARCH (ip4_local_node, ip4_local) + void ip4_register_protocol (u32 protocol, u32 node_index) { vlib_main_t * vm = vlib_get_main(); @@ -2743,6 +2755,8 @@ VLIB_REGISTER_NODE (ip4_rewrite_node) = { }, }; +VLIB_NODE_FUNCTION_MULTIARCH (ip4_rewrite_node, ip4_rewrite_transit) + VLIB_REGISTER_NODE (ip4_rewrite_local_node,static) = { .function = ip4_rewrite_local, .name = "ip4-rewrite-local", @@ -2759,6 +2773,8 @@ VLIB_REGISTER_NODE (ip4_rewrite_local_node,static) = { }, }; +VLIB_NODE_FUNCTION_MULTIARCH (ip4_rewrite_local_node, ip4_rewrite_local) + static clib_error_t * add_del_interface_table (vlib_main_t * vm, unformat_input_t * input, @@ -3032,6 +3048,8 @@ VLIB_REGISTER_NODE (ip4_lookup_multicast_node,static) = { .next_nodes = IP4_LOOKUP_NEXT_NODES, }; +VLIB_NODE_FUNCTION_MULTIARCH (ip4_lookup_multicast_node, ip4_lookup_multicast) + VLIB_REGISTER_NODE (ip4_multicast_node,static) = { .function = ip4_drop, .name = "ip4-multicast", diff --git a/vnet/vnet/ip/ip4_hop_by_hop.c b/vnet/vnet/ip/ip4_hop_by_hop.c index ae46040a5d3..177feb7430e 100644 --- a/vnet/vnet/ip/ip4_hop_by_hop.c +++ b/vnet/vnet/ip/ip4_hop_by_hop.c @@ -233,6 +233,8 @@ VLIB_REGISTER_NODE (ip4_hop_by_hop_node) = { .next_nodes = IP4_LOOKUP_NEXT_NODES, }; +VLIB_NODE_FUNCTION_MULTIARCH (ip4_hop_by_hop_node, ip4_hop_by_hop_node_fn) + VLIB_REGISTER_NODE (ip4_add_hop_by_hop_node) = { .function = ip4_hop_by_hop_node_fn, .name = "ip4-add-hop-by-hop", diff --git a/vnet/vnet/ip/ip4_input.c b/vnet/vnet/ip/ip4_input.c index f31df0f6eb9..606342501a0 100644 --- a/vnet/vnet/ip/ip4_input.c +++ b/vnet/vnet/ip/ip4_input.c @@ -371,6 +371,8 @@ VLIB_REGISTER_NODE (ip4_input_node) = { .format_trace = format_ip4_input_trace, }; +VLIB_NODE_FUNCTION_MULTIARCH (ip4_input_node, ip4_input) + VLIB_REGISTER_NODE (ip4_input_no_checksum_node,static) = { .function = ip4_input_no_checksum, .name = "ip4-input-no-checksum", @@ -389,6 +391,8 @@ VLIB_REGISTER_NODE (ip4_input_no_checksum_node,static) = { .format_trace = format_ip4_input_trace, }; +VLIB_NODE_FUNCTION_MULTIARCH (ip4_input_no_checksum_node, ip4_input_no_checksum) + static clib_error_t * ip4_init (vlib_main_t * vm) { clib_error_t * error; diff --git a/vnet/vnet/ip/ip4_source_check.c b/vnet/vnet/ip/ip4_source_check.c index 47e22f2392e..11e6678ed2b 100644 --- a/vnet/vnet/ip/ip4_source_check.c +++ b/vnet/vnet/ip/ip4_source_check.c @@ -297,6 +297,9 @@ VLIB_REGISTER_NODE (ip4_check_source_reachable_via_any) = { .format_trace = format_ip4_source_check_trace, }; +VLIB_NODE_FUNCTION_MULTIARCH (ip4_check_source_reachable_via_any, + ip4_source_check_reachable_via_any) + VLIB_REGISTER_NODE (ip4_check_source_reachable_via_rx) = { .function = ip4_source_check_reachable_via_rx, .name = "ip4-source-check-via-rx", @@ -311,6 +314,9 @@ VLIB_REGISTER_NODE (ip4_check_source_reachable_via_rx) = { .format_trace = format_ip4_source_check_trace, }; +VLIB_NODE_FUNCTION_MULTIARCH (ip4_check_source_reachable_via_rx, + ip4_source_check_reachable_via_rx) + static clib_error_t * set_ip_source_check (vlib_main_t * vm, unformat_input_t * input, diff --git a/vnet/vnet/ip/ip6_forward.c b/vnet/vnet/ip/ip6_forward.c index a136da3e142..7093c571e1f 100644 --- a/vnet/vnet/ip/ip6_forward.c +++ b/vnet/vnet/ip/ip6_forward.c @@ -1260,6 +1260,8 @@ VLIB_REGISTER_NODE (ip6_lookup_node) = { .next_nodes = IP6_LOOKUP_NEXT_NODES, }; +VLIB_NODE_FUNCTION_MULTIARCH (ip6_lookup_node, ip6_lookup) + static uword ip6_indirect (vlib_main_t * vm, vlib_node_runtime_t * node, @@ -1278,6 +1280,8 @@ VLIB_REGISTER_NODE (ip6_indirect_node) = { .next_nodes = IP6_LOOKUP_NEXT_NODES, }; +VLIB_NODE_FUNCTION_MULTIARCH (ip6_indirect_node, ip6_indirect) + typedef struct { /* Adjacency taken. */ u32 adj_index; @@ -1454,6 +1458,8 @@ VLIB_REGISTER_NODE (ip6_drop_node,static) = { }, }; +VLIB_NODE_FUNCTION_MULTIARCH (ip6_drop_node, ip6_drop) + VLIB_REGISTER_NODE (ip6_punt_node,static) = { .function = ip6_punt, .name = "ip6-punt", @@ -1467,6 +1473,8 @@ VLIB_REGISTER_NODE (ip6_punt_node,static) = { }, }; +VLIB_NODE_FUNCTION_MULTIARCH (ip6_punt_node, ip6_punt) + VLIB_REGISTER_NODE (ip6_miss_node,static) = { .function = ip6_miss, .name = "ip6-miss", @@ -1480,6 +1488,8 @@ VLIB_REGISTER_NODE (ip6_miss_node,static) = { }, }; +VLIB_NODE_FUNCTION_MULTIARCH (ip6_miss_node, ip6_miss) + VLIB_REGISTER_NODE (ip6_multicast_node,static) = { .function = ip6_drop, .name = "ip6-multicast", @@ -1858,6 +1868,8 @@ VLIB_REGISTER_NODE (ip6_local_node,static) = { }, }; +VLIB_NODE_FUNCTION_MULTIARCH (ip6_local_node, ip6_local) + void ip6_register_protocol (u32 protocol, u32 node_index) { vlib_main_t * vm = vlib_get_main(); @@ -2424,6 +2436,8 @@ VLIB_REGISTER_NODE (ip6_rewrite_node) = { }, }; +VLIB_NODE_FUNCTION_MULTIARCH (ip6_rewrite_node, ip6_rewrite_transit) + VLIB_REGISTER_NODE (ip6_rewrite_local_node,static) = { .function = ip6_rewrite_local, .name = "ip6-rewrite-local", @@ -2439,6 +2453,8 @@ VLIB_REGISTER_NODE (ip6_rewrite_local_node,static) = { }, }; +VLIB_NODE_FUNCTION_MULTIARCH (ip6_rewrite_local_node, ip6_rewrite_local) + /* Global IP6 main. */ ip6_main_t ip6_main; diff --git a/vnet/vnet/ip/ip6_hop_by_hop.c b/vnet/vnet/ip/ip6_hop_by_hop.c index 16650dde9b2..045dbc7f603 100644 --- a/vnet/vnet/ip/ip6_hop_by_hop.c +++ b/vnet/vnet/ip/ip6_hop_by_hop.c @@ -628,6 +628,8 @@ VLIB_REGISTER_NODE (ip6_hop_by_hop_node) = { }, }; +VLIB_NODE_FUNCTION_MULTIARCH (ip6_hop_by_hop_node, ip6_hop_by_hop_node_fn) + /* The main h-b-h tracer will be invoked, no need to do much here */ typedef struct { u32 next_index; @@ -849,6 +851,7 @@ VLIB_REGISTER_NODE (ip6_add_hop_by_hop_node) = { }, }; +VLIB_NODE_FUNCTION_MULTIARCH (ip6_add_hop_by_hop_node, ip6_add_hop_by_hop_node_fn) /* The main h-b-h tracer was already invoked, no need to do much here */ typedef struct { @@ -1162,6 +1165,8 @@ VLIB_REGISTER_NODE (ip6_pop_hop_by_hop_node) = { }, }; +VLIB_NODE_FUNCTION_MULTIARCH (ip6_pop_hop_by_hop_node, + ip6_pop_hop_by_hop_node_fn) static clib_error_t * ip6_hop_by_hop_init (vlib_main_t * vm) diff --git a/vnet/vnet/ip/ip6_input.c b/vnet/vnet/ip/ip6_input.c index f96a1cfb5a5..2042cbd7813 100644 --- a/vnet/vnet/ip/ip6_input.c +++ b/vnet/vnet/ip/ip6_input.c @@ -305,6 +305,8 @@ VLIB_REGISTER_NODE (ip6_input_node) = { .format_trace = format_ip6_input_trace, }; +VLIB_NODE_FUNCTION_MULTIARCH (ip6_input_node, ip6_input) + static clib_error_t * ip6_init (vlib_main_t * vm) { ethernet_register_input_type (vm, ETHERNET_TYPE_IP6, diff --git a/vnet/vnet/ip/ip_input_acl.c b/vnet/vnet/ip/ip_input_acl.c index e905ed154a3..fcf8eeabc7b 100644 --- a/vnet/vnet/ip/ip_input_acl.c +++ b/vnet/vnet/ip/ip_input_acl.c @@ -369,6 +369,8 @@ VLIB_REGISTER_NODE (ip4_inacl_node) = { }, }; +VLIB_NODE_FUNCTION_MULTIARCH (ip4_inacl_node, ip4_inacl) + static uword ip6_inacl (vlib_main_t * vm, vlib_node_runtime_t * node, @@ -392,6 +394,8 @@ VLIB_REGISTER_NODE (ip6_inacl_node) = { }, }; +VLIB_NODE_FUNCTION_MULTIARCH (ip6_inacl_node, ip6_inacl) + static clib_error_t * ip_inacl_init (vlib_main_t * vm) { diff --git a/vnet/vnet/ip/udp_local.c b/vnet/vnet/ip/udp_local.c index 253ad0f369e..354dd4e8c91 100644 --- a/vnet/vnet/ip/udp_local.c +++ b/vnet/vnet/ip/udp_local.c @@ -327,6 +327,8 @@ VLIB_REGISTER_NODE (udp4_input_node) = { .unformat_buffer = unformat_udp_header, }; +VLIB_NODE_FUNCTION_MULTIARCH (udp4_input_node, udp4_input) + VLIB_REGISTER_NODE (udp6_input_node) = { .function = udp6_input, .name = "ip6-udp-lookup", @@ -350,6 +352,8 @@ VLIB_REGISTER_NODE (udp6_input_node) = { .unformat_buffer = unformat_udp_header, }; +VLIB_NODE_FUNCTION_MULTIARCH (udp6_input_node, udp6_input) + static void add_dst_port (udp_main_t * um, udp_dst_port_t dst_port, char * dst_port_name, u8 is_ip4) diff --git a/vnet/vnet/ipsec/esp_decrypt.c b/vnet/vnet/ipsec/esp_decrypt.c index 958a4d6738a..7af88b23dd7 100644 --- a/vnet/vnet/ipsec/esp_decrypt.c +++ b/vnet/vnet/ipsec/esp_decrypt.c @@ -425,3 +425,5 @@ VLIB_REGISTER_NODE (esp_decrypt_node) = { }, }; +VLIB_NODE_FUNCTION_MULTIARCH (esp_decrypt_node, esp_decrypt_node_fn) + diff --git a/vnet/vnet/ipsec/esp_encrypt.c b/vnet/vnet/ipsec/esp_encrypt.c index 39bbf2e47ff..d1dbcf96f84 100644 --- a/vnet/vnet/ipsec/esp_encrypt.c +++ b/vnet/vnet/ipsec/esp_encrypt.c @@ -387,3 +387,5 @@ VLIB_REGISTER_NODE (esp_encrypt_node) = { }, }; +VLIB_NODE_FUNCTION_MULTIARCH (esp_encrypt_node, esp_encrypt_node_fn) + diff --git a/vnet/vnet/ipsec/ipsec_if_in.c b/vnet/vnet/ipsec/ipsec_if_in.c index 517f8bff7b2..5a8a685807b 100644 --- a/vnet/vnet/ipsec/ipsec_if_in.c +++ b/vnet/vnet/ipsec/ipsec_if_in.c @@ -148,4 +148,7 @@ VLIB_REGISTER_NODE (ipsec_if_input_node) = { [IPSEC_IF_INPUT_NEXT_ESP_DECRYPT] = "esp-decrypt", [IPSEC_IF_INPUT_NEXT_DROP] = "error-drop", }, -}; \ No newline at end of file +}; + +VLIB_NODE_FUNCTION_MULTIARCH (ipsec_if_input_node, ipsec_if_input_node_fn) + diff --git a/vnet/vnet/ipsec/ipsec_if_out.c b/vnet/vnet/ipsec/ipsec_if_out.c index 1e1dd52854b..9573c4f6f8c 100644 --- a/vnet/vnet/ipsec/ipsec_if_out.c +++ b/vnet/vnet/ipsec/ipsec_if_out.c @@ -138,3 +138,5 @@ VLIB_REGISTER_NODE (ipsec_if_output_node) = { }, }; +VLIB_NODE_FUNCTION_MULTIARCH (ipsec_if_output_node, ipsec_if_output_node_fn) + diff --git a/vnet/vnet/ipsec/ipsec_input.c b/vnet/vnet/ipsec/ipsec_input.c index 3cd60ba1fe9..09acd106cae 100644 --- a/vnet/vnet/ipsec/ipsec_input.c +++ b/vnet/vnet/ipsec/ipsec_input.c @@ -288,6 +288,9 @@ VLIB_REGISTER_NODE (ipsec_input_ip4_node,static) = { }, }; +VLIB_NODE_FUNCTION_MULTIARCH (ipsec_input_ip4_node, + ipsec_input_ip4_node_fn) + static vlib_node_registration_t ipsec_input_ip6_node; @@ -406,3 +409,7 @@ VLIB_REGISTER_NODE (ipsec_input_ip6_node,static) = { #undef _ }, }; + +VLIB_NODE_FUNCTION_MULTIARCH (ipsec_input_ip6_node, + ipsec_input_ip6_node_fn) + diff --git a/vnet/vnet/ipsec/ipsec_output.c b/vnet/vnet/ipsec/ipsec_output.c index 509b1e2fba9..93554681a70 100644 --- a/vnet/vnet/ipsec/ipsec_output.c +++ b/vnet/vnet/ipsec/ipsec_output.c @@ -405,6 +405,8 @@ VLIB_REGISTER_NODE (ipsec_output_node,static) = { }, }; +VLIB_NODE_FUNCTION_MULTIARCH (ipsec_output_node, ipsec_output_node_fn) + #else /* IPSEC > 1 */ /* Dummy ipsec output node, in case when IPSec is disabled */ diff --git a/vnet/vnet/l2/l2_classify.c b/vnet/vnet/l2/l2_classify.c index 449dea7221d..765bc686c05 100644 --- a/vnet/vnet/l2/l2_classify.c +++ b/vnet/vnet/l2/l2_classify.c @@ -399,6 +399,8 @@ VLIB_REGISTER_NODE (l2_classify_node) = { }, }; +VLIB_NODE_FUNCTION_MULTIARCH (l2_classify_node, l2_classify_node_fn) + clib_error_t *l2_classify_init (vlib_main_t *vm) { l2_classify_main_t * cm = &l2_classify_main; diff --git a/vnet/vnet/l2/l2_efp_filter.c b/vnet/vnet/l2/l2_efp_filter.c index b865c375966..17b7eb3fcd6 100644 --- a/vnet/vnet/l2/l2_efp_filter.c +++ b/vnet/vnet/l2/l2_efp_filter.c @@ -498,6 +498,8 @@ VLIB_REGISTER_NODE (l2_efp_filter_node,static) = { }, }; +VLIB_NODE_FUNCTION_MULTIARCH (l2_efp_filter_node, l2_efp_filter_node_fn) + clib_error_t *l2_efp_filter_init (vlib_main_t *vm) { l2_efp_filter_main_t * mp = &l2_efp_filter_main; diff --git a/vnet/vnet/l2/l2_flood.c b/vnet/vnet/l2/l2_flood.c index ca8c171c245..9f71677c16b 100644 --- a/vnet/vnet/l2/l2_flood.c +++ b/vnet/vnet/l2/l2_flood.c @@ -442,6 +442,8 @@ VLIB_REGISTER_NODE (l2flood_node,static) = { }, }; +VLIB_NODE_FUNCTION_MULTIARCH (l2flood_node, l2flood_node_fn) + clib_error_t *l2flood_init (vlib_main_t *vm) { l2flood_main_t * mp = &l2flood_main; diff --git a/vnet/vnet/l2/l2_fwd.c b/vnet/vnet/l2/l2_fwd.c index 88a6b69c340..5af83a7529c 100644 --- a/vnet/vnet/l2/l2_fwd.c +++ b/vnet/vnet/l2/l2_fwd.c @@ -367,6 +367,8 @@ VLIB_REGISTER_NODE (l2fwd_node,static) = { }, }; +VLIB_NODE_FUNCTION_MULTIARCH (l2fwd_node, l2fwd_node_fn) + clib_error_t *l2fwd_init (vlib_main_t *vm) { l2fwd_main_t * mp = &l2fwd_main; diff --git a/vnet/vnet/l2/l2_input.c b/vnet/vnet/l2/l2_input.c index 870aff68794..dd3a8b756e4 100644 --- a/vnet/vnet/l2/l2_input.c +++ b/vnet/vnet/l2/l2_input.c @@ -433,6 +433,8 @@ VLIB_REGISTER_NODE (l2input_node) = { }, }; +VLIB_NODE_FUNCTION_MULTIARCH (l2input_node, l2input_node_fn) + clib_error_t *l2input_init (vlib_main_t *vm) { l2input_main_t * mp = &l2input_main; diff --git a/vnet/vnet/l2/l2_input_acl.c b/vnet/vnet/l2/l2_input_acl.c index 49df0f65ce4..68c005f11de 100644 --- a/vnet/vnet/l2/l2_input_acl.c +++ b/vnet/vnet/l2/l2_input_acl.c @@ -371,6 +371,8 @@ VLIB_REGISTER_NODE (l2_inacl_node,static) = { }, }; +VLIB_NODE_FUNCTION_MULTIARCH (l2_inacl_node, l2_inacl_node_fn) + clib_error_t *l2_inacl_init (vlib_main_t *vm) { l2_inacl_main_t * mp = &l2_inacl_main; diff --git a/vnet/vnet/l2/l2_input_vtr.c b/vnet/vnet/l2/l2_input_vtr.c index bbef33a9fec..d0f2181be8c 100644 --- a/vnet/vnet/l2/l2_input_vtr.c +++ b/vnet/vnet/l2/l2_input_vtr.c @@ -293,6 +293,8 @@ VLIB_REGISTER_NODE (l2_invtr_node,static) = { }, }; +VLIB_NODE_FUNCTION_MULTIARCH (l2_invtr_node, l2_invtr_node_fn) + clib_error_t *l2_invtr_init (vlib_main_t *vm) { l2_invtr_main_t * mp = &l2_invtr_main; diff --git a/vnet/vnet/l2/l2_learn.c b/vnet/vnet/l2/l2_learn.c index a891cb3c903..0d94e43dc01 100644 --- a/vnet/vnet/l2/l2_learn.c +++ b/vnet/vnet/l2/l2_learn.c @@ -415,6 +415,7 @@ VLIB_REGISTER_NODE (l2learn_node,static) = { }, }; +VLIB_NODE_FUNCTION_MULTIARCH (l2learn_node, l2learn_node_fn) clib_error_t *l2learn_init (vlib_main_t *vm) { diff --git a/vnet/vnet/l2/l2_output.c b/vnet/vnet/l2/l2_output.c index a550d0757fd..f5b22117dae 100644 --- a/vnet/vnet/l2/l2_output.c +++ b/vnet/vnet/l2/l2_output.c @@ -414,6 +414,8 @@ VLIB_REGISTER_NODE (l2output_node,static) = { }, }; +VLIB_NODE_FUNCTION_MULTIARCH (l2output_node, l2output_node_fn) + clib_error_t *l2output_init (vlib_main_t *vm) { l2output_main_t * mp = &l2output_main; diff --git a/vnet/vnet/l2/l2_output_acl.c b/vnet/vnet/l2/l2_output_acl.c index f75345f1dbd..b222255f47e 100644 --- a/vnet/vnet/l2/l2_output_acl.c +++ b/vnet/vnet/l2/l2_output_acl.c @@ -274,6 +274,8 @@ VLIB_REGISTER_NODE (l2_outacl_node,static) = { }, }; +VLIB_NODE_FUNCTION_MULTIARCH (l2_outacl_node, l2_outacl_node_fn) + clib_error_t *l2_outacl_init (vlib_main_t *vm) { l2_outacl_main_t * mp = &l2_outacl_main; diff --git a/vnet/vnet/l2/l2_patch.c b/vnet/vnet/l2/l2_patch.c index 63be409d3b8..0839142b8ad 100644 --- a/vnet/vnet/l2/l2_patch.c +++ b/vnet/vnet/l2/l2_patch.c @@ -252,6 +252,8 @@ VLIB_REGISTER_NODE (l2_patch_node, static) = { }, }; +VLIB_NODE_FUNCTION_MULTIARCH (l2_patch_node, l2_patch_node_fn) + int vnet_l2_patch_add_del (u32 rx_sw_if_index, u32 tx_sw_if_index, int is_add) { l2_patch_main_t * l2pm = &l2_patch_main; diff --git a/vnet/vnet/l2/l2_rw.c b/vnet/vnet/l2/l2_rw.c index 93580c24457..b66a7d345c8 100644 --- a/vnet/vnet/l2/l2_rw.c +++ b/vnet/vnet/l2/l2_rw.c @@ -604,3 +604,5 @@ VLIB_REGISTER_NODE (l2_rw_node) = { .next_nodes = { [L2_RW_NEXT_DROP] = "error-drop"}, }; +VLIB_NODE_FUNCTION_MULTIARCH (l2_rw_node, l2_rw_node_fn) + diff --git a/vnet/vnet/l2/l2_xcrw.c b/vnet/vnet/l2/l2_xcrw.c index f5fe3ca14e4..00542a19f80 100644 --- a/vnet/vnet/l2/l2_xcrw.c +++ b/vnet/vnet/l2/l2_xcrw.c @@ -255,6 +255,8 @@ VLIB_REGISTER_NODE (l2_xcrw_node, static) = { }, }; +VLIB_NODE_FUNCTION_MULTIARCH (l2_xcrw_node, l2_xcrw_node_fn) + clib_error_t *l2_xcrw_init (vlib_main_t *vm) { l2_xcrw_main_t * mp = &l2_xcrw_main; diff --git a/vnet/vnet/l2tp/decap.c b/vnet/vnet/l2tp/decap.c index 1a2bc4890d0..5f0d05c097f 100644 --- a/vnet/vnet/l2tp/decap.c +++ b/vnet/vnet/l2tp/decap.c @@ -247,6 +247,8 @@ VLIB_REGISTER_NODE (l2t_decap_node) = { }, }; +VLIB_NODE_FUNCTION_MULTIARCH (l2t_decap_node, l2t_decap_node_fn) + void l2tp_decap_init (void) { ip6_register_protocol (IP_PROTOCOL_L2TP, l2t_decap_node.index); diff --git a/vnet/vnet/l2tp/encap.c b/vnet/vnet/l2tp/encap.c index 8f26ab007dc..eca098cb07b 100644 --- a/vnet/vnet/l2tp/encap.c +++ b/vnet/vnet/l2tp/encap.c @@ -206,6 +206,8 @@ VLIB_REGISTER_NODE (l2t_encap_node) = { }, }; +VLIB_NODE_FUNCTION_MULTIARCH (l2t_encap_node, l2t_encap_node_fn) + void l2tp_encap_init (vlib_main_t * vm) { l2tp_encap_runtime_t * rt; diff --git a/vnet/vnet/lawful-intercept/node.c b/vnet/vnet/lawful-intercept/node.c index 26514cc5c14..8701c323358 100644 --- a/vnet/vnet/lawful-intercept/node.c +++ b/vnet/vnet/lawful-intercept/node.c @@ -272,6 +272,8 @@ VLIB_REGISTER_NODE (li_hit_node) = { }, }; +VLIB_NODE_FUNCTION_MULTIARCH (li_hit_node, li_hit_node_fn) + #else #include diff --git a/vnet/vnet/mpls-gre/interface.c b/vnet/vnet/mpls-gre/interface.c index 408ca750b00..75cd022a305 100644 --- a/vnet/vnet/mpls-gre/interface.c +++ b/vnet/vnet/mpls-gre/interface.c @@ -259,6 +259,9 @@ VNET_DEVICE_CLASS (mpls_gre_device_class) = { #endif }; +VLIB_DEVICE_TX_FUNCTION_MULTIARCH (mpls_gre_device_class, + mpls_gre_interface_tx) + VNET_HW_INTERFACE_CLASS (mpls_gre_hw_interface_class) = { .name = "MPLS-GRE", .format_header = format_mpls_gre_header_with_length, @@ -510,6 +513,8 @@ VNET_DEVICE_CLASS (mpls_eth_device_class) = { #endif }; +VLIB_DEVICE_TX_FUNCTION_MULTIARCH (mpls_eth_device_class, + mpls_eth_interface_tx) VNET_HW_INTERFACE_CLASS (mpls_eth_hw_interface_class) = { .name = "MPLS-ETH", @@ -676,6 +681,8 @@ VLIB_REGISTER_NODE (mpls_post_rewrite_node) = { }, }; +VLIB_NODE_FUNCTION_MULTIARCH (mpls_post_rewrite_node, mpls_post_rewrite) + static u8 * mpls_gre_rewrite (mpls_main_t *mm, mpls_gre_tunnel_t * t) { ip4_header_t * ip0; diff --git a/vnet/vnet/mpls-gre/node.c b/vnet/vnet/mpls-gre/node.c index 6bf5f814aec..474e2e2a9a4 100644 --- a/vnet/vnet/mpls-gre/node.c +++ b/vnet/vnet/mpls-gre/node.c @@ -278,6 +278,8 @@ VLIB_REGISTER_NODE (mpls_input_node) = { .unformat_buffer = unformat_mpls_gre_header, }; +VLIB_NODE_FUNCTION_MULTIARCH (mpls_input_node, mpls_input) + static uword mpls_ethernet_input (vlib_main_t * vm, vlib_node_runtime_t * node, @@ -310,6 +312,8 @@ VLIB_REGISTER_NODE (mpls_ethernet_input_node) = { .unformat_buffer = unformat_mpls_gre_header, }; +VLIB_NODE_FUNCTION_MULTIARCH (mpls_ethernet_input_node, mpls_ethernet_input) + static void mpls_setup_nodes (vlib_main_t * vm) { diff --git a/vnet/vnet/mpls-gre/policy_encap.c b/vnet/vnet/mpls-gre/policy_encap.c index b44391934e2..e3f4beb67ad 100644 --- a/vnet/vnet/mpls-gre/policy_encap.c +++ b/vnet/vnet/mpls-gre/policy_encap.c @@ -153,6 +153,8 @@ VLIB_REGISTER_NODE (mpls_policy_encap_node) = { }, }; +VLIB_NODE_FUNCTION_MULTIARCH (mpls_policy_encap_node, mpls_policy_encap) + static clib_error_t * mpls_policy_encap_init (vlib_main_t * vm) { diff --git a/vnet/vnet/policer/node_funcs.c b/vnet/vnet/policer/node_funcs.c index 8a76e8697ff..739ce4551b6 100644 --- a/vnet/vnet/policer/node_funcs.c +++ b/vnet/vnet/policer/node_funcs.c @@ -324,6 +324,9 @@ VLIB_REGISTER_NODE (policer_by_sw_if_index_node, static) = { }, }; +VLIB_NODE_FUNCTION_MULTIARCH (policer_by_sw_if_index_node, + vnet_policer_by_sw_if_index); + int test_policer_add_del (u32 rx_sw_if_index, u8 *config_name, int is_add) diff --git a/vnet/vnet/sr/sr.c b/vnet/vnet/sr/sr.c index 46318a27bbe..e0ef3181a5e 100644 --- a/vnet/vnet/sr/sr.c +++ b/vnet/vnet/sr/sr.c @@ -653,6 +653,8 @@ VLIB_REGISTER_NODE (sr_rewrite_node) = { }, }; +VLIB_NODE_FUNCTION_MULTIARCH (sr_rewrite_node, sr_rewrite) + static int ip6_delete_route_no_next_hop (ip6_address_t *dst_address_arg, u32 dst_address_length, u32 rx_table_id) @@ -1911,6 +1913,8 @@ VLIB_REGISTER_NODE (sr_fix_dst_addr_node) = { }, }; +VLIB_NODE_FUNCTION_MULTIARCH (sr_fix_dst_addr_node, sr_fix_dst_addr) + static clib_error_t * sr_init (vlib_main_t * vm) { ip6_sr_main_t * sm = &sr_main; @@ -2560,6 +2564,8 @@ VLIB_REGISTER_NODE (sr_local_node, static) = { }, }; +VLIB_NODE_FUNCTION_MULTIARCH (sr_local_node, sr_local) + ip6_sr_main_t * sr_get_main (vlib_main_t * vm) { vlib_call_init_function (vm, sr_init); diff --git a/vnet/vnet/sr/sr_replicate.c b/vnet/vnet/sr/sr_replicate.c index 80d04ba6d75..ccc01465f73 100644 --- a/vnet/vnet/sr/sr_replicate.c +++ b/vnet/vnet/sr/sr_replicate.c @@ -350,6 +350,8 @@ VLIB_REGISTER_NODE (sr_replicate_node) = { }, }; +VLIB_NODE_FUNCTION_MULTIARCH (sr_replicate_node, sr_replicate_node_fn) + clib_error_t *sr_replicate_init (vlib_main_t *vm) { sr_replicate_main_t *msm = &sr_replicate_main; diff --git a/vnet/vnet/vxlan/decap.c b/vnet/vnet/vxlan/decap.c index bd61f4bb256..00acd660f35 100644 --- a/vnet/vnet/vxlan/decap.c +++ b/vnet/vnet/vxlan/decap.c @@ -550,6 +550,8 @@ VLIB_REGISTER_NODE (vxlan4_input_node) = { // $$$$ .unformat_buffer = unformat_vxlan_header, }; +VLIB_NODE_FUNCTION_MULTIARCH (vxlan4_input_node, vxlan4_input) + VLIB_REGISTER_NODE (vxlan6_input_node) = { .function = vxlan6_input, .name = "vxlan6-input", @@ -570,3 +572,6 @@ VLIB_REGISTER_NODE (vxlan6_input_node) = { .format_trace = format_vxlan_rx_trace, // $$$$ .unformat_buffer = unformat_vxlan_header, }; + +VLIB_NODE_FUNCTION_MULTIARCH (vxlan6_input_node, vxlan6_input) + diff --git a/vnet/vnet/vxlan/encap.c b/vnet/vnet/vxlan/encap.c index ee5c7d5ea67..387a728a9c3 100644 --- a/vnet/vnet/vxlan/encap.c +++ b/vnet/vnet/vxlan/encap.c @@ -586,3 +586,6 @@ VLIB_REGISTER_NODE (vxlan_encap_node) = { [VXLAN_ENCAP_NEXT_DROP] = "error-drop", }, }; + +VLIB_NODE_FUNCTION_MULTIARCH (vxlan_encap_node, vxlan_encap) + diff --git a/vpp/app/l2t_ip6.c b/vpp/app/l2t_ip6.c index ac24886ad42..8a1e23a971b 100644 --- a/vpp/app/l2t_ip6.c +++ b/vpp/app/l2t_ip6.c @@ -274,6 +274,8 @@ static VLIB_REGISTER_NODE (sw6_ip6_node) = { }, }; +VLIB_NODE_FUNCTION_MULTIARCH (sw6_ip6_node, ip6_l2t_node_fn) + static clib_error_t * l2tp_config (vlib_main_t * vm, unformat_input_t * input) { diff --git a/vpp/app/l2t_l2.c b/vpp/app/l2t_l2.c index 5c8327d5262..f6385a5ab1d 100644 --- a/vpp/app/l2t_l2.c +++ b/vpp/app/l2t_l2.c @@ -250,3 +250,5 @@ VLIB_REGISTER_NODE (l2t_l2_node) = { }, }; +VLIB_NODE_FUNCTION_MULTIARCH (l2t_l2_node, l2t_l2_node_fn) + diff --git a/vpp/app/version.c b/vpp/app/version.c index 588aadf29e9..22bec400c09 100644 --- a/vpp/app/version.c +++ b/vpp/app/version.c @@ -59,6 +59,7 @@ show_vpe_version_command_fn (vlib_main_t * vm, _("Compiler", "%s", vpe_compiler); _("CPU model name", "%U", format_cpu_model_name); _("CPU microarchitecture", "%U", format_cpu_uarch); + _("CPU flags", "%U", format_cpu_flags); _("Current PID", "%d", getpid()); #if DPDK > 0 _("DPDK Version", "%s", rte_version()); diff --git a/vpp/vnet/main.c b/vpp/vnet/main.c index f707994ba54..2540083982b 100644 --- a/vpp/vnet/main.c +++ b/vpp/vnet/main.c @@ -12,6 +12,8 @@ * See the License for the specific language governing permissions and * limitations under the License. */ + +#include #include #include #include @@ -59,33 +61,32 @@ int main (int argc, char * argv[]) u32 size; void vlib_set_get_handoff_structure_cb (void *cb); -#if __x86_64__ && !defined(__clang__) - __builtin_cpu_init (); +#if __x86_64__ const char * msg = "ERROR: This binary requires CPU with %s extensions.\n"; #define _(a,b) \ - if (!__builtin_cpu_supports(a)) \ + if (!clib_cpu_supports_ ## a ()) \ { \ fprintf(stderr, msg, b); \ exit(1); \ } #if __AVX2__ - _("avx2", "AVX2") + _(avx2, "AVX2") #endif #if __AVX__ - _("avx", "AVX") + _(avx, "AVX") #endif #if __SSE4_2__ - _("sse4.2", "SSE4.2") + _(sse42, "SSE4.2") #endif #if __SSE4_1__ - _("sse4.1", "SSE4.1") + _(sse41, "SSE4.1") #endif #if __SSSE3__ - _("ssse3", "SSSE3") + _(ssse3, "SSSE3") #endif #if __SSE3__ - _("sse3", "SSE3") + _(sse3, "SSE3") #endif #undef _ #endif diff --git a/vppinfra/vppinfra/cpu.c b/vppinfra/vppinfra/cpu.c index f2dbaf1f7d8..9008ee3dd89 100644 --- a/vppinfra/vppinfra/cpu.c +++ b/vppinfra/vppinfra/cpu.c @@ -16,10 +16,6 @@ #include #include -#if __x86_64__ -#include -#endif - #define foreach_x86_cpu_uarch \ _(0x06, 0x4f, "Broadwell", "Broadwell-EP/EX") \ _(0x06, 0x3d, "Broadwell", "Broadwell") \ @@ -108,4 +104,21 @@ format_cpu_model_name (u8 * s, va_list * args) #else /* ! __x86_64__ */ return format (s, "unknown"); #endif -} \ No newline at end of file +} + +u8 * +format_cpu_flags (u8 * s, va_list * args) +{ +#if __x86_64__ +#define _(flag, func, reg, bit) \ + if (clib_cpu_supports_ ## flag()) \ + s = format (s, #flag " "); + foreach_x86_64_flags + return s; +#undef _ +#else /* ! __x86_64__ */ + return format (s, "unknown"); +#endif +} + + diff --git a/vppinfra/vppinfra/cpu.h b/vppinfra/vppinfra/cpu.h index 79cdf74f84b..961af709a63 100644 --- a/vppinfra/vppinfra/cpu.h +++ b/vppinfra/vppinfra/cpu.h @@ -16,7 +16,83 @@ #ifndef included_clib_cpu_h #define included_clib_cpu_h +#include + +/* + * multiarchitecture support. Adding new entry will produce + * new graph node function variant optimized for specific cpu + * microarchitecture. + * Order is important for runtime selection, as 1st match wins... + */ + +#if __x86_64__ && CLIB_DEBUG == 0 +#define foreach_march_variant(macro, x) \ + macro(avx2, x, "arch=core-avx2") +#else +#define foreach_march_variant(macro, x) +#endif + + +#if __GNUC__ > 4 && !__clang__ +#define CLIB_CPU_OPTIMIZED __attribute__ ((optimize ("tree-vectorize"))) +#else +#define CLIB_CPU_OPTIMIZED +#endif + + +#define CLIB_MULTIARCH_ARCH_CHECK(arch, fn, tgt) \ + if (clib_cpu_supports_ ## arch()) \ + return & fn ## _ ##arch; + +#define CLIB_MULTIARCH_SELECT_FN(fn,...) \ + __VA_ARGS__ void * fn ## _multiarch_select(void) \ +{ \ + foreach_march_variant(CLIB_MULTIARCH_ARCH_CHECK, fn) \ + return & fn; \ +} + +#if __x86_64__ +#include "cpuid.h" + +#define foreach_x86_64_flags \ +_ (sse3, 1, ecx, 0) \ +_ (ssse3, 1, ecx, 9) \ +_ (sse41, 1, ecx, 19) \ +_ (sse42, 1, ecx, 20) \ +_ (avx, 1, ecx, 28) \ +_ (avx2, 7, ebx, 5) \ +_ (avx512f, 7, ebx, 16) \ +_ (aes, 1, ecx, 25) \ +_ (sha, 7, ebx, 29) + +static inline int +clib_get_cpuid(const u32 lev, u32 * eax, u32 *ebx, u32 * ecx, u32 * edx) +{ + if ((u32) __get_cpuid_max (0x80000000 & lev, 0) < lev) + return 0; + if (lev == 7) + __cpuid_count(lev, 0, *eax, *ebx, *ecx, *edx); + else + __cpuid(lev, *eax, *ebx, *ecx, *edx); + return 1; +} + + +#define _(flag, func, reg, bit) \ +static inline int \ +clib_cpu_supports_ ## flag() \ +{ \ + u32 __attribute__((unused)) eax, ebx = 0, ecx = 0, edx = 0; \ + clib_get_cpuid (func, &eax, &ebx, &ecx, &edx); \ + \ + return ((reg & (1 << bit)) != 0); \ +} + foreach_x86_64_flags +#undef _ +#endif + format_function_t format_cpu_uarch; format_function_t format_cpu_model_name; +format_function_t format_cpu_flags; -#endif \ No newline at end of file +#endif