X-Git-Url: https://gerrit.fd.io/r/gitweb?a=blobdiff_plain;f=src%2Fvnet%2Fethernet%2Fnode.c;h=6e1e1e3f3c8fd20a398369198a1e09179f19f291;hb=e9cebdf4c12adf7962ab2a2053789c93d70c33d3;hp=5305012f26d2921fe929d0e7fe26371a2540424a;hpb=0f26c5a0138ac86d7ebd197c31a09d8d624c35fe;p=vpp.git diff --git a/src/vnet/ethernet/node.c b/src/vnet/ethernet/node.c index 5305012f26d..6e1e1e3f3c8 100755 --- a/src/vnet/ethernet/node.c +++ b/src/vnet/ethernet/node.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015 Cisco and/or its affiliates. + * Copyright (c) 2018 Cisco and/or its affiliates. * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: @@ -40,6 +40,8 @@ #include #include #include +#include +#include #include #include @@ -47,7 +49,9 @@ #define foreach_ethernet_input_next \ _ (PUNT, "error-punt") \ _ (DROP, "error-drop") \ - _ (LLC, "llc-input") + _ (LLC, "llc-input") \ + _ (IP4_INPUT, "ip4-input") \ + _ (IP4_INPUT_NCS, "ip4-input-no-checksum") typedef enum { @@ -60,6 +64,8 @@ typedef enum typedef struct { u8 packet_data[32]; + u16 frame_flags; + ethernet_input_frame_t frame_data; } ethernet_input_trace_t; static u8 * @@ -68,13 +74,22 @@ format_ethernet_input_trace (u8 * s, va_list * va) CLIB_UNUSED (vlib_main_t * vm) = va_arg (*va, vlib_main_t *); CLIB_UNUSED (vlib_node_t * node) = va_arg (*va, vlib_node_t *); ethernet_input_trace_t *t = va_arg (*va, ethernet_input_trace_t *); + u32 indent = format_get_indent (s); + if (t->frame_flags) + { + s = format (s, "frame: flags 0x%x", t->frame_flags); + if (t->frame_flags & ETH_INPUT_FRAME_F_SINGLE_SW_IF_IDX) + s = format (s, ", hw-if-index %u, sw-if-index %u", + t->frame_data.hw_if_index, t->frame_data.sw_if_index); + s = format (s, "\n%U", format_white_space, indent); + } s = format (s, "%U", format_ethernet_header, t->packet_data); return s; } -vlib_node_registration_t ethernet_input_node; +extern vlib_node_registration_t ethernet_input_node; typedef enum { @@ -101,7 +116,8 @@ parse_header (ethernet_input_variant_t variant, e0 = (void *) (b0->data + b0->current_data); - vnet_buffer (b0)->ethernet.start_of_ethernet_header = b0->current_data; + vnet_buffer (b0)->l2_hdr_offset = b0->current_data; + b0->flags |= VNET_BUFFER_F_L2_HDR_OFFSET_VALID; vlib_buffer_advance (b0, sizeof (e0[0])); @@ -142,6 +158,8 @@ parse_header (ethernet_input_variant_t variant, tag = clib_net_to_host_u16 (h0->priority_cfi_and_id); *outer_id = tag & 0xfff; + if (0 == *outer_id) + *match_flags &= ~SUBINT_CONFIG_MATCH_1_TAG; *type = clib_net_to_host_u16 (h0->type); @@ -163,11 +181,12 @@ parse_header (ethernet_input_variant_t variant, vlib_buffer_advance (b0, sizeof (h0[0])); vlan_count = 2; - if (*type == ETHERNET_TYPE_VLAN) { // More than double tagged packet *match_flags = SUBINT_CONFIG_VALID | SUBINT_CONFIG_MATCH_3_TAG; + + vlib_buffer_advance (b0, sizeof (h0[0])); vlan_count = 3; // "unknown" number, aka, 3-or-more } } @@ -202,9 +221,7 @@ identify_subint (vnet_hw_interface_t * hi, if (!(*is_l2)) { ethernet_header_t *e0; - e0 = - (void *) (b0->data + - vnet_buffer (b0)->ethernet.start_of_ethernet_header); + e0 = (void *) (b0->data + vnet_buffer (b0)->l2_hdr_offset); if (!(ethernet_address_cast (e0->dst_address))) { @@ -226,6 +243,9 @@ determine_next_node (ethernet_main_t * em, u32 is_l20, u32 type0, vlib_buffer_t * b0, u8 * error0, u8 * next0) { + vnet_buffer (b0)->l3_hdr_offset = b0->current_data; + b0->flags |= VNET_BUFFER_F_L3_HDR_OFFSET_VALID; + if (PREDICT_FALSE (*error0 != ETHERNET_ERROR_NONE)) { // some error occurred @@ -233,11 +253,13 @@ determine_next_node (ethernet_main_t * em, } else if (is_l20) { - *next0 = em->l2_next; // record the L2 len and reset the buffer so the L2 header is preserved - u32 eth_start = vnet_buffer (b0)->ethernet.start_of_ethernet_header; + u32 eth_start = vnet_buffer (b0)->l2_hdr_offset; vnet_buffer (b0)->l2.l2_len = b0->current_data - eth_start; - vlib_buffer_advance (b0, -ethernet_buffer_header_size (b0)); + *next0 = em->l2_next; + ASSERT (vnet_buffer (b0)->l2.l2_len == + ethernet_buffer_header_size (b0)); + vlib_buffer_advance (b0, -(vnet_buffer (b0)->l2.l2_len)); // check for common IP/MPLS ethertypes } @@ -280,18 +302,418 @@ determine_next_node (ethernet_main_t * em, } } -static_always_inline uword +typedef enum +{ + ETYPE_ID_UNKNOWN = 0, + ETYPE_ID_IP4, + ETYPE_ID_IP6, + ETYPE_ID_MPLS, + ETYPE_N_IDS, +} etype_id_t; + +static_always_inline void +eth_input_advance_and_flags (vlib_main_t * vm, u32 * from, u32 n_left, + i16 advance, u32 and_flags, u32 or_flags) +{ + vlib_buffer_t *b[8]; + while (n_left >= 8) + { + vlib_get_buffers (vm, from, b, 8); + vlib_buffer_advance (b[0], advance); + vlib_buffer_advance (b[1], advance); + vlib_buffer_advance (b[2], advance); + vlib_buffer_advance (b[3], advance); + vlib_buffer_advance (b[4], advance); + vlib_buffer_advance (b[5], advance); + vlib_buffer_advance (b[6], advance); + vlib_buffer_advance (b[7], advance); + b[0]->flags = (b[0]->flags & and_flags) | or_flags; + b[1]->flags = (b[1]->flags & and_flags) | or_flags; + b[2]->flags = (b[2]->flags & and_flags) | or_flags; + b[3]->flags = (b[3]->flags & and_flags) | or_flags; + b[4]->flags = (b[4]->flags & and_flags) | or_flags; + b[5]->flags = (b[5]->flags & and_flags) | or_flags; + b[6]->flags = (b[6]->flags & and_flags) | or_flags; + b[7]->flags = (b[7]->flags & and_flags) | or_flags; + + n_left -= 8; + from += 8; + } + while (n_left) + { + vlib_get_buffers (vm, from, b, 1); + vlib_buffer_advance (b[0], advance); + b[0]->flags = (b[0]->flags & and_flags) | or_flags; + + n_left -= 1; + from += 1; + } +} + +typedef struct +{ + u16 etypes[VLIB_FRAME_SIZE]; + u32 bufs_by_etype[ETYPE_N_IDS][VLIB_FRAME_SIZE]; + u16 n_bufs_by_etype[ETYPE_N_IDS]; +} eth_input_data_t; + +/* following vector code relies on following assumptions */ +STATIC_ASSERT_OFFSET_OF (vlib_buffer_t, current_data, 0); +STATIC_ASSERT_OFFSET_OF (vlib_buffer_t, current_length, 2); +STATIC_ASSERT_OFFSET_OF (vlib_buffer_t, flags, 4); +STATIC_ASSERT (STRUCT_OFFSET_OF (vnet_buffer_opaque_t, l2_hdr_offset) == + STRUCT_OFFSET_OF (vnet_buffer_opaque_t, l3_hdr_offset) - 2, + "l3_hdr_offset must follow l2_hdr_offset"); + +static_always_inline void +eth_input_adv_and_flags_x4 (vlib_buffer_t ** b, i16 adv, u32 flags, int is_l3) +{ +#ifdef CLIB_HAVE_VEC256 + /* to reduce number of small loads/stores we are loading first 64 bits + of each buffer metadata into 256-bit register so we can advance + current_data, current_length and flags. + Observed saving of this code is ~2 clocks per packet */ + u64x4 r, radv; + + /* vector if signed 16 bit integers used in signed vector add operation + to advnce current_data and current_length */ + u32x8 flags4 = { 0, flags, 0, flags, 0, flags, 0, flags }; + i16x16 adv4 = { + adv, -adv, 0, 0, adv, -adv, 0, 0, + adv, -adv, 0, 0, adv, -adv, 0, 0 + }; + + /* load 4 x 64 bits */ + r = u64x4_gather (b[0], b[1], b[2], b[3]); + + /* set flags */ + r |= (u64x4) flags4; + + /* advance buffer */ + radv = (u64x4) ((i16x16) r + adv4); + + /* write 4 x 64 bits */ + u64x4_scatter (is_l3 ? radv : r, b[0], b[1], b[2], b[3]); + + /* use old current_data as l2_hdr_offset and new current_data as + l3_hdr_offset */ + r = (u64x4) u16x16_blend (r, radv << 16, 0xaa); + + /* store both l2_hdr_offset and l3_hdr_offset in single store operation */ + u32x8_scatter_one ((u32x8) r, 0, &vnet_buffer (b[0])->l2_hdr_offset); + u32x8_scatter_one ((u32x8) r, 2, &vnet_buffer (b[1])->l2_hdr_offset); + u32x8_scatter_one ((u32x8) r, 4, &vnet_buffer (b[2])->l2_hdr_offset); + u32x8_scatter_one ((u32x8) r, 6, &vnet_buffer (b[3])->l2_hdr_offset); + + if (is_l3) + { + ASSERT (b[0]->current_data == vnet_buffer (b[0])->l3_hdr_offset); + ASSERT (b[1]->current_data == vnet_buffer (b[1])->l3_hdr_offset); + ASSERT (b[2]->current_data == vnet_buffer (b[2])->l3_hdr_offset); + ASSERT (b[3]->current_data == vnet_buffer (b[3])->l3_hdr_offset); + + ASSERT (b[0]->current_data - vnet_buffer (b[0])->l2_hdr_offset == adv); + ASSERT (b[1]->current_data - vnet_buffer (b[1])->l2_hdr_offset == adv); + ASSERT (b[2]->current_data - vnet_buffer (b[2])->l2_hdr_offset == adv); + ASSERT (b[3]->current_data - vnet_buffer (b[3])->l2_hdr_offset == adv); + } + else + { + ASSERT (b[0]->current_data == vnet_buffer (b[0])->l2_hdr_offset); + ASSERT (b[1]->current_data == vnet_buffer (b[1])->l2_hdr_offset); + ASSERT (b[2]->current_data == vnet_buffer (b[2])->l2_hdr_offset); + ASSERT (b[3]->current_data == vnet_buffer (b[3])->l2_hdr_offset); + + ASSERT (b[0]->current_data - vnet_buffer (b[0])->l3_hdr_offset == -adv); + ASSERT (b[1]->current_data - vnet_buffer (b[1])->l3_hdr_offset == -adv); + ASSERT (b[2]->current_data - vnet_buffer (b[2])->l3_hdr_offset == -adv); + ASSERT (b[3]->current_data - vnet_buffer (b[3])->l3_hdr_offset == -adv); + } + +#else + vnet_buffer (b[0])->l2_hdr_offset = b[0]->current_data; + vnet_buffer (b[1])->l2_hdr_offset = b[1]->current_data; + vnet_buffer (b[2])->l2_hdr_offset = b[2]->current_data; + vnet_buffer (b[3])->l2_hdr_offset = b[3]->current_data; + vnet_buffer (b[0])->l3_hdr_offset = b[0]->current_data + adv; + vnet_buffer (b[1])->l3_hdr_offset = b[1]->current_data + adv; + vnet_buffer (b[2])->l3_hdr_offset = b[2]->current_data + adv; + vnet_buffer (b[3])->l3_hdr_offset = b[3]->current_data + adv; + + if (is_l3) + { + vlib_buffer_advance (b[0], adv); + vlib_buffer_advance (b[1], adv); + vlib_buffer_advance (b[2], adv); + vlib_buffer_advance (b[3], adv); + } + + b[0]->flags |= flags; + b[1]->flags |= flags; + b[2]->flags |= flags; + b[3]->flags |= flags; +#endif + + if (!is_l3) + { + vnet_buffer (b[0])->l2.l2_len = adv; + vnet_buffer (b[1])->l2.l2_len = adv; + vnet_buffer (b[2])->l2.l2_len = adv; + vnet_buffer (b[3])->l2.l2_len = adv; + } +} + +static_always_inline void +eth_input_adv_and_flags_x1 (vlib_buffer_t ** b, i16 adv, u32 flags, int is_l3) +{ + vnet_buffer (b[0])->l2_hdr_offset = b[0]->current_data; + vnet_buffer (b[0])->l3_hdr_offset = b[0]->current_data + adv; + + if (is_l3) + vlib_buffer_advance (b[0], adv); + b[0]->flags |= flags; + if (!is_l3) + vnet_buffer (b[0])->l2.l2_len = adv; +} + +static_always_inline void +eth_input_process_frame (vlib_main_t * vm, u32 * from, u16 * etype, + u32 n_left, int is_l3) +{ + vlib_buffer_t *b[16]; + ethernet_header_t *e; + int adv = sizeof (ethernet_header_t); + + u32 flags = VNET_BUFFER_F_L2_HDR_OFFSET_VALID | + VNET_BUFFER_F_L3_HDR_OFFSET_VALID; + + while (n_left >= 16) + { + vlib_buffer_t **ph = b + 12, **pd = b + 8; + vlib_get_buffers (vm, from, b, 4); + vlib_get_buffers (vm, from + 8, b + 8, 8); + + vlib_prefetch_buffer_header (ph[0], LOAD); + vlib_prefetch_buffer_data (pd[0], LOAD); + e = vlib_buffer_get_current (b[0]); + etype[0] = e->type; + + vlib_prefetch_buffer_header (ph[1], LOAD); + vlib_prefetch_buffer_data (pd[1], LOAD); + e = vlib_buffer_get_current (b[1]); + etype[1] = e->type; + + vlib_prefetch_buffer_header (ph[2], LOAD); + vlib_prefetch_buffer_data (pd[2], LOAD); + e = vlib_buffer_get_current (b[2]); + etype[2] = e->type; + + vlib_prefetch_buffer_header (ph[3], LOAD); + vlib_prefetch_buffer_data (pd[3], LOAD); + e = vlib_buffer_get_current (b[3]); + etype[3] = e->type; + + eth_input_adv_and_flags_x4 (b, adv, flags, is_l3); + + /* next */ + n_left -= 4; + etype += 4; + from += 4; + } + while (n_left >= 4) + { + vlib_get_buffers (vm, from, b, 4); + + e = vlib_buffer_get_current (b[0]); + etype[0] = e->type; + + e = vlib_buffer_get_current (b[1]); + etype[1] = e->type; + + e = vlib_buffer_get_current (b[2]); + etype[2] = e->type; + + e = vlib_buffer_get_current (b[3]); + etype[3] = e->type; + + eth_input_adv_and_flags_x4 (b, adv, flags, is_l3); + + /* next */ + n_left -= 4; + etype += 4; + from += 4; + } + while (n_left) + { + vlib_get_buffers (vm, from, b, 1); + + e = vlib_buffer_get_current (b[0]); + etype[0] = e->type; + + eth_input_adv_and_flags_x1 (b, adv, flags, is_l3); + + /* next */ + n_left -= 1; + etype += 1; + from += 1; + } +} + +static_always_inline void +eth_input_sort (vlib_main_t * vm, u32 * from, u32 n_packets, + eth_input_data_t * d) +{ + u16 *etype = d->etypes; + i32 n_left = n_packets; + +#if defined (CLIB_HAVE_VEC256) + u16x16 e16; + u16x16 et16_ip4 = u16x16_splat (clib_host_to_net_u16 (ETHERNET_TYPE_IP4)); + u16x16 et16_ip6 = u16x16_splat (clib_host_to_net_u16 (ETHERNET_TYPE_IP6)); + u16x16 et16_mpls = u16x16_splat (clib_host_to_net_u16 (ETHERNET_TYPE_MPLS)); + u16x16 id16_ip4 = u16x16_splat (ETYPE_ID_IP4); + u16x16 id16_ip6 = u16x16_splat (ETYPE_ID_IP6); + u16x16 id16_mpls = u16x16_splat (ETYPE_ID_MPLS); + + while (n_left > 0) + { + u16x16 r = { 0 }; + e16 = u16x16_load_unaligned (etype); + r += (e16 == et16_ip4) & id16_ip4; + r += (e16 == et16_ip6) & id16_ip6; + r += (e16 == et16_mpls) & id16_mpls; + u16x16_store_unaligned (r, etype); + etype += 16; + n_left -= 16; + } +#elif defined (CLIB_HAVE_VEC128) + u16x8 e8; + u16x8 et8_ip4 = u16x8_splat (clib_host_to_net_u16 (ETHERNET_TYPE_IP4)); + u16x8 et8_ip6 = u16x8_splat (clib_host_to_net_u16 (ETHERNET_TYPE_IP6)); + u16x8 et8_mpls = u16x8_splat (clib_host_to_net_u16 (ETHERNET_TYPE_MPLS)); + u16x8 id8_ip4 = u16x8_splat (ETYPE_ID_IP4); + u16x8 id8_ip6 = u16x8_splat (ETYPE_ID_IP6); + u16x8 id8_mpls = u16x8_splat (ETYPE_ID_MPLS); + + while (n_left > 0) + { + u16x8 r = { 0 }; + e8 = u16x8_load_unaligned (etype); + r += (e8 == et8_ip4) & id8_ip4; + r += (e8 == et8_ip6) & id8_ip6; + r += (e8 == et8_mpls) & id8_mpls; + u16x8_store_unaligned (r, etype); + etype += 8; + n_left -= 8; + } +#else + while (n_left) + { + if (etype[0] == ETHERNET_TYPE_IP4) + etype[0] = ETYPE_ID_IP4; + else if (etype[0] == ETHERNET_TYPE_IP6) + etype[0] = ETYPE_ID_IP6; + else if (etype[0] == ETHERNET_TYPE_MPLS) + etype[0] = ETYPE_ID_MPLS; + else + etype[0] = ETYPE_ID_UNKNOWN; + + etype += 1; + n_left -= 1; + } +#endif + + etype = d->etypes; + n_left = n_packets; + + clib_memset_u16 (d->n_bufs_by_etype, 0, ETYPE_N_IDS); + while (n_left) + { + u16 x, y; + x = etype[0]; + y = d->n_bufs_by_etype[x]; + +#ifdef CLIB_HAVE_VEC256 + if (n_left >= 16 && u16x16_is_all_equal (u16x16_load_unaligned (etype), + etype[0])) + { + clib_memcpy_fast (&d->bufs_by_etype[x][y], from, 16 * sizeof (u32)); + d->n_bufs_by_etype[x] += 16; + + /* next */ + n_left -= 16; + etype += 16; + from += 16; + continue; + } +#endif +#ifdef CLIB_HAVE_VEC128 + if (n_left >= 8 && u16x8_is_all_equal (u16x8_load_unaligned (etype), + etype[0])) + { + clib_memcpy_fast (&d->bufs_by_etype[x][y], from, 8 * sizeof (u32)); + d->n_bufs_by_etype[x] += 8; + + /* next */ + n_left -= 8; + etype += 8; + from += 8; + continue; + } +#endif + d->bufs_by_etype[x][y] = from[0]; + d->n_bufs_by_etype[x]++; + + /* next */ + n_left -= 1; + etype += 1; + from += 1; + } +} + +static_always_inline void +ethernet_input_trace (vlib_main_t * vm, vlib_node_runtime_t * node, + vlib_frame_t * from_frame) +{ + u32 *from, n_left; + if ((node->flags & VLIB_NODE_FLAG_TRACE) == 0) + return; + + from = vlib_frame_vector_args (from_frame); + n_left = from_frame->n_vectors; + + while (n_left) + { + ethernet_input_trace_t *t0; + vlib_buffer_t *b0 = vlib_get_buffer (vm, from[0]); + + if (b0->flags & VLIB_BUFFER_IS_TRACED) + { + t0 = vlib_add_trace (vm, node, b0, sizeof (ethernet_input_trace_t)); + clib_memcpy_fast (t0->packet_data, b0->data + b0->current_data, + sizeof (t0->packet_data)); + t0->frame_flags = from_frame->flags; + clib_memcpy_fast (&t0->frame_data, + vlib_frame_scalar_args (from_frame), + sizeof (ethernet_input_frame_t)); + } + from += 1; + n_left -= 1; + } +} + +static_always_inline void ethernet_input_inline (vlib_main_t * vm, vlib_node_runtime_t * node, - vlib_frame_t * from_frame, + u32 * from, u32 n_packets, ethernet_input_variant_t variant) { vnet_main_t *vnm = vnet_get_main (); ethernet_main_t *em = ðernet_main; vlib_node_runtime_t *error_node; - u32 n_left_from, next_index, *from, *to_next; + u32 n_left_from, next_index, *to_next; u32 stats_sw_if_index, stats_n_packets, stats_n_bytes; - u32 thread_index = vlib_get_thread_index (); + u32 thread_index = vm->thread_index; u32 cached_sw_if_index = ~0; u32 cached_is_l2 = 0; /* shut up gcc */ vnet_hw_interface_t *hi = NULL; /* used for main interface only */ @@ -301,15 +723,7 @@ ethernet_input_inline (vlib_main_t * vm, else error_node = node; - from = vlib_frame_vector_args (from_frame); - n_left_from = from_frame->n_vectors; - - if (node->flags & VLIB_NODE_FLAG_TRACE) - vlib_trace_frame_buffers_only (vm, node, - from, - n_left_from, - sizeof (from[0]), - sizeof (ethernet_input_trace_t)); + n_left_from = n_packets; next_index = node->cached_next_index; stats_sw_if_index = node->runtime_data[0]; @@ -370,10 +784,16 @@ ethernet_input_inline (vlib_main_t * vm, e1 = vlib_buffer_get_current (b1); type1 = clib_net_to_host_u16 (e1->type); + /* Set the L2 header offset for all packets */ + vnet_buffer (b0)->l2_hdr_offset = b0->current_data; + vnet_buffer (b1)->l2_hdr_offset = b1->current_data; + b0->flags |= VNET_BUFFER_F_L2_HDR_OFFSET_VALID; + b1->flags |= VNET_BUFFER_F_L2_HDR_OFFSET_VALID; + /* Speed-path for the untagged case */ if (PREDICT_TRUE (variant == ETHERNET_INPUT_VARIANT_ETHERNET - && !ethernet_frame_is_tagged (type0) - && !ethernet_frame_is_tagged (type1))) + && !ethernet_frame_is_any_tagged_x2 (type0, + type1))) { main_intf_t *intf0; subint_config_t *subint0; @@ -397,13 +817,16 @@ ethernet_input_inline (vlib_main_t * vm, cached_is_l2 = is_l20 = subint0->flags & SUBINT_CONFIG_L2; } - vnet_buffer (b0)->ethernet.start_of_ethernet_header = - b0->current_data; - vnet_buffer (b1)->ethernet.start_of_ethernet_header = - b1->current_data; - if (PREDICT_TRUE (is_l20 != 0)) { + vnet_buffer (b0)->l3_hdr_offset = + vnet_buffer (b0)->l2_hdr_offset + + sizeof (ethernet_header_t); + vnet_buffer (b1)->l3_hdr_offset = + vnet_buffer (b1)->l2_hdr_offset + + sizeof (ethernet_header_t); + b0->flags |= VNET_BUFFER_F_L3_HDR_OFFSET_VALID; + b1->flags |= VNET_BUFFER_F_L3_HDR_OFFSET_VALID; next0 = em->l2_next; vnet_buffer (b0)->l2.l2_len = sizeof (ethernet_header_t); next1 = em->l2_next; @@ -412,17 +835,19 @@ ethernet_input_inline (vlib_main_t * vm, else { if (!ethernet_address_cast (e0->dst_address) && + (hi->hw_address != 0) && !eth_mac_equal ((u8 *) e0, hi->hw_address)) error0 = ETHERNET_ERROR_L3_MAC_MISMATCH; if (!ethernet_address_cast (e1->dst_address) && + (hi->hw_address != 0) && !eth_mac_equal ((u8 *) e1, hi->hw_address)) error1 = ETHERNET_ERROR_L3_MAC_MISMATCH; + vlib_buffer_advance (b0, sizeof (ethernet_header_t)); determine_next_node (em, variant, 0, type0, b0, &error0, &next0); - vlib_buffer_advance (b0, sizeof (ethernet_header_t)); + vlib_buffer_advance (b1, sizeof (ethernet_header_t)); determine_next_node (em, variant, 0, type1, b1, &error1, &next1); - vlib_buffer_advance (b1, sizeof (ethernet_header_t)); } goto ship_it01; } @@ -490,9 +915,9 @@ ethernet_input_inline (vlib_main_t * vm, { len0 = vlib_buffer_length_in_chain (vm, b0) + b0->current_data - - vnet_buffer (b0)->ethernet.start_of_ethernet_header; + - vnet_buffer (b0)->l2_hdr_offset; len1 = vlib_buffer_length_in_chain (vm, b1) + b1->current_data - - vnet_buffer (b1)->ethernet.start_of_ethernet_header; + - vnet_buffer (b1)->l2_hdr_offset; stats_n_packets += 2; stats_n_bytes += len0 + len1; @@ -597,6 +1022,10 @@ ethernet_input_inline (vlib_main_t * vm, e0 = vlib_buffer_get_current (b0); type0 = clib_net_to_host_u16 (e0->type); + /* Set the L2 header offset for all packets */ + vnet_buffer (b0)->l2_hdr_offset = b0->current_data; + b0->flags |= VNET_BUFFER_F_L2_HDR_OFFSET_VALID; + /* Speed-path for the untagged case */ if (PREDICT_TRUE (variant == ETHERNET_INPUT_VARIANT_ETHERNET && !ethernet_frame_is_tagged (type0))) @@ -617,22 +1046,25 @@ ethernet_input_inline (vlib_main_t * vm, cached_is_l2 = is_l20 = subint0->flags & SUBINT_CONFIG_L2; } - vnet_buffer (b0)->ethernet.start_of_ethernet_header = - b0->current_data; if (PREDICT_TRUE (is_l20 != 0)) { + vnet_buffer (b0)->l3_hdr_offset = + vnet_buffer (b0)->l2_hdr_offset + + sizeof (ethernet_header_t); + b0->flags |= VNET_BUFFER_F_L3_HDR_OFFSET_VALID; next0 = em->l2_next; vnet_buffer (b0)->l2.l2_len = sizeof (ethernet_header_t); } else { if (!ethernet_address_cast (e0->dst_address) && + (hi->hw_address != 0) && !eth_mac_equal ((u8 *) e0, hi->hw_address)) error0 = ETHERNET_ERROR_L3_MAC_MISMATCH; + vlib_buffer_advance (b0, sizeof (ethernet_header_t)); determine_next_node (em, variant, 0, type0, b0, &error0, &next0); - vlib_buffer_advance (b0, sizeof (ethernet_header_t)); } goto ship_it0; } @@ -680,7 +1112,7 @@ ethernet_input_inline (vlib_main_t * vm, { len0 = vlib_buffer_length_in_chain (vm, b0) + b0->current_data - - vnet_buffer (b0)->ethernet.start_of_ethernet_header; + - vnet_buffer (b0)->l2_hdr_offset; stats_n_packets += 1; stats_n_bytes += len0; @@ -737,32 +1169,136 @@ ethernet_input_inline (vlib_main_t * vm, thread_index, stats_sw_if_index, stats_n_packets, stats_n_bytes); node->runtime_data[0] = stats_sw_if_index; } +} + +static_always_inline void +eth_input_enqueue_untagged (vlib_main_t * vm, vlib_node_runtime_t * node, + eth_input_data_t * d, int ip4_cksum_ok, int is_l3) +{ + ethernet_main_t *em = ðernet_main; + etype_id_t id; + u32 next_index; + + id = ETYPE_ID_IP4; + if (d->n_bufs_by_etype[id]) + { + if (is_l3) + { + next_index = em->l3_next.input_next_ip4; + if (next_index == ETHERNET_INPUT_NEXT_IP4_INPUT && ip4_cksum_ok) + next_index = ETHERNET_INPUT_NEXT_IP4_INPUT_NCS; + } + else + next_index = em->l2_next; + + vlib_buffer_enqueue_to_single_next (vm, node, d->bufs_by_etype[id], + next_index, d->n_bufs_by_etype[id]); + } + + id = ETYPE_ID_IP6; + if (d->n_bufs_by_etype[id]) + { + next_index = is_l3 ? em->l3_next.input_next_ip6 : em->l2_next; + vlib_buffer_enqueue_to_single_next (vm, node, d->bufs_by_etype[id], + next_index, d->n_bufs_by_etype[id]); + } + + id = ETYPE_ID_MPLS; + if (d->n_bufs_by_etype[id]) + { + next_index = is_l3 ? em->l3_next.input_next_mpls : em->l2_next; + vlib_buffer_enqueue_to_single_next (vm, node, d->bufs_by_etype[id], + next_index, d->n_bufs_by_etype[id]); + } - return from_frame->n_vectors; + id = ETYPE_ID_UNKNOWN; + if (d->n_bufs_by_etype[id]) + { + /* in case of l3 interfaces, we already advanced buffer so we need to + roll back */ + if (is_l3) + eth_input_advance_and_flags (vm, d->bufs_by_etype[id], + d->n_bufs_by_etype[id], + -(i16) sizeof (ethernet_header_t), + ~VNET_BUFFER_F_L3_HDR_OFFSET_VALID, 0); + ethernet_input_inline (vm, node, d->bufs_by_etype[id], + d->n_bufs_by_etype[id], + ETHERNET_INPUT_VARIANT_ETHERNET); + } } -static uword -ethernet_input (vlib_main_t * vm, - vlib_node_runtime_t * node, vlib_frame_t * from_frame) +VLIB_NODE_FN (ethernet_input_node) (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame) { - return ethernet_input_inline (vm, node, from_frame, - ETHERNET_INPUT_VARIANT_ETHERNET); + vnet_main_t *vnm = vnet_get_main (); + ethernet_main_t *em = ðernet_main; + u32 *from = vlib_frame_vector_args (frame); + u32 n_packets = frame->n_vectors; + + ethernet_input_trace (vm, node, frame); + + if (frame->flags & ETH_INPUT_FRAME_F_SINGLE_SW_IF_IDX) + { + eth_input_data_t data, *d = &data; + ethernet_input_frame_t *ef = vlib_frame_scalar_args (frame); + vnet_hw_interface_t *hi = vnet_get_hw_interface (vnm, ef->hw_if_index); + main_intf_t *intf0 = vec_elt_at_index (em->main_intfs, hi->hw_if_index); + subint_config_t *subint0 = &intf0->untagged_subint; + int ip4_cksum_ok = (frame->flags & ETH_INPUT_FRAME_F_IP4_CKSUM_OK) != 0; + + if (subint0->flags & SUBINT_CONFIG_L2) + { + /* untagged packets are treated as L2 */ + eth_input_process_frame (vm, from, d->etypes, n_packets, 0); + eth_input_sort (vm, from, n_packets, d); + eth_input_enqueue_untagged (vm, node, d, ip4_cksum_ok, 0); + } + else + { + ethernet_interface_t *ei; + ei = pool_elt_at_index (em->interfaces, hi->hw_instance); + + /* currently only slowpath deals with dmac check */ + if (ei->flags & ETHERNET_INTERFACE_FLAG_ACCEPT_ALL) + goto slowpath; + + /* untagged packets are treated as L3 */ + eth_input_process_frame (vm, from, d->etypes, n_packets, 1); + eth_input_sort (vm, from, n_packets, d); + eth_input_enqueue_untagged (vm, node, d, ip4_cksum_ok, 1); + } + return n_packets; + } + +slowpath: + ethernet_input_inline (vm, node, from, n_packets, + ETHERNET_INPUT_VARIANT_ETHERNET); + return n_packets; } -static uword -ethernet_input_type (vlib_main_t * vm, - vlib_node_runtime_t * node, vlib_frame_t * from_frame) +VLIB_NODE_FN (ethernet_input_type_node) (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * from_frame) { - return ethernet_input_inline (vm, node, from_frame, - ETHERNET_INPUT_VARIANT_ETHERNET_TYPE); + u32 *from = vlib_frame_vector_args (from_frame); + u32 n_packets = from_frame->n_vectors; + ethernet_input_trace (vm, node, from_frame); + ethernet_input_inline (vm, node, from, n_packets, + ETHERNET_INPUT_VARIANT_ETHERNET_TYPE); + return n_packets; } -static uword -ethernet_input_not_l2 (vlib_main_t * vm, - vlib_node_runtime_t * node, vlib_frame_t * from_frame) +VLIB_NODE_FN (ethernet_input_not_l2_node) (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * from_frame) { - return ethernet_input_inline (vm, node, from_frame, - ETHERNET_INPUT_VARIANT_NOT_L2); + u32 *from = vlib_frame_vector_args (from_frame); + u32 n_packets = from_frame->n_vectors; + ethernet_input_trace (vm, node, from_frame); + ethernet_input_inline (vm, node, from, n_packets, + ETHERNET_INPUT_VARIANT_NOT_L2); + return n_packets; } @@ -798,11 +1334,32 @@ ethernet_sw_interface_get_config (vnet_main_t * vnm, // Locate the subint for the given ethernet config si = vnet_get_sw_interface (vnm, sw_if_index); - if (si->sub.eth.flags.default_sub) + if (si->type == VNET_SW_INTERFACE_TYPE_P2P) + { + p2p_ethernet_main_t *p2pm = &p2p_main; + u32 p2pe_sw_if_index = + p2p_ethernet_lookup (hi->hw_if_index, si->p2p.client_mac); + if (p2pe_sw_if_index == ~0) + { + pool_get (p2pm->p2p_subif_pool, subint); + si->p2p.pool_index = subint - p2pm->p2p_subif_pool; + } + else + subint = vec_elt_at_index (p2pm->p2p_subif_pool, si->p2p.pool_index); + *flags = SUBINT_CONFIG_P2P; + } + else if (si->type == VNET_SW_INTERFACE_TYPE_PIPE) + { + pipe_t *pipe; + + pipe = pipe_get (sw_if_index); + subint = &pipe->subint; + *flags = SUBINT_CONFIG_P2P; + } + else if (si->sub.eth.flags.default_sub) { subint = &main_intf->default_subint; - *flags = SUBINT_CONFIG_MATCH_0_TAG | - SUBINT_CONFIG_MATCH_1_TAG | + *flags = SUBINT_CONFIG_MATCH_1_TAG | SUBINT_CONFIG_MATCH_2_TAG | SUBINT_CONFIG_MATCH_3_TAG; } else if ((si->sub.eth.flags.no_tags) || (si->sub.eth.raw_flags == 0)) @@ -931,7 +1488,7 @@ done: return subint; } -clib_error_t * +static clib_error_t * ethernet_sw_interface_up_down (vnet_main_t * vnm, u32 sw_if_index, u32 flags) { subint_config_t *subint; @@ -960,6 +1517,7 @@ done: VNET_SW_INTERFACE_ADMIN_UP_DOWN_FUNCTION (ethernet_sw_interface_up_down); +#ifndef CLIB_MARCH_VARIANT // Set the L2/L3 mode for the subinterface void ethernet_sw_interface_set_l2_mode (vnet_main_t * vnm, u32 sw_if_index, u32 l2) @@ -1047,6 +1605,7 @@ ethernet_sw_interface_set_l2_mode_noport (vnet_main_t * vnm, done: return; } +#endif static clib_error_t * ethernet_sw_interface_add_del (vnet_main_t * vnm, @@ -1087,7 +1646,7 @@ ethernet_sw_interface_add_del (vnet_main_t * vnm, } else { - // Note that config is L3 by defaulty + // Note that config is L3 by default subint->flags = SUBINT_CONFIG_VALID | match_flags; subint->sw_if_index = ~0; // because interfaces are initially down } @@ -1106,10 +1665,10 @@ static char *ethernet_error_strings[] = { /* *INDENT-OFF* */ VLIB_REGISTER_NODE (ethernet_input_node) = { - .function = ethernet_input, .name = "ethernet-input", /* Takes a vector of packets. */ .vector_size = sizeof (u32), + .scalar_size = sizeof (ethernet_input_frame_t), .n_errors = ETHERNET_N_ERROR, .error_strings = ethernet_error_strings, .n_next_nodes = ETHERNET_INPUT_N_NEXT, @@ -1122,15 +1681,8 @@ VLIB_REGISTER_NODE (ethernet_input_node) = { .format_trace = format_ethernet_input_trace, .unformat_buffer = unformat_ethernet_header, }; -/* *INDENT-ON* */ -/* *INDENT-OFF* */ -VLIB_NODE_FUNCTION_MULTIARCH (ethernet_input_node, ethernet_input) -/* *INDENT-ON* */ - -/* *INDENT-OFF* */ -VLIB_REGISTER_NODE (ethernet_input_type_node, static) = { - .function = ethernet_input_type, +VLIB_REGISTER_NODE (ethernet_input_type_node) = { .name = "ethernet-input-type", /* Takes a vector of packets. */ .vector_size = sizeof (u32), @@ -1141,15 +1693,8 @@ VLIB_REGISTER_NODE (ethernet_input_type_node, static) = { #undef _ }, }; -/* *INDENT-ON* */ -/* *INDENT-OFF* */ -VLIB_NODE_FUNCTION_MULTIARCH (ethernet_input_type_node, ethernet_input_type) -/* *INDENT-ON* */ - -/* *INDENT-OFF* */ -VLIB_REGISTER_NODE (ethernet_input_not_l2_node, static) = { - .function = ethernet_input_not_l2, +VLIB_REGISTER_NODE (ethernet_input_not_l2_node) = { .name = "ethernet-input-not-l2", /* Takes a vector of packets. */ .vector_size = sizeof (u32), @@ -1162,13 +1707,7 @@ VLIB_REGISTER_NODE (ethernet_input_not_l2_node, static) = { }; /* *INDENT-ON* */ - -/* *INDENT-OFF* */ -VLIB_NODE_FUNCTION_MULTIARCH (ethernet_input_not_l2_node, - ethernet_input_not_l2) -/* *INDENT-ON* */ - - +#ifndef CLIB_MARCH_VARIANT void ethernet_set_rx_redirect (vnet_main_t * vnm, vnet_hw_interface_t * hi, u32 enable) @@ -1369,6 +1908,7 @@ ethernet_register_l3_redirect (vlib_main_t * vm, u32 node_index) ASSERT (i == em->redirect_l3_next); } +#endif /* * fd.io coding-style-patch-verification: ON