2 * Copyright (c) 2018 Cisco and/or its affiliates.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at:
7 * http://www.apache.org/licenses/LICENSE-2.0
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
16 * ethernet_node.c: ethernet packet processing
18 * Copyright (c) 2008 Eliot Dresselhaus
20 * Permission is hereby granted, free of charge, to any person obtaining
21 * a copy of this software and associated documentation files (the
22 * "Software"), to deal in the Software without restriction, including
23 * without limitation the rights to use, copy, modify, merge, publish,
24 * distribute, sublicense, and/or sell copies of the Software, and to
25 * permit persons to whom the Software is furnished to do so, subject to
26 * the following conditions:
28 * The above copyright notice and this permission notice shall be
29 * included in all copies or substantial portions of the Software.
31 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
32 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
33 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
34 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
35 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
36 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
37 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
40 #include <vlib/vlib.h>
41 #include <vnet/pg/pg.h>
42 #include <vnet/ethernet/ethernet.h>
43 #include <vnet/ethernet/p2p_ethernet.h>
44 #include <vnet/devices/pipe/pipe.h>
45 #include <vppinfra/sparse_vec.h>
46 #include <vnet/l2/l2_bvi.h>
48 #define foreach_ethernet_input_next \
49 _ (PUNT, "error-punt") \
50 _ (DROP, "error-drop") \
51 _ (LLC, "llc-input") \
52 _ (IP4_INPUT, "ip4-input") \
53 _ (IP4_INPUT_NCS, "ip4-input-no-checksum")
57 #define _(s,n) ETHERNET_INPUT_NEXT_##s,
58 foreach_ethernet_input_next
60 ETHERNET_INPUT_N_NEXT,
61 } ethernet_input_next_t;
67 ethernet_input_frame_t frame_data;
68 } ethernet_input_trace_t;
71 format_ethernet_input_trace (u8 * s, va_list * va)
73 CLIB_UNUSED (vlib_main_t * vm) = va_arg (*va, vlib_main_t *);
74 CLIB_UNUSED (vlib_node_t * node) = va_arg (*va, vlib_node_t *);
75 ethernet_input_trace_t *t = va_arg (*va, ethernet_input_trace_t *);
76 u32 indent = format_get_indent (s);
80 s = format (s, "frame: flags 0x%x", t->frame_flags);
81 if (t->frame_flags & ETH_INPUT_FRAME_F_SINGLE_SW_IF_IDX)
82 s = format (s, ", hw-if-index %u, sw-if-index %u",
83 t->frame_data.hw_if_index, t->frame_data.sw_if_index);
84 s = format (s, "\n%U", format_white_space, indent);
86 s = format (s, "%U", format_ethernet_header, t->packet_data);
91 extern vlib_node_registration_t ethernet_input_node;
95 ETHERNET_INPUT_VARIANT_ETHERNET,
96 ETHERNET_INPUT_VARIANT_ETHERNET_TYPE,
97 ETHERNET_INPUT_VARIANT_NOT_L2,
98 } ethernet_input_variant_t;
101 // Parse the ethernet header to extract vlan tags and innermost ethertype
102 static_always_inline void
103 parse_header (ethernet_input_variant_t variant,
107 u16 * outer_id, u16 * inner_id, u32 * match_flags)
111 if (variant == ETHERNET_INPUT_VARIANT_ETHERNET
112 || variant == ETHERNET_INPUT_VARIANT_NOT_L2)
114 ethernet_header_t *e0;
116 e0 = (void *) (b0->data + b0->current_data);
118 vnet_buffer (b0)->l2_hdr_offset = b0->current_data;
119 b0->flags |= VNET_BUFFER_F_L2_HDR_OFFSET_VALID;
121 vlib_buffer_advance (b0, sizeof (e0[0]));
123 *type = clib_net_to_host_u16 (e0->type);
125 else if (variant == ETHERNET_INPUT_VARIANT_ETHERNET_TYPE)
127 // here when prior node was LLC/SNAP processing
130 e0 = (void *) (b0->data + b0->current_data);
132 vlib_buffer_advance (b0, sizeof (e0[0]));
134 *type = clib_net_to_host_u16 (e0[0]);
137 // save for distinguishing between dot1q and dot1ad later
140 // default the tags to 0 (used if there is no corresponding tag)
144 *match_flags = SUBINT_CONFIG_VALID | SUBINT_CONFIG_MATCH_0_TAG;
147 // check for vlan encaps
148 if (ethernet_frame_is_tagged (*type))
150 ethernet_vlan_header_t *h0;
153 *match_flags = SUBINT_CONFIG_VALID | SUBINT_CONFIG_MATCH_1_TAG;
155 h0 = (void *) (b0->data + b0->current_data);
157 tag = clib_net_to_host_u16 (h0->priority_cfi_and_id);
159 *outer_id = tag & 0xfff;
161 *match_flags &= ~SUBINT_CONFIG_MATCH_1_TAG;
163 *type = clib_net_to_host_u16 (h0->type);
165 vlib_buffer_advance (b0, sizeof (h0[0]));
168 if (*type == ETHERNET_TYPE_VLAN)
170 // Double tagged packet
171 *match_flags = SUBINT_CONFIG_VALID | SUBINT_CONFIG_MATCH_2_TAG;
173 h0 = (void *) (b0->data + b0->current_data);
175 tag = clib_net_to_host_u16 (h0->priority_cfi_and_id);
177 *inner_id = tag & 0xfff;
179 *type = clib_net_to_host_u16 (h0->type);
181 vlib_buffer_advance (b0, sizeof (h0[0]));
183 if (*type == ETHERNET_TYPE_VLAN)
185 // More than double tagged packet
186 *match_flags = SUBINT_CONFIG_VALID | SUBINT_CONFIG_MATCH_3_TAG;
188 vlib_buffer_advance (b0, sizeof (h0[0]));
189 vlan_count = 3; // "unknown" number, aka, 3-or-more
193 ethernet_buffer_set_vlan_count (b0, vlan_count);
196 // Determine the subinterface for this packet, given the result of the
197 // vlan table lookups and vlan header parsing. Check the most specific
199 static_always_inline void
200 identify_subint (vnet_hw_interface_t * hi,
203 main_intf_t * main_intf,
204 vlan_intf_t * vlan_intf,
205 qinq_intf_t * qinq_intf,
206 u32 * new_sw_if_index, u8 * error0, u32 * is_l2)
210 matched = eth_identify_subint (hi, match_flags, main_intf, vlan_intf,
211 qinq_intf, new_sw_if_index, error0, is_l2);
216 // Perform L3 my-mac filter
217 // A unicast packet arriving on an L3 interface must have a dmac matching the interface mac.
218 // This is required for promiscuous mode, else we will forward packets we aren't supposed to.
221 ethernet_header_t *e0;
222 e0 = (void *) (b0->data + vnet_buffer (b0)->l2_hdr_offset);
224 if (!(ethernet_address_cast (e0->dst_address)))
226 if (!eth_mac_equal ((u8 *) e0, hi->hw_address))
228 *error0 = ETHERNET_ERROR_L3_MAC_MISMATCH;
233 // Check for down subinterface
234 *error0 = (*new_sw_if_index) != ~0 ? (*error0) : ETHERNET_ERROR_DOWN;
238 static_always_inline void
239 determine_next_node (ethernet_main_t * em,
240 ethernet_input_variant_t variant,
242 u32 type0, vlib_buffer_t * b0, u8 * error0, u8 * next0)
244 vnet_buffer (b0)->l3_hdr_offset = b0->current_data;
245 b0->flags |= VNET_BUFFER_F_L3_HDR_OFFSET_VALID;
247 if (PREDICT_FALSE (*error0 != ETHERNET_ERROR_NONE))
249 // some error occurred
250 *next0 = ETHERNET_INPUT_NEXT_DROP;
254 // record the L2 len and reset the buffer so the L2 header is preserved
255 u32 eth_start = vnet_buffer (b0)->l2_hdr_offset;
256 vnet_buffer (b0)->l2.l2_len = b0->current_data - eth_start;
257 *next0 = em->l2_next;
258 ASSERT (vnet_buffer (b0)->l2.l2_len ==
259 ethernet_buffer_header_size (b0));
260 vlib_buffer_advance (b0, -(vnet_buffer (b0)->l2.l2_len));
262 // check for common IP/MPLS ethertypes
264 else if (type0 == ETHERNET_TYPE_IP4)
266 *next0 = em->l3_next.input_next_ip4;
268 else if (type0 == ETHERNET_TYPE_IP6)
270 *next0 = em->l3_next.input_next_ip6;
272 else if (type0 == ETHERNET_TYPE_MPLS)
274 *next0 = em->l3_next.input_next_mpls;
277 else if (em->redirect_l3)
279 // L3 Redirect is on, the cached common next nodes will be
280 // pointing to the redirect node, catch the uncommon types here
281 *next0 = em->redirect_l3_next;
285 // uncommon ethertype, check table
287 i0 = sparse_vec_index (em->l3_next.input_next_by_type, type0);
288 *next0 = vec_elt (em->l3_next.input_next_by_type, i0);
291 SPARSE_VEC_INVALID_INDEX ? ETHERNET_ERROR_UNKNOWN_TYPE : *error0;
293 // The table is not populated with LLC values, so check that now.
294 // If variant is variant_ethernet then we came from LLC processing. Don't
295 // go back there; drop instead using by keeping the drop/bad table result.
296 if ((type0 < 0x600) && (variant == ETHERNET_INPUT_VARIANT_ETHERNET))
298 *next0 = ETHERNET_INPUT_NEXT_LLC;
304 /* following vector code relies on following assumptions */
305 STATIC_ASSERT_OFFSET_OF (vlib_buffer_t, current_data, 0);
306 STATIC_ASSERT_OFFSET_OF (vlib_buffer_t, current_length, 2);
307 STATIC_ASSERT_OFFSET_OF (vlib_buffer_t, flags, 4);
308 STATIC_ASSERT (STRUCT_OFFSET_OF (vnet_buffer_opaque_t, l2_hdr_offset) ==
309 STRUCT_OFFSET_OF (vnet_buffer_opaque_t, l3_hdr_offset) - 2,
310 "l3_hdr_offset must follow l2_hdr_offset");
312 static_always_inline void
313 eth_input_adv_and_flags_x4 (vlib_buffer_t ** b, int is_l3)
315 i16 adv = sizeof (ethernet_header_t);
316 u32 flags = VNET_BUFFER_F_L2_HDR_OFFSET_VALID |
317 VNET_BUFFER_F_L3_HDR_OFFSET_VALID;
319 #ifdef CLIB_HAVE_VEC256
320 /* to reduce number of small loads/stores we are loading first 64 bits
321 of each buffer metadata into 256-bit register so we can advance
322 current_data, current_length and flags.
323 Observed saving of this code is ~2 clocks per packet */
326 /* vector if signed 16 bit integers used in signed vector add operation
327 to advnce current_data and current_length */
328 u32x8 flags4 = { 0, flags, 0, flags, 0, flags, 0, flags };
330 adv, -adv, 0, 0, adv, -adv, 0, 0,
331 adv, -adv, 0, 0, adv, -adv, 0, 0
334 /* load 4 x 64 bits */
335 r = u64x4_gather (b[0], b[1], b[2], b[3]);
341 radv = (u64x4) ((i16x16) r + adv4);
343 /* write 4 x 64 bits */
344 u64x4_scatter (is_l3 ? radv : r, b[0], b[1], b[2], b[3]);
346 /* use old current_data as l2_hdr_offset and new current_data as
348 r = (u64x4) u16x16_blend (r, radv << 16, 0xaa);
350 /* store both l2_hdr_offset and l3_hdr_offset in single store operation */
351 u32x8_scatter_one ((u32x8) r, 0, &vnet_buffer (b[0])->l2_hdr_offset);
352 u32x8_scatter_one ((u32x8) r, 2, &vnet_buffer (b[1])->l2_hdr_offset);
353 u32x8_scatter_one ((u32x8) r, 4, &vnet_buffer (b[2])->l2_hdr_offset);
354 u32x8_scatter_one ((u32x8) r, 6, &vnet_buffer (b[3])->l2_hdr_offset);
358 ASSERT (b[0]->current_data == vnet_buffer (b[0])->l3_hdr_offset);
359 ASSERT (b[1]->current_data == vnet_buffer (b[1])->l3_hdr_offset);
360 ASSERT (b[2]->current_data == vnet_buffer (b[2])->l3_hdr_offset);
361 ASSERT (b[3]->current_data == vnet_buffer (b[3])->l3_hdr_offset);
363 ASSERT (b[0]->current_data - vnet_buffer (b[0])->l2_hdr_offset == adv);
364 ASSERT (b[1]->current_data - vnet_buffer (b[1])->l2_hdr_offset == adv);
365 ASSERT (b[2]->current_data - vnet_buffer (b[2])->l2_hdr_offset == adv);
366 ASSERT (b[3]->current_data - vnet_buffer (b[3])->l2_hdr_offset == adv);
370 ASSERT (b[0]->current_data == vnet_buffer (b[0])->l2_hdr_offset);
371 ASSERT (b[1]->current_data == vnet_buffer (b[1])->l2_hdr_offset);
372 ASSERT (b[2]->current_data == vnet_buffer (b[2])->l2_hdr_offset);
373 ASSERT (b[3]->current_data == vnet_buffer (b[3])->l2_hdr_offset);
375 ASSERT (b[0]->current_data - vnet_buffer (b[0])->l3_hdr_offset == -adv);
376 ASSERT (b[1]->current_data - vnet_buffer (b[1])->l3_hdr_offset == -adv);
377 ASSERT (b[2]->current_data - vnet_buffer (b[2])->l3_hdr_offset == -adv);
378 ASSERT (b[3]->current_data - vnet_buffer (b[3])->l3_hdr_offset == -adv);
382 vnet_buffer (b[0])->l2_hdr_offset = b[0]->current_data;
383 vnet_buffer (b[1])->l2_hdr_offset = b[1]->current_data;
384 vnet_buffer (b[2])->l2_hdr_offset = b[2]->current_data;
385 vnet_buffer (b[3])->l2_hdr_offset = b[3]->current_data;
386 vnet_buffer (b[0])->l3_hdr_offset = b[0]->current_data + adv;
387 vnet_buffer (b[1])->l3_hdr_offset = b[1]->current_data + adv;
388 vnet_buffer (b[2])->l3_hdr_offset = b[2]->current_data + adv;
389 vnet_buffer (b[3])->l3_hdr_offset = b[3]->current_data + adv;
393 vlib_buffer_advance (b[0], adv);
394 vlib_buffer_advance (b[1], adv);
395 vlib_buffer_advance (b[2], adv);
396 vlib_buffer_advance (b[3], adv);
399 b[0]->flags |= flags;
400 b[1]->flags |= flags;
401 b[2]->flags |= flags;
402 b[3]->flags |= flags;
407 vnet_buffer (b[0])->l2.l2_len = adv;
408 vnet_buffer (b[1])->l2.l2_len = adv;
409 vnet_buffer (b[2])->l2.l2_len = adv;
410 vnet_buffer (b[3])->l2.l2_len = adv;
414 static_always_inline void
415 eth_input_adv_and_flags_x1 (vlib_buffer_t ** b, int is_l3)
417 i16 adv = sizeof (ethernet_header_t);
418 u32 flags = VNET_BUFFER_F_L2_HDR_OFFSET_VALID |
419 VNET_BUFFER_F_L3_HDR_OFFSET_VALID;
421 vnet_buffer (b[0])->l2_hdr_offset = b[0]->current_data;
422 vnet_buffer (b[0])->l3_hdr_offset = b[0]->current_data + adv;
425 vlib_buffer_advance (b[0], adv);
426 b[0]->flags |= flags;
428 vnet_buffer (b[0])->l2.l2_len = adv;
432 static_always_inline void
433 eth_input_get_etype_and_tags (vlib_buffer_t ** b, u16 * etype, u64 * tags,
434 u64 * dmacs, int offset, int dmac_check)
436 ethernet_header_t *e;
437 e = vlib_buffer_get_current (b[offset]);
438 #ifdef CLIB_HAVE_VEC128
439 u64x2 r = u64x2_load_unaligned (((u8 *) & e->type) - 6);
440 etype[offset] = ((u16x8) r)[3];
443 etype[offset] = e->type;
444 tags[offset] = *(u64 *) (e + 1);
448 dmacs[offset] = *(u64 *) e;
451 static_always_inline u16
452 eth_input_next_by_type (u16 etype)
454 ethernet_main_t *em = ðernet_main;
456 return (etype < 0x600) ? ETHERNET_INPUT_NEXT_LLC :
457 vec_elt (em->l3_next.input_next_by_type,
458 sparse_vec_index (em->l3_next.input_next_by_type, etype));
468 u64 n_packets, n_bytes;
469 } eth_input_tag_lookup_t;
471 static_always_inline void
472 eth_input_update_if_counters (vlib_main_t * vm, vnet_main_t * vnm,
473 eth_input_tag_lookup_t * l)
475 if (l->n_packets == 0 || l->sw_if_index == ~0)
479 l->n_bytes += l->n_packets * l->len;
481 vlib_increment_combined_counter
482 (vnm->interface_main.combined_sw_if_counters +
483 VNET_INTERFACE_COUNTER_RX, vm->thread_index, l->sw_if_index,
484 l->n_packets, l->n_bytes);
487 static_always_inline void
488 eth_input_tag_lookup (vlib_main_t * vm, vnet_main_t * vnm,
489 vlib_node_runtime_t * node, vnet_hw_interface_t * hi,
490 u64 tag, u16 * next, vlib_buffer_t * b,
491 eth_input_tag_lookup_t * l, u8 dmac_bad, int is_dot1ad,
492 int main_is_l3, int check_dmac)
494 ethernet_main_t *em = ðernet_main;
496 if ((tag ^ l->tag) & l->mask)
498 main_intf_t *mif = vec_elt_at_index (em->main_intfs, hi->hw_if_index);
501 vlan_table_t *vlan_table;
502 qinq_table_t *qinq_table;
503 u16 *t = (u16 *) & tag;
504 u16 vlan1 = clib_net_to_host_u16 (t[0]) & 0xFFF;
505 u16 vlan2 = clib_net_to_host_u16 (t[2]) & 0xFFF;
506 u32 matched, is_l2, new_sw_if_index;
508 vlan_table = vec_elt_at_index (em->vlan_pool, is_dot1ad ?
509 mif->dot1ad_vlans : mif->dot1q_vlans);
510 vif = &vlan_table->vlans[vlan1];
511 qinq_table = vec_elt_at_index (em->qinq_pool, vif->qinqs);
512 qif = &qinq_table->vlans[vlan2];
513 l->err = ETHERNET_ERROR_NONE;
514 l->type = clib_net_to_host_u16 (t[1]);
516 if (l->type == ETHERNET_TYPE_VLAN)
518 l->type = clib_net_to_host_u16 (t[3]);
520 matched = eth_identify_subint (hi, SUBINT_CONFIG_VALID |
521 SUBINT_CONFIG_MATCH_2_TAG, mif, vif,
522 qif, &new_sw_if_index, &l->err,
530 new_sw_if_index = hi->sw_if_index;
531 l->err = ETHERNET_ERROR_NONE;
533 is_l2 = main_is_l3 == 0;
536 matched = eth_identify_subint (hi, SUBINT_CONFIG_VALID |
537 SUBINT_CONFIG_MATCH_1_TAG, mif,
538 vif, qif, &new_sw_if_index,
542 if (l->sw_if_index != new_sw_if_index)
544 eth_input_update_if_counters (vm, vnm, l);
547 l->sw_if_index = new_sw_if_index;
550 l->mask = (l->n_tags == 2) ?
551 clib_net_to_host_u64 (0xffffffffffffffff) :
552 clib_net_to_host_u64 (0xffffffff00000000);
554 if (matched && l->sw_if_index == ~0)
555 l->err = ETHERNET_ERROR_DOWN;
557 l->len = sizeof (ethernet_header_t) +
558 l->n_tags * sizeof (ethernet_vlan_header_t);
560 l->adv = is_l2 ? -(int) sizeof (ethernet_header_t) :
561 l->n_tags * sizeof (ethernet_vlan_header_t);
563 l->adv = is_l2 ? 0 : l->len;
565 if (PREDICT_FALSE (l->err != ETHERNET_ERROR_NONE))
566 l->next = ETHERNET_INPUT_NEXT_DROP;
568 l->next = em->l2_next;
569 else if (l->type == ETHERNET_TYPE_IP4)
570 l->next = em->l3_next.input_next_ip4;
571 else if (l->type == ETHERNET_TYPE_IP6)
572 l->next = em->l3_next.input_next_ip6;
573 else if (l->type == ETHERNET_TYPE_MPLS)
574 l->next = em->l3_next.input_next_mpls;
575 else if (em->redirect_l3)
576 l->next = em->redirect_l3_next;
579 l->next = eth_input_next_by_type (l->type);
580 if (l->next == ETHERNET_INPUT_NEXT_PUNT)
581 l->err = ETHERNET_ERROR_UNKNOWN_TYPE;
585 if (check_dmac && l->adv > 0 && dmac_bad)
587 l->err = ETHERNET_ERROR_L3_MAC_MISMATCH;
588 next[0] = ETHERNET_INPUT_NEXT_PUNT;
593 vlib_buffer_advance (b, l->adv);
594 vnet_buffer (b)->l2.l2_len = l->len;
595 vnet_buffer (b)->l3_hdr_offset = vnet_buffer (b)->l2_hdr_offset + l->len;
597 if (l->err == ETHERNET_ERROR_NONE)
599 vnet_buffer (b)->sw_if_index[VLIB_RX] = l->sw_if_index;
600 ethernet_buffer_set_vlan_count (b, l->n_tags);
603 b->error = node->errors[l->err];
605 /* update counters */
607 l->n_bytes += vlib_buffer_length_in_chain (vm, b);
610 /* process frame of buffers, store ethertype into array and update
611 buffer metadata fields depending on interface being l2 or l3 assuming that
612 packets are untagged. For tagged packets those fields are updated later.
613 Optionally store Destionation MAC address and tag data into arrays
614 for further processing */
616 STATIC_ASSERT (VLIB_FRAME_SIZE % 8 == 0,
617 "VLIB_FRAME_SIZE must be power of 8");
618 static_always_inline void
619 eth_input_process_frame (vlib_main_t * vm, vlib_node_runtime_t * node,
620 vnet_hw_interface_t * hi,
621 u32 * buffer_indices, u32 n_packets, int main_is_l3,
622 int ip4_cksum_ok, int dmac_check)
624 ethernet_main_t *em = ðernet_main;
625 u16 nexts[VLIB_FRAME_SIZE], *next;
626 u16 etypes[VLIB_FRAME_SIZE], *etype = etypes;
627 u64 dmacs[VLIB_FRAME_SIZE], *dmac = dmacs;
628 u8 dmacs_bad[VLIB_FRAME_SIZE];
629 u64 tags[VLIB_FRAME_SIZE], *tag = tags;
630 u16 slowpath_indices[VLIB_FRAME_SIZE];
632 u16 next_ip4, next_ip6, next_mpls, next_l2;
633 u16 et_ip4 = clib_host_to_net_u16 (ETHERNET_TYPE_IP4);
634 u16 et_ip6 = clib_host_to_net_u16 (ETHERNET_TYPE_IP6);
635 u16 et_mpls = clib_host_to_net_u16 (ETHERNET_TYPE_MPLS);
636 u16 et_vlan = clib_host_to_net_u16 (ETHERNET_TYPE_VLAN);
637 u16 et_dot1ad = clib_host_to_net_u16 (ETHERNET_TYPE_DOT1AD);
638 i32 n_left = n_packets;
639 vlib_buffer_t *b[20];
642 from = buffer_indices;
646 vlib_buffer_t **ph = b + 16, **pd = b + 8;
647 vlib_get_buffers (vm, from, b, 4);
648 vlib_get_buffers (vm, from + 8, pd, 4);
649 vlib_get_buffers (vm, from + 16, ph, 4);
651 vlib_prefetch_buffer_header (ph[0], LOAD);
652 vlib_prefetch_buffer_data (pd[0], LOAD);
653 eth_input_get_etype_and_tags (b, etype, tag, dmac, 0, dmac_check);
655 vlib_prefetch_buffer_header (ph[1], LOAD);
656 vlib_prefetch_buffer_data (pd[1], LOAD);
657 eth_input_get_etype_and_tags (b, etype, tag, dmac, 1, dmac_check);
659 vlib_prefetch_buffer_header (ph[2], LOAD);
660 vlib_prefetch_buffer_data (pd[2], LOAD);
661 eth_input_get_etype_and_tags (b, etype, tag, dmac, 2, dmac_check);
663 vlib_prefetch_buffer_header (ph[3], LOAD);
664 vlib_prefetch_buffer_data (pd[3], LOAD);
665 eth_input_get_etype_and_tags (b, etype, tag, dmac, 3, dmac_check);
667 eth_input_adv_and_flags_x4 (b, main_is_l3);
678 vlib_get_buffers (vm, from, b, 4);
679 eth_input_get_etype_and_tags (b, etype, tag, dmac, 0, dmac_check);
680 eth_input_get_etype_and_tags (b, etype, tag, dmac, 1, dmac_check);
681 eth_input_get_etype_and_tags (b, etype, tag, dmac, 2, dmac_check);
682 eth_input_get_etype_and_tags (b, etype, tag, dmac, 3, dmac_check);
683 eth_input_adv_and_flags_x4 (b, main_is_l3);
694 vlib_get_buffers (vm, from, b, 1);
695 eth_input_get_etype_and_tags (b, etype, tag, dmac, 0, dmac_check);
696 eth_input_adv_and_flags_x1 (b, main_is_l3);
708 u64 mask = clib_net_to_host_u64 (0xFFFFFFFFFFFF0000);
709 u64 igbit = clib_net_to_host_u64 (0x0100000000000000);
710 u64 hwaddr = (*(u64 *) hi->hw_address) & mask;
712 u8 *dmac_bad = dmacs_bad;
716 #ifdef CLIB_HAVE_VEC256
717 u64x4 igbit4 = u64x4_splat (igbit);
718 u64x4 mask4 = u64x4_splat (mask);
719 u64x4 hwaddr4 = u64x4_splat (hwaddr);
723 r0 = u64x4_load_unaligned (dmac + 0) & mask4;
724 r1 = u64x4_load_unaligned (dmac + 4) & mask4;
726 r0 = (r0 != hwaddr4) & ((r0 & igbit4) == 0);
727 r1 = (r1 != hwaddr4) & ((r1 & igbit4) == 0);
729 *(u32 *) (dmac_bad + 0) = u8x32_msb_mask ((u8x32) (r0));
730 *(u32 *) (dmac_bad + 4) = u8x32_msb_mask ((u8x32) (r1));
746 r0 = (r0 != hwaddr) && ((r0 & igbit) == 0);
747 r1 = (r1 != hwaddr) && ((r1 & igbit) == 0);
748 r2 = (r2 != hwaddr) && ((r2 & igbit) == 0);
749 r3 = (r3 != hwaddr) && ((r3 & igbit) == 0);
764 next_ip4 = em->l3_next.input_next_ip4;
765 next_ip6 = em->l3_next.input_next_ip6;
766 next_mpls = em->l3_next.input_next_mpls;
767 next_l2 = em->l2_next;
769 if (next_ip4 == ETHERNET_INPUT_NEXT_IP4_INPUT && ip4_cksum_ok)
770 next_ip4 = ETHERNET_INPUT_NEXT_IP4_INPUT_NCS;
772 #ifdef CLIB_HAVE_VEC256
773 u16x16 et16_ip4 = u16x16_splat (et_ip4);
774 u16x16 et16_ip6 = u16x16_splat (et_ip6);
775 u16x16 et16_mpls = u16x16_splat (et_mpls);
776 u16x16 et16_vlan = u16x16_splat (et_vlan);
777 u16x16 et16_dot1ad = u16x16_splat (et_dot1ad);
778 u16x16 next16_ip4 = u16x16_splat (next_ip4);
779 u16x16 next16_ip6 = u16x16_splat (next_ip6);
780 u16x16 next16_mpls = u16x16_splat (next_mpls);
781 u16x16 next16_l2 = u16x16_splat (next_l2);
783 u16x16 stairs = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 };
792 /* fastpath - in l3 mode hadles ip4, ip6 and mpls packets, other packets
793 are considered as slowpath, in l2 mode all untagged packets are
794 considered as fastpath */
797 #ifdef CLIB_HAVE_VEC256
801 u16x16 e16 = u16x16_load_unaligned (etype);
804 r += (e16 == et16_ip4) & next16_ip4;
805 r += (e16 == et16_ip6) & next16_ip6;
806 r += (e16 == et16_mpls) & next16_mpls;
809 r = ((e16 != et16_vlan) & (e16 != et16_dot1ad)) & next16_l2;
810 u16x16_store_unaligned (r, next);
812 if (!u16x16_is_all_zero (r == zero))
814 if (u16x16_is_all_zero (r))
816 u16x16_store_unaligned (u16x16_splat (i) + stairs,
817 slowpath_indices + n_slowpath);
822 for (int j = 0; j < 16; j++)
824 slowpath_indices[n_slowpath++] = i + j;
835 if (main_is_l3 && etype[0] == et_ip4)
837 else if (main_is_l3 && etype[0] == et_ip6)
839 else if (main_is_l3 && etype[0] == et_mpls)
841 else if (main_is_l3 == 0 &&
842 etype[0] != et_vlan && etype[0] != et_dot1ad)
847 slowpath_indices[n_slowpath++] = i;
858 vnet_main_t *vnm = vnet_get_main ();
860 u16 *si = slowpath_indices;
861 u32 last_unknown_etype = ~0;
862 u32 last_unknown_next = ~0;
863 eth_input_tag_lookup_t dot1ad_lookup, dot1q_lookup = {
865 .tag = tags[si[0]] ^ -1LL,
869 clib_memcpy_fast (&dot1ad_lookup, &dot1q_lookup, sizeof (dot1q_lookup));
874 u16 etype = etypes[i];
876 if (etype == et_vlan)
878 vlib_buffer_t *b = vlib_get_buffer (vm, buffer_indices[i]);
879 eth_input_tag_lookup (vm, vnm, node, hi, tags[i], nexts + i, b,
880 &dot1q_lookup, dmacs_bad[i], 0,
881 main_is_l3, dmac_check);
884 else if (etype == et_dot1ad)
886 vlib_buffer_t *b = vlib_get_buffer (vm, buffer_indices[i]);
887 eth_input_tag_lookup (vm, vnm, node, hi, tags[i], nexts + i, b,
888 &dot1ad_lookup, dmacs_bad[i], 1,
889 main_is_l3, dmac_check);
893 /* untagged packet with not well known etyertype */
894 if (last_unknown_etype != etype)
896 last_unknown_etype = etype;
897 etype = clib_host_to_net_u16 (etype);
898 last_unknown_next = eth_input_next_by_type (etype);
900 if (dmac_check && main_is_l3 && dmacs_bad[i])
902 vlib_buffer_t *b = vlib_get_buffer (vm, buffer_indices[i]);
903 b->error = node->errors[ETHERNET_ERROR_L3_MAC_MISMATCH];
904 nexts[i] = ETHERNET_INPUT_NEXT_PUNT;
907 nexts[i] = last_unknown_next;
915 eth_input_update_if_counters (vm, vnm, &dot1q_lookup);
916 eth_input_update_if_counters (vm, vnm, &dot1ad_lookup);
919 vlib_buffer_enqueue_to_next (vm, node, buffer_indices, nexts, n_packets);
922 static_always_inline void
923 eth_input_single_int (vlib_main_t * vm, vlib_node_runtime_t * node,
924 vnet_hw_interface_t * hi, u32 * from, u32 n_pkts,
927 ethernet_main_t *em = ðernet_main;
928 ethernet_interface_t *ei;
929 ei = pool_elt_at_index (em->interfaces, hi->hw_instance);
930 main_intf_t *intf0 = vec_elt_at_index (em->main_intfs, hi->hw_if_index);
931 subint_config_t *subint0 = &intf0->untagged_subint;
933 int main_is_l3 = (subint0->flags & SUBINT_CONFIG_L2) == 0;
934 int promisc = (ei->flags & ETHERNET_INTERFACE_FLAG_ACCEPT_ALL) != 0;
938 /* main interface is L3, we dont expect tagged packets and interface
939 is not in promisc node, so we dont't need to check DMAC */
943 eth_input_process_frame (vm, node, hi, from, n_pkts, is_l3,
946 /* subinterfaces and promisc mode so DMAC check is needed */
947 eth_input_process_frame (vm, node, hi, from, n_pkts, is_l3,
953 /* untagged packets are treated as L2 */
955 eth_input_process_frame (vm, node, hi, from, n_pkts, is_l3,
961 static_always_inline void
962 ethernet_input_trace (vlib_main_t * vm, vlib_node_runtime_t * node,
963 vlib_frame_t * from_frame)
966 if ((node->flags & VLIB_NODE_FLAG_TRACE) == 0)
969 from = vlib_frame_vector_args (from_frame);
970 n_left = from_frame->n_vectors;
974 ethernet_input_trace_t *t0;
975 vlib_buffer_t *b0 = vlib_get_buffer (vm, from[0]);
977 if (b0->flags & VLIB_BUFFER_IS_TRACED)
979 t0 = vlib_add_trace (vm, node, b0, sizeof (ethernet_input_trace_t));
980 clib_memcpy_fast (t0->packet_data, b0->data + b0->current_data,
981 sizeof (t0->packet_data));
982 t0->frame_flags = from_frame->flags;
983 clib_memcpy_fast (&t0->frame_data,
984 vlib_frame_scalar_args (from_frame),
985 sizeof (ethernet_input_frame_t));
992 static_always_inline void
993 ethernet_input_inline (vlib_main_t * vm,
994 vlib_node_runtime_t * node,
995 u32 * from, u32 n_packets,
996 ethernet_input_variant_t variant)
998 vnet_main_t *vnm = vnet_get_main ();
999 ethernet_main_t *em = ðernet_main;
1000 vlib_node_runtime_t *error_node;
1001 u32 n_left_from, next_index, *to_next;
1002 u32 stats_sw_if_index, stats_n_packets, stats_n_bytes;
1003 u32 thread_index = vm->thread_index;
1004 u32 cached_sw_if_index = ~0;
1005 u32 cached_is_l2 = 0; /* shut up gcc */
1006 vnet_hw_interface_t *hi = NULL; /* used for main interface only */
1008 if (variant != ETHERNET_INPUT_VARIANT_ETHERNET)
1009 error_node = vlib_node_get_runtime (vm, ethernet_input_node.index);
1013 n_left_from = n_packets;
1015 next_index = node->cached_next_index;
1016 stats_sw_if_index = node->runtime_data[0];
1017 stats_n_packets = stats_n_bytes = 0;
1019 while (n_left_from > 0)
1023 vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
1025 while (n_left_from >= 4 && n_left_to_next >= 2)
1028 vlib_buffer_t *b0, *b1;
1029 u8 next0, next1, error0, error1;
1030 u16 type0, orig_type0, type1, orig_type1;
1031 u16 outer_id0, inner_id0, outer_id1, inner_id1;
1032 u32 match_flags0, match_flags1;
1033 u32 old_sw_if_index0, new_sw_if_index0, len0, old_sw_if_index1,
1034 new_sw_if_index1, len1;
1035 vnet_hw_interface_t *hi0, *hi1;
1036 main_intf_t *main_intf0, *main_intf1;
1037 vlan_intf_t *vlan_intf0, *vlan_intf1;
1038 qinq_intf_t *qinq_intf0, *qinq_intf1;
1040 ethernet_header_t *e0, *e1;
1042 /* Prefetch next iteration. */
1044 vlib_buffer_t *b2, *b3;
1046 b2 = vlib_get_buffer (vm, from[2]);
1047 b3 = vlib_get_buffer (vm, from[3]);
1049 vlib_prefetch_buffer_header (b2, STORE);
1050 vlib_prefetch_buffer_header (b3, STORE);
1052 CLIB_PREFETCH (b2->data, sizeof (ethernet_header_t), LOAD);
1053 CLIB_PREFETCH (b3->data, sizeof (ethernet_header_t), LOAD);
1062 n_left_to_next -= 2;
1065 b0 = vlib_get_buffer (vm, bi0);
1066 b1 = vlib_get_buffer (vm, bi1);
1068 error0 = error1 = ETHERNET_ERROR_NONE;
1069 e0 = vlib_buffer_get_current (b0);
1070 type0 = clib_net_to_host_u16 (e0->type);
1071 e1 = vlib_buffer_get_current (b1);
1072 type1 = clib_net_to_host_u16 (e1->type);
1074 /* Set the L2 header offset for all packets */
1075 vnet_buffer (b0)->l2_hdr_offset = b0->current_data;
1076 vnet_buffer (b1)->l2_hdr_offset = b1->current_data;
1077 b0->flags |= VNET_BUFFER_F_L2_HDR_OFFSET_VALID;
1078 b1->flags |= VNET_BUFFER_F_L2_HDR_OFFSET_VALID;
1080 /* Speed-path for the untagged case */
1081 if (PREDICT_TRUE (variant == ETHERNET_INPUT_VARIANT_ETHERNET
1082 && !ethernet_frame_is_any_tagged_x2 (type0,
1086 subint_config_t *subint0;
1087 u32 sw_if_index0, sw_if_index1;
1089 sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
1090 sw_if_index1 = vnet_buffer (b1)->sw_if_index[VLIB_RX];
1091 is_l20 = cached_is_l2;
1093 /* This is probably wholly unnecessary */
1094 if (PREDICT_FALSE (sw_if_index0 != sw_if_index1))
1097 /* Now sw_if_index0 == sw_if_index1 */
1098 if (PREDICT_FALSE (cached_sw_if_index != sw_if_index0))
1100 cached_sw_if_index = sw_if_index0;
1101 hi = vnet_get_sup_hw_interface (vnm, sw_if_index0);
1102 intf0 = vec_elt_at_index (em->main_intfs, hi->hw_if_index);
1103 subint0 = &intf0->untagged_subint;
1104 cached_is_l2 = is_l20 = subint0->flags & SUBINT_CONFIG_L2;
1107 if (PREDICT_TRUE (is_l20 != 0))
1109 vnet_buffer (b0)->l3_hdr_offset =
1110 vnet_buffer (b0)->l2_hdr_offset +
1111 sizeof (ethernet_header_t);
1112 vnet_buffer (b1)->l3_hdr_offset =
1113 vnet_buffer (b1)->l2_hdr_offset +
1114 sizeof (ethernet_header_t);
1115 b0->flags |= VNET_BUFFER_F_L3_HDR_OFFSET_VALID;
1116 b1->flags |= VNET_BUFFER_F_L3_HDR_OFFSET_VALID;
1117 next0 = em->l2_next;
1118 vnet_buffer (b0)->l2.l2_len = sizeof (ethernet_header_t);
1119 next1 = em->l2_next;
1120 vnet_buffer (b1)->l2.l2_len = sizeof (ethernet_header_t);
1124 if (!ethernet_address_cast (e0->dst_address) &&
1125 (hi->hw_address != 0) &&
1126 !eth_mac_equal ((u8 *) e0, hi->hw_address))
1127 error0 = ETHERNET_ERROR_L3_MAC_MISMATCH;
1128 if (!ethernet_address_cast (e1->dst_address) &&
1129 (hi->hw_address != 0) &&
1130 !eth_mac_equal ((u8 *) e1, hi->hw_address))
1131 error1 = ETHERNET_ERROR_L3_MAC_MISMATCH;
1132 vlib_buffer_advance (b0, sizeof (ethernet_header_t));
1133 determine_next_node (em, variant, 0, type0, b0,
1135 vlib_buffer_advance (b1, sizeof (ethernet_header_t));
1136 determine_next_node (em, variant, 0, type1, b1,
1142 /* Slow-path for the tagged case */
1144 parse_header (variant,
1147 &orig_type0, &outer_id0, &inner_id0, &match_flags0);
1149 parse_header (variant,
1152 &orig_type1, &outer_id1, &inner_id1, &match_flags1);
1154 old_sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
1155 old_sw_if_index1 = vnet_buffer (b1)->sw_if_index[VLIB_RX];
1157 eth_vlan_table_lookups (em,
1164 &main_intf0, &vlan_intf0, &qinq_intf0);
1166 eth_vlan_table_lookups (em,
1173 &main_intf1, &vlan_intf1, &qinq_intf1);
1175 identify_subint (hi0,
1180 qinq_intf0, &new_sw_if_index0, &error0, &is_l20);
1182 identify_subint (hi1,
1187 qinq_intf1, &new_sw_if_index1, &error1, &is_l21);
1189 // Save RX sw_if_index for later nodes
1190 vnet_buffer (b0)->sw_if_index[VLIB_RX] =
1192 ETHERNET_ERROR_NONE ? old_sw_if_index0 : new_sw_if_index0;
1193 vnet_buffer (b1)->sw_if_index[VLIB_RX] =
1195 ETHERNET_ERROR_NONE ? old_sw_if_index1 : new_sw_if_index1;
1197 // Check if there is a stat to take (valid and non-main sw_if_index for pkt 0 or pkt 1)
1198 if (((new_sw_if_index0 != ~0)
1199 && (new_sw_if_index0 != old_sw_if_index0))
1200 || ((new_sw_if_index1 != ~0)
1201 && (new_sw_if_index1 != old_sw_if_index1)))
1204 len0 = vlib_buffer_length_in_chain (vm, b0) + b0->current_data
1205 - vnet_buffer (b0)->l2_hdr_offset;
1206 len1 = vlib_buffer_length_in_chain (vm, b1) + b1->current_data
1207 - vnet_buffer (b1)->l2_hdr_offset;
1209 stats_n_packets += 2;
1210 stats_n_bytes += len0 + len1;
1213 (!(new_sw_if_index0 == stats_sw_if_index
1214 && new_sw_if_index1 == stats_sw_if_index)))
1216 stats_n_packets -= 2;
1217 stats_n_bytes -= len0 + len1;
1219 if (new_sw_if_index0 != old_sw_if_index0
1220 && new_sw_if_index0 != ~0)
1221 vlib_increment_combined_counter (vnm->
1222 interface_main.combined_sw_if_counters
1224 VNET_INTERFACE_COUNTER_RX,
1226 new_sw_if_index0, 1,
1228 if (new_sw_if_index1 != old_sw_if_index1
1229 && new_sw_if_index1 != ~0)
1230 vlib_increment_combined_counter (vnm->
1231 interface_main.combined_sw_if_counters
1233 VNET_INTERFACE_COUNTER_RX,
1235 new_sw_if_index1, 1,
1238 if (new_sw_if_index0 == new_sw_if_index1)
1240 if (stats_n_packets > 0)
1242 vlib_increment_combined_counter
1243 (vnm->interface_main.combined_sw_if_counters
1244 + VNET_INTERFACE_COUNTER_RX,
1247 stats_n_packets, stats_n_bytes);
1248 stats_n_packets = stats_n_bytes = 0;
1250 stats_sw_if_index = new_sw_if_index0;
1255 if (variant == ETHERNET_INPUT_VARIANT_NOT_L2)
1256 is_l20 = is_l21 = 0;
1258 determine_next_node (em, variant, is_l20, type0, b0, &error0,
1260 determine_next_node (em, variant, is_l21, type1, b1, &error1,
1264 b0->error = error_node->errors[error0];
1265 b1->error = error_node->errors[error1];
1267 // verify speculative enqueue
1268 vlib_validate_buffer_enqueue_x2 (vm, node, next_index, to_next,
1269 n_left_to_next, bi0, bi1, next0,
1273 while (n_left_from > 0 && n_left_to_next > 0)
1278 u16 type0, orig_type0;
1279 u16 outer_id0, inner_id0;
1281 u32 old_sw_if_index0, new_sw_if_index0, len0;
1282 vnet_hw_interface_t *hi0;
1283 main_intf_t *main_intf0;
1284 vlan_intf_t *vlan_intf0;
1285 qinq_intf_t *qinq_intf0;
1286 ethernet_header_t *e0;
1289 // Prefetch next iteration
1290 if (n_left_from > 1)
1294 p2 = vlib_get_buffer (vm, from[1]);
1295 vlib_prefetch_buffer_header (p2, STORE);
1296 CLIB_PREFETCH (p2->data, CLIB_CACHE_LINE_BYTES, LOAD);
1304 n_left_to_next -= 1;
1306 b0 = vlib_get_buffer (vm, bi0);
1308 error0 = ETHERNET_ERROR_NONE;
1309 e0 = vlib_buffer_get_current (b0);
1310 type0 = clib_net_to_host_u16 (e0->type);
1312 /* Set the L2 header offset for all packets */
1313 vnet_buffer (b0)->l2_hdr_offset = b0->current_data;
1314 b0->flags |= VNET_BUFFER_F_L2_HDR_OFFSET_VALID;
1316 /* Speed-path for the untagged case */
1317 if (PREDICT_TRUE (variant == ETHERNET_INPUT_VARIANT_ETHERNET
1318 && !ethernet_frame_is_tagged (type0)))
1321 subint_config_t *subint0;
1324 sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
1325 is_l20 = cached_is_l2;
1327 if (PREDICT_FALSE (cached_sw_if_index != sw_if_index0))
1329 cached_sw_if_index = sw_if_index0;
1330 hi = vnet_get_sup_hw_interface (vnm, sw_if_index0);
1331 intf0 = vec_elt_at_index (em->main_intfs, hi->hw_if_index);
1332 subint0 = &intf0->untagged_subint;
1333 cached_is_l2 = is_l20 = subint0->flags & SUBINT_CONFIG_L2;
1337 if (PREDICT_TRUE (is_l20 != 0))
1339 vnet_buffer (b0)->l3_hdr_offset =
1340 vnet_buffer (b0)->l2_hdr_offset +
1341 sizeof (ethernet_header_t);
1342 b0->flags |= VNET_BUFFER_F_L3_HDR_OFFSET_VALID;
1343 next0 = em->l2_next;
1344 vnet_buffer (b0)->l2.l2_len = sizeof (ethernet_header_t);
1348 if (!ethernet_address_cast (e0->dst_address) &&
1349 (hi->hw_address != 0) &&
1350 !eth_mac_equal ((u8 *) e0, hi->hw_address))
1351 error0 = ETHERNET_ERROR_L3_MAC_MISMATCH;
1352 vlib_buffer_advance (b0, sizeof (ethernet_header_t));
1353 determine_next_node (em, variant, 0, type0, b0,
1359 /* Slow-path for the tagged case */
1360 parse_header (variant,
1363 &orig_type0, &outer_id0, &inner_id0, &match_flags0);
1365 old_sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
1367 eth_vlan_table_lookups (em,
1374 &main_intf0, &vlan_intf0, &qinq_intf0);
1376 identify_subint (hi0,
1381 qinq_intf0, &new_sw_if_index0, &error0, &is_l20);
1383 // Save RX sw_if_index for later nodes
1384 vnet_buffer (b0)->sw_if_index[VLIB_RX] =
1386 ETHERNET_ERROR_NONE ? old_sw_if_index0 : new_sw_if_index0;
1388 // Increment subinterface stats
1389 // Note that interface-level counters have already been incremented
1390 // prior to calling this function. Thus only subinterface counters
1391 // are incremented here.
1393 // Interface level counters include packets received on the main
1394 // interface and all subinterfaces. Subinterface level counters
1395 // include only those packets received on that subinterface
1396 // Increment stats if the subint is valid and it is not the main intf
1397 if ((new_sw_if_index0 != ~0)
1398 && (new_sw_if_index0 != old_sw_if_index0))
1401 len0 = vlib_buffer_length_in_chain (vm, b0) + b0->current_data
1402 - vnet_buffer (b0)->l2_hdr_offset;
1404 stats_n_packets += 1;
1405 stats_n_bytes += len0;
1407 // Batch stat increments from the same subinterface so counters
1408 // don't need to be incremented for every packet.
1409 if (PREDICT_FALSE (new_sw_if_index0 != stats_sw_if_index))
1411 stats_n_packets -= 1;
1412 stats_n_bytes -= len0;
1414 if (new_sw_if_index0 != ~0)
1415 vlib_increment_combined_counter
1416 (vnm->interface_main.combined_sw_if_counters
1417 + VNET_INTERFACE_COUNTER_RX,
1418 thread_index, new_sw_if_index0, 1, len0);
1419 if (stats_n_packets > 0)
1421 vlib_increment_combined_counter
1422 (vnm->interface_main.combined_sw_if_counters
1423 + VNET_INTERFACE_COUNTER_RX,
1425 stats_sw_if_index, stats_n_packets, stats_n_bytes);
1426 stats_n_packets = stats_n_bytes = 0;
1428 stats_sw_if_index = new_sw_if_index0;
1432 if (variant == ETHERNET_INPUT_VARIANT_NOT_L2)
1435 determine_next_node (em, variant, is_l20, type0, b0, &error0,
1439 b0->error = error_node->errors[error0];
1441 // verify speculative enqueue
1442 vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
1443 to_next, n_left_to_next,
1447 vlib_put_next_frame (vm, node, next_index, n_left_to_next);
1450 // Increment any remaining batched stats
1451 if (stats_n_packets > 0)
1453 vlib_increment_combined_counter
1454 (vnm->interface_main.combined_sw_if_counters
1455 + VNET_INTERFACE_COUNTER_RX,
1456 thread_index, stats_sw_if_index, stats_n_packets, stats_n_bytes);
1457 node->runtime_data[0] = stats_sw_if_index;
1461 VLIB_NODE_FN (ethernet_input_node) (vlib_main_t * vm,
1462 vlib_node_runtime_t * node,
1463 vlib_frame_t * frame)
1465 vnet_main_t *vnm = vnet_get_main ();
1466 u32 *from = vlib_frame_vector_args (frame);
1467 u32 n_packets = frame->n_vectors;
1469 ethernet_input_trace (vm, node, frame);
1471 if (frame->flags & ETH_INPUT_FRAME_F_SINGLE_SW_IF_IDX)
1473 ethernet_input_frame_t *ef = vlib_frame_scalar_args (frame);
1474 int ip4_cksum_ok = (frame->flags & ETH_INPUT_FRAME_F_IP4_CKSUM_OK) != 0;
1475 vnet_hw_interface_t *hi = vnet_get_hw_interface (vnm, ef->hw_if_index);
1476 eth_input_single_int (vm, node, hi, from, n_packets, ip4_cksum_ok);
1479 ethernet_input_inline (vm, node, from, n_packets,
1480 ETHERNET_INPUT_VARIANT_ETHERNET);
1484 VLIB_NODE_FN (ethernet_input_type_node) (vlib_main_t * vm,
1485 vlib_node_runtime_t * node,
1486 vlib_frame_t * from_frame)
1488 u32 *from = vlib_frame_vector_args (from_frame);
1489 u32 n_packets = from_frame->n_vectors;
1490 ethernet_input_trace (vm, node, from_frame);
1491 ethernet_input_inline (vm, node, from, n_packets,
1492 ETHERNET_INPUT_VARIANT_ETHERNET_TYPE);
1496 VLIB_NODE_FN (ethernet_input_not_l2_node) (vlib_main_t * vm,
1497 vlib_node_runtime_t * node,
1498 vlib_frame_t * from_frame)
1500 u32 *from = vlib_frame_vector_args (from_frame);
1501 u32 n_packets = from_frame->n_vectors;
1502 ethernet_input_trace (vm, node, from_frame);
1503 ethernet_input_inline (vm, node, from, n_packets,
1504 ETHERNET_INPUT_VARIANT_NOT_L2);
1509 // Return the subinterface config struct for the given sw_if_index
1510 // Also return via parameter the appropriate match flags for the
1511 // configured number of tags.
1512 // On error (unsupported or not ethernet) return 0.
1513 static subint_config_t *
1514 ethernet_sw_interface_get_config (vnet_main_t * vnm,
1516 u32 * flags, u32 * unsupported)
1518 ethernet_main_t *em = ðernet_main;
1519 vnet_hw_interface_t *hi;
1520 vnet_sw_interface_t *si;
1521 main_intf_t *main_intf;
1522 vlan_table_t *vlan_table;
1523 qinq_table_t *qinq_table;
1524 subint_config_t *subint = 0;
1526 hi = vnet_get_sup_hw_interface (vnm, sw_if_index);
1528 if (!hi || (hi->hw_class_index != ethernet_hw_interface_class.index))
1531 goto done; // non-ethernet interface
1534 // ensure there's an entry for the main intf (shouldn't really be necessary)
1535 vec_validate (em->main_intfs, hi->hw_if_index);
1536 main_intf = vec_elt_at_index (em->main_intfs, hi->hw_if_index);
1538 // Locate the subint for the given ethernet config
1539 si = vnet_get_sw_interface (vnm, sw_if_index);
1541 if (si->type == VNET_SW_INTERFACE_TYPE_P2P)
1543 p2p_ethernet_main_t *p2pm = &p2p_main;
1544 u32 p2pe_sw_if_index =
1545 p2p_ethernet_lookup (hi->hw_if_index, si->p2p.client_mac);
1546 if (p2pe_sw_if_index == ~0)
1548 pool_get (p2pm->p2p_subif_pool, subint);
1549 si->p2p.pool_index = subint - p2pm->p2p_subif_pool;
1552 subint = vec_elt_at_index (p2pm->p2p_subif_pool, si->p2p.pool_index);
1553 *flags = SUBINT_CONFIG_P2P;
1555 else if (si->type == VNET_SW_INTERFACE_TYPE_PIPE)
1559 pipe = pipe_get (sw_if_index);
1560 subint = &pipe->subint;
1561 *flags = SUBINT_CONFIG_P2P;
1563 else if (si->sub.eth.flags.default_sub)
1565 subint = &main_intf->default_subint;
1566 *flags = SUBINT_CONFIG_MATCH_1_TAG |
1567 SUBINT_CONFIG_MATCH_2_TAG | SUBINT_CONFIG_MATCH_3_TAG;
1569 else if ((si->sub.eth.flags.no_tags) || (si->sub.eth.raw_flags == 0))
1571 // if no flags are set then this is a main interface
1572 // so treat as untagged
1573 subint = &main_intf->untagged_subint;
1574 *flags = SUBINT_CONFIG_MATCH_0_TAG;
1579 // first get the vlan table
1580 if (si->sub.eth.flags.dot1ad)
1582 if (main_intf->dot1ad_vlans == 0)
1584 // Allocate a vlan table from the pool
1585 pool_get (em->vlan_pool, vlan_table);
1586 main_intf->dot1ad_vlans = vlan_table - em->vlan_pool;
1590 // Get ptr to existing vlan table
1592 vec_elt_at_index (em->vlan_pool, main_intf->dot1ad_vlans);
1597 if (main_intf->dot1q_vlans == 0)
1599 // Allocate a vlan table from the pool
1600 pool_get (em->vlan_pool, vlan_table);
1601 main_intf->dot1q_vlans = vlan_table - em->vlan_pool;
1605 // Get ptr to existing vlan table
1607 vec_elt_at_index (em->vlan_pool, main_intf->dot1q_vlans);
1611 if (si->sub.eth.flags.one_tag)
1613 *flags = si->sub.eth.flags.exact_match ?
1614 SUBINT_CONFIG_MATCH_1_TAG :
1615 (SUBINT_CONFIG_MATCH_1_TAG |
1616 SUBINT_CONFIG_MATCH_2_TAG | SUBINT_CONFIG_MATCH_3_TAG);
1618 if (si->sub.eth.flags.outer_vlan_id_any)
1620 // not implemented yet
1626 // a single vlan, a common case
1628 &vlan_table->vlans[si->sub.eth.
1629 outer_vlan_id].single_tag_subint;
1636 *flags = si->sub.eth.flags.exact_match ?
1637 SUBINT_CONFIG_MATCH_2_TAG :
1638 (SUBINT_CONFIG_MATCH_2_TAG | SUBINT_CONFIG_MATCH_3_TAG);
1640 if (si->sub.eth.flags.outer_vlan_id_any
1641 && si->sub.eth.flags.inner_vlan_id_any)
1643 // not implemented yet
1648 if (si->sub.eth.flags.inner_vlan_id_any)
1650 // a specific outer and "any" inner
1651 // don't need a qinq table for this
1653 &vlan_table->vlans[si->sub.eth.
1654 outer_vlan_id].inner_any_subint;
1655 if (si->sub.eth.flags.exact_match)
1657 *flags = SUBINT_CONFIG_MATCH_2_TAG;
1661 *flags = SUBINT_CONFIG_MATCH_2_TAG |
1662 SUBINT_CONFIG_MATCH_3_TAG;
1667 // a specific outer + specifc innner vlan id, a common case
1669 // get the qinq table
1670 if (vlan_table->vlans[si->sub.eth.outer_vlan_id].qinqs == 0)
1672 // Allocate a qinq table from the pool
1673 pool_get (em->qinq_pool, qinq_table);
1674 vlan_table->vlans[si->sub.eth.outer_vlan_id].qinqs =
1675 qinq_table - em->qinq_pool;
1679 // Get ptr to existing qinq table
1681 vec_elt_at_index (em->qinq_pool,
1682 vlan_table->vlans[si->sub.
1686 subint = &qinq_table->vlans[si->sub.eth.inner_vlan_id].subint;
1695 static clib_error_t *
1696 ethernet_sw_interface_up_down (vnet_main_t * vnm, u32 sw_if_index, u32 flags)
1698 subint_config_t *subint;
1701 clib_error_t *error = 0;
1703 // Find the config for this subinterface
1705 ethernet_sw_interface_get_config (vnm, sw_if_index, &dummy_flags,
1710 // not implemented yet or not ethernet
1714 subint->sw_if_index =
1715 ((flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP) ? sw_if_index : ~0);
1721 VNET_SW_INTERFACE_ADMIN_UP_DOWN_FUNCTION (ethernet_sw_interface_up_down);
1724 #ifndef CLIB_MARCH_VARIANT
1725 // Set the L2/L3 mode for the subinterface
1727 ethernet_sw_interface_set_l2_mode (vnet_main_t * vnm, u32 sw_if_index, u32 l2)
1729 subint_config_t *subint;
1733 vnet_sw_interface_t *sw = vnet_get_sw_interface (vnm, sw_if_index);
1735 is_port = !(sw->type == VNET_SW_INTERFACE_TYPE_SUB);
1737 // Find the config for this subinterface
1739 ethernet_sw_interface_get_config (vnm, sw_if_index, &dummy_flags,
1744 // unimplemented or not ethernet
1748 // Double check that the config we found is for our interface (or the interface is down)
1749 ASSERT ((subint->sw_if_index == sw_if_index) | (subint->sw_if_index == ~0));
1753 subint->flags |= SUBINT_CONFIG_L2;
1756 SUBINT_CONFIG_MATCH_0_TAG | SUBINT_CONFIG_MATCH_1_TAG
1757 | SUBINT_CONFIG_MATCH_2_TAG | SUBINT_CONFIG_MATCH_3_TAG;
1761 subint->flags &= ~SUBINT_CONFIG_L2;
1764 ~(SUBINT_CONFIG_MATCH_1_TAG | SUBINT_CONFIG_MATCH_2_TAG
1765 | SUBINT_CONFIG_MATCH_3_TAG);
1773 * Set the L2/L3 mode for the subinterface regardless of port
1776 ethernet_sw_interface_set_l2_mode_noport (vnet_main_t * vnm,
1777 u32 sw_if_index, u32 l2)
1779 subint_config_t *subint;
1783 /* Find the config for this subinterface */
1785 ethernet_sw_interface_get_config (vnm, sw_if_index, &dummy_flags,
1790 /* unimplemented or not ethernet */
1795 * Double check that the config we found is for our interface (or the
1796 * interface is down)
1798 ASSERT ((subint->sw_if_index == sw_if_index) | (subint->sw_if_index == ~0));
1802 subint->flags |= SUBINT_CONFIG_L2;
1806 subint->flags &= ~SUBINT_CONFIG_L2;
1814 static clib_error_t *
1815 ethernet_sw_interface_add_del (vnet_main_t * vnm,
1816 u32 sw_if_index, u32 is_create)
1818 clib_error_t *error = 0;
1819 subint_config_t *subint;
1821 u32 unsupported = 0;
1823 // Find the config for this subinterface
1825 ethernet_sw_interface_get_config (vnm, sw_if_index, &match_flags,
1830 // not implemented yet or not ethernet
1833 // this is the NYI case
1834 error = clib_error_return (0, "not implemented yet");
1845 // Initialize the subint
1846 if (subint->flags & SUBINT_CONFIG_VALID)
1848 // Error vlan already in use
1849 error = clib_error_return (0, "vlan is already in use");
1853 // Note that config is L3 by default
1854 subint->flags = SUBINT_CONFIG_VALID | match_flags;
1855 subint->sw_if_index = ~0; // because interfaces are initially down
1862 VNET_SW_INTERFACE_ADD_DEL_FUNCTION (ethernet_sw_interface_add_del);
1864 static char *ethernet_error_strings[] = {
1865 #define ethernet_error(n,c,s) s,
1866 #include "error.def"
1867 #undef ethernet_error
1871 VLIB_REGISTER_NODE (ethernet_input_node) = {
1872 .name = "ethernet-input",
1873 /* Takes a vector of packets. */
1874 .vector_size = sizeof (u32),
1875 .scalar_size = sizeof (ethernet_input_frame_t),
1876 .n_errors = ETHERNET_N_ERROR,
1877 .error_strings = ethernet_error_strings,
1878 .n_next_nodes = ETHERNET_INPUT_N_NEXT,
1880 #define _(s,n) [ETHERNET_INPUT_NEXT_##s] = n,
1881 foreach_ethernet_input_next
1884 .format_buffer = format_ethernet_header_with_length,
1885 .format_trace = format_ethernet_input_trace,
1886 .unformat_buffer = unformat_ethernet_header,
1889 VLIB_REGISTER_NODE (ethernet_input_type_node) = {
1890 .name = "ethernet-input-type",
1891 /* Takes a vector of packets. */
1892 .vector_size = sizeof (u32),
1893 .n_next_nodes = ETHERNET_INPUT_N_NEXT,
1895 #define _(s,n) [ETHERNET_INPUT_NEXT_##s] = n,
1896 foreach_ethernet_input_next
1901 VLIB_REGISTER_NODE (ethernet_input_not_l2_node) = {
1902 .name = "ethernet-input-not-l2",
1903 /* Takes a vector of packets. */
1904 .vector_size = sizeof (u32),
1905 .n_next_nodes = ETHERNET_INPUT_N_NEXT,
1907 #define _(s,n) [ETHERNET_INPUT_NEXT_##s] = n,
1908 foreach_ethernet_input_next
1914 #ifndef CLIB_MARCH_VARIANT
1916 ethernet_set_rx_redirect (vnet_main_t * vnm,
1917 vnet_hw_interface_t * hi, u32 enable)
1919 // Insure all packets go to ethernet-input (i.e. untagged ipv4 packets
1920 // don't go directly to ip4-input)
1921 vnet_hw_interface_rx_redirect_to_node
1922 (vnm, hi->hw_if_index, enable ? ethernet_input_node.index : ~0);
1927 * Initialization and registration for the next_by_ethernet structure
1931 next_by_ethertype_init (next_by_ethertype_t * l3_next)
1933 l3_next->input_next_by_type = sparse_vec_new
1934 ( /* elt bytes */ sizeof (l3_next->input_next_by_type[0]),
1935 /* bits in index */ BITS (((ethernet_header_t *) 0)->type));
1937 vec_validate (l3_next->sparse_index_by_input_next_index,
1938 ETHERNET_INPUT_NEXT_DROP);
1939 vec_validate (l3_next->sparse_index_by_input_next_index,
1940 ETHERNET_INPUT_NEXT_PUNT);
1941 l3_next->sparse_index_by_input_next_index[ETHERNET_INPUT_NEXT_DROP] =
1942 SPARSE_VEC_INVALID_INDEX;
1943 l3_next->sparse_index_by_input_next_index[ETHERNET_INPUT_NEXT_PUNT] =
1944 SPARSE_VEC_INVALID_INDEX;
1947 * Make sure we don't wipe out an ethernet registration by mistake
1948 * Can happen if init function ordering constraints are missing.
1952 ethernet_main_t *em = ðernet_main;
1953 ASSERT (em->next_by_ethertype_register_called == 0);
1959 // Add an ethertype -> next index mapping to the structure
1961 next_by_ethertype_register (next_by_ethertype_t * l3_next,
1962 u32 ethertype, u32 next_index)
1966 ethernet_main_t *em = ðernet_main;
1970 ethernet_main_t *em = ðernet_main;
1971 em->next_by_ethertype_register_called = 1;
1974 /* Setup ethernet type -> next index sparse vector mapping. */
1975 n = sparse_vec_validate (l3_next->input_next_by_type, ethertype);
1978 /* Rebuild next index -> sparse index inverse mapping when sparse vector
1980 vec_validate (l3_next->sparse_index_by_input_next_index, next_index);
1981 for (i = 1; i < vec_len (l3_next->input_next_by_type); i++)
1983 sparse_index_by_input_next_index[l3_next->input_next_by_type[i]] = i;
1985 // do not allow the cached next index's to be updated if L3
1986 // redirect is enabled, as it will have overwritten them
1987 if (!em->redirect_l3)
1989 // Cache common ethertypes directly
1990 if (ethertype == ETHERNET_TYPE_IP4)
1992 l3_next->input_next_ip4 = next_index;
1994 else if (ethertype == ETHERNET_TYPE_IP6)
1996 l3_next->input_next_ip6 = next_index;
1998 else if (ethertype == ETHERNET_TYPE_MPLS)
2000 l3_next->input_next_mpls = next_index;
2007 static clib_error_t *
2008 ethernet_input_init (vlib_main_t * vm)
2010 ethernet_main_t *em = ðernet_main;
2011 __attribute__ ((unused)) vlan_table_t *invalid_vlan_table;
2012 __attribute__ ((unused)) qinq_table_t *invalid_qinq_table;
2014 ethernet_setup_node (vm, ethernet_input_node.index);
2015 ethernet_setup_node (vm, ethernet_input_type_node.index);
2016 ethernet_setup_node (vm, ethernet_input_not_l2_node.index);
2018 next_by_ethertype_init (&em->l3_next);
2020 // Initialize pools and vector for vlan parsing
2021 vec_validate (em->main_intfs, 10); // 10 main interfaces
2022 pool_alloc (em->vlan_pool, 10);
2023 pool_alloc (em->qinq_pool, 1);
2025 // The first vlan pool will always be reserved for an invalid table
2026 pool_get (em->vlan_pool, invalid_vlan_table); // first id = 0
2027 // The first qinq pool will always be reserved for an invalid table
2028 pool_get (em->qinq_pool, invalid_qinq_table); // first id = 0
2033 VLIB_INIT_FUNCTION (ethernet_input_init);
2036 ethernet_register_input_type (vlib_main_t * vm,
2037 ethernet_type_t type, u32 node_index)
2039 ethernet_main_t *em = ðernet_main;
2040 ethernet_type_info_t *ti;
2044 clib_error_t *error = vlib_call_init_function (vm, ethernet_init);
2046 clib_error_report (error);
2049 ti = ethernet_get_type_info (em, type);
2050 ti->node_index = node_index;
2051 ti->next_index = vlib_node_add_next (vm,
2052 ethernet_input_node.index, node_index);
2053 i = vlib_node_add_next (vm, ethernet_input_type_node.index, node_index);
2054 ASSERT (i == ti->next_index);
2056 i = vlib_node_add_next (vm, ethernet_input_not_l2_node.index, node_index);
2057 ASSERT (i == ti->next_index);
2059 // Add the L3 node for this ethertype to the next nodes structure
2060 next_by_ethertype_register (&em->l3_next, type, ti->next_index);
2062 // Call the registration functions for other nodes that want a mapping
2063 l2bvi_register_input_type (vm, type, node_index);
2067 ethernet_register_l2_input (vlib_main_t * vm, u32 node_index)
2069 ethernet_main_t *em = ðernet_main;
2073 vlib_node_add_next (vm, ethernet_input_node.index, node_index);
2076 * Even if we never use these arcs, we have to align the next indices...
2078 i = vlib_node_add_next (vm, ethernet_input_type_node.index, node_index);
2080 ASSERT (i == em->l2_next);
2082 i = vlib_node_add_next (vm, ethernet_input_not_l2_node.index, node_index);
2083 ASSERT (i == em->l2_next);
2086 // Register a next node for L3 redirect, and enable L3 redirect
2088 ethernet_register_l3_redirect (vlib_main_t * vm, u32 node_index)
2090 ethernet_main_t *em = ðernet_main;
2093 em->redirect_l3 = 1;
2094 em->redirect_l3_next = vlib_node_add_next (vm,
2095 ethernet_input_node.index,
2098 * Change the cached next nodes to the redirect node
2100 em->l3_next.input_next_ip4 = em->redirect_l3_next;
2101 em->l3_next.input_next_ip6 = em->redirect_l3_next;
2102 em->l3_next.input_next_mpls = em->redirect_l3_next;
2105 * Even if we never use these arcs, we have to align the next indices...
2107 i = vlib_node_add_next (vm, ethernet_input_type_node.index, node_index);
2109 ASSERT (i == em->redirect_l3_next);
2111 i = vlib_node_add_next (vm, ethernet_input_not_l2_node.index, node_index);
2113 ASSERT (i == em->redirect_l3_next);
2118 * fd.io coding-style-patch-verification: ON
2121 * eval: (c-set-style "gnu")