2 * Copyright (c) 2018 Cisco and/or its affiliates.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at:
7 * http://www.apache.org/licenses/LICENSE-2.0
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
16 * ethernet_node.c: ethernet packet processing
18 * Copyright (c) 2008 Eliot Dresselhaus
20 * Permission is hereby granted, free of charge, to any person obtaining
21 * a copy of this software and associated documentation files (the
22 * "Software"), to deal in the Software without restriction, including
23 * without limitation the rights to use, copy, modify, merge, publish,
24 * distribute, sublicense, and/or sell copies of the Software, and to
25 * permit persons to whom the Software is furnished to do so, subject to
26 * the following conditions:
28 * The above copyright notice and this permission notice shall be
29 * included in all copies or substantial portions of the Software.
31 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
32 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
33 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
34 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
35 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
36 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
37 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
40 #include <vlib/vlib.h>
41 #include <vnet/pg/pg.h>
42 #include <vnet/ethernet/ethernet.h>
43 #include <vnet/ethernet/p2p_ethernet.h>
44 #include <vnet/devices/pipe/pipe.h>
45 #include <vppinfra/sparse_vec.h>
46 #include <vnet/l2/l2_bvi.h>
48 #define foreach_ethernet_input_next \
49 _ (PUNT, "error-punt") \
50 _ (DROP, "error-drop") \
51 _ (LLC, "llc-input") \
52 _ (IP4_INPUT, "ip4-input") \
53 _ (IP4_INPUT_NCS, "ip4-input-no-checksum")
57 #define _(s,n) ETHERNET_INPUT_NEXT_##s,
58 foreach_ethernet_input_next
60 ETHERNET_INPUT_N_NEXT,
61 } ethernet_input_next_t;
67 ethernet_input_frame_t frame_data;
68 } ethernet_input_trace_t;
71 format_ethernet_input_trace (u8 * s, va_list * va)
73 CLIB_UNUSED (vlib_main_t * vm) = va_arg (*va, vlib_main_t *);
74 CLIB_UNUSED (vlib_node_t * node) = va_arg (*va, vlib_node_t *);
75 ethernet_input_trace_t *t = va_arg (*va, ethernet_input_trace_t *);
76 u32 indent = format_get_indent (s);
80 s = format (s, "frame: flags 0x%x", t->frame_flags);
81 if (t->frame_flags & ETH_INPUT_FRAME_F_SINGLE_SW_IF_IDX)
82 s = format (s, ", hw-if-index %u, sw-if-index %u",
83 t->frame_data.hw_if_index, t->frame_data.sw_if_index);
84 s = format (s, "\n%U", format_white_space, indent);
86 s = format (s, "%U", format_ethernet_header, t->packet_data);
91 extern vlib_node_registration_t ethernet_input_node;
95 ETHERNET_INPUT_VARIANT_ETHERNET,
96 ETHERNET_INPUT_VARIANT_ETHERNET_TYPE,
97 ETHERNET_INPUT_VARIANT_NOT_L2,
98 } ethernet_input_variant_t;
101 // Parse the ethernet header to extract vlan tags and innermost ethertype
102 static_always_inline void
103 parse_header (ethernet_input_variant_t variant,
107 u16 * outer_id, u16 * inner_id, u32 * match_flags)
111 if (variant == ETHERNET_INPUT_VARIANT_ETHERNET
112 || variant == ETHERNET_INPUT_VARIANT_NOT_L2)
114 ethernet_header_t *e0;
116 e0 = (void *) (b0->data + b0->current_data);
118 vnet_buffer (b0)->l2_hdr_offset = b0->current_data;
119 b0->flags |= VNET_BUFFER_F_L2_HDR_OFFSET_VALID;
121 vlib_buffer_advance (b0, sizeof (e0[0]));
123 *type = clib_net_to_host_u16 (e0->type);
125 else if (variant == ETHERNET_INPUT_VARIANT_ETHERNET_TYPE)
127 // here when prior node was LLC/SNAP processing
130 e0 = (void *) (b0->data + b0->current_data);
132 vlib_buffer_advance (b0, sizeof (e0[0]));
134 *type = clib_net_to_host_u16 (e0[0]);
137 // save for distinguishing between dot1q and dot1ad later
140 // default the tags to 0 (used if there is no corresponding tag)
144 *match_flags = SUBINT_CONFIG_VALID | SUBINT_CONFIG_MATCH_0_TAG;
147 // check for vlan encaps
148 if (ethernet_frame_is_tagged (*type))
150 ethernet_vlan_header_t *h0;
153 *match_flags = SUBINT_CONFIG_VALID | SUBINT_CONFIG_MATCH_1_TAG;
155 h0 = (void *) (b0->data + b0->current_data);
157 tag = clib_net_to_host_u16 (h0->priority_cfi_and_id);
159 *outer_id = tag & 0xfff;
161 *match_flags &= ~SUBINT_CONFIG_MATCH_1_TAG;
163 *type = clib_net_to_host_u16 (h0->type);
165 vlib_buffer_advance (b0, sizeof (h0[0]));
168 if (*type == ETHERNET_TYPE_VLAN)
170 // Double tagged packet
171 *match_flags = SUBINT_CONFIG_VALID | SUBINT_CONFIG_MATCH_2_TAG;
173 h0 = (void *) (b0->data + b0->current_data);
175 tag = clib_net_to_host_u16 (h0->priority_cfi_and_id);
177 *inner_id = tag & 0xfff;
179 *type = clib_net_to_host_u16 (h0->type);
181 vlib_buffer_advance (b0, sizeof (h0[0]));
183 if (*type == ETHERNET_TYPE_VLAN)
185 // More than double tagged packet
186 *match_flags = SUBINT_CONFIG_VALID | SUBINT_CONFIG_MATCH_3_TAG;
188 vlib_buffer_advance (b0, sizeof (h0[0]));
189 vlan_count = 3; // "unknown" number, aka, 3-or-more
193 ethernet_buffer_set_vlan_count (b0, vlan_count);
196 // Determine the subinterface for this packet, given the result of the
197 // vlan table lookups and vlan header parsing. Check the most specific
199 static_always_inline void
200 identify_subint (vnet_hw_interface_t * hi,
203 main_intf_t * main_intf,
204 vlan_intf_t * vlan_intf,
205 qinq_intf_t * qinq_intf,
206 u32 * new_sw_if_index, u8 * error0, u32 * is_l2)
210 matched = eth_identify_subint (hi, match_flags, main_intf, vlan_intf,
211 qinq_intf, new_sw_if_index, error0, is_l2);
216 // Perform L3 my-mac filter
217 // A unicast packet arriving on an L3 interface must have a dmac matching the interface mac.
218 // This is required for promiscuous mode, else we will forward packets we aren't supposed to.
221 ethernet_header_t *e0;
222 e0 = (void *) (b0->data + vnet_buffer (b0)->l2_hdr_offset);
224 if (!(ethernet_address_cast (e0->dst_address)))
226 if (!ethernet_mac_address_equal ((u8 *) e0, hi->hw_address))
228 *error0 = ETHERNET_ERROR_L3_MAC_MISMATCH;
233 // Check for down subinterface
234 *error0 = (*new_sw_if_index) != ~0 ? (*error0) : ETHERNET_ERROR_DOWN;
238 static_always_inline void
239 determine_next_node (ethernet_main_t * em,
240 ethernet_input_variant_t variant,
242 u32 type0, vlib_buffer_t * b0, u8 * error0, u8 * next0)
244 vnet_buffer (b0)->l3_hdr_offset = b0->current_data;
245 b0->flags |= VNET_BUFFER_F_L3_HDR_OFFSET_VALID;
247 if (PREDICT_FALSE (*error0 != ETHERNET_ERROR_NONE))
249 // some error occurred
250 *next0 = ETHERNET_INPUT_NEXT_DROP;
254 // record the L2 len and reset the buffer so the L2 header is preserved
255 u32 eth_start = vnet_buffer (b0)->l2_hdr_offset;
256 vnet_buffer (b0)->l2.l2_len = b0->current_data - eth_start;
257 *next0 = em->l2_next;
258 ASSERT (vnet_buffer (b0)->l2.l2_len ==
259 ethernet_buffer_header_size (b0));
260 vlib_buffer_advance (b0, -(vnet_buffer (b0)->l2.l2_len));
262 // check for common IP/MPLS ethertypes
264 else if (type0 == ETHERNET_TYPE_IP4)
266 *next0 = em->l3_next.input_next_ip4;
268 else if (type0 == ETHERNET_TYPE_IP6)
270 *next0 = em->l3_next.input_next_ip6;
272 else if (type0 == ETHERNET_TYPE_MPLS)
274 *next0 = em->l3_next.input_next_mpls;
277 else if (em->redirect_l3)
279 // L3 Redirect is on, the cached common next nodes will be
280 // pointing to the redirect node, catch the uncommon types here
281 *next0 = em->redirect_l3_next;
285 // uncommon ethertype, check table
287 i0 = sparse_vec_index (em->l3_next.input_next_by_type, type0);
288 *next0 = vec_elt (em->l3_next.input_next_by_type, i0);
291 SPARSE_VEC_INVALID_INDEX ? ETHERNET_ERROR_UNKNOWN_TYPE : *error0;
293 // The table is not populated with LLC values, so check that now.
294 // If variant is variant_ethernet then we came from LLC processing. Don't
295 // go back there; drop instead using by keeping the drop/bad table result.
296 if ((type0 < 0x600) && (variant == ETHERNET_INPUT_VARIANT_ETHERNET))
298 *next0 = ETHERNET_INPUT_NEXT_LLC;
304 /* following vector code relies on following assumptions */
305 STATIC_ASSERT_OFFSET_OF (vlib_buffer_t, current_data, 0);
306 STATIC_ASSERT_OFFSET_OF (vlib_buffer_t, current_length, 2);
307 STATIC_ASSERT_OFFSET_OF (vlib_buffer_t, flags, 4);
308 STATIC_ASSERT (STRUCT_OFFSET_OF (vnet_buffer_opaque_t, l2_hdr_offset) ==
309 STRUCT_OFFSET_OF (vnet_buffer_opaque_t, l3_hdr_offset) - 2,
310 "l3_hdr_offset must follow l2_hdr_offset");
312 static_always_inline void
313 eth_input_adv_and_flags_x4 (vlib_buffer_t ** b, int is_l3)
315 i16 adv = sizeof (ethernet_header_t);
316 u32 flags = VNET_BUFFER_F_L2_HDR_OFFSET_VALID |
317 VNET_BUFFER_F_L3_HDR_OFFSET_VALID;
319 #ifdef CLIB_HAVE_VEC256
320 /* to reduce number of small loads/stores we are loading first 64 bits
321 of each buffer metadata into 256-bit register so we can advance
322 current_data, current_length and flags.
323 Observed saving of this code is ~2 clocks per packet */
326 /* vector if signed 16 bit integers used in signed vector add operation
327 to advnce current_data and current_length */
328 u32x8 flags4 = { 0, flags, 0, flags, 0, flags, 0, flags };
330 adv, -adv, 0, 0, adv, -adv, 0, 0,
331 adv, -adv, 0, 0, adv, -adv, 0, 0
334 /* load 4 x 64 bits */
335 r = u64x4_gather (b[0], b[1], b[2], b[3]);
341 radv = (u64x4) ((i16x16) r + adv4);
343 /* write 4 x 64 bits */
344 u64x4_scatter (is_l3 ? radv : r, b[0], b[1], b[2], b[3]);
346 /* use old current_data as l2_hdr_offset and new current_data as
348 r = (u64x4) u16x16_blend (r, radv << 16, 0xaa);
350 /* store both l2_hdr_offset and l3_hdr_offset in single store operation */
351 u32x8_scatter_one ((u32x8) r, 0, &vnet_buffer (b[0])->l2_hdr_offset);
352 u32x8_scatter_one ((u32x8) r, 2, &vnet_buffer (b[1])->l2_hdr_offset);
353 u32x8_scatter_one ((u32x8) r, 4, &vnet_buffer (b[2])->l2_hdr_offset);
354 u32x8_scatter_one ((u32x8) r, 6, &vnet_buffer (b[3])->l2_hdr_offset);
358 ASSERT (b[0]->current_data == vnet_buffer (b[0])->l3_hdr_offset);
359 ASSERT (b[1]->current_data == vnet_buffer (b[1])->l3_hdr_offset);
360 ASSERT (b[2]->current_data == vnet_buffer (b[2])->l3_hdr_offset);
361 ASSERT (b[3]->current_data == vnet_buffer (b[3])->l3_hdr_offset);
363 ASSERT (b[0]->current_data - vnet_buffer (b[0])->l2_hdr_offset == adv);
364 ASSERT (b[1]->current_data - vnet_buffer (b[1])->l2_hdr_offset == adv);
365 ASSERT (b[2]->current_data - vnet_buffer (b[2])->l2_hdr_offset == adv);
366 ASSERT (b[3]->current_data - vnet_buffer (b[3])->l2_hdr_offset == adv);
370 ASSERT (b[0]->current_data == vnet_buffer (b[0])->l2_hdr_offset);
371 ASSERT (b[1]->current_data == vnet_buffer (b[1])->l2_hdr_offset);
372 ASSERT (b[2]->current_data == vnet_buffer (b[2])->l2_hdr_offset);
373 ASSERT (b[3]->current_data == vnet_buffer (b[3])->l2_hdr_offset);
375 ASSERT (b[0]->current_data - vnet_buffer (b[0])->l3_hdr_offset == -adv);
376 ASSERT (b[1]->current_data - vnet_buffer (b[1])->l3_hdr_offset == -adv);
377 ASSERT (b[2]->current_data - vnet_buffer (b[2])->l3_hdr_offset == -adv);
378 ASSERT (b[3]->current_data - vnet_buffer (b[3])->l3_hdr_offset == -adv);
382 vnet_buffer (b[0])->l2_hdr_offset = b[0]->current_data;
383 vnet_buffer (b[1])->l2_hdr_offset = b[1]->current_data;
384 vnet_buffer (b[2])->l2_hdr_offset = b[2]->current_data;
385 vnet_buffer (b[3])->l2_hdr_offset = b[3]->current_data;
386 vnet_buffer (b[0])->l3_hdr_offset = b[0]->current_data + adv;
387 vnet_buffer (b[1])->l3_hdr_offset = b[1]->current_data + adv;
388 vnet_buffer (b[2])->l3_hdr_offset = b[2]->current_data + adv;
389 vnet_buffer (b[3])->l3_hdr_offset = b[3]->current_data + adv;
393 vlib_buffer_advance (b[0], adv);
394 vlib_buffer_advance (b[1], adv);
395 vlib_buffer_advance (b[2], adv);
396 vlib_buffer_advance (b[3], adv);
399 b[0]->flags |= flags;
400 b[1]->flags |= flags;
401 b[2]->flags |= flags;
402 b[3]->flags |= flags;
407 vnet_buffer (b[0])->l2.l2_len = adv;
408 vnet_buffer (b[1])->l2.l2_len = adv;
409 vnet_buffer (b[2])->l2.l2_len = adv;
410 vnet_buffer (b[3])->l2.l2_len = adv;
414 static_always_inline void
415 eth_input_adv_and_flags_x1 (vlib_buffer_t ** b, int is_l3)
417 i16 adv = sizeof (ethernet_header_t);
418 u32 flags = VNET_BUFFER_F_L2_HDR_OFFSET_VALID |
419 VNET_BUFFER_F_L3_HDR_OFFSET_VALID;
421 vnet_buffer (b[0])->l2_hdr_offset = b[0]->current_data;
422 vnet_buffer (b[0])->l3_hdr_offset = b[0]->current_data + adv;
425 vlib_buffer_advance (b[0], adv);
426 b[0]->flags |= flags;
428 vnet_buffer (b[0])->l2.l2_len = adv;
432 static_always_inline void
433 eth_input_get_etype_and_tags (vlib_buffer_t ** b, u16 * etype, u64 * tags,
434 u64 * dmacs, int offset, int dmac_check)
436 ethernet_header_t *e;
437 e = vlib_buffer_get_current (b[offset]);
438 #ifdef CLIB_HAVE_VEC128
439 u64x2 r = u64x2_load_unaligned (((u8 *) & e->type) - 6);
440 etype[offset] = ((u16x8) r)[3];
443 etype[offset] = e->type;
444 tags[offset] = *(u64 *) (e + 1);
448 dmacs[offset] = *(u64 *) e;
451 static_always_inline u16
452 eth_input_next_by_type (u16 etype)
454 ethernet_main_t *em = ðernet_main;
456 return (etype < 0x600) ? ETHERNET_INPUT_NEXT_LLC :
457 vec_elt (em->l3_next.input_next_by_type,
458 sparse_vec_index (em->l3_next.input_next_by_type, etype));
468 u64 n_packets, n_bytes;
469 } eth_input_tag_lookup_t;
471 static_always_inline void
472 eth_input_update_if_counters (vlib_main_t * vm, vnet_main_t * vnm,
473 eth_input_tag_lookup_t * l)
475 if (l->n_packets == 0 || l->sw_if_index == ~0)
479 l->n_bytes += l->n_packets * l->len;
481 vlib_increment_combined_counter
482 (vnm->interface_main.combined_sw_if_counters +
483 VNET_INTERFACE_COUNTER_RX, vm->thread_index, l->sw_if_index,
484 l->n_packets, l->n_bytes);
487 static_always_inline void
488 eth_input_tag_lookup (vlib_main_t * vm, vnet_main_t * vnm,
489 vlib_node_runtime_t * node, vnet_hw_interface_t * hi,
490 u64 tag, u16 * next, vlib_buffer_t * b,
491 eth_input_tag_lookup_t * l, u8 dmac_bad, int is_dot1ad,
492 int main_is_l3, int check_dmac)
494 ethernet_main_t *em = ðernet_main;
496 if ((tag ^ l->tag) & l->mask)
498 main_intf_t *mif = vec_elt_at_index (em->main_intfs, hi->hw_if_index);
501 vlan_table_t *vlan_table;
502 qinq_table_t *qinq_table;
503 u16 *t = (u16 *) & tag;
504 u16 vlan1 = clib_net_to_host_u16 (t[0]) & 0xFFF;
505 u16 vlan2 = clib_net_to_host_u16 (t[2]) & 0xFFF;
506 u32 matched, is_l2, new_sw_if_index;
508 vlan_table = vec_elt_at_index (em->vlan_pool, is_dot1ad ?
509 mif->dot1ad_vlans : mif->dot1q_vlans);
510 vif = &vlan_table->vlans[vlan1];
511 qinq_table = vec_elt_at_index (em->qinq_pool, vif->qinqs);
512 qif = &qinq_table->vlans[vlan2];
513 l->err = ETHERNET_ERROR_NONE;
514 l->type = clib_net_to_host_u16 (t[1]);
516 if (l->type == ETHERNET_TYPE_VLAN)
518 l->type = clib_net_to_host_u16 (t[3]);
520 matched = eth_identify_subint (hi, SUBINT_CONFIG_VALID |
521 SUBINT_CONFIG_MATCH_2_TAG, mif, vif,
522 qif, &new_sw_if_index, &l->err,
530 new_sw_if_index = hi->sw_if_index;
531 l->err = ETHERNET_ERROR_NONE;
533 is_l2 = main_is_l3 == 0;
536 matched = eth_identify_subint (hi, SUBINT_CONFIG_VALID |
537 SUBINT_CONFIG_MATCH_1_TAG, mif,
538 vif, qif, &new_sw_if_index,
542 if (l->sw_if_index != new_sw_if_index)
544 eth_input_update_if_counters (vm, vnm, l);
547 l->sw_if_index = new_sw_if_index;
550 l->mask = (l->n_tags == 2) ?
551 clib_net_to_host_u64 (0xffffffffffffffff) :
552 clib_net_to_host_u64 (0xffffffff00000000);
554 if (matched && l->sw_if_index == ~0)
555 l->err = ETHERNET_ERROR_DOWN;
557 l->len = sizeof (ethernet_header_t) +
558 l->n_tags * sizeof (ethernet_vlan_header_t);
560 l->adv = is_l2 ? -(int) sizeof (ethernet_header_t) :
561 l->n_tags * sizeof (ethernet_vlan_header_t);
563 l->adv = is_l2 ? 0 : l->len;
565 if (PREDICT_FALSE (l->err != ETHERNET_ERROR_NONE))
566 l->next = ETHERNET_INPUT_NEXT_DROP;
568 l->next = em->l2_next;
569 else if (l->type == ETHERNET_TYPE_IP4)
570 l->next = em->l3_next.input_next_ip4;
571 else if (l->type == ETHERNET_TYPE_IP6)
572 l->next = em->l3_next.input_next_ip6;
573 else if (l->type == ETHERNET_TYPE_MPLS)
574 l->next = em->l3_next.input_next_mpls;
575 else if (em->redirect_l3)
576 l->next = em->redirect_l3_next;
579 l->next = eth_input_next_by_type (l->type);
580 if (l->next == ETHERNET_INPUT_NEXT_PUNT)
581 l->err = ETHERNET_ERROR_UNKNOWN_TYPE;
585 if (check_dmac && l->adv > 0 && dmac_bad)
587 l->err = ETHERNET_ERROR_L3_MAC_MISMATCH;
588 next[0] = ETHERNET_INPUT_NEXT_PUNT;
593 vlib_buffer_advance (b, l->adv);
594 vnet_buffer (b)->l2.l2_len = l->len;
595 vnet_buffer (b)->l3_hdr_offset = vnet_buffer (b)->l2_hdr_offset + l->len;
597 if (l->err == ETHERNET_ERROR_NONE)
599 vnet_buffer (b)->sw_if_index[VLIB_RX] = l->sw_if_index;
600 ethernet_buffer_set_vlan_count (b, l->n_tags);
603 b->error = node->errors[l->err];
605 /* update counters */
607 l->n_bytes += vlib_buffer_length_in_chain (vm, b);
610 /* process frame of buffers, store ethertype into array and update
611 buffer metadata fields depending on interface being l2 or l3 assuming that
612 packets are untagged. For tagged packets those fields are updated later.
613 Optionally store Destionation MAC address and tag data into arrays
614 for further processing */
616 STATIC_ASSERT (VLIB_FRAME_SIZE % 8 == 0,
617 "VLIB_FRAME_SIZE must be power of 8");
618 static_always_inline void
619 eth_input_process_frame (vlib_main_t * vm, vlib_node_runtime_t * node,
620 vnet_hw_interface_t * hi,
621 u32 * buffer_indices, u32 n_packets, int main_is_l3,
622 int ip4_cksum_ok, int dmac_check)
624 ethernet_main_t *em = ðernet_main;
625 u16 nexts[VLIB_FRAME_SIZE], *next;
626 u16 etypes[VLIB_FRAME_SIZE], *etype = etypes;
627 u64 dmacs[VLIB_FRAME_SIZE], *dmac = dmacs;
628 u8 dmacs_bad[VLIB_FRAME_SIZE];
629 u64 tags[VLIB_FRAME_SIZE], *tag = tags;
630 u16 slowpath_indices[VLIB_FRAME_SIZE];
632 u16 next_ip4, next_ip6, next_mpls, next_l2;
633 u16 et_ip4 = clib_host_to_net_u16 (ETHERNET_TYPE_IP4);
634 u16 et_ip6 = clib_host_to_net_u16 (ETHERNET_TYPE_IP6);
635 u16 et_mpls = clib_host_to_net_u16 (ETHERNET_TYPE_MPLS);
636 u16 et_vlan = clib_host_to_net_u16 (ETHERNET_TYPE_VLAN);
637 u16 et_dot1ad = clib_host_to_net_u16 (ETHERNET_TYPE_DOT1AD);
638 i32 n_left = n_packets;
639 vlib_buffer_t *b[20];
642 from = buffer_indices;
646 vlib_buffer_t **ph = b + 16, **pd = b + 8;
647 vlib_get_buffers (vm, from, b, 4);
648 vlib_get_buffers (vm, from + 8, pd, 4);
649 vlib_get_buffers (vm, from + 16, ph, 4);
651 vlib_prefetch_buffer_header (ph[0], LOAD);
652 vlib_prefetch_buffer_data (pd[0], LOAD);
653 eth_input_get_etype_and_tags (b, etype, tag, dmac, 0, dmac_check);
655 vlib_prefetch_buffer_header (ph[1], LOAD);
656 vlib_prefetch_buffer_data (pd[1], LOAD);
657 eth_input_get_etype_and_tags (b, etype, tag, dmac, 1, dmac_check);
659 vlib_prefetch_buffer_header (ph[2], LOAD);
660 vlib_prefetch_buffer_data (pd[2], LOAD);
661 eth_input_get_etype_and_tags (b, etype, tag, dmac, 2, dmac_check);
663 vlib_prefetch_buffer_header (ph[3], LOAD);
664 vlib_prefetch_buffer_data (pd[3], LOAD);
665 eth_input_get_etype_and_tags (b, etype, tag, dmac, 3, dmac_check);
667 eth_input_adv_and_flags_x4 (b, main_is_l3);
678 vlib_get_buffers (vm, from, b, 4);
679 eth_input_get_etype_and_tags (b, etype, tag, dmac, 0, dmac_check);
680 eth_input_get_etype_and_tags (b, etype, tag, dmac, 1, dmac_check);
681 eth_input_get_etype_and_tags (b, etype, tag, dmac, 2, dmac_check);
682 eth_input_get_etype_and_tags (b, etype, tag, dmac, 3, dmac_check);
683 eth_input_adv_and_flags_x4 (b, main_is_l3);
694 vlib_get_buffers (vm, from, b, 1);
695 eth_input_get_etype_and_tags (b, etype, tag, dmac, 0, dmac_check);
696 eth_input_adv_and_flags_x1 (b, main_is_l3);
708 u64 mask = clib_net_to_host_u64 (0xFFFFFFFFFFFF0000);
709 u64 igbit = clib_net_to_host_u64 (0x0100000000000000);
710 u64 hwaddr = (*(u64 *) hi->hw_address) & mask;
712 u8 *dmac_bad = dmacs_bad;
716 #ifdef CLIB_HAVE_VEC256
717 u64x4 igbit4 = u64x4_splat (igbit);
718 u64x4 mask4 = u64x4_splat (mask);
719 u64x4 hwaddr4 = u64x4_splat (hwaddr);
723 r0 = u64x4_load_unaligned (dmac + 0) & mask4;
724 r1 = u64x4_load_unaligned (dmac + 4) & mask4;
726 r0 = (r0 != hwaddr4) & ((r0 & igbit4) == 0);
727 r1 = (r1 != hwaddr4) & ((r1 & igbit4) == 0);
729 *(u32 *) (dmac_bad + 0) = u8x32_msb_mask ((u8x32) (r0));
730 *(u32 *) (dmac_bad + 4) = u8x32_msb_mask ((u8x32) (r1));
746 r0 = (r0 != hwaddr) && ((r0 & igbit) == 0);
747 r1 = (r1 != hwaddr) && ((r1 & igbit) == 0);
748 r2 = (r2 != hwaddr) && ((r2 & igbit) == 0);
749 r3 = (r3 != hwaddr) && ((r3 & igbit) == 0);
764 next_ip4 = em->l3_next.input_next_ip4;
765 next_ip6 = em->l3_next.input_next_ip6;
766 next_mpls = em->l3_next.input_next_mpls;
767 next_l2 = em->l2_next;
769 if (next_ip4 == ETHERNET_INPUT_NEXT_IP4_INPUT && ip4_cksum_ok)
770 next_ip4 = ETHERNET_INPUT_NEXT_IP4_INPUT_NCS;
772 #ifdef CLIB_HAVE_VEC256
773 u16x16 et16_ip4 = u16x16_splat (et_ip4);
774 u16x16 et16_ip6 = u16x16_splat (et_ip6);
775 u16x16 et16_mpls = u16x16_splat (et_mpls);
776 u16x16 et16_vlan = u16x16_splat (et_vlan);
777 u16x16 et16_dot1ad = u16x16_splat (et_dot1ad);
778 u16x16 next16_ip4 = u16x16_splat (next_ip4);
779 u16x16 next16_ip6 = u16x16_splat (next_ip6);
780 u16x16 next16_mpls = u16x16_splat (next_mpls);
781 u16x16 next16_l2 = u16x16_splat (next_l2);
783 u16x16 stairs = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 };
792 /* fastpath - in l3 mode hadles ip4, ip6 and mpls packets, other packets
793 are considered as slowpath, in l2 mode all untagged packets are
794 considered as fastpath */
797 #ifdef CLIB_HAVE_VEC256
801 u16x16 e16 = u16x16_load_unaligned (etype);
804 r += (e16 == et16_ip4) & next16_ip4;
805 r += (e16 == et16_ip6) & next16_ip6;
806 r += (e16 == et16_mpls) & next16_mpls;
809 r = ((e16 != et16_vlan) & (e16 != et16_dot1ad)) & next16_l2;
810 u16x16_store_unaligned (r, next);
812 if (!u16x16_is_all_zero (r == zero))
814 if (u16x16_is_all_zero (r))
816 u16x16_store_unaligned (u16x16_splat (i) + stairs,
817 slowpath_indices + n_slowpath);
822 for (int j = 0; j < 16; j++)
824 slowpath_indices[n_slowpath++] = i + j;
835 if (main_is_l3 && etype[0] == et_ip4)
837 else if (main_is_l3 && etype[0] == et_ip6)
839 else if (main_is_l3 && etype[0] == et_mpls)
841 else if (main_is_l3 == 0 &&
842 etype[0] != et_vlan && etype[0] != et_dot1ad)
847 slowpath_indices[n_slowpath++] = i;
858 vnet_main_t *vnm = vnet_get_main ();
860 u16 *si = slowpath_indices;
861 u32 last_unknown_etype = ~0;
862 u32 last_unknown_next = ~0;
863 eth_input_tag_lookup_t dot1ad_lookup, dot1q_lookup = {
865 .tag = tags[si[0]] ^ -1LL,
869 clib_memcpy_fast (&dot1ad_lookup, &dot1q_lookup, sizeof (dot1q_lookup));
874 u16 etype = etypes[i];
876 if (etype == et_vlan)
878 vlib_buffer_t *b = vlib_get_buffer (vm, buffer_indices[i]);
879 eth_input_tag_lookup (vm, vnm, node, hi, tags[i], nexts + i, b,
880 &dot1q_lookup, dmacs_bad[i], 0,
881 main_is_l3, dmac_check);
884 else if (etype == et_dot1ad)
886 vlib_buffer_t *b = vlib_get_buffer (vm, buffer_indices[i]);
887 eth_input_tag_lookup (vm, vnm, node, hi, tags[i], nexts + i, b,
888 &dot1ad_lookup, dmacs_bad[i], 1,
889 main_is_l3, dmac_check);
893 /* untagged packet with not well known etyertype */
894 if (last_unknown_etype != etype)
896 last_unknown_etype = etype;
897 etype = clib_host_to_net_u16 (etype);
898 last_unknown_next = eth_input_next_by_type (etype);
900 if (dmac_check && main_is_l3 && dmacs_bad[i])
902 vlib_buffer_t *b = vlib_get_buffer (vm, buffer_indices[i]);
903 b->error = node->errors[ETHERNET_ERROR_L3_MAC_MISMATCH];
904 nexts[i] = ETHERNET_INPUT_NEXT_PUNT;
907 nexts[i] = last_unknown_next;
915 eth_input_update_if_counters (vm, vnm, &dot1q_lookup);
916 eth_input_update_if_counters (vm, vnm, &dot1ad_lookup);
919 vlib_buffer_enqueue_to_next (vm, node, buffer_indices, nexts, n_packets);
922 static_always_inline void
923 eth_input_single_int (vlib_main_t * vm, vlib_node_runtime_t * node,
924 vnet_hw_interface_t * hi, u32 * from, u32 n_pkts,
927 ethernet_main_t *em = ðernet_main;
928 ethernet_interface_t *ei;
929 ei = pool_elt_at_index (em->interfaces, hi->hw_instance);
930 main_intf_t *intf0 = vec_elt_at_index (em->main_intfs, hi->hw_if_index);
931 subint_config_t *subint0 = &intf0->untagged_subint;
933 int main_is_l3 = (subint0->flags & SUBINT_CONFIG_L2) == 0;
934 int promisc = (ei->flags & ETHERNET_INTERFACE_FLAG_ACCEPT_ALL) != 0;
938 /* main interface is L3, we dont expect tagged packets and interface
939 is not in promisc node, so we dont't need to check DMAC */
943 eth_input_process_frame (vm, node, hi, from, n_pkts, is_l3,
946 /* subinterfaces and promisc mode so DMAC check is needed */
947 eth_input_process_frame (vm, node, hi, from, n_pkts, is_l3,
953 /* untagged packets are treated as L2 */
955 eth_input_process_frame (vm, node, hi, from, n_pkts, is_l3,
961 static_always_inline void
962 ethernet_input_trace (vlib_main_t * vm, vlib_node_runtime_t * node,
963 vlib_frame_t * from_frame)
966 if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)))
968 from = vlib_frame_vector_args (from_frame);
969 n_left = from_frame->n_vectors;
973 ethernet_input_trace_t *t0;
974 vlib_buffer_t *b0 = vlib_get_buffer (vm, from[0]);
976 if (b0->flags & VLIB_BUFFER_IS_TRACED)
978 t0 = vlib_add_trace (vm, node, b0,
979 sizeof (ethernet_input_trace_t));
980 clib_memcpy_fast (t0->packet_data, b0->data + b0->current_data,
981 sizeof (t0->packet_data));
982 t0->frame_flags = from_frame->flags;
983 clib_memcpy_fast (&t0->frame_data,
984 vlib_frame_scalar_args (from_frame),
985 sizeof (ethernet_input_frame_t));
992 /* rx pcap capture if enabled */
993 if (PREDICT_FALSE (vm->pcap[VLIB_RX].pcap_enable))
997 from = vlib_frame_vector_args (from_frame);
998 n_left = from_frame->n_vectors;
1004 b0 = vlib_get_buffer (vm, bi0);
1006 if (vm->pcap[VLIB_RX].pcap_sw_if_index == 0 ||
1007 vm->pcap[VLIB_RX].pcap_sw_if_index
1008 == vnet_buffer (b0)->sw_if_index[VLIB_RX])
1010 pcap_add_buffer (&vm->pcap[VLIB_RX].pcap_main, vm, bi0, 512);
1017 static_always_inline void
1018 ethernet_input_inline (vlib_main_t * vm,
1019 vlib_node_runtime_t * node,
1020 u32 * from, u32 n_packets,
1021 ethernet_input_variant_t variant)
1023 vnet_main_t *vnm = vnet_get_main ();
1024 ethernet_main_t *em = ðernet_main;
1025 vlib_node_runtime_t *error_node;
1026 u32 n_left_from, next_index, *to_next;
1027 u32 stats_sw_if_index, stats_n_packets, stats_n_bytes;
1028 u32 thread_index = vm->thread_index;
1029 u32 cached_sw_if_index = ~0;
1030 u32 cached_is_l2 = 0; /* shut up gcc */
1031 vnet_hw_interface_t *hi = NULL; /* used for main interface only */
1032 vlib_buffer_t *bufs[VLIB_FRAME_SIZE];
1033 vlib_buffer_t **b = bufs;
1035 if (variant != ETHERNET_INPUT_VARIANT_ETHERNET)
1036 error_node = vlib_node_get_runtime (vm, ethernet_input_node.index);
1040 n_left_from = n_packets;
1042 next_index = node->cached_next_index;
1043 stats_sw_if_index = node->runtime_data[0];
1044 stats_n_packets = stats_n_bytes = 0;
1045 vlib_get_buffers (vm, from, bufs, n_left_from);
1047 while (n_left_from > 0)
1051 vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
1053 while (n_left_from >= 4 && n_left_to_next >= 2)
1056 vlib_buffer_t *b0, *b1;
1057 u8 next0, next1, error0, error1;
1058 u16 type0, orig_type0, type1, orig_type1;
1059 u16 outer_id0, inner_id0, outer_id1, inner_id1;
1060 u32 match_flags0, match_flags1;
1061 u32 old_sw_if_index0, new_sw_if_index0, len0, old_sw_if_index1,
1062 new_sw_if_index1, len1;
1063 vnet_hw_interface_t *hi0, *hi1;
1064 main_intf_t *main_intf0, *main_intf1;
1065 vlan_intf_t *vlan_intf0, *vlan_intf1;
1066 qinq_intf_t *qinq_intf0, *qinq_intf1;
1068 ethernet_header_t *e0, *e1;
1070 /* Prefetch next iteration. */
1072 vlib_prefetch_buffer_header (b[2], STORE);
1073 vlib_prefetch_buffer_header (b[3], STORE);
1075 CLIB_PREFETCH (b[2]->data, sizeof (ethernet_header_t), LOAD);
1076 CLIB_PREFETCH (b[3]->data, sizeof (ethernet_header_t), LOAD);
1085 n_left_to_next -= 2;
1092 error0 = error1 = ETHERNET_ERROR_NONE;
1093 e0 = vlib_buffer_get_current (b0);
1094 type0 = clib_net_to_host_u16 (e0->type);
1095 e1 = vlib_buffer_get_current (b1);
1096 type1 = clib_net_to_host_u16 (e1->type);
1098 /* Set the L2 header offset for all packets */
1099 vnet_buffer (b0)->l2_hdr_offset = b0->current_data;
1100 vnet_buffer (b1)->l2_hdr_offset = b1->current_data;
1101 b0->flags |= VNET_BUFFER_F_L2_HDR_OFFSET_VALID;
1102 b1->flags |= VNET_BUFFER_F_L2_HDR_OFFSET_VALID;
1104 /* Speed-path for the untagged case */
1105 if (PREDICT_TRUE (variant == ETHERNET_INPUT_VARIANT_ETHERNET
1106 && !ethernet_frame_is_any_tagged_x2 (type0,
1110 subint_config_t *subint0;
1111 u32 sw_if_index0, sw_if_index1;
1113 sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
1114 sw_if_index1 = vnet_buffer (b1)->sw_if_index[VLIB_RX];
1115 is_l20 = cached_is_l2;
1117 /* This is probably wholly unnecessary */
1118 if (PREDICT_FALSE (sw_if_index0 != sw_if_index1))
1121 /* Now sw_if_index0 == sw_if_index1 */
1122 if (PREDICT_FALSE (cached_sw_if_index != sw_if_index0))
1124 cached_sw_if_index = sw_if_index0;
1125 hi = vnet_get_sup_hw_interface (vnm, sw_if_index0);
1126 intf0 = vec_elt_at_index (em->main_intfs, hi->hw_if_index);
1127 subint0 = &intf0->untagged_subint;
1128 cached_is_l2 = is_l20 = subint0->flags & SUBINT_CONFIG_L2;
1131 if (PREDICT_TRUE (is_l20 != 0))
1133 vnet_buffer (b0)->l3_hdr_offset =
1134 vnet_buffer (b0)->l2_hdr_offset +
1135 sizeof (ethernet_header_t);
1136 vnet_buffer (b1)->l3_hdr_offset =
1137 vnet_buffer (b1)->l2_hdr_offset +
1138 sizeof (ethernet_header_t);
1139 b0->flags |= VNET_BUFFER_F_L3_HDR_OFFSET_VALID;
1140 b1->flags |= VNET_BUFFER_F_L3_HDR_OFFSET_VALID;
1141 next0 = em->l2_next;
1142 vnet_buffer (b0)->l2.l2_len = sizeof (ethernet_header_t);
1143 next1 = em->l2_next;
1144 vnet_buffer (b1)->l2.l2_len = sizeof (ethernet_header_t);
1148 if (!ethernet_address_cast (e0->dst_address) &&
1149 (hi->hw_address != 0) &&
1150 !ethernet_mac_address_equal ((u8 *) e0, hi->hw_address))
1151 error0 = ETHERNET_ERROR_L3_MAC_MISMATCH;
1152 if (!ethernet_address_cast (e1->dst_address) &&
1153 (hi->hw_address != 0) &&
1154 !ethernet_mac_address_equal ((u8 *) e1, hi->hw_address))
1155 error1 = ETHERNET_ERROR_L3_MAC_MISMATCH;
1156 vlib_buffer_advance (b0, sizeof (ethernet_header_t));
1157 determine_next_node (em, variant, 0, type0, b0,
1159 vlib_buffer_advance (b1, sizeof (ethernet_header_t));
1160 determine_next_node (em, variant, 0, type1, b1,
1166 /* Slow-path for the tagged case */
1168 parse_header (variant,
1171 &orig_type0, &outer_id0, &inner_id0, &match_flags0);
1173 parse_header (variant,
1176 &orig_type1, &outer_id1, &inner_id1, &match_flags1);
1178 old_sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
1179 old_sw_if_index1 = vnet_buffer (b1)->sw_if_index[VLIB_RX];
1181 eth_vlan_table_lookups (em,
1188 &main_intf0, &vlan_intf0, &qinq_intf0);
1190 eth_vlan_table_lookups (em,
1197 &main_intf1, &vlan_intf1, &qinq_intf1);
1199 identify_subint (hi0,
1204 qinq_intf0, &new_sw_if_index0, &error0, &is_l20);
1206 identify_subint (hi1,
1211 qinq_intf1, &new_sw_if_index1, &error1, &is_l21);
1213 // Save RX sw_if_index for later nodes
1214 vnet_buffer (b0)->sw_if_index[VLIB_RX] =
1216 ETHERNET_ERROR_NONE ? old_sw_if_index0 : new_sw_if_index0;
1217 vnet_buffer (b1)->sw_if_index[VLIB_RX] =
1219 ETHERNET_ERROR_NONE ? old_sw_if_index1 : new_sw_if_index1;
1221 // Check if there is a stat to take (valid and non-main sw_if_index for pkt 0 or pkt 1)
1222 if (((new_sw_if_index0 != ~0)
1223 && (new_sw_if_index0 != old_sw_if_index0))
1224 || ((new_sw_if_index1 != ~0)
1225 && (new_sw_if_index1 != old_sw_if_index1)))
1228 len0 = vlib_buffer_length_in_chain (vm, b0) + b0->current_data
1229 - vnet_buffer (b0)->l2_hdr_offset;
1230 len1 = vlib_buffer_length_in_chain (vm, b1) + b1->current_data
1231 - vnet_buffer (b1)->l2_hdr_offset;
1233 stats_n_packets += 2;
1234 stats_n_bytes += len0 + len1;
1237 (!(new_sw_if_index0 == stats_sw_if_index
1238 && new_sw_if_index1 == stats_sw_if_index)))
1240 stats_n_packets -= 2;
1241 stats_n_bytes -= len0 + len1;
1243 if (new_sw_if_index0 != old_sw_if_index0
1244 && new_sw_if_index0 != ~0)
1245 vlib_increment_combined_counter (vnm->
1246 interface_main.combined_sw_if_counters
1248 VNET_INTERFACE_COUNTER_RX,
1250 new_sw_if_index0, 1,
1252 if (new_sw_if_index1 != old_sw_if_index1
1253 && new_sw_if_index1 != ~0)
1254 vlib_increment_combined_counter (vnm->
1255 interface_main.combined_sw_if_counters
1257 VNET_INTERFACE_COUNTER_RX,
1259 new_sw_if_index1, 1,
1262 if (new_sw_if_index0 == new_sw_if_index1)
1264 if (stats_n_packets > 0)
1266 vlib_increment_combined_counter
1267 (vnm->interface_main.combined_sw_if_counters
1268 + VNET_INTERFACE_COUNTER_RX,
1271 stats_n_packets, stats_n_bytes);
1272 stats_n_packets = stats_n_bytes = 0;
1274 stats_sw_if_index = new_sw_if_index0;
1279 if (variant == ETHERNET_INPUT_VARIANT_NOT_L2)
1280 is_l20 = is_l21 = 0;
1282 determine_next_node (em, variant, is_l20, type0, b0, &error0,
1284 determine_next_node (em, variant, is_l21, type1, b1, &error1,
1288 b0->error = error_node->errors[error0];
1289 b1->error = error_node->errors[error1];
1291 // verify speculative enqueue
1292 vlib_validate_buffer_enqueue_x2 (vm, node, next_index, to_next,
1293 n_left_to_next, bi0, bi1, next0,
1297 while (n_left_from > 0 && n_left_to_next > 0)
1302 u16 type0, orig_type0;
1303 u16 outer_id0, inner_id0;
1305 u32 old_sw_if_index0, new_sw_if_index0, len0;
1306 vnet_hw_interface_t *hi0;
1307 main_intf_t *main_intf0;
1308 vlan_intf_t *vlan_intf0;
1309 qinq_intf_t *qinq_intf0;
1310 ethernet_header_t *e0;
1313 // Prefetch next iteration
1314 if (n_left_from > 1)
1316 vlib_prefetch_buffer_header (b[1], STORE);
1317 CLIB_PREFETCH (b[1]->data, CLIB_CACHE_LINE_BYTES, LOAD);
1325 n_left_to_next -= 1;
1330 error0 = ETHERNET_ERROR_NONE;
1331 e0 = vlib_buffer_get_current (b0);
1332 type0 = clib_net_to_host_u16 (e0->type);
1334 /* Set the L2 header offset for all packets */
1335 vnet_buffer (b0)->l2_hdr_offset = b0->current_data;
1336 b0->flags |= VNET_BUFFER_F_L2_HDR_OFFSET_VALID;
1338 /* Speed-path for the untagged case */
1339 if (PREDICT_TRUE (variant == ETHERNET_INPUT_VARIANT_ETHERNET
1340 && !ethernet_frame_is_tagged (type0)))
1343 subint_config_t *subint0;
1346 sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
1347 is_l20 = cached_is_l2;
1349 if (PREDICT_FALSE (cached_sw_if_index != sw_if_index0))
1351 cached_sw_if_index = sw_if_index0;
1352 hi = vnet_get_sup_hw_interface (vnm, sw_if_index0);
1353 intf0 = vec_elt_at_index (em->main_intfs, hi->hw_if_index);
1354 subint0 = &intf0->untagged_subint;
1355 cached_is_l2 = is_l20 = subint0->flags & SUBINT_CONFIG_L2;
1359 if (PREDICT_TRUE (is_l20 != 0))
1361 vnet_buffer (b0)->l3_hdr_offset =
1362 vnet_buffer (b0)->l2_hdr_offset +
1363 sizeof (ethernet_header_t);
1364 b0->flags |= VNET_BUFFER_F_L3_HDR_OFFSET_VALID;
1365 next0 = em->l2_next;
1366 vnet_buffer (b0)->l2.l2_len = sizeof (ethernet_header_t);
1370 if (!ethernet_address_cast (e0->dst_address) &&
1371 (hi->hw_address != 0) &&
1372 !ethernet_mac_address_equal ((u8 *) e0, hi->hw_address))
1373 error0 = ETHERNET_ERROR_L3_MAC_MISMATCH;
1374 vlib_buffer_advance (b0, sizeof (ethernet_header_t));
1375 determine_next_node (em, variant, 0, type0, b0,
1381 /* Slow-path for the tagged case */
1382 parse_header (variant,
1385 &orig_type0, &outer_id0, &inner_id0, &match_flags0);
1387 old_sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
1389 eth_vlan_table_lookups (em,
1396 &main_intf0, &vlan_intf0, &qinq_intf0);
1398 identify_subint (hi0,
1403 qinq_intf0, &new_sw_if_index0, &error0, &is_l20);
1405 // Save RX sw_if_index for later nodes
1406 vnet_buffer (b0)->sw_if_index[VLIB_RX] =
1408 ETHERNET_ERROR_NONE ? old_sw_if_index0 : new_sw_if_index0;
1410 // Increment subinterface stats
1411 // Note that interface-level counters have already been incremented
1412 // prior to calling this function. Thus only subinterface counters
1413 // are incremented here.
1415 // Interface level counters include packets received on the main
1416 // interface and all subinterfaces. Subinterface level counters
1417 // include only those packets received on that subinterface
1418 // Increment stats if the subint is valid and it is not the main intf
1419 if ((new_sw_if_index0 != ~0)
1420 && (new_sw_if_index0 != old_sw_if_index0))
1423 len0 = vlib_buffer_length_in_chain (vm, b0) + b0->current_data
1424 - vnet_buffer (b0)->l2_hdr_offset;
1426 stats_n_packets += 1;
1427 stats_n_bytes += len0;
1429 // Batch stat increments from the same subinterface so counters
1430 // don't need to be incremented for every packet.
1431 if (PREDICT_FALSE (new_sw_if_index0 != stats_sw_if_index))
1433 stats_n_packets -= 1;
1434 stats_n_bytes -= len0;
1436 if (new_sw_if_index0 != ~0)
1437 vlib_increment_combined_counter
1438 (vnm->interface_main.combined_sw_if_counters
1439 + VNET_INTERFACE_COUNTER_RX,
1440 thread_index, new_sw_if_index0, 1, len0);
1441 if (stats_n_packets > 0)
1443 vlib_increment_combined_counter
1444 (vnm->interface_main.combined_sw_if_counters
1445 + VNET_INTERFACE_COUNTER_RX,
1447 stats_sw_if_index, stats_n_packets, stats_n_bytes);
1448 stats_n_packets = stats_n_bytes = 0;
1450 stats_sw_if_index = new_sw_if_index0;
1454 if (variant == ETHERNET_INPUT_VARIANT_NOT_L2)
1457 determine_next_node (em, variant, is_l20, type0, b0, &error0,
1461 b0->error = error_node->errors[error0];
1463 // verify speculative enqueue
1464 vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
1465 to_next, n_left_to_next,
1469 vlib_put_next_frame (vm, node, next_index, n_left_to_next);
1472 // Increment any remaining batched stats
1473 if (stats_n_packets > 0)
1475 vlib_increment_combined_counter
1476 (vnm->interface_main.combined_sw_if_counters
1477 + VNET_INTERFACE_COUNTER_RX,
1478 thread_index, stats_sw_if_index, stats_n_packets, stats_n_bytes);
1479 node->runtime_data[0] = stats_sw_if_index;
1483 VLIB_NODE_FN (ethernet_input_node) (vlib_main_t * vm,
1484 vlib_node_runtime_t * node,
1485 vlib_frame_t * frame)
1487 vnet_main_t *vnm = vnet_get_main ();
1488 u32 *from = vlib_frame_vector_args (frame);
1489 u32 n_packets = frame->n_vectors;
1491 ethernet_input_trace (vm, node, frame);
1493 if (frame->flags & ETH_INPUT_FRAME_F_SINGLE_SW_IF_IDX)
1495 ethernet_input_frame_t *ef = vlib_frame_scalar_args (frame);
1496 int ip4_cksum_ok = (frame->flags & ETH_INPUT_FRAME_F_IP4_CKSUM_OK) != 0;
1497 vnet_hw_interface_t *hi = vnet_get_hw_interface (vnm, ef->hw_if_index);
1498 eth_input_single_int (vm, node, hi, from, n_packets, ip4_cksum_ok);
1501 ethernet_input_inline (vm, node, from, n_packets,
1502 ETHERNET_INPUT_VARIANT_ETHERNET);
1506 VLIB_NODE_FN (ethernet_input_type_node) (vlib_main_t * vm,
1507 vlib_node_runtime_t * node,
1508 vlib_frame_t * from_frame)
1510 u32 *from = vlib_frame_vector_args (from_frame);
1511 u32 n_packets = from_frame->n_vectors;
1512 ethernet_input_trace (vm, node, from_frame);
1513 ethernet_input_inline (vm, node, from, n_packets,
1514 ETHERNET_INPUT_VARIANT_ETHERNET_TYPE);
1518 VLIB_NODE_FN (ethernet_input_not_l2_node) (vlib_main_t * vm,
1519 vlib_node_runtime_t * node,
1520 vlib_frame_t * from_frame)
1522 u32 *from = vlib_frame_vector_args (from_frame);
1523 u32 n_packets = from_frame->n_vectors;
1524 ethernet_input_trace (vm, node, from_frame);
1525 ethernet_input_inline (vm, node, from, n_packets,
1526 ETHERNET_INPUT_VARIANT_NOT_L2);
1531 // Return the subinterface config struct for the given sw_if_index
1532 // Also return via parameter the appropriate match flags for the
1533 // configured number of tags.
1534 // On error (unsupported or not ethernet) return 0.
1535 static subint_config_t *
1536 ethernet_sw_interface_get_config (vnet_main_t * vnm,
1538 u32 * flags, u32 * unsupported)
1540 ethernet_main_t *em = ðernet_main;
1541 vnet_hw_interface_t *hi;
1542 vnet_sw_interface_t *si;
1543 main_intf_t *main_intf;
1544 vlan_table_t *vlan_table;
1545 qinq_table_t *qinq_table;
1546 subint_config_t *subint = 0;
1548 hi = vnet_get_sup_hw_interface (vnm, sw_if_index);
1550 if (!hi || (hi->hw_class_index != ethernet_hw_interface_class.index))
1553 goto done; // non-ethernet interface
1556 // ensure there's an entry for the main intf (shouldn't really be necessary)
1557 vec_validate (em->main_intfs, hi->hw_if_index);
1558 main_intf = vec_elt_at_index (em->main_intfs, hi->hw_if_index);
1560 // Locate the subint for the given ethernet config
1561 si = vnet_get_sw_interface (vnm, sw_if_index);
1563 if (si->type == VNET_SW_INTERFACE_TYPE_P2P)
1565 p2p_ethernet_main_t *p2pm = &p2p_main;
1566 u32 p2pe_sw_if_index =
1567 p2p_ethernet_lookup (hi->hw_if_index, si->p2p.client_mac);
1568 if (p2pe_sw_if_index == ~0)
1570 pool_get (p2pm->p2p_subif_pool, subint);
1571 si->p2p.pool_index = subint - p2pm->p2p_subif_pool;
1574 subint = vec_elt_at_index (p2pm->p2p_subif_pool, si->p2p.pool_index);
1575 *flags = SUBINT_CONFIG_P2P;
1577 else if (si->type == VNET_SW_INTERFACE_TYPE_PIPE)
1581 pipe = pipe_get (sw_if_index);
1582 subint = &pipe->subint;
1583 *flags = SUBINT_CONFIG_P2P;
1585 else if (si->sub.eth.flags.default_sub)
1587 subint = &main_intf->default_subint;
1588 *flags = SUBINT_CONFIG_MATCH_1_TAG |
1589 SUBINT_CONFIG_MATCH_2_TAG | SUBINT_CONFIG_MATCH_3_TAG;
1591 else if ((si->sub.eth.flags.no_tags) || (si->sub.eth.raw_flags == 0))
1593 // if no flags are set then this is a main interface
1594 // so treat as untagged
1595 subint = &main_intf->untagged_subint;
1596 *flags = SUBINT_CONFIG_MATCH_0_TAG;
1601 // first get the vlan table
1602 if (si->sub.eth.flags.dot1ad)
1604 if (main_intf->dot1ad_vlans == 0)
1606 // Allocate a vlan table from the pool
1607 pool_get (em->vlan_pool, vlan_table);
1608 main_intf->dot1ad_vlans = vlan_table - em->vlan_pool;
1612 // Get ptr to existing vlan table
1614 vec_elt_at_index (em->vlan_pool, main_intf->dot1ad_vlans);
1619 if (main_intf->dot1q_vlans == 0)
1621 // Allocate a vlan table from the pool
1622 pool_get (em->vlan_pool, vlan_table);
1623 main_intf->dot1q_vlans = vlan_table - em->vlan_pool;
1627 // Get ptr to existing vlan table
1629 vec_elt_at_index (em->vlan_pool, main_intf->dot1q_vlans);
1633 if (si->sub.eth.flags.one_tag)
1635 *flags = si->sub.eth.flags.exact_match ?
1636 SUBINT_CONFIG_MATCH_1_TAG :
1637 (SUBINT_CONFIG_MATCH_1_TAG |
1638 SUBINT_CONFIG_MATCH_2_TAG | SUBINT_CONFIG_MATCH_3_TAG);
1640 if (si->sub.eth.flags.outer_vlan_id_any)
1642 // not implemented yet
1648 // a single vlan, a common case
1650 &vlan_table->vlans[si->sub.eth.
1651 outer_vlan_id].single_tag_subint;
1658 *flags = si->sub.eth.flags.exact_match ?
1659 SUBINT_CONFIG_MATCH_2_TAG :
1660 (SUBINT_CONFIG_MATCH_2_TAG | SUBINT_CONFIG_MATCH_3_TAG);
1662 if (si->sub.eth.flags.outer_vlan_id_any
1663 && si->sub.eth.flags.inner_vlan_id_any)
1665 // not implemented yet
1670 if (si->sub.eth.flags.inner_vlan_id_any)
1672 // a specific outer and "any" inner
1673 // don't need a qinq table for this
1675 &vlan_table->vlans[si->sub.eth.
1676 outer_vlan_id].inner_any_subint;
1677 if (si->sub.eth.flags.exact_match)
1679 *flags = SUBINT_CONFIG_MATCH_2_TAG;
1683 *flags = SUBINT_CONFIG_MATCH_2_TAG |
1684 SUBINT_CONFIG_MATCH_3_TAG;
1689 // a specific outer + specifc innner vlan id, a common case
1691 // get the qinq table
1692 if (vlan_table->vlans[si->sub.eth.outer_vlan_id].qinqs == 0)
1694 // Allocate a qinq table from the pool
1695 pool_get (em->qinq_pool, qinq_table);
1696 vlan_table->vlans[si->sub.eth.outer_vlan_id].qinqs =
1697 qinq_table - em->qinq_pool;
1701 // Get ptr to existing qinq table
1703 vec_elt_at_index (em->qinq_pool,
1704 vlan_table->vlans[si->sub.
1708 subint = &qinq_table->vlans[si->sub.eth.inner_vlan_id].subint;
1717 static clib_error_t *
1718 ethernet_sw_interface_up_down (vnet_main_t * vnm, u32 sw_if_index, u32 flags)
1720 subint_config_t *subint;
1723 clib_error_t *error = 0;
1725 // Find the config for this subinterface
1727 ethernet_sw_interface_get_config (vnm, sw_if_index, &dummy_flags,
1732 // not implemented yet or not ethernet
1736 subint->sw_if_index =
1737 ((flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP) ? sw_if_index : ~0);
1743 VNET_SW_INTERFACE_ADMIN_UP_DOWN_FUNCTION (ethernet_sw_interface_up_down);
1746 #ifndef CLIB_MARCH_VARIANT
1747 // Set the L2/L3 mode for the subinterface
1749 ethernet_sw_interface_set_l2_mode (vnet_main_t * vnm, u32 sw_if_index, u32 l2)
1751 subint_config_t *subint;
1755 vnet_sw_interface_t *sw = vnet_get_sw_interface (vnm, sw_if_index);
1757 is_port = !(sw->type == VNET_SW_INTERFACE_TYPE_SUB);
1759 // Find the config for this subinterface
1761 ethernet_sw_interface_get_config (vnm, sw_if_index, &dummy_flags,
1766 // unimplemented or not ethernet
1770 // Double check that the config we found is for our interface (or the interface is down)
1771 ASSERT ((subint->sw_if_index == sw_if_index) | (subint->sw_if_index == ~0));
1775 subint->flags |= SUBINT_CONFIG_L2;
1778 SUBINT_CONFIG_MATCH_0_TAG | SUBINT_CONFIG_MATCH_1_TAG
1779 | SUBINT_CONFIG_MATCH_2_TAG | SUBINT_CONFIG_MATCH_3_TAG;
1783 subint->flags &= ~SUBINT_CONFIG_L2;
1786 ~(SUBINT_CONFIG_MATCH_1_TAG | SUBINT_CONFIG_MATCH_2_TAG
1787 | SUBINT_CONFIG_MATCH_3_TAG);
1795 * Set the L2/L3 mode for the subinterface regardless of port
1798 ethernet_sw_interface_set_l2_mode_noport (vnet_main_t * vnm,
1799 u32 sw_if_index, u32 l2)
1801 subint_config_t *subint;
1805 /* Find the config for this subinterface */
1807 ethernet_sw_interface_get_config (vnm, sw_if_index, &dummy_flags,
1812 /* unimplemented or not ethernet */
1817 * Double check that the config we found is for our interface (or the
1818 * interface is down)
1820 ASSERT ((subint->sw_if_index == sw_if_index) | (subint->sw_if_index == ~0));
1824 subint->flags |= SUBINT_CONFIG_L2;
1828 subint->flags &= ~SUBINT_CONFIG_L2;
1836 static clib_error_t *
1837 ethernet_sw_interface_add_del (vnet_main_t * vnm,
1838 u32 sw_if_index, u32 is_create)
1840 clib_error_t *error = 0;
1841 subint_config_t *subint;
1843 u32 unsupported = 0;
1845 // Find the config for this subinterface
1847 ethernet_sw_interface_get_config (vnm, sw_if_index, &match_flags,
1852 // not implemented yet or not ethernet
1855 // this is the NYI case
1856 error = clib_error_return (0, "not implemented yet");
1867 // Initialize the subint
1868 if (subint->flags & SUBINT_CONFIG_VALID)
1870 // Error vlan already in use
1871 error = clib_error_return (0, "vlan is already in use");
1875 // Note that config is L3 by default
1876 subint->flags = SUBINT_CONFIG_VALID | match_flags;
1877 subint->sw_if_index = ~0; // because interfaces are initially down
1884 VNET_SW_INTERFACE_ADD_DEL_FUNCTION (ethernet_sw_interface_add_del);
1886 static char *ethernet_error_strings[] = {
1887 #define ethernet_error(n,c,s) s,
1888 #include "error.def"
1889 #undef ethernet_error
1893 VLIB_REGISTER_NODE (ethernet_input_node) = {
1894 .name = "ethernet-input",
1895 /* Takes a vector of packets. */
1896 .vector_size = sizeof (u32),
1897 .scalar_size = sizeof (ethernet_input_frame_t),
1898 .n_errors = ETHERNET_N_ERROR,
1899 .error_strings = ethernet_error_strings,
1900 .n_next_nodes = ETHERNET_INPUT_N_NEXT,
1902 #define _(s,n) [ETHERNET_INPUT_NEXT_##s] = n,
1903 foreach_ethernet_input_next
1906 .format_buffer = format_ethernet_header_with_length,
1907 .format_trace = format_ethernet_input_trace,
1908 .unformat_buffer = unformat_ethernet_header,
1911 VLIB_REGISTER_NODE (ethernet_input_type_node) = {
1912 .name = "ethernet-input-type",
1913 /* Takes a vector of packets. */
1914 .vector_size = sizeof (u32),
1915 .n_next_nodes = ETHERNET_INPUT_N_NEXT,
1917 #define _(s,n) [ETHERNET_INPUT_NEXT_##s] = n,
1918 foreach_ethernet_input_next
1923 VLIB_REGISTER_NODE (ethernet_input_not_l2_node) = {
1924 .name = "ethernet-input-not-l2",
1925 /* Takes a vector of packets. */
1926 .vector_size = sizeof (u32),
1927 .n_next_nodes = ETHERNET_INPUT_N_NEXT,
1929 #define _(s,n) [ETHERNET_INPUT_NEXT_##s] = n,
1930 foreach_ethernet_input_next
1936 #ifndef CLIB_MARCH_VARIANT
1938 ethernet_set_rx_redirect (vnet_main_t * vnm,
1939 vnet_hw_interface_t * hi, u32 enable)
1941 // Insure all packets go to ethernet-input (i.e. untagged ipv4 packets
1942 // don't go directly to ip4-input)
1943 vnet_hw_interface_rx_redirect_to_node
1944 (vnm, hi->hw_if_index, enable ? ethernet_input_node.index : ~0);
1949 * Initialization and registration for the next_by_ethernet structure
1953 next_by_ethertype_init (next_by_ethertype_t * l3_next)
1955 l3_next->input_next_by_type = sparse_vec_new
1956 ( /* elt bytes */ sizeof (l3_next->input_next_by_type[0]),
1957 /* bits in index */ BITS (((ethernet_header_t *) 0)->type));
1959 vec_validate (l3_next->sparse_index_by_input_next_index,
1960 ETHERNET_INPUT_NEXT_DROP);
1961 vec_validate (l3_next->sparse_index_by_input_next_index,
1962 ETHERNET_INPUT_NEXT_PUNT);
1963 l3_next->sparse_index_by_input_next_index[ETHERNET_INPUT_NEXT_DROP] =
1964 SPARSE_VEC_INVALID_INDEX;
1965 l3_next->sparse_index_by_input_next_index[ETHERNET_INPUT_NEXT_PUNT] =
1966 SPARSE_VEC_INVALID_INDEX;
1969 * Make sure we don't wipe out an ethernet registration by mistake
1970 * Can happen if init function ordering constraints are missing.
1974 ethernet_main_t *em = ðernet_main;
1975 ASSERT (em->next_by_ethertype_register_called == 0);
1981 // Add an ethertype -> next index mapping to the structure
1983 next_by_ethertype_register (next_by_ethertype_t * l3_next,
1984 u32 ethertype, u32 next_index)
1988 ethernet_main_t *em = ðernet_main;
1992 ethernet_main_t *em = ðernet_main;
1993 em->next_by_ethertype_register_called = 1;
1996 /* Setup ethernet type -> next index sparse vector mapping. */
1997 n = sparse_vec_validate (l3_next->input_next_by_type, ethertype);
2000 /* Rebuild next index -> sparse index inverse mapping when sparse vector
2002 vec_validate (l3_next->sparse_index_by_input_next_index, next_index);
2003 for (i = 1; i < vec_len (l3_next->input_next_by_type); i++)
2005 sparse_index_by_input_next_index[l3_next->input_next_by_type[i]] = i;
2007 // do not allow the cached next index's to be updated if L3
2008 // redirect is enabled, as it will have overwritten them
2009 if (!em->redirect_l3)
2011 // Cache common ethertypes directly
2012 if (ethertype == ETHERNET_TYPE_IP4)
2014 l3_next->input_next_ip4 = next_index;
2016 else if (ethertype == ETHERNET_TYPE_IP6)
2018 l3_next->input_next_ip6 = next_index;
2020 else if (ethertype == ETHERNET_TYPE_MPLS)
2022 l3_next->input_next_mpls = next_index;
2029 ethernet_input_init (vlib_main_t * vm, ethernet_main_t * em)
2031 __attribute__ ((unused)) vlan_table_t *invalid_vlan_table;
2032 __attribute__ ((unused)) qinq_table_t *invalid_qinq_table;
2034 ethernet_setup_node (vm, ethernet_input_node.index);
2035 ethernet_setup_node (vm, ethernet_input_type_node.index);
2036 ethernet_setup_node (vm, ethernet_input_not_l2_node.index);
2038 next_by_ethertype_init (&em->l3_next);
2040 // Initialize pools and vector for vlan parsing
2041 vec_validate (em->main_intfs, 10); // 10 main interfaces
2042 pool_alloc (em->vlan_pool, 10);
2043 pool_alloc (em->qinq_pool, 1);
2045 // The first vlan pool will always be reserved for an invalid table
2046 pool_get (em->vlan_pool, invalid_vlan_table); // first id = 0
2047 // The first qinq pool will always be reserved for an invalid table
2048 pool_get (em->qinq_pool, invalid_qinq_table); // first id = 0
2052 ethernet_register_input_type (vlib_main_t * vm,
2053 ethernet_type_t type, u32 node_index)
2055 ethernet_main_t *em = ðernet_main;
2056 ethernet_type_info_t *ti;
2060 clib_error_t *error = vlib_call_init_function (vm, ethernet_init);
2062 clib_error_report (error);
2065 ti = ethernet_get_type_info (em, type);
2066 ti->node_index = node_index;
2067 ti->next_index = vlib_node_add_next (vm,
2068 ethernet_input_node.index, node_index);
2069 i = vlib_node_add_next (vm, ethernet_input_type_node.index, node_index);
2070 ASSERT (i == ti->next_index);
2072 i = vlib_node_add_next (vm, ethernet_input_not_l2_node.index, node_index);
2073 ASSERT (i == ti->next_index);
2075 // Add the L3 node for this ethertype to the next nodes structure
2076 next_by_ethertype_register (&em->l3_next, type, ti->next_index);
2078 // Call the registration functions for other nodes that want a mapping
2079 l2bvi_register_input_type (vm, type, node_index);
2083 ethernet_register_l2_input (vlib_main_t * vm, u32 node_index)
2085 ethernet_main_t *em = ðernet_main;
2089 vlib_node_add_next (vm, ethernet_input_node.index, node_index);
2092 * Even if we never use these arcs, we have to align the next indices...
2094 i = vlib_node_add_next (vm, ethernet_input_type_node.index, node_index);
2096 ASSERT (i == em->l2_next);
2098 i = vlib_node_add_next (vm, ethernet_input_not_l2_node.index, node_index);
2099 ASSERT (i == em->l2_next);
2102 // Register a next node for L3 redirect, and enable L3 redirect
2104 ethernet_register_l3_redirect (vlib_main_t * vm, u32 node_index)
2106 ethernet_main_t *em = ðernet_main;
2109 em->redirect_l3 = 1;
2110 em->redirect_l3_next = vlib_node_add_next (vm,
2111 ethernet_input_node.index,
2114 * Change the cached next nodes to the redirect node
2116 em->l3_next.input_next_ip4 = em->redirect_l3_next;
2117 em->l3_next.input_next_ip6 = em->redirect_l3_next;
2118 em->l3_next.input_next_mpls = em->redirect_l3_next;
2121 * Even if we never use these arcs, we have to align the next indices...
2123 i = vlib_node_add_next (vm, ethernet_input_type_node.index, node_index);
2125 ASSERT (i == em->redirect_l3_next);
2127 i = vlib_node_add_next (vm, ethernet_input_not_l2_node.index, node_index);
2129 ASSERT (i == em->redirect_l3_next);
2134 * fd.io coding-style-patch-verification: ON
2137 * eval: (c-set-style "gnu")