2 * Copyright (c) 2018 Cisco and/or its affiliates.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at:
7 * http://www.apache.org/licenses/LICENSE-2.0
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
16 * ethernet_node.c: ethernet packet processing
18 * Copyright (c) 2008 Eliot Dresselhaus
20 * Permission is hereby granted, free of charge, to any person obtaining
21 * a copy of this software and associated documentation files (the
22 * "Software"), to deal in the Software without restriction, including
23 * without limitation the rights to use, copy, modify, merge, publish,
24 * distribute, sublicense, and/or sell copies of the Software, and to
25 * permit persons to whom the Software is furnished to do so, subject to
26 * the following conditions:
28 * The above copyright notice and this permission notice shall be
29 * included in all copies or substantial portions of the Software.
31 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
32 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
33 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
34 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
35 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
36 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
37 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
40 #include <vlib/vlib.h>
41 #include <vnet/pg/pg.h>
42 #include <vnet/ethernet/ethernet.h>
43 #include <vnet/ethernet/p2p_ethernet.h>
44 #include <vnet/devices/pipe/pipe.h>
45 #include <vppinfra/sparse_vec.h>
46 #include <vnet/l2/l2_bvi.h>
48 #define foreach_ethernet_input_next \
49 _ (PUNT, "error-punt") \
50 _ (DROP, "error-drop") \
51 _ (LLC, "llc-input") \
52 _ (IP4_INPUT, "ip4-input") \
53 _ (IP4_INPUT_NCS, "ip4-input-no-checksum")
57 #define _(s,n) ETHERNET_INPUT_NEXT_##s,
58 foreach_ethernet_input_next
60 ETHERNET_INPUT_N_NEXT,
61 } ethernet_input_next_t;
67 ethernet_input_frame_t frame_data;
68 } ethernet_input_trace_t;
71 format_ethernet_input_trace (u8 * s, va_list * va)
73 CLIB_UNUSED (vlib_main_t * vm) = va_arg (*va, vlib_main_t *);
74 CLIB_UNUSED (vlib_node_t * node) = va_arg (*va, vlib_node_t *);
75 ethernet_input_trace_t *t = va_arg (*va, ethernet_input_trace_t *);
76 u32 indent = format_get_indent (s);
80 s = format (s, "frame: flags 0x%x", t->frame_flags);
81 if (t->frame_flags & ETH_INPUT_FRAME_F_SINGLE_SW_IF_IDX)
82 s = format (s, ", hw-if-index %u, sw-if-index %u",
83 t->frame_data.hw_if_index, t->frame_data.sw_if_index);
84 s = format (s, "\n%U", format_white_space, indent);
86 s = format (s, "%U", format_ethernet_header, t->packet_data);
91 extern vlib_node_registration_t ethernet_input_node;
95 ETHERNET_INPUT_VARIANT_ETHERNET,
96 ETHERNET_INPUT_VARIANT_ETHERNET_TYPE,
97 ETHERNET_INPUT_VARIANT_NOT_L2,
98 } ethernet_input_variant_t;
101 // Parse the ethernet header to extract vlan tags and innermost ethertype
102 static_always_inline void
103 parse_header (ethernet_input_variant_t variant,
107 u16 * outer_id, u16 * inner_id, u32 * match_flags)
111 if (variant == ETHERNET_INPUT_VARIANT_ETHERNET
112 || variant == ETHERNET_INPUT_VARIANT_NOT_L2)
114 ethernet_header_t *e0;
116 e0 = (void *) (b0->data + b0->current_data);
118 vnet_buffer (b0)->l2_hdr_offset = b0->current_data;
119 b0->flags |= VNET_BUFFER_F_L2_HDR_OFFSET_VALID;
121 vlib_buffer_advance (b0, sizeof (e0[0]));
123 *type = clib_net_to_host_u16 (e0->type);
125 else if (variant == ETHERNET_INPUT_VARIANT_ETHERNET_TYPE)
127 // here when prior node was LLC/SNAP processing
130 e0 = (void *) (b0->data + b0->current_data);
132 vlib_buffer_advance (b0, sizeof (e0[0]));
134 *type = clib_net_to_host_u16 (e0[0]);
137 // save for distinguishing between dot1q and dot1ad later
140 // default the tags to 0 (used if there is no corresponding tag)
144 *match_flags = SUBINT_CONFIG_VALID | SUBINT_CONFIG_MATCH_0_TAG;
147 // check for vlan encaps
148 if (ethernet_frame_is_tagged (*type))
150 ethernet_vlan_header_t *h0;
153 *match_flags = SUBINT_CONFIG_VALID | SUBINT_CONFIG_MATCH_1_TAG;
155 h0 = (void *) (b0->data + b0->current_data);
157 tag = clib_net_to_host_u16 (h0->priority_cfi_and_id);
159 *outer_id = tag & 0xfff;
161 *match_flags &= ~SUBINT_CONFIG_MATCH_1_TAG;
163 *type = clib_net_to_host_u16 (h0->type);
165 vlib_buffer_advance (b0, sizeof (h0[0]));
168 if (*type == ETHERNET_TYPE_VLAN)
170 // Double tagged packet
171 *match_flags = SUBINT_CONFIG_VALID | SUBINT_CONFIG_MATCH_2_TAG;
173 h0 = (void *) (b0->data + b0->current_data);
175 tag = clib_net_to_host_u16 (h0->priority_cfi_and_id);
177 *inner_id = tag & 0xfff;
179 *type = clib_net_to_host_u16 (h0->type);
181 vlib_buffer_advance (b0, sizeof (h0[0]));
183 if (*type == ETHERNET_TYPE_VLAN)
185 // More than double tagged packet
186 *match_flags = SUBINT_CONFIG_VALID | SUBINT_CONFIG_MATCH_3_TAG;
188 vlib_buffer_advance (b0, sizeof (h0[0]));
189 vlan_count = 3; // "unknown" number, aka, 3-or-more
193 ethernet_buffer_set_vlan_count (b0, vlan_count);
196 // Determine the subinterface for this packet, given the result of the
197 // vlan table lookups and vlan header parsing. Check the most specific
199 static_always_inline void
200 identify_subint (vnet_hw_interface_t * hi,
203 main_intf_t * main_intf,
204 vlan_intf_t * vlan_intf,
205 qinq_intf_t * qinq_intf,
206 u32 * new_sw_if_index, u8 * error0, u32 * is_l2)
210 matched = eth_identify_subint (hi, match_flags, main_intf, vlan_intf,
211 qinq_intf, new_sw_if_index, error0, is_l2);
216 // Perform L3 my-mac filter
217 // A unicast packet arriving on an L3 interface must have a dmac matching the interface mac.
218 // This is required for promiscuous mode, else we will forward packets we aren't supposed to.
221 ethernet_header_t *e0;
222 e0 = (void *) (b0->data + vnet_buffer (b0)->l2_hdr_offset);
224 if (!(ethernet_address_cast (e0->dst_address)))
226 if (!ethernet_mac_address_equal ((u8 *) e0, hi->hw_address))
228 *error0 = ETHERNET_ERROR_L3_MAC_MISMATCH;
233 // Check for down subinterface
234 *error0 = (*new_sw_if_index) != ~0 ? (*error0) : ETHERNET_ERROR_DOWN;
238 static_always_inline void
239 determine_next_node (ethernet_main_t * em,
240 ethernet_input_variant_t variant,
242 u32 type0, vlib_buffer_t * b0, u8 * error0, u8 * next0)
244 vnet_buffer (b0)->l3_hdr_offset = b0->current_data;
245 b0->flags |= VNET_BUFFER_F_L3_HDR_OFFSET_VALID;
247 if (PREDICT_FALSE (*error0 != ETHERNET_ERROR_NONE))
249 // some error occurred
250 *next0 = ETHERNET_INPUT_NEXT_DROP;
254 // record the L2 len and reset the buffer so the L2 header is preserved
255 u32 eth_start = vnet_buffer (b0)->l2_hdr_offset;
256 vnet_buffer (b0)->l2.l2_len = b0->current_data - eth_start;
257 *next0 = em->l2_next;
258 ASSERT (vnet_buffer (b0)->l2.l2_len ==
259 ethernet_buffer_header_size (b0));
260 vlib_buffer_advance (b0, -(vnet_buffer (b0)->l2.l2_len));
262 // check for common IP/MPLS ethertypes
264 else if (type0 == ETHERNET_TYPE_IP4)
266 *next0 = em->l3_next.input_next_ip4;
268 else if (type0 == ETHERNET_TYPE_IP6)
270 *next0 = em->l3_next.input_next_ip6;
272 else if (type0 == ETHERNET_TYPE_MPLS)
274 *next0 = em->l3_next.input_next_mpls;
277 else if (em->redirect_l3)
279 // L3 Redirect is on, the cached common next nodes will be
280 // pointing to the redirect node, catch the uncommon types here
281 *next0 = em->redirect_l3_next;
285 // uncommon ethertype, check table
287 i0 = sparse_vec_index (em->l3_next.input_next_by_type, type0);
288 *next0 = vec_elt (em->l3_next.input_next_by_type, i0);
291 SPARSE_VEC_INVALID_INDEX ? ETHERNET_ERROR_UNKNOWN_TYPE : *error0;
293 // The table is not populated with LLC values, so check that now.
294 // If variant is variant_ethernet then we came from LLC processing. Don't
295 // go back there; drop instead using by keeping the drop/bad table result.
296 if ((type0 < 0x600) && (variant == ETHERNET_INPUT_VARIANT_ETHERNET))
298 *next0 = ETHERNET_INPUT_NEXT_LLC;
304 /* following vector code relies on following assumptions */
305 STATIC_ASSERT_OFFSET_OF (vlib_buffer_t, current_data, 0);
306 STATIC_ASSERT_OFFSET_OF (vlib_buffer_t, current_length, 2);
307 STATIC_ASSERT_OFFSET_OF (vlib_buffer_t, flags, 4);
308 STATIC_ASSERT (STRUCT_OFFSET_OF (vnet_buffer_opaque_t, l2_hdr_offset) ==
309 STRUCT_OFFSET_OF (vnet_buffer_opaque_t, l3_hdr_offset) - 2,
310 "l3_hdr_offset must follow l2_hdr_offset");
312 static_always_inline void
313 eth_input_adv_and_flags_x4 (vlib_buffer_t ** b, int is_l3)
315 i16 adv = sizeof (ethernet_header_t);
316 u32 flags = VNET_BUFFER_F_L2_HDR_OFFSET_VALID |
317 VNET_BUFFER_F_L3_HDR_OFFSET_VALID;
319 #ifdef CLIB_HAVE_VEC256
320 /* to reduce number of small loads/stores we are loading first 64 bits
321 of each buffer metadata into 256-bit register so we can advance
322 current_data, current_length and flags.
323 Observed saving of this code is ~2 clocks per packet */
326 /* vector if signed 16 bit integers used in signed vector add operation
327 to advnce current_data and current_length */
328 u32x8 flags4 = { 0, flags, 0, flags, 0, flags, 0, flags };
330 adv, -adv, 0, 0, adv, -adv, 0, 0,
331 adv, -adv, 0, 0, adv, -adv, 0, 0
334 /* load 4 x 64 bits */
335 r = u64x4_gather (b[0], b[1], b[2], b[3]);
341 radv = (u64x4) ((i16x16) r + adv4);
343 /* write 4 x 64 bits */
344 u64x4_scatter (is_l3 ? radv : r, b[0], b[1], b[2], b[3]);
346 /* use old current_data as l2_hdr_offset and new current_data as
348 r = (u64x4) u16x16_blend (r, radv << 16, 0xaa);
350 /* store both l2_hdr_offset and l3_hdr_offset in single store operation */
351 u32x8_scatter_one ((u32x8) r, 0, &vnet_buffer (b[0])->l2_hdr_offset);
352 u32x8_scatter_one ((u32x8) r, 2, &vnet_buffer (b[1])->l2_hdr_offset);
353 u32x8_scatter_one ((u32x8) r, 4, &vnet_buffer (b[2])->l2_hdr_offset);
354 u32x8_scatter_one ((u32x8) r, 6, &vnet_buffer (b[3])->l2_hdr_offset);
358 ASSERT (b[0]->current_data == vnet_buffer (b[0])->l3_hdr_offset);
359 ASSERT (b[1]->current_data == vnet_buffer (b[1])->l3_hdr_offset);
360 ASSERT (b[2]->current_data == vnet_buffer (b[2])->l3_hdr_offset);
361 ASSERT (b[3]->current_data == vnet_buffer (b[3])->l3_hdr_offset);
363 ASSERT (b[0]->current_data - vnet_buffer (b[0])->l2_hdr_offset == adv);
364 ASSERT (b[1]->current_data - vnet_buffer (b[1])->l2_hdr_offset == adv);
365 ASSERT (b[2]->current_data - vnet_buffer (b[2])->l2_hdr_offset == adv);
366 ASSERT (b[3]->current_data - vnet_buffer (b[3])->l2_hdr_offset == adv);
370 ASSERT (b[0]->current_data == vnet_buffer (b[0])->l2_hdr_offset);
371 ASSERT (b[1]->current_data == vnet_buffer (b[1])->l2_hdr_offset);
372 ASSERT (b[2]->current_data == vnet_buffer (b[2])->l2_hdr_offset);
373 ASSERT (b[3]->current_data == vnet_buffer (b[3])->l2_hdr_offset);
375 ASSERT (b[0]->current_data - vnet_buffer (b[0])->l3_hdr_offset == -adv);
376 ASSERT (b[1]->current_data - vnet_buffer (b[1])->l3_hdr_offset == -adv);
377 ASSERT (b[2]->current_data - vnet_buffer (b[2])->l3_hdr_offset == -adv);
378 ASSERT (b[3]->current_data - vnet_buffer (b[3])->l3_hdr_offset == -adv);
382 vnet_buffer (b[0])->l2_hdr_offset = b[0]->current_data;
383 vnet_buffer (b[1])->l2_hdr_offset = b[1]->current_data;
384 vnet_buffer (b[2])->l2_hdr_offset = b[2]->current_data;
385 vnet_buffer (b[3])->l2_hdr_offset = b[3]->current_data;
386 vnet_buffer (b[0])->l3_hdr_offset = b[0]->current_data + adv;
387 vnet_buffer (b[1])->l3_hdr_offset = b[1]->current_data + adv;
388 vnet_buffer (b[2])->l3_hdr_offset = b[2]->current_data + adv;
389 vnet_buffer (b[3])->l3_hdr_offset = b[3]->current_data + adv;
393 vlib_buffer_advance (b[0], adv);
394 vlib_buffer_advance (b[1], adv);
395 vlib_buffer_advance (b[2], adv);
396 vlib_buffer_advance (b[3], adv);
399 b[0]->flags |= flags;
400 b[1]->flags |= flags;
401 b[2]->flags |= flags;
402 b[3]->flags |= flags;
407 vnet_buffer (b[0])->l2.l2_len = adv;
408 vnet_buffer (b[1])->l2.l2_len = adv;
409 vnet_buffer (b[2])->l2.l2_len = adv;
410 vnet_buffer (b[3])->l2.l2_len = adv;
414 static_always_inline void
415 eth_input_adv_and_flags_x1 (vlib_buffer_t ** b, int is_l3)
417 i16 adv = sizeof (ethernet_header_t);
418 u32 flags = VNET_BUFFER_F_L2_HDR_OFFSET_VALID |
419 VNET_BUFFER_F_L3_HDR_OFFSET_VALID;
421 vnet_buffer (b[0])->l2_hdr_offset = b[0]->current_data;
422 vnet_buffer (b[0])->l3_hdr_offset = b[0]->current_data + adv;
425 vlib_buffer_advance (b[0], adv);
426 b[0]->flags |= flags;
428 vnet_buffer (b[0])->l2.l2_len = adv;
432 static_always_inline void
433 eth_input_get_etype_and_tags (vlib_buffer_t ** b, u16 * etype, u64 * tags,
434 u64 * dmacs, int offset, int dmac_check)
436 ethernet_header_t *e;
437 e = vlib_buffer_get_current (b[offset]);
438 #ifdef CLIB_HAVE_VEC128
439 u64x2 r = u64x2_load_unaligned (((u8 *) & e->type) - 6);
440 etype[offset] = ((u16x8) r)[3];
443 etype[offset] = e->type;
444 tags[offset] = *(u64 *) (e + 1);
448 dmacs[offset] = *(u64 *) e;
451 static_always_inline u16
452 eth_input_next_by_type (u16 etype)
454 ethernet_main_t *em = ðernet_main;
456 return (etype < 0x600) ? ETHERNET_INPUT_NEXT_LLC :
457 vec_elt (em->l3_next.input_next_by_type,
458 sparse_vec_index (em->l3_next.input_next_by_type, etype));
468 u64 n_packets, n_bytes;
469 } eth_input_tag_lookup_t;
471 static_always_inline void
472 eth_input_update_if_counters (vlib_main_t * vm, vnet_main_t * vnm,
473 eth_input_tag_lookup_t * l)
475 if (l->n_packets == 0 || l->sw_if_index == ~0)
479 l->n_bytes += l->n_packets * l->len;
481 vlib_increment_combined_counter
482 (vnm->interface_main.combined_sw_if_counters +
483 VNET_INTERFACE_COUNTER_RX, vm->thread_index, l->sw_if_index,
484 l->n_packets, l->n_bytes);
487 static_always_inline void
488 eth_input_tag_lookup (vlib_main_t * vm, vnet_main_t * vnm,
489 vlib_node_runtime_t * node, vnet_hw_interface_t * hi,
490 u64 tag, u16 * next, vlib_buffer_t * b,
491 eth_input_tag_lookup_t * l, u8 dmac_bad, int is_dot1ad,
492 int main_is_l3, int check_dmac)
494 ethernet_main_t *em = ðernet_main;
496 if ((tag ^ l->tag) & l->mask)
498 main_intf_t *mif = vec_elt_at_index (em->main_intfs, hi->hw_if_index);
501 vlan_table_t *vlan_table;
502 qinq_table_t *qinq_table;
503 u16 *t = (u16 *) & tag;
504 u16 vlan1 = clib_net_to_host_u16 (t[0]) & 0xFFF;
505 u16 vlan2 = clib_net_to_host_u16 (t[2]) & 0xFFF;
506 u32 matched, is_l2, new_sw_if_index;
508 vlan_table = vec_elt_at_index (em->vlan_pool, is_dot1ad ?
509 mif->dot1ad_vlans : mif->dot1q_vlans);
510 vif = &vlan_table->vlans[vlan1];
511 qinq_table = vec_elt_at_index (em->qinq_pool, vif->qinqs);
512 qif = &qinq_table->vlans[vlan2];
513 l->err = ETHERNET_ERROR_NONE;
514 l->type = clib_net_to_host_u16 (t[1]);
516 if (l->type == ETHERNET_TYPE_VLAN)
518 l->type = clib_net_to_host_u16 (t[3]);
520 matched = eth_identify_subint (hi, SUBINT_CONFIG_VALID |
521 SUBINT_CONFIG_MATCH_2_TAG, mif, vif,
522 qif, &new_sw_if_index, &l->err,
530 new_sw_if_index = hi->sw_if_index;
531 l->err = ETHERNET_ERROR_NONE;
533 is_l2 = main_is_l3 == 0;
536 matched = eth_identify_subint (hi, SUBINT_CONFIG_VALID |
537 SUBINT_CONFIG_MATCH_1_TAG, mif,
538 vif, qif, &new_sw_if_index,
542 if (l->sw_if_index != new_sw_if_index)
544 eth_input_update_if_counters (vm, vnm, l);
547 l->sw_if_index = new_sw_if_index;
550 l->mask = (l->n_tags == 2) ?
551 clib_net_to_host_u64 (0xffffffffffffffff) :
552 clib_net_to_host_u64 (0xffffffff00000000);
554 if (matched && l->sw_if_index == ~0)
555 l->err = ETHERNET_ERROR_DOWN;
557 l->len = sizeof (ethernet_header_t) +
558 l->n_tags * sizeof (ethernet_vlan_header_t);
560 l->adv = is_l2 ? -(int) sizeof (ethernet_header_t) :
561 l->n_tags * sizeof (ethernet_vlan_header_t);
563 l->adv = is_l2 ? 0 : l->len;
565 if (PREDICT_FALSE (l->err != ETHERNET_ERROR_NONE))
566 l->next = ETHERNET_INPUT_NEXT_DROP;
568 l->next = em->l2_next;
569 else if (l->type == ETHERNET_TYPE_IP4)
570 l->next = em->l3_next.input_next_ip4;
571 else if (l->type == ETHERNET_TYPE_IP6)
572 l->next = em->l3_next.input_next_ip6;
573 else if (l->type == ETHERNET_TYPE_MPLS)
574 l->next = em->l3_next.input_next_mpls;
575 else if (em->redirect_l3)
576 l->next = em->redirect_l3_next;
579 l->next = eth_input_next_by_type (l->type);
580 if (l->next == ETHERNET_INPUT_NEXT_PUNT)
581 l->err = ETHERNET_ERROR_UNKNOWN_TYPE;
585 if (check_dmac && l->adv > 0 && dmac_bad)
587 l->err = ETHERNET_ERROR_L3_MAC_MISMATCH;
588 next[0] = ETHERNET_INPUT_NEXT_PUNT;
593 vlib_buffer_advance (b, l->adv);
594 vnet_buffer (b)->l2.l2_len = l->len;
595 vnet_buffer (b)->l3_hdr_offset = vnet_buffer (b)->l2_hdr_offset + l->len;
597 if (l->err == ETHERNET_ERROR_NONE)
599 vnet_buffer (b)->sw_if_index[VLIB_RX] = l->sw_if_index;
600 ethernet_buffer_set_vlan_count (b, l->n_tags);
603 b->error = node->errors[l->err];
605 /* update counters */
607 l->n_bytes += vlib_buffer_length_in_chain (vm, b);
610 /* process frame of buffers, store ethertype into array and update
611 buffer metadata fields depending on interface being l2 or l3 assuming that
612 packets are untagged. For tagged packets those fields are updated later.
613 Optionally store Destionation MAC address and tag data into arrays
614 for further processing */
616 STATIC_ASSERT (VLIB_FRAME_SIZE % 8 == 0,
617 "VLIB_FRAME_SIZE must be power of 8");
618 static_always_inline void
619 eth_input_process_frame (vlib_main_t * vm, vlib_node_runtime_t * node,
620 vnet_hw_interface_t * hi,
621 u32 * buffer_indices, u32 n_packets, int main_is_l3,
622 int ip4_cksum_ok, int dmac_check)
624 ethernet_main_t *em = ðernet_main;
625 u16 nexts[VLIB_FRAME_SIZE], *next;
626 u16 etypes[VLIB_FRAME_SIZE], *etype = etypes;
627 u64 dmacs[VLIB_FRAME_SIZE], *dmac = dmacs;
628 u8 dmacs_bad[VLIB_FRAME_SIZE];
629 u64 tags[VLIB_FRAME_SIZE], *tag = tags;
630 u16 slowpath_indices[VLIB_FRAME_SIZE];
632 u16 next_ip4, next_ip6, next_mpls, next_l2;
633 u16 et_ip4 = clib_host_to_net_u16 (ETHERNET_TYPE_IP4);
634 u16 et_ip6 = clib_host_to_net_u16 (ETHERNET_TYPE_IP6);
635 u16 et_mpls = clib_host_to_net_u16 (ETHERNET_TYPE_MPLS);
636 u16 et_vlan = clib_host_to_net_u16 (ETHERNET_TYPE_VLAN);
637 u16 et_dot1ad = clib_host_to_net_u16 (ETHERNET_TYPE_DOT1AD);
638 i32 n_left = n_packets;
639 vlib_buffer_t *b[20];
642 from = buffer_indices;
646 vlib_buffer_t **ph = b + 16, **pd = b + 8;
647 vlib_get_buffers (vm, from, b, 4);
648 vlib_get_buffers (vm, from + 8, pd, 4);
649 vlib_get_buffers (vm, from + 16, ph, 4);
651 vlib_prefetch_buffer_header (ph[0], LOAD);
652 vlib_prefetch_buffer_data (pd[0], LOAD);
653 eth_input_get_etype_and_tags (b, etype, tag, dmac, 0, dmac_check);
655 vlib_prefetch_buffer_header (ph[1], LOAD);
656 vlib_prefetch_buffer_data (pd[1], LOAD);
657 eth_input_get_etype_and_tags (b, etype, tag, dmac, 1, dmac_check);
659 vlib_prefetch_buffer_header (ph[2], LOAD);
660 vlib_prefetch_buffer_data (pd[2], LOAD);
661 eth_input_get_etype_and_tags (b, etype, tag, dmac, 2, dmac_check);
663 vlib_prefetch_buffer_header (ph[3], LOAD);
664 vlib_prefetch_buffer_data (pd[3], LOAD);
665 eth_input_get_etype_and_tags (b, etype, tag, dmac, 3, dmac_check);
667 eth_input_adv_and_flags_x4 (b, main_is_l3);
678 vlib_get_buffers (vm, from, b, 4);
679 eth_input_get_etype_and_tags (b, etype, tag, dmac, 0, dmac_check);
680 eth_input_get_etype_and_tags (b, etype, tag, dmac, 1, dmac_check);
681 eth_input_get_etype_and_tags (b, etype, tag, dmac, 2, dmac_check);
682 eth_input_get_etype_and_tags (b, etype, tag, dmac, 3, dmac_check);
683 eth_input_adv_and_flags_x4 (b, main_is_l3);
694 vlib_get_buffers (vm, from, b, 1);
695 eth_input_get_etype_and_tags (b, etype, tag, dmac, 0, dmac_check);
696 eth_input_adv_and_flags_x1 (b, main_is_l3);
708 u64 mask = clib_net_to_host_u64 (0xFFFFFFFFFFFF0000);
709 u64 igbit = clib_net_to_host_u64 (0x0100000000000000);
710 u64 hwaddr = (*(u64 *) hi->hw_address) & mask;
712 u8 *dmac_bad = dmacs_bad;
716 #ifdef CLIB_HAVE_VEC256
717 u64x4 igbit4 = u64x4_splat (igbit);
718 u64x4 mask4 = u64x4_splat (mask);
719 u64x4 hwaddr4 = u64x4_splat (hwaddr);
723 r0 = u64x4_load_unaligned (dmac + 0) & mask4;
724 r1 = u64x4_load_unaligned (dmac + 4) & mask4;
726 r0 = (r0 != hwaddr4) & ((r0 & igbit4) == 0);
727 r1 = (r1 != hwaddr4) & ((r1 & igbit4) == 0);
729 *(u32 *) (dmac_bad + 0) = u8x32_msb_mask ((u8x32) (r0));
730 *(u32 *) (dmac_bad + 4) = u8x32_msb_mask ((u8x32) (r1));
746 r0 = (r0 != hwaddr) && ((r0 & igbit) == 0);
747 r1 = (r1 != hwaddr) && ((r1 & igbit) == 0);
748 r2 = (r2 != hwaddr) && ((r2 & igbit) == 0);
749 r3 = (r3 != hwaddr) && ((r3 & igbit) == 0);
764 next_ip4 = em->l3_next.input_next_ip4;
765 next_ip6 = em->l3_next.input_next_ip6;
766 next_mpls = em->l3_next.input_next_mpls;
767 next_l2 = em->l2_next;
769 if (next_ip4 == ETHERNET_INPUT_NEXT_IP4_INPUT && ip4_cksum_ok)
770 next_ip4 = ETHERNET_INPUT_NEXT_IP4_INPUT_NCS;
772 #ifdef CLIB_HAVE_VEC256
773 u16x16 et16_ip4 = u16x16_splat (et_ip4);
774 u16x16 et16_ip6 = u16x16_splat (et_ip6);
775 u16x16 et16_mpls = u16x16_splat (et_mpls);
776 u16x16 et16_vlan = u16x16_splat (et_vlan);
777 u16x16 et16_dot1ad = u16x16_splat (et_dot1ad);
778 u16x16 next16_ip4 = u16x16_splat (next_ip4);
779 u16x16 next16_ip6 = u16x16_splat (next_ip6);
780 u16x16 next16_mpls = u16x16_splat (next_mpls);
781 u16x16 next16_l2 = u16x16_splat (next_l2);
783 u16x16 stairs = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 };
792 /* fastpath - in l3 mode hadles ip4, ip6 and mpls packets, other packets
793 are considered as slowpath, in l2 mode all untagged packets are
794 considered as fastpath */
797 #ifdef CLIB_HAVE_VEC256
801 u16x16 e16 = u16x16_load_unaligned (etype);
804 r += (e16 == et16_ip4) & next16_ip4;
805 r += (e16 == et16_ip6) & next16_ip6;
806 r += (e16 == et16_mpls) & next16_mpls;
809 r = ((e16 != et16_vlan) & (e16 != et16_dot1ad)) & next16_l2;
810 u16x16_store_unaligned (r, next);
812 if (!u16x16_is_all_zero (r == zero))
814 if (u16x16_is_all_zero (r))
816 u16x16_store_unaligned (u16x16_splat (i) + stairs,
817 slowpath_indices + n_slowpath);
822 for (int j = 0; j < 16; j++)
824 slowpath_indices[n_slowpath++] = i + j;
835 if (main_is_l3 && etype[0] == et_ip4)
837 else if (main_is_l3 && etype[0] == et_ip6)
839 else if (main_is_l3 && etype[0] == et_mpls)
841 else if (main_is_l3 == 0 &&
842 etype[0] != et_vlan && etype[0] != et_dot1ad)
847 slowpath_indices[n_slowpath++] = i;
858 vnet_main_t *vnm = vnet_get_main ();
860 u16 *si = slowpath_indices;
861 u32 last_unknown_etype = ~0;
862 u32 last_unknown_next = ~0;
863 eth_input_tag_lookup_t dot1ad_lookup, dot1q_lookup = {
865 .tag = tags[si[0]] ^ -1LL,
869 clib_memcpy_fast (&dot1ad_lookup, &dot1q_lookup, sizeof (dot1q_lookup));
874 u16 etype = etypes[i];
876 if (etype == et_vlan)
878 vlib_buffer_t *b = vlib_get_buffer (vm, buffer_indices[i]);
879 eth_input_tag_lookup (vm, vnm, node, hi, tags[i], nexts + i, b,
880 &dot1q_lookup, dmacs_bad[i], 0,
881 main_is_l3, dmac_check);
884 else if (etype == et_dot1ad)
886 vlib_buffer_t *b = vlib_get_buffer (vm, buffer_indices[i]);
887 eth_input_tag_lookup (vm, vnm, node, hi, tags[i], nexts + i, b,
888 &dot1ad_lookup, dmacs_bad[i], 1,
889 main_is_l3, dmac_check);
893 /* untagged packet with not well known etyertype */
894 if (last_unknown_etype != etype)
896 last_unknown_etype = etype;
897 etype = clib_host_to_net_u16 (etype);
898 last_unknown_next = eth_input_next_by_type (etype);
900 if (dmac_check && main_is_l3 && dmacs_bad[i])
902 vlib_buffer_t *b = vlib_get_buffer (vm, buffer_indices[i]);
903 b->error = node->errors[ETHERNET_ERROR_L3_MAC_MISMATCH];
904 nexts[i] = ETHERNET_INPUT_NEXT_PUNT;
907 nexts[i] = last_unknown_next;
915 eth_input_update_if_counters (vm, vnm, &dot1q_lookup);
916 eth_input_update_if_counters (vm, vnm, &dot1ad_lookup);
919 vlib_buffer_enqueue_to_next (vm, node, buffer_indices, nexts, n_packets);
922 static_always_inline void
923 eth_input_single_int (vlib_main_t * vm, vlib_node_runtime_t * node,
924 vnet_hw_interface_t * hi, u32 * from, u32 n_pkts,
927 ethernet_main_t *em = ðernet_main;
928 ethernet_interface_t *ei;
929 ei = pool_elt_at_index (em->interfaces, hi->hw_instance);
930 main_intf_t *intf0 = vec_elt_at_index (em->main_intfs, hi->hw_if_index);
931 subint_config_t *subint0 = &intf0->untagged_subint;
933 int main_is_l3 = (subint0->flags & SUBINT_CONFIG_L2) == 0;
934 int promisc = (ei->flags & ETHERNET_INTERFACE_FLAG_ACCEPT_ALL) != 0;
938 /* main interface is L3, we dont expect tagged packets and interface
939 is not in promisc node, so we dont't need to check DMAC */
943 eth_input_process_frame (vm, node, hi, from, n_pkts, is_l3,
946 /* subinterfaces and promisc mode so DMAC check is needed */
947 eth_input_process_frame (vm, node, hi, from, n_pkts, is_l3,
953 /* untagged packets are treated as L2 */
955 eth_input_process_frame (vm, node, hi, from, n_pkts, is_l3,
961 static_always_inline void
962 ethernet_input_trace (vlib_main_t * vm, vlib_node_runtime_t * node,
963 vlib_frame_t * from_frame)
966 if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)))
968 from = vlib_frame_vector_args (from_frame);
969 n_left = from_frame->n_vectors;
973 ethernet_input_trace_t *t0;
974 vlib_buffer_t *b0 = vlib_get_buffer (vm, from[0]);
976 if (b0->flags & VLIB_BUFFER_IS_TRACED)
978 t0 = vlib_add_trace (vm, node, b0,
979 sizeof (ethernet_input_trace_t));
980 clib_memcpy_fast (t0->packet_data, b0->data + b0->current_data,
981 sizeof (t0->packet_data));
982 t0->frame_flags = from_frame->flags;
983 clib_memcpy_fast (&t0->frame_data,
984 vlib_frame_scalar_args (from_frame),
985 sizeof (ethernet_input_frame_t));
992 /* rx pcap capture if enabled */
993 if (PREDICT_FALSE (vm->pcap[VLIB_RX].pcap_enable))
997 from = vlib_frame_vector_args (from_frame);
998 n_left = from_frame->n_vectors;
1004 b0 = vlib_get_buffer (vm, bi0);
1006 if (vm->pcap[VLIB_RX].pcap_sw_if_index == 0 ||
1007 vm->pcap[VLIB_RX].pcap_sw_if_index
1008 == vnet_buffer (b0)->sw_if_index[VLIB_RX])
1010 pcap_add_buffer (&vm->pcap[VLIB_RX].pcap_main, vm, bi0, 512);
1017 static_always_inline void
1018 ethernet_input_inline (vlib_main_t * vm,
1019 vlib_node_runtime_t * node,
1020 u32 * from, u32 n_packets,
1021 ethernet_input_variant_t variant)
1023 vnet_main_t *vnm = vnet_get_main ();
1024 ethernet_main_t *em = ðernet_main;
1025 vlib_node_runtime_t *error_node;
1026 u32 n_left_from, next_index, *to_next;
1027 u32 stats_sw_if_index, stats_n_packets, stats_n_bytes;
1028 u32 thread_index = vm->thread_index;
1029 u32 cached_sw_if_index = ~0;
1030 u32 cached_is_l2 = 0; /* shut up gcc */
1031 vnet_hw_interface_t *hi = NULL; /* used for main interface only */
1033 if (variant != ETHERNET_INPUT_VARIANT_ETHERNET)
1034 error_node = vlib_node_get_runtime (vm, ethernet_input_node.index);
1038 n_left_from = n_packets;
1040 next_index = node->cached_next_index;
1041 stats_sw_if_index = node->runtime_data[0];
1042 stats_n_packets = stats_n_bytes = 0;
1044 while (n_left_from > 0)
1048 vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
1050 while (n_left_from >= 4 && n_left_to_next >= 2)
1053 vlib_buffer_t *b0, *b1;
1054 u8 next0, next1, error0, error1;
1055 u16 type0, orig_type0, type1, orig_type1;
1056 u16 outer_id0, inner_id0, outer_id1, inner_id1;
1057 u32 match_flags0, match_flags1;
1058 u32 old_sw_if_index0, new_sw_if_index0, len0, old_sw_if_index1,
1059 new_sw_if_index1, len1;
1060 vnet_hw_interface_t *hi0, *hi1;
1061 main_intf_t *main_intf0, *main_intf1;
1062 vlan_intf_t *vlan_intf0, *vlan_intf1;
1063 qinq_intf_t *qinq_intf0, *qinq_intf1;
1065 ethernet_header_t *e0, *e1;
1067 /* Prefetch next iteration. */
1069 vlib_buffer_t *b2, *b3;
1071 b2 = vlib_get_buffer (vm, from[2]);
1072 b3 = vlib_get_buffer (vm, from[3]);
1074 vlib_prefetch_buffer_header (b2, STORE);
1075 vlib_prefetch_buffer_header (b3, STORE);
1077 CLIB_PREFETCH (b2->data, sizeof (ethernet_header_t), LOAD);
1078 CLIB_PREFETCH (b3->data, sizeof (ethernet_header_t), LOAD);
1087 n_left_to_next -= 2;
1090 b0 = vlib_get_buffer (vm, bi0);
1091 b1 = vlib_get_buffer (vm, bi1);
1093 error0 = error1 = ETHERNET_ERROR_NONE;
1094 e0 = vlib_buffer_get_current (b0);
1095 type0 = clib_net_to_host_u16 (e0->type);
1096 e1 = vlib_buffer_get_current (b1);
1097 type1 = clib_net_to_host_u16 (e1->type);
1099 /* Set the L2 header offset for all packets */
1100 vnet_buffer (b0)->l2_hdr_offset = b0->current_data;
1101 vnet_buffer (b1)->l2_hdr_offset = b1->current_data;
1102 b0->flags |= VNET_BUFFER_F_L2_HDR_OFFSET_VALID;
1103 b1->flags |= VNET_BUFFER_F_L2_HDR_OFFSET_VALID;
1105 /* Speed-path for the untagged case */
1106 if (PREDICT_TRUE (variant == ETHERNET_INPUT_VARIANT_ETHERNET
1107 && !ethernet_frame_is_any_tagged_x2 (type0,
1111 subint_config_t *subint0;
1112 u32 sw_if_index0, sw_if_index1;
1114 sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
1115 sw_if_index1 = vnet_buffer (b1)->sw_if_index[VLIB_RX];
1116 is_l20 = cached_is_l2;
1118 /* This is probably wholly unnecessary */
1119 if (PREDICT_FALSE (sw_if_index0 != sw_if_index1))
1122 /* Now sw_if_index0 == sw_if_index1 */
1123 if (PREDICT_FALSE (cached_sw_if_index != sw_if_index0))
1125 cached_sw_if_index = sw_if_index0;
1126 hi = vnet_get_sup_hw_interface (vnm, sw_if_index0);
1127 intf0 = vec_elt_at_index (em->main_intfs, hi->hw_if_index);
1128 subint0 = &intf0->untagged_subint;
1129 cached_is_l2 = is_l20 = subint0->flags & SUBINT_CONFIG_L2;
1132 if (PREDICT_TRUE (is_l20 != 0))
1134 vnet_buffer (b0)->l3_hdr_offset =
1135 vnet_buffer (b0)->l2_hdr_offset +
1136 sizeof (ethernet_header_t);
1137 vnet_buffer (b1)->l3_hdr_offset =
1138 vnet_buffer (b1)->l2_hdr_offset +
1139 sizeof (ethernet_header_t);
1140 b0->flags |= VNET_BUFFER_F_L3_HDR_OFFSET_VALID;
1141 b1->flags |= VNET_BUFFER_F_L3_HDR_OFFSET_VALID;
1142 next0 = em->l2_next;
1143 vnet_buffer (b0)->l2.l2_len = sizeof (ethernet_header_t);
1144 next1 = em->l2_next;
1145 vnet_buffer (b1)->l2.l2_len = sizeof (ethernet_header_t);
1149 if (!ethernet_address_cast (e0->dst_address) &&
1150 (hi->hw_address != 0) &&
1151 !ethernet_mac_address_equal ((u8 *) e0, hi->hw_address))
1152 error0 = ETHERNET_ERROR_L3_MAC_MISMATCH;
1153 if (!ethernet_address_cast (e1->dst_address) &&
1154 (hi->hw_address != 0) &&
1155 !ethernet_mac_address_equal ((u8 *) e1, hi->hw_address))
1156 error1 = ETHERNET_ERROR_L3_MAC_MISMATCH;
1157 vlib_buffer_advance (b0, sizeof (ethernet_header_t));
1158 determine_next_node (em, variant, 0, type0, b0,
1160 vlib_buffer_advance (b1, sizeof (ethernet_header_t));
1161 determine_next_node (em, variant, 0, type1, b1,
1167 /* Slow-path for the tagged case */
1169 parse_header (variant,
1172 &orig_type0, &outer_id0, &inner_id0, &match_flags0);
1174 parse_header (variant,
1177 &orig_type1, &outer_id1, &inner_id1, &match_flags1);
1179 old_sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
1180 old_sw_if_index1 = vnet_buffer (b1)->sw_if_index[VLIB_RX];
1182 eth_vlan_table_lookups (em,
1189 &main_intf0, &vlan_intf0, &qinq_intf0);
1191 eth_vlan_table_lookups (em,
1198 &main_intf1, &vlan_intf1, &qinq_intf1);
1200 identify_subint (hi0,
1205 qinq_intf0, &new_sw_if_index0, &error0, &is_l20);
1207 identify_subint (hi1,
1212 qinq_intf1, &new_sw_if_index1, &error1, &is_l21);
1214 // Save RX sw_if_index for later nodes
1215 vnet_buffer (b0)->sw_if_index[VLIB_RX] =
1217 ETHERNET_ERROR_NONE ? old_sw_if_index0 : new_sw_if_index0;
1218 vnet_buffer (b1)->sw_if_index[VLIB_RX] =
1220 ETHERNET_ERROR_NONE ? old_sw_if_index1 : new_sw_if_index1;
1222 // Check if there is a stat to take (valid and non-main sw_if_index for pkt 0 or pkt 1)
1223 if (((new_sw_if_index0 != ~0)
1224 && (new_sw_if_index0 != old_sw_if_index0))
1225 || ((new_sw_if_index1 != ~0)
1226 && (new_sw_if_index1 != old_sw_if_index1)))
1229 len0 = vlib_buffer_length_in_chain (vm, b0) + b0->current_data
1230 - vnet_buffer (b0)->l2_hdr_offset;
1231 len1 = vlib_buffer_length_in_chain (vm, b1) + b1->current_data
1232 - vnet_buffer (b1)->l2_hdr_offset;
1234 stats_n_packets += 2;
1235 stats_n_bytes += len0 + len1;
1238 (!(new_sw_if_index0 == stats_sw_if_index
1239 && new_sw_if_index1 == stats_sw_if_index)))
1241 stats_n_packets -= 2;
1242 stats_n_bytes -= len0 + len1;
1244 if (new_sw_if_index0 != old_sw_if_index0
1245 && new_sw_if_index0 != ~0)
1246 vlib_increment_combined_counter (vnm->
1247 interface_main.combined_sw_if_counters
1249 VNET_INTERFACE_COUNTER_RX,
1251 new_sw_if_index0, 1,
1253 if (new_sw_if_index1 != old_sw_if_index1
1254 && new_sw_if_index1 != ~0)
1255 vlib_increment_combined_counter (vnm->
1256 interface_main.combined_sw_if_counters
1258 VNET_INTERFACE_COUNTER_RX,
1260 new_sw_if_index1, 1,
1263 if (new_sw_if_index0 == new_sw_if_index1)
1265 if (stats_n_packets > 0)
1267 vlib_increment_combined_counter
1268 (vnm->interface_main.combined_sw_if_counters
1269 + VNET_INTERFACE_COUNTER_RX,
1272 stats_n_packets, stats_n_bytes);
1273 stats_n_packets = stats_n_bytes = 0;
1275 stats_sw_if_index = new_sw_if_index0;
1280 if (variant == ETHERNET_INPUT_VARIANT_NOT_L2)
1281 is_l20 = is_l21 = 0;
1283 determine_next_node (em, variant, is_l20, type0, b0, &error0,
1285 determine_next_node (em, variant, is_l21, type1, b1, &error1,
1289 b0->error = error_node->errors[error0];
1290 b1->error = error_node->errors[error1];
1292 // verify speculative enqueue
1293 vlib_validate_buffer_enqueue_x2 (vm, node, next_index, to_next,
1294 n_left_to_next, bi0, bi1, next0,
1298 while (n_left_from > 0 && n_left_to_next > 0)
1303 u16 type0, orig_type0;
1304 u16 outer_id0, inner_id0;
1306 u32 old_sw_if_index0, new_sw_if_index0, len0;
1307 vnet_hw_interface_t *hi0;
1308 main_intf_t *main_intf0;
1309 vlan_intf_t *vlan_intf0;
1310 qinq_intf_t *qinq_intf0;
1311 ethernet_header_t *e0;
1314 // Prefetch next iteration
1315 if (n_left_from > 1)
1319 p2 = vlib_get_buffer (vm, from[1]);
1320 vlib_prefetch_buffer_header (p2, STORE);
1321 CLIB_PREFETCH (p2->data, CLIB_CACHE_LINE_BYTES, LOAD);
1329 n_left_to_next -= 1;
1331 b0 = vlib_get_buffer (vm, bi0);
1333 error0 = ETHERNET_ERROR_NONE;
1334 e0 = vlib_buffer_get_current (b0);
1335 type0 = clib_net_to_host_u16 (e0->type);
1337 /* Set the L2 header offset for all packets */
1338 vnet_buffer (b0)->l2_hdr_offset = b0->current_data;
1339 b0->flags |= VNET_BUFFER_F_L2_HDR_OFFSET_VALID;
1341 /* Speed-path for the untagged case */
1342 if (PREDICT_TRUE (variant == ETHERNET_INPUT_VARIANT_ETHERNET
1343 && !ethernet_frame_is_tagged (type0)))
1346 subint_config_t *subint0;
1349 sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
1350 is_l20 = cached_is_l2;
1352 if (PREDICT_FALSE (cached_sw_if_index != sw_if_index0))
1354 cached_sw_if_index = sw_if_index0;
1355 hi = vnet_get_sup_hw_interface (vnm, sw_if_index0);
1356 intf0 = vec_elt_at_index (em->main_intfs, hi->hw_if_index);
1357 subint0 = &intf0->untagged_subint;
1358 cached_is_l2 = is_l20 = subint0->flags & SUBINT_CONFIG_L2;
1362 if (PREDICT_TRUE (is_l20 != 0))
1364 vnet_buffer (b0)->l3_hdr_offset =
1365 vnet_buffer (b0)->l2_hdr_offset +
1366 sizeof (ethernet_header_t);
1367 b0->flags |= VNET_BUFFER_F_L3_HDR_OFFSET_VALID;
1368 next0 = em->l2_next;
1369 vnet_buffer (b0)->l2.l2_len = sizeof (ethernet_header_t);
1373 if (!ethernet_address_cast (e0->dst_address) &&
1374 (hi->hw_address != 0) &&
1375 !ethernet_mac_address_equal ((u8 *) e0, hi->hw_address))
1376 error0 = ETHERNET_ERROR_L3_MAC_MISMATCH;
1377 vlib_buffer_advance (b0, sizeof (ethernet_header_t));
1378 determine_next_node (em, variant, 0, type0, b0,
1384 /* Slow-path for the tagged case */
1385 parse_header (variant,
1388 &orig_type0, &outer_id0, &inner_id0, &match_flags0);
1390 old_sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
1392 eth_vlan_table_lookups (em,
1399 &main_intf0, &vlan_intf0, &qinq_intf0);
1401 identify_subint (hi0,
1406 qinq_intf0, &new_sw_if_index0, &error0, &is_l20);
1408 // Save RX sw_if_index for later nodes
1409 vnet_buffer (b0)->sw_if_index[VLIB_RX] =
1411 ETHERNET_ERROR_NONE ? old_sw_if_index0 : new_sw_if_index0;
1413 // Increment subinterface stats
1414 // Note that interface-level counters have already been incremented
1415 // prior to calling this function. Thus only subinterface counters
1416 // are incremented here.
1418 // Interface level counters include packets received on the main
1419 // interface and all subinterfaces. Subinterface level counters
1420 // include only those packets received on that subinterface
1421 // Increment stats if the subint is valid and it is not the main intf
1422 if ((new_sw_if_index0 != ~0)
1423 && (new_sw_if_index0 != old_sw_if_index0))
1426 len0 = vlib_buffer_length_in_chain (vm, b0) + b0->current_data
1427 - vnet_buffer (b0)->l2_hdr_offset;
1429 stats_n_packets += 1;
1430 stats_n_bytes += len0;
1432 // Batch stat increments from the same subinterface so counters
1433 // don't need to be incremented for every packet.
1434 if (PREDICT_FALSE (new_sw_if_index0 != stats_sw_if_index))
1436 stats_n_packets -= 1;
1437 stats_n_bytes -= len0;
1439 if (new_sw_if_index0 != ~0)
1440 vlib_increment_combined_counter
1441 (vnm->interface_main.combined_sw_if_counters
1442 + VNET_INTERFACE_COUNTER_RX,
1443 thread_index, new_sw_if_index0, 1, len0);
1444 if (stats_n_packets > 0)
1446 vlib_increment_combined_counter
1447 (vnm->interface_main.combined_sw_if_counters
1448 + VNET_INTERFACE_COUNTER_RX,
1450 stats_sw_if_index, stats_n_packets, stats_n_bytes);
1451 stats_n_packets = stats_n_bytes = 0;
1453 stats_sw_if_index = new_sw_if_index0;
1457 if (variant == ETHERNET_INPUT_VARIANT_NOT_L2)
1460 determine_next_node (em, variant, is_l20, type0, b0, &error0,
1464 b0->error = error_node->errors[error0];
1466 // verify speculative enqueue
1467 vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
1468 to_next, n_left_to_next,
1472 vlib_put_next_frame (vm, node, next_index, n_left_to_next);
1475 // Increment any remaining batched stats
1476 if (stats_n_packets > 0)
1478 vlib_increment_combined_counter
1479 (vnm->interface_main.combined_sw_if_counters
1480 + VNET_INTERFACE_COUNTER_RX,
1481 thread_index, stats_sw_if_index, stats_n_packets, stats_n_bytes);
1482 node->runtime_data[0] = stats_sw_if_index;
1486 VLIB_NODE_FN (ethernet_input_node) (vlib_main_t * vm,
1487 vlib_node_runtime_t * node,
1488 vlib_frame_t * frame)
1490 vnet_main_t *vnm = vnet_get_main ();
1491 u32 *from = vlib_frame_vector_args (frame);
1492 u32 n_packets = frame->n_vectors;
1494 ethernet_input_trace (vm, node, frame);
1496 if (frame->flags & ETH_INPUT_FRAME_F_SINGLE_SW_IF_IDX)
1498 ethernet_input_frame_t *ef = vlib_frame_scalar_args (frame);
1499 int ip4_cksum_ok = (frame->flags & ETH_INPUT_FRAME_F_IP4_CKSUM_OK) != 0;
1500 vnet_hw_interface_t *hi = vnet_get_hw_interface (vnm, ef->hw_if_index);
1501 eth_input_single_int (vm, node, hi, from, n_packets, ip4_cksum_ok);
1504 ethernet_input_inline (vm, node, from, n_packets,
1505 ETHERNET_INPUT_VARIANT_ETHERNET);
1509 VLIB_NODE_FN (ethernet_input_type_node) (vlib_main_t * vm,
1510 vlib_node_runtime_t * node,
1511 vlib_frame_t * from_frame)
1513 u32 *from = vlib_frame_vector_args (from_frame);
1514 u32 n_packets = from_frame->n_vectors;
1515 ethernet_input_trace (vm, node, from_frame);
1516 ethernet_input_inline (vm, node, from, n_packets,
1517 ETHERNET_INPUT_VARIANT_ETHERNET_TYPE);
1521 VLIB_NODE_FN (ethernet_input_not_l2_node) (vlib_main_t * vm,
1522 vlib_node_runtime_t * node,
1523 vlib_frame_t * from_frame)
1525 u32 *from = vlib_frame_vector_args (from_frame);
1526 u32 n_packets = from_frame->n_vectors;
1527 ethernet_input_trace (vm, node, from_frame);
1528 ethernet_input_inline (vm, node, from, n_packets,
1529 ETHERNET_INPUT_VARIANT_NOT_L2);
1534 // Return the subinterface config struct for the given sw_if_index
1535 // Also return via parameter the appropriate match flags for the
1536 // configured number of tags.
1537 // On error (unsupported or not ethernet) return 0.
1538 static subint_config_t *
1539 ethernet_sw_interface_get_config (vnet_main_t * vnm,
1541 u32 * flags, u32 * unsupported)
1543 ethernet_main_t *em = ðernet_main;
1544 vnet_hw_interface_t *hi;
1545 vnet_sw_interface_t *si;
1546 main_intf_t *main_intf;
1547 vlan_table_t *vlan_table;
1548 qinq_table_t *qinq_table;
1549 subint_config_t *subint = 0;
1551 hi = vnet_get_sup_hw_interface (vnm, sw_if_index);
1553 if (!hi || (hi->hw_class_index != ethernet_hw_interface_class.index))
1556 goto done; // non-ethernet interface
1559 // ensure there's an entry for the main intf (shouldn't really be necessary)
1560 vec_validate (em->main_intfs, hi->hw_if_index);
1561 main_intf = vec_elt_at_index (em->main_intfs, hi->hw_if_index);
1563 // Locate the subint for the given ethernet config
1564 si = vnet_get_sw_interface (vnm, sw_if_index);
1566 if (si->type == VNET_SW_INTERFACE_TYPE_P2P)
1568 p2p_ethernet_main_t *p2pm = &p2p_main;
1569 u32 p2pe_sw_if_index =
1570 p2p_ethernet_lookup (hi->hw_if_index, si->p2p.client_mac);
1571 if (p2pe_sw_if_index == ~0)
1573 pool_get (p2pm->p2p_subif_pool, subint);
1574 si->p2p.pool_index = subint - p2pm->p2p_subif_pool;
1577 subint = vec_elt_at_index (p2pm->p2p_subif_pool, si->p2p.pool_index);
1578 *flags = SUBINT_CONFIG_P2P;
1580 else if (si->type == VNET_SW_INTERFACE_TYPE_PIPE)
1584 pipe = pipe_get (sw_if_index);
1585 subint = &pipe->subint;
1586 *flags = SUBINT_CONFIG_P2P;
1588 else if (si->sub.eth.flags.default_sub)
1590 subint = &main_intf->default_subint;
1591 *flags = SUBINT_CONFIG_MATCH_1_TAG |
1592 SUBINT_CONFIG_MATCH_2_TAG | SUBINT_CONFIG_MATCH_3_TAG;
1594 else if ((si->sub.eth.flags.no_tags) || (si->sub.eth.raw_flags == 0))
1596 // if no flags are set then this is a main interface
1597 // so treat as untagged
1598 subint = &main_intf->untagged_subint;
1599 *flags = SUBINT_CONFIG_MATCH_0_TAG;
1604 // first get the vlan table
1605 if (si->sub.eth.flags.dot1ad)
1607 if (main_intf->dot1ad_vlans == 0)
1609 // Allocate a vlan table from the pool
1610 pool_get (em->vlan_pool, vlan_table);
1611 main_intf->dot1ad_vlans = vlan_table - em->vlan_pool;
1615 // Get ptr to existing vlan table
1617 vec_elt_at_index (em->vlan_pool, main_intf->dot1ad_vlans);
1622 if (main_intf->dot1q_vlans == 0)
1624 // Allocate a vlan table from the pool
1625 pool_get (em->vlan_pool, vlan_table);
1626 main_intf->dot1q_vlans = vlan_table - em->vlan_pool;
1630 // Get ptr to existing vlan table
1632 vec_elt_at_index (em->vlan_pool, main_intf->dot1q_vlans);
1636 if (si->sub.eth.flags.one_tag)
1638 *flags = si->sub.eth.flags.exact_match ?
1639 SUBINT_CONFIG_MATCH_1_TAG :
1640 (SUBINT_CONFIG_MATCH_1_TAG |
1641 SUBINT_CONFIG_MATCH_2_TAG | SUBINT_CONFIG_MATCH_3_TAG);
1643 if (si->sub.eth.flags.outer_vlan_id_any)
1645 // not implemented yet
1651 // a single vlan, a common case
1653 &vlan_table->vlans[si->sub.eth.
1654 outer_vlan_id].single_tag_subint;
1661 *flags = si->sub.eth.flags.exact_match ?
1662 SUBINT_CONFIG_MATCH_2_TAG :
1663 (SUBINT_CONFIG_MATCH_2_TAG | SUBINT_CONFIG_MATCH_3_TAG);
1665 if (si->sub.eth.flags.outer_vlan_id_any
1666 && si->sub.eth.flags.inner_vlan_id_any)
1668 // not implemented yet
1673 if (si->sub.eth.flags.inner_vlan_id_any)
1675 // a specific outer and "any" inner
1676 // don't need a qinq table for this
1678 &vlan_table->vlans[si->sub.eth.
1679 outer_vlan_id].inner_any_subint;
1680 if (si->sub.eth.flags.exact_match)
1682 *flags = SUBINT_CONFIG_MATCH_2_TAG;
1686 *flags = SUBINT_CONFIG_MATCH_2_TAG |
1687 SUBINT_CONFIG_MATCH_3_TAG;
1692 // a specific outer + specifc innner vlan id, a common case
1694 // get the qinq table
1695 if (vlan_table->vlans[si->sub.eth.outer_vlan_id].qinqs == 0)
1697 // Allocate a qinq table from the pool
1698 pool_get (em->qinq_pool, qinq_table);
1699 vlan_table->vlans[si->sub.eth.outer_vlan_id].qinqs =
1700 qinq_table - em->qinq_pool;
1704 // Get ptr to existing qinq table
1706 vec_elt_at_index (em->qinq_pool,
1707 vlan_table->vlans[si->sub.
1711 subint = &qinq_table->vlans[si->sub.eth.inner_vlan_id].subint;
1720 static clib_error_t *
1721 ethernet_sw_interface_up_down (vnet_main_t * vnm, u32 sw_if_index, u32 flags)
1723 subint_config_t *subint;
1726 clib_error_t *error = 0;
1728 // Find the config for this subinterface
1730 ethernet_sw_interface_get_config (vnm, sw_if_index, &dummy_flags,
1735 // not implemented yet or not ethernet
1739 subint->sw_if_index =
1740 ((flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP) ? sw_if_index : ~0);
1746 VNET_SW_INTERFACE_ADMIN_UP_DOWN_FUNCTION (ethernet_sw_interface_up_down);
1749 #ifndef CLIB_MARCH_VARIANT
1750 // Set the L2/L3 mode for the subinterface
1752 ethernet_sw_interface_set_l2_mode (vnet_main_t * vnm, u32 sw_if_index, u32 l2)
1754 subint_config_t *subint;
1758 vnet_sw_interface_t *sw = vnet_get_sw_interface (vnm, sw_if_index);
1760 is_port = !(sw->type == VNET_SW_INTERFACE_TYPE_SUB);
1762 // Find the config for this subinterface
1764 ethernet_sw_interface_get_config (vnm, sw_if_index, &dummy_flags,
1769 // unimplemented or not ethernet
1773 // Double check that the config we found is for our interface (or the interface is down)
1774 ASSERT ((subint->sw_if_index == sw_if_index) | (subint->sw_if_index == ~0));
1778 subint->flags |= SUBINT_CONFIG_L2;
1781 SUBINT_CONFIG_MATCH_0_TAG | SUBINT_CONFIG_MATCH_1_TAG
1782 | SUBINT_CONFIG_MATCH_2_TAG | SUBINT_CONFIG_MATCH_3_TAG;
1786 subint->flags &= ~SUBINT_CONFIG_L2;
1789 ~(SUBINT_CONFIG_MATCH_1_TAG | SUBINT_CONFIG_MATCH_2_TAG
1790 | SUBINT_CONFIG_MATCH_3_TAG);
1798 * Set the L2/L3 mode for the subinterface regardless of port
1801 ethernet_sw_interface_set_l2_mode_noport (vnet_main_t * vnm,
1802 u32 sw_if_index, u32 l2)
1804 subint_config_t *subint;
1808 /* Find the config for this subinterface */
1810 ethernet_sw_interface_get_config (vnm, sw_if_index, &dummy_flags,
1815 /* unimplemented or not ethernet */
1820 * Double check that the config we found is for our interface (or the
1821 * interface is down)
1823 ASSERT ((subint->sw_if_index == sw_if_index) | (subint->sw_if_index == ~0));
1827 subint->flags |= SUBINT_CONFIG_L2;
1831 subint->flags &= ~SUBINT_CONFIG_L2;
1839 static clib_error_t *
1840 ethernet_sw_interface_add_del (vnet_main_t * vnm,
1841 u32 sw_if_index, u32 is_create)
1843 clib_error_t *error = 0;
1844 subint_config_t *subint;
1846 u32 unsupported = 0;
1848 // Find the config for this subinterface
1850 ethernet_sw_interface_get_config (vnm, sw_if_index, &match_flags,
1855 // not implemented yet or not ethernet
1858 // this is the NYI case
1859 error = clib_error_return (0, "not implemented yet");
1870 // Initialize the subint
1871 if (subint->flags & SUBINT_CONFIG_VALID)
1873 // Error vlan already in use
1874 error = clib_error_return (0, "vlan is already in use");
1878 // Note that config is L3 by default
1879 subint->flags = SUBINT_CONFIG_VALID | match_flags;
1880 subint->sw_if_index = ~0; // because interfaces are initially down
1887 VNET_SW_INTERFACE_ADD_DEL_FUNCTION (ethernet_sw_interface_add_del);
1889 static char *ethernet_error_strings[] = {
1890 #define ethernet_error(n,c,s) s,
1891 #include "error.def"
1892 #undef ethernet_error
1896 VLIB_REGISTER_NODE (ethernet_input_node) = {
1897 .name = "ethernet-input",
1898 /* Takes a vector of packets. */
1899 .vector_size = sizeof (u32),
1900 .scalar_size = sizeof (ethernet_input_frame_t),
1901 .n_errors = ETHERNET_N_ERROR,
1902 .error_strings = ethernet_error_strings,
1903 .n_next_nodes = ETHERNET_INPUT_N_NEXT,
1905 #define _(s,n) [ETHERNET_INPUT_NEXT_##s] = n,
1906 foreach_ethernet_input_next
1909 .format_buffer = format_ethernet_header_with_length,
1910 .format_trace = format_ethernet_input_trace,
1911 .unformat_buffer = unformat_ethernet_header,
1914 VLIB_REGISTER_NODE (ethernet_input_type_node) = {
1915 .name = "ethernet-input-type",
1916 /* Takes a vector of packets. */
1917 .vector_size = sizeof (u32),
1918 .n_next_nodes = ETHERNET_INPUT_N_NEXT,
1920 #define _(s,n) [ETHERNET_INPUT_NEXT_##s] = n,
1921 foreach_ethernet_input_next
1926 VLIB_REGISTER_NODE (ethernet_input_not_l2_node) = {
1927 .name = "ethernet-input-not-l2",
1928 /* Takes a vector of packets. */
1929 .vector_size = sizeof (u32),
1930 .n_next_nodes = ETHERNET_INPUT_N_NEXT,
1932 #define _(s,n) [ETHERNET_INPUT_NEXT_##s] = n,
1933 foreach_ethernet_input_next
1939 #ifndef CLIB_MARCH_VARIANT
1941 ethernet_set_rx_redirect (vnet_main_t * vnm,
1942 vnet_hw_interface_t * hi, u32 enable)
1944 // Insure all packets go to ethernet-input (i.e. untagged ipv4 packets
1945 // don't go directly to ip4-input)
1946 vnet_hw_interface_rx_redirect_to_node
1947 (vnm, hi->hw_if_index, enable ? ethernet_input_node.index : ~0);
1952 * Initialization and registration for the next_by_ethernet structure
1956 next_by_ethertype_init (next_by_ethertype_t * l3_next)
1958 l3_next->input_next_by_type = sparse_vec_new
1959 ( /* elt bytes */ sizeof (l3_next->input_next_by_type[0]),
1960 /* bits in index */ BITS (((ethernet_header_t *) 0)->type));
1962 vec_validate (l3_next->sparse_index_by_input_next_index,
1963 ETHERNET_INPUT_NEXT_DROP);
1964 vec_validate (l3_next->sparse_index_by_input_next_index,
1965 ETHERNET_INPUT_NEXT_PUNT);
1966 l3_next->sparse_index_by_input_next_index[ETHERNET_INPUT_NEXT_DROP] =
1967 SPARSE_VEC_INVALID_INDEX;
1968 l3_next->sparse_index_by_input_next_index[ETHERNET_INPUT_NEXT_PUNT] =
1969 SPARSE_VEC_INVALID_INDEX;
1972 * Make sure we don't wipe out an ethernet registration by mistake
1973 * Can happen if init function ordering constraints are missing.
1977 ethernet_main_t *em = ðernet_main;
1978 ASSERT (em->next_by_ethertype_register_called == 0);
1984 // Add an ethertype -> next index mapping to the structure
1986 next_by_ethertype_register (next_by_ethertype_t * l3_next,
1987 u32 ethertype, u32 next_index)
1991 ethernet_main_t *em = ðernet_main;
1995 ethernet_main_t *em = ðernet_main;
1996 em->next_by_ethertype_register_called = 1;
1999 /* Setup ethernet type -> next index sparse vector mapping. */
2000 n = sparse_vec_validate (l3_next->input_next_by_type, ethertype);
2003 /* Rebuild next index -> sparse index inverse mapping when sparse vector
2005 vec_validate (l3_next->sparse_index_by_input_next_index, next_index);
2006 for (i = 1; i < vec_len (l3_next->input_next_by_type); i++)
2008 sparse_index_by_input_next_index[l3_next->input_next_by_type[i]] = i;
2010 // do not allow the cached next index's to be updated if L3
2011 // redirect is enabled, as it will have overwritten them
2012 if (!em->redirect_l3)
2014 // Cache common ethertypes directly
2015 if (ethertype == ETHERNET_TYPE_IP4)
2017 l3_next->input_next_ip4 = next_index;
2019 else if (ethertype == ETHERNET_TYPE_IP6)
2021 l3_next->input_next_ip6 = next_index;
2023 else if (ethertype == ETHERNET_TYPE_MPLS)
2025 l3_next->input_next_mpls = next_index;
2032 static clib_error_t *
2033 ethernet_input_init (vlib_main_t * vm)
2035 ethernet_main_t *em = ðernet_main;
2036 __attribute__ ((unused)) vlan_table_t *invalid_vlan_table;
2037 __attribute__ ((unused)) qinq_table_t *invalid_qinq_table;
2039 ethernet_setup_node (vm, ethernet_input_node.index);
2040 ethernet_setup_node (vm, ethernet_input_type_node.index);
2041 ethernet_setup_node (vm, ethernet_input_not_l2_node.index);
2043 next_by_ethertype_init (&em->l3_next);
2045 // Initialize pools and vector for vlan parsing
2046 vec_validate (em->main_intfs, 10); // 10 main interfaces
2047 pool_alloc (em->vlan_pool, 10);
2048 pool_alloc (em->qinq_pool, 1);
2050 // The first vlan pool will always be reserved for an invalid table
2051 pool_get (em->vlan_pool, invalid_vlan_table); // first id = 0
2052 // The first qinq pool will always be reserved for an invalid table
2053 pool_get (em->qinq_pool, invalid_qinq_table); // first id = 0
2058 VLIB_INIT_FUNCTION (ethernet_input_init);
2061 ethernet_register_input_type (vlib_main_t * vm,
2062 ethernet_type_t type, u32 node_index)
2064 ethernet_main_t *em = ðernet_main;
2065 ethernet_type_info_t *ti;
2069 clib_error_t *error = vlib_call_init_function (vm, ethernet_init);
2071 clib_error_report (error);
2074 ti = ethernet_get_type_info (em, type);
2075 ti->node_index = node_index;
2076 ti->next_index = vlib_node_add_next (vm,
2077 ethernet_input_node.index, node_index);
2078 i = vlib_node_add_next (vm, ethernet_input_type_node.index, node_index);
2079 ASSERT (i == ti->next_index);
2081 i = vlib_node_add_next (vm, ethernet_input_not_l2_node.index, node_index);
2082 ASSERT (i == ti->next_index);
2084 // Add the L3 node for this ethertype to the next nodes structure
2085 next_by_ethertype_register (&em->l3_next, type, ti->next_index);
2087 // Call the registration functions for other nodes that want a mapping
2088 l2bvi_register_input_type (vm, type, node_index);
2092 ethernet_register_l2_input (vlib_main_t * vm, u32 node_index)
2094 ethernet_main_t *em = ðernet_main;
2098 vlib_node_add_next (vm, ethernet_input_node.index, node_index);
2101 * Even if we never use these arcs, we have to align the next indices...
2103 i = vlib_node_add_next (vm, ethernet_input_type_node.index, node_index);
2105 ASSERT (i == em->l2_next);
2107 i = vlib_node_add_next (vm, ethernet_input_not_l2_node.index, node_index);
2108 ASSERT (i == em->l2_next);
2111 // Register a next node for L3 redirect, and enable L3 redirect
2113 ethernet_register_l3_redirect (vlib_main_t * vm, u32 node_index)
2115 ethernet_main_t *em = ðernet_main;
2118 em->redirect_l3 = 1;
2119 em->redirect_l3_next = vlib_node_add_next (vm,
2120 ethernet_input_node.index,
2123 * Change the cached next nodes to the redirect node
2125 em->l3_next.input_next_ip4 = em->redirect_l3_next;
2126 em->l3_next.input_next_ip6 = em->redirect_l3_next;
2127 em->l3_next.input_next_mpls = em->redirect_l3_next;
2130 * Even if we never use these arcs, we have to align the next indices...
2132 i = vlib_node_add_next (vm, ethernet_input_type_node.index, node_index);
2134 ASSERT (i == em->redirect_l3_next);
2136 i = vlib_node_add_next (vm, ethernet_input_not_l2_node.index, node_index);
2138 ASSERT (i == em->redirect_l3_next);
2143 * fd.io coding-style-patch-verification: ON
2146 * eval: (c-set-style "gnu")