2 * Copyright (c) 2018 Cisco and/or its affiliates.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at:
7 * http://www.apache.org/licenses/LICENSE-2.0
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
16 * ethernet_node.c: ethernet packet processing
18 * Copyright (c) 2008 Eliot Dresselhaus
20 * Permission is hereby granted, free of charge, to any person obtaining
21 * a copy of this software and associated documentation files (the
22 * "Software"), to deal in the Software without restriction, including
23 * without limitation the rights to use, copy, modify, merge, publish,
24 * distribute, sublicense, and/or sell copies of the Software, and to
25 * permit persons to whom the Software is furnished to do so, subject to
26 * the following conditions:
28 * The above copyright notice and this permission notice shall be
29 * included in all copies or substantial portions of the Software.
31 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
32 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
33 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
34 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
35 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
36 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
37 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
40 #include <vlib/vlib.h>
41 #include <vnet/pg/pg.h>
42 #include <vnet/ethernet/ethernet.h>
43 #include <vnet/ethernet/p2p_ethernet.h>
44 #include <vnet/devices/pipe/pipe.h>
45 #include <vppinfra/sparse_vec.h>
46 #include <vnet/l2/l2_bvi.h>
48 #define foreach_ethernet_input_next \
49 _ (PUNT, "error-punt") \
50 _ (DROP, "error-drop") \
51 _ (LLC, "llc-input") \
52 _ (IP4_INPUT, "ip4-input") \
53 _ (IP4_INPUT_NCS, "ip4-input-no-checksum")
57 #define _(s,n) ETHERNET_INPUT_NEXT_##s,
58 foreach_ethernet_input_next
60 ETHERNET_INPUT_N_NEXT,
61 } ethernet_input_next_t;
67 ethernet_input_frame_t frame_data;
68 } ethernet_input_trace_t;
71 format_ethernet_input_trace (u8 * s, va_list * va)
73 CLIB_UNUSED (vlib_main_t * vm) = va_arg (*va, vlib_main_t *);
74 CLIB_UNUSED (vlib_node_t * node) = va_arg (*va, vlib_node_t *);
75 ethernet_input_trace_t *t = va_arg (*va, ethernet_input_trace_t *);
76 u32 indent = format_get_indent (s);
80 s = format (s, "frame: flags 0x%x", t->frame_flags);
81 if (t->frame_flags & ETH_INPUT_FRAME_F_SINGLE_SW_IF_IDX)
82 s = format (s, ", hw-if-index %u, sw-if-index %u",
83 t->frame_data.hw_if_index, t->frame_data.sw_if_index);
84 s = format (s, "\n%U", format_white_space, indent);
86 s = format (s, "%U", format_ethernet_header, t->packet_data);
91 extern vlib_node_registration_t ethernet_input_node;
95 ETHERNET_INPUT_VARIANT_ETHERNET,
96 ETHERNET_INPUT_VARIANT_ETHERNET_TYPE,
97 ETHERNET_INPUT_VARIANT_NOT_L2,
98 } ethernet_input_variant_t;
101 // Parse the ethernet header to extract vlan tags and innermost ethertype
102 static_always_inline void
103 parse_header (ethernet_input_variant_t variant,
107 u16 * outer_id, u16 * inner_id, u32 * match_flags)
111 if (variant == ETHERNET_INPUT_VARIANT_ETHERNET
112 || variant == ETHERNET_INPUT_VARIANT_NOT_L2)
114 ethernet_header_t *e0;
116 e0 = (void *) (b0->data + b0->current_data);
118 vnet_buffer (b0)->l2_hdr_offset = b0->current_data;
119 b0->flags |= VNET_BUFFER_F_L2_HDR_OFFSET_VALID;
121 vlib_buffer_advance (b0, sizeof (e0[0]));
123 *type = clib_net_to_host_u16 (e0->type);
125 else if (variant == ETHERNET_INPUT_VARIANT_ETHERNET_TYPE)
127 // here when prior node was LLC/SNAP processing
130 e0 = (void *) (b0->data + b0->current_data);
132 vlib_buffer_advance (b0, sizeof (e0[0]));
134 *type = clib_net_to_host_u16 (e0[0]);
137 // save for distinguishing between dot1q and dot1ad later
140 // default the tags to 0 (used if there is no corresponding tag)
144 *match_flags = SUBINT_CONFIG_VALID | SUBINT_CONFIG_MATCH_0_TAG;
147 // check for vlan encaps
148 if (ethernet_frame_is_tagged (*type))
150 ethernet_vlan_header_t *h0;
153 *match_flags = SUBINT_CONFIG_VALID | SUBINT_CONFIG_MATCH_1_TAG;
155 h0 = (void *) (b0->data + b0->current_data);
157 tag = clib_net_to_host_u16 (h0->priority_cfi_and_id);
159 *outer_id = tag & 0xfff;
161 *match_flags &= ~SUBINT_CONFIG_MATCH_1_TAG;
163 *type = clib_net_to_host_u16 (h0->type);
165 vlib_buffer_advance (b0, sizeof (h0[0]));
168 if (*type == ETHERNET_TYPE_VLAN)
170 // Double tagged packet
171 *match_flags = SUBINT_CONFIG_VALID | SUBINT_CONFIG_MATCH_2_TAG;
173 h0 = (void *) (b0->data + b0->current_data);
175 tag = clib_net_to_host_u16 (h0->priority_cfi_and_id);
177 *inner_id = tag & 0xfff;
179 *type = clib_net_to_host_u16 (h0->type);
181 vlib_buffer_advance (b0, sizeof (h0[0]));
183 if (*type == ETHERNET_TYPE_VLAN)
185 // More than double tagged packet
186 *match_flags = SUBINT_CONFIG_VALID | SUBINT_CONFIG_MATCH_3_TAG;
188 vlib_buffer_advance (b0, sizeof (h0[0]));
189 vlan_count = 3; // "unknown" number, aka, 3-or-more
193 ethernet_buffer_set_vlan_count (b0, vlan_count);
196 // Determine the subinterface for this packet, given the result of the
197 // vlan table lookups and vlan header parsing. Check the most specific
199 static_always_inline void
200 identify_subint (vnet_hw_interface_t * hi,
203 main_intf_t * main_intf,
204 vlan_intf_t * vlan_intf,
205 qinq_intf_t * qinq_intf,
206 u32 * new_sw_if_index, u8 * error0, u32 * is_l2)
210 matched = eth_identify_subint (hi, match_flags, main_intf, vlan_intf,
211 qinq_intf, new_sw_if_index, error0, is_l2);
216 // Perform L3 my-mac filter
217 // A unicast packet arriving on an L3 interface must have a dmac matching the interface mac.
218 // This is required for promiscuous mode, else we will forward packets we aren't supposed to.
221 ethernet_header_t *e0;
222 e0 = (void *) (b0->data + vnet_buffer (b0)->l2_hdr_offset);
224 if (!(ethernet_address_cast (e0->dst_address)))
226 if (!ethernet_mac_address_equal ((u8 *) e0, hi->hw_address))
228 *error0 = ETHERNET_ERROR_L3_MAC_MISMATCH;
233 // Check for down subinterface
234 *error0 = (*new_sw_if_index) != ~0 ? (*error0) : ETHERNET_ERROR_DOWN;
238 static_always_inline void
239 determine_next_node (ethernet_main_t * em,
240 ethernet_input_variant_t variant,
242 u32 type0, vlib_buffer_t * b0, u8 * error0, u8 * next0)
244 vnet_buffer (b0)->l3_hdr_offset = b0->current_data;
245 b0->flags |= VNET_BUFFER_F_L3_HDR_OFFSET_VALID;
247 if (PREDICT_FALSE (*error0 != ETHERNET_ERROR_NONE))
249 // some error occurred
250 *next0 = ETHERNET_INPUT_NEXT_DROP;
254 // record the L2 len and reset the buffer so the L2 header is preserved
255 u32 eth_start = vnet_buffer (b0)->l2_hdr_offset;
256 vnet_buffer (b0)->l2.l2_len = b0->current_data - eth_start;
257 *next0 = em->l2_next;
258 ASSERT (vnet_buffer (b0)->l2.l2_len ==
259 ethernet_buffer_header_size (b0));
260 vlib_buffer_advance (b0, -(vnet_buffer (b0)->l2.l2_len));
262 // check for common IP/MPLS ethertypes
264 else if (type0 == ETHERNET_TYPE_IP4)
266 *next0 = em->l3_next.input_next_ip4;
268 else if (type0 == ETHERNET_TYPE_IP6)
270 *next0 = em->l3_next.input_next_ip6;
272 else if (type0 == ETHERNET_TYPE_MPLS)
274 *next0 = em->l3_next.input_next_mpls;
277 else if (em->redirect_l3)
279 // L3 Redirect is on, the cached common next nodes will be
280 // pointing to the redirect node, catch the uncommon types here
281 *next0 = em->redirect_l3_next;
285 // uncommon ethertype, check table
287 i0 = sparse_vec_index (em->l3_next.input_next_by_type, type0);
288 *next0 = vec_elt (em->l3_next.input_next_by_type, i0);
291 SPARSE_VEC_INVALID_INDEX ? ETHERNET_ERROR_UNKNOWN_TYPE : *error0;
293 // The table is not populated with LLC values, so check that now.
294 // If variant is variant_ethernet then we came from LLC processing. Don't
295 // go back there; drop instead using by keeping the drop/bad table result.
296 if ((type0 < 0x600) && (variant == ETHERNET_INPUT_VARIANT_ETHERNET))
298 *next0 = ETHERNET_INPUT_NEXT_LLC;
304 /* following vector code relies on following assumptions */
305 STATIC_ASSERT_OFFSET_OF (vlib_buffer_t, current_data, 0);
306 STATIC_ASSERT_OFFSET_OF (vlib_buffer_t, current_length, 2);
307 STATIC_ASSERT_OFFSET_OF (vlib_buffer_t, flags, 4);
308 STATIC_ASSERT (STRUCT_OFFSET_OF (vnet_buffer_opaque_t, l2_hdr_offset) ==
309 STRUCT_OFFSET_OF (vnet_buffer_opaque_t, l3_hdr_offset) - 2,
310 "l3_hdr_offset must follow l2_hdr_offset");
312 static_always_inline void
313 eth_input_adv_and_flags_x4 (vlib_buffer_t ** b, int is_l3)
315 i16 adv = sizeof (ethernet_header_t);
316 u32 flags = VNET_BUFFER_F_L2_HDR_OFFSET_VALID |
317 VNET_BUFFER_F_L3_HDR_OFFSET_VALID;
319 #ifdef CLIB_HAVE_VEC256
320 /* to reduce number of small loads/stores we are loading first 64 bits
321 of each buffer metadata into 256-bit register so we can advance
322 current_data, current_length and flags.
323 Observed saving of this code is ~2 clocks per packet */
326 /* vector if signed 16 bit integers used in signed vector add operation
327 to advnce current_data and current_length */
328 u32x8 flags4 = { 0, flags, 0, flags, 0, flags, 0, flags };
330 adv, -adv, 0, 0, adv, -adv, 0, 0,
331 adv, -adv, 0, 0, adv, -adv, 0, 0
334 /* load 4 x 64 bits */
335 r = u64x4_gather (b[0], b[1], b[2], b[3]);
341 radv = (u64x4) ((i16x16) r + adv4);
343 /* write 4 x 64 bits */
344 u64x4_scatter (is_l3 ? radv : r, b[0], b[1], b[2], b[3]);
346 /* use old current_data as l2_hdr_offset and new current_data as
348 r = (u64x4) u16x16_blend (r, radv << 16, 0xaa);
350 /* store both l2_hdr_offset and l3_hdr_offset in single store operation */
351 u32x8_scatter_one ((u32x8) r, 0, &vnet_buffer (b[0])->l2_hdr_offset);
352 u32x8_scatter_one ((u32x8) r, 2, &vnet_buffer (b[1])->l2_hdr_offset);
353 u32x8_scatter_one ((u32x8) r, 4, &vnet_buffer (b[2])->l2_hdr_offset);
354 u32x8_scatter_one ((u32x8) r, 6, &vnet_buffer (b[3])->l2_hdr_offset);
358 ASSERT (b[0]->current_data == vnet_buffer (b[0])->l3_hdr_offset);
359 ASSERT (b[1]->current_data == vnet_buffer (b[1])->l3_hdr_offset);
360 ASSERT (b[2]->current_data == vnet_buffer (b[2])->l3_hdr_offset);
361 ASSERT (b[3]->current_data == vnet_buffer (b[3])->l3_hdr_offset);
363 ASSERT (b[0]->current_data - vnet_buffer (b[0])->l2_hdr_offset == adv);
364 ASSERT (b[1]->current_data - vnet_buffer (b[1])->l2_hdr_offset == adv);
365 ASSERT (b[2]->current_data - vnet_buffer (b[2])->l2_hdr_offset == adv);
366 ASSERT (b[3]->current_data - vnet_buffer (b[3])->l2_hdr_offset == adv);
370 ASSERT (b[0]->current_data == vnet_buffer (b[0])->l2_hdr_offset);
371 ASSERT (b[1]->current_data == vnet_buffer (b[1])->l2_hdr_offset);
372 ASSERT (b[2]->current_data == vnet_buffer (b[2])->l2_hdr_offset);
373 ASSERT (b[3]->current_data == vnet_buffer (b[3])->l2_hdr_offset);
375 ASSERT (b[0]->current_data - vnet_buffer (b[0])->l3_hdr_offset == -adv);
376 ASSERT (b[1]->current_data - vnet_buffer (b[1])->l3_hdr_offset == -adv);
377 ASSERT (b[2]->current_data - vnet_buffer (b[2])->l3_hdr_offset == -adv);
378 ASSERT (b[3]->current_data - vnet_buffer (b[3])->l3_hdr_offset == -adv);
382 vnet_buffer (b[0])->l2_hdr_offset = b[0]->current_data;
383 vnet_buffer (b[1])->l2_hdr_offset = b[1]->current_data;
384 vnet_buffer (b[2])->l2_hdr_offset = b[2]->current_data;
385 vnet_buffer (b[3])->l2_hdr_offset = b[3]->current_data;
386 vnet_buffer (b[0])->l3_hdr_offset = b[0]->current_data + adv;
387 vnet_buffer (b[1])->l3_hdr_offset = b[1]->current_data + adv;
388 vnet_buffer (b[2])->l3_hdr_offset = b[2]->current_data + adv;
389 vnet_buffer (b[3])->l3_hdr_offset = b[3]->current_data + adv;
393 vlib_buffer_advance (b[0], adv);
394 vlib_buffer_advance (b[1], adv);
395 vlib_buffer_advance (b[2], adv);
396 vlib_buffer_advance (b[3], adv);
399 b[0]->flags |= flags;
400 b[1]->flags |= flags;
401 b[2]->flags |= flags;
402 b[3]->flags |= flags;
407 vnet_buffer (b[0])->l2.l2_len = adv;
408 vnet_buffer (b[1])->l2.l2_len = adv;
409 vnet_buffer (b[2])->l2.l2_len = adv;
410 vnet_buffer (b[3])->l2.l2_len = adv;
414 static_always_inline void
415 eth_input_adv_and_flags_x1 (vlib_buffer_t ** b, int is_l3)
417 i16 adv = sizeof (ethernet_header_t);
418 u32 flags = VNET_BUFFER_F_L2_HDR_OFFSET_VALID |
419 VNET_BUFFER_F_L3_HDR_OFFSET_VALID;
421 vnet_buffer (b[0])->l2_hdr_offset = b[0]->current_data;
422 vnet_buffer (b[0])->l3_hdr_offset = b[0]->current_data + adv;
425 vlib_buffer_advance (b[0], adv);
426 b[0]->flags |= flags;
428 vnet_buffer (b[0])->l2.l2_len = adv;
432 static_always_inline void
433 eth_input_get_etype_and_tags (vlib_buffer_t ** b, u16 * etype, u64 * tags,
434 u64 * dmacs, int offset, int dmac_check)
436 ethernet_header_t *e;
437 e = vlib_buffer_get_current (b[offset]);
438 #ifdef CLIB_HAVE_VEC128
439 u64x2 r = u64x2_load_unaligned (((u8 *) & e->type) - 6);
440 etype[offset] = ((u16x8) r)[3];
443 etype[offset] = e->type;
444 tags[offset] = *(u64 *) (e + 1);
448 dmacs[offset] = *(u64 *) e;
451 static_always_inline u16
452 eth_input_next_by_type (u16 etype)
454 ethernet_main_t *em = ðernet_main;
456 return (etype < 0x600) ? ETHERNET_INPUT_NEXT_LLC :
457 vec_elt (em->l3_next.input_next_by_type,
458 sparse_vec_index (em->l3_next.input_next_by_type, etype));
468 u64 n_packets, n_bytes;
469 } eth_input_tag_lookup_t;
471 static_always_inline void
472 eth_input_update_if_counters (vlib_main_t * vm, vnet_main_t * vnm,
473 eth_input_tag_lookup_t * l)
475 if (l->n_packets == 0 || l->sw_if_index == ~0)
479 l->n_bytes += l->n_packets * l->len;
481 vlib_increment_combined_counter
482 (vnm->interface_main.combined_sw_if_counters +
483 VNET_INTERFACE_COUNTER_RX, vm->thread_index, l->sw_if_index,
484 l->n_packets, l->n_bytes);
487 static_always_inline void
488 eth_input_tag_lookup (vlib_main_t * vm, vnet_main_t * vnm,
489 vlib_node_runtime_t * node, vnet_hw_interface_t * hi,
490 u64 tag, u16 * next, vlib_buffer_t * b,
491 eth_input_tag_lookup_t * l, u8 dmac_bad, int is_dot1ad,
492 int main_is_l3, int check_dmac)
494 ethernet_main_t *em = ðernet_main;
496 if ((tag ^ l->tag) & l->mask)
498 main_intf_t *mif = vec_elt_at_index (em->main_intfs, hi->hw_if_index);
501 vlan_table_t *vlan_table;
502 qinq_table_t *qinq_table;
503 u16 *t = (u16 *) & tag;
504 u16 vlan1 = clib_net_to_host_u16 (t[0]) & 0xFFF;
505 u16 vlan2 = clib_net_to_host_u16 (t[2]) & 0xFFF;
506 u32 matched, is_l2, new_sw_if_index;
508 vlan_table = vec_elt_at_index (em->vlan_pool, is_dot1ad ?
509 mif->dot1ad_vlans : mif->dot1q_vlans);
510 vif = &vlan_table->vlans[vlan1];
511 qinq_table = vec_elt_at_index (em->qinq_pool, vif->qinqs);
512 qif = &qinq_table->vlans[vlan2];
513 l->err = ETHERNET_ERROR_NONE;
514 l->type = clib_net_to_host_u16 (t[1]);
516 if (l->type == ETHERNET_TYPE_VLAN)
518 l->type = clib_net_to_host_u16 (t[3]);
520 matched = eth_identify_subint (hi, SUBINT_CONFIG_VALID |
521 SUBINT_CONFIG_MATCH_2_TAG, mif, vif,
522 qif, &new_sw_if_index, &l->err,
530 new_sw_if_index = hi->sw_if_index;
531 l->err = ETHERNET_ERROR_NONE;
533 is_l2 = main_is_l3 == 0;
536 matched = eth_identify_subint (hi, SUBINT_CONFIG_VALID |
537 SUBINT_CONFIG_MATCH_1_TAG, mif,
538 vif, qif, &new_sw_if_index,
542 if (l->sw_if_index != new_sw_if_index)
544 eth_input_update_if_counters (vm, vnm, l);
547 l->sw_if_index = new_sw_if_index;
550 l->mask = (l->n_tags == 2) ?
551 clib_net_to_host_u64 (0xffffffffffffffff) :
552 clib_net_to_host_u64 (0xffffffff00000000);
554 if (matched && l->sw_if_index == ~0)
555 l->err = ETHERNET_ERROR_DOWN;
557 l->len = sizeof (ethernet_header_t) +
558 l->n_tags * sizeof (ethernet_vlan_header_t);
560 l->adv = is_l2 ? -(int) sizeof (ethernet_header_t) :
561 l->n_tags * sizeof (ethernet_vlan_header_t);
563 l->adv = is_l2 ? 0 : l->len;
565 if (PREDICT_FALSE (l->err != ETHERNET_ERROR_NONE))
566 l->next = ETHERNET_INPUT_NEXT_DROP;
568 l->next = em->l2_next;
569 else if (l->type == ETHERNET_TYPE_IP4)
570 l->next = em->l3_next.input_next_ip4;
571 else if (l->type == ETHERNET_TYPE_IP6)
572 l->next = em->l3_next.input_next_ip6;
573 else if (l->type == ETHERNET_TYPE_MPLS)
574 l->next = em->l3_next.input_next_mpls;
575 else if (em->redirect_l3)
576 l->next = em->redirect_l3_next;
579 l->next = eth_input_next_by_type (l->type);
580 if (l->next == ETHERNET_INPUT_NEXT_PUNT)
581 l->err = ETHERNET_ERROR_UNKNOWN_TYPE;
585 if (check_dmac && l->adv > 0 && dmac_bad)
587 l->err = ETHERNET_ERROR_L3_MAC_MISMATCH;
588 next[0] = ETHERNET_INPUT_NEXT_PUNT;
593 vlib_buffer_advance (b, l->adv);
594 vnet_buffer (b)->l2.l2_len = l->len;
595 vnet_buffer (b)->l3_hdr_offset = vnet_buffer (b)->l2_hdr_offset + l->len;
597 if (l->err == ETHERNET_ERROR_NONE)
599 vnet_buffer (b)->sw_if_index[VLIB_RX] = l->sw_if_index;
600 ethernet_buffer_set_vlan_count (b, l->n_tags);
603 b->error = node->errors[l->err];
605 /* update counters */
607 l->n_bytes += vlib_buffer_length_in_chain (vm, b);
610 static_always_inline void
611 eth_input_process_frame_dmac_check (vnet_hw_interface_t * hi,
612 u64 * dmacs, u8 * dmacs_bad,
615 u64 mask = clib_net_to_host_u64 (0xFFFFFFFFFFFF0000);
616 u64 igbit = clib_net_to_host_u64 (0x0100000000000000);
617 u64 hwaddr = (*(u64 *) hi->hw_address) & mask;
619 u8 *dmac_bad = dmacs_bad;
621 i32 n_left = n_packets;
623 #ifdef CLIB_HAVE_VEC256
624 u64x4 igbit4 = u64x4_splat (igbit);
625 u64x4 mask4 = u64x4_splat (mask);
626 u64x4 hwaddr4 = u64x4_splat (hwaddr);
630 r0 = u64x4_load_unaligned (dmac + 0) & mask4;
631 r1 = u64x4_load_unaligned (dmac + 4) & mask4;
633 r0 = (r0 != hwaddr4) & ((r0 & igbit4) == 0);
634 r1 = (r1 != hwaddr4) & ((r1 & igbit4) == 0);
636 *(u32 *) (dmac_bad + 0) = u8x32_msb_mask ((u8x32) (r0));
637 *(u32 *) (dmac_bad + 4) = u8x32_msb_mask ((u8x32) (r1));
654 r0 = (r0 != hwaddr) && ((r0 & igbit) == 0);
655 r1 = (r1 != hwaddr) && ((r1 & igbit) == 0);
656 r2 = (r2 != hwaddr) && ((r2 & igbit) == 0);
657 r3 = (r3 != hwaddr) && ((r3 & igbit) == 0);
672 /* process frame of buffers, store ethertype into array and update
673 buffer metadata fields depending on interface being l2 or l3 assuming that
674 packets are untagged. For tagged packets those fields are updated later.
675 Optionally store Destionation MAC address and tag data into arrays
676 for further processing */
678 STATIC_ASSERT (VLIB_FRAME_SIZE % 8 == 0,
679 "VLIB_FRAME_SIZE must be power of 8");
680 static_always_inline void
681 eth_input_process_frame (vlib_main_t * vm, vlib_node_runtime_t * node,
682 vnet_hw_interface_t * hi,
683 u32 * buffer_indices, u32 n_packets, int main_is_l3,
684 int ip4_cksum_ok, int dmac_check)
686 ethernet_main_t *em = ðernet_main;
687 u16 nexts[VLIB_FRAME_SIZE], *next;
688 u16 etypes[VLIB_FRAME_SIZE], *etype = etypes;
689 u64 dmacs[VLIB_FRAME_SIZE], *dmac = dmacs;
690 u8 dmacs_bad[VLIB_FRAME_SIZE];
691 u64 tags[VLIB_FRAME_SIZE], *tag = tags;
692 u16 slowpath_indices[VLIB_FRAME_SIZE];
694 u16 next_ip4, next_ip6, next_mpls, next_l2;
695 u16 et_ip4 = clib_host_to_net_u16 (ETHERNET_TYPE_IP4);
696 u16 et_ip6 = clib_host_to_net_u16 (ETHERNET_TYPE_IP6);
697 u16 et_mpls = clib_host_to_net_u16 (ETHERNET_TYPE_MPLS);
698 u16 et_vlan = clib_host_to_net_u16 (ETHERNET_TYPE_VLAN);
699 u16 et_dot1ad = clib_host_to_net_u16 (ETHERNET_TYPE_DOT1AD);
700 i32 n_left = n_packets;
701 vlib_buffer_t *b[20];
704 from = buffer_indices;
708 vlib_buffer_t **ph = b + 16, **pd = b + 8;
709 vlib_get_buffers (vm, from, b, 4);
710 vlib_get_buffers (vm, from + 8, pd, 4);
711 vlib_get_buffers (vm, from + 16, ph, 4);
713 vlib_prefetch_buffer_header (ph[0], LOAD);
714 vlib_prefetch_buffer_data (pd[0], LOAD);
715 eth_input_get_etype_and_tags (b, etype, tag, dmac, 0, dmac_check);
717 vlib_prefetch_buffer_header (ph[1], LOAD);
718 vlib_prefetch_buffer_data (pd[1], LOAD);
719 eth_input_get_etype_and_tags (b, etype, tag, dmac, 1, dmac_check);
721 vlib_prefetch_buffer_header (ph[2], LOAD);
722 vlib_prefetch_buffer_data (pd[2], LOAD);
723 eth_input_get_etype_and_tags (b, etype, tag, dmac, 2, dmac_check);
725 vlib_prefetch_buffer_header (ph[3], LOAD);
726 vlib_prefetch_buffer_data (pd[3], LOAD);
727 eth_input_get_etype_and_tags (b, etype, tag, dmac, 3, dmac_check);
729 eth_input_adv_and_flags_x4 (b, main_is_l3);
740 vlib_get_buffers (vm, from, b, 4);
741 eth_input_get_etype_and_tags (b, etype, tag, dmac, 0, dmac_check);
742 eth_input_get_etype_and_tags (b, etype, tag, dmac, 1, dmac_check);
743 eth_input_get_etype_and_tags (b, etype, tag, dmac, 2, dmac_check);
744 eth_input_get_etype_and_tags (b, etype, tag, dmac, 3, dmac_check);
745 eth_input_adv_and_flags_x4 (b, main_is_l3);
756 vlib_get_buffers (vm, from, b, 1);
757 eth_input_get_etype_and_tags (b, etype, tag, dmac, 0, dmac_check);
758 eth_input_adv_and_flags_x1 (b, main_is_l3);
769 eth_input_process_frame_dmac_check (hi, dmacs, dmacs_bad, n_packets);
771 next_ip4 = em->l3_next.input_next_ip4;
772 next_ip6 = em->l3_next.input_next_ip6;
773 next_mpls = em->l3_next.input_next_mpls;
774 next_l2 = em->l2_next;
776 if (next_ip4 == ETHERNET_INPUT_NEXT_IP4_INPUT && ip4_cksum_ok)
777 next_ip4 = ETHERNET_INPUT_NEXT_IP4_INPUT_NCS;
779 #ifdef CLIB_HAVE_VEC256
780 u16x16 et16_ip4 = u16x16_splat (et_ip4);
781 u16x16 et16_ip6 = u16x16_splat (et_ip6);
782 u16x16 et16_mpls = u16x16_splat (et_mpls);
783 u16x16 et16_vlan = u16x16_splat (et_vlan);
784 u16x16 et16_dot1ad = u16x16_splat (et_dot1ad);
785 u16x16 next16_ip4 = u16x16_splat (next_ip4);
786 u16x16 next16_ip6 = u16x16_splat (next_ip6);
787 u16x16 next16_mpls = u16x16_splat (next_mpls);
788 u16x16 next16_l2 = u16x16_splat (next_l2);
790 u16x16 stairs = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 };
799 /* fastpath - in l3 mode hadles ip4, ip6 and mpls packets, other packets
800 are considered as slowpath, in l2 mode all untagged packets are
801 considered as fastpath */
804 #ifdef CLIB_HAVE_VEC256
808 u16x16 e16 = u16x16_load_unaligned (etype);
811 r += (e16 == et16_ip4) & next16_ip4;
812 r += (e16 == et16_ip6) & next16_ip6;
813 r += (e16 == et16_mpls) & next16_mpls;
816 r = ((e16 != et16_vlan) & (e16 != et16_dot1ad)) & next16_l2;
817 u16x16_store_unaligned (r, next);
819 if (!u16x16_is_all_zero (r == zero))
821 if (u16x16_is_all_zero (r))
823 u16x16_store_unaligned (u16x16_splat (i) + stairs,
824 slowpath_indices + n_slowpath);
829 for (int j = 0; j < 16; j++)
831 slowpath_indices[n_slowpath++] = i + j;
842 if (main_is_l3 && etype[0] == et_ip4)
844 else if (main_is_l3 && etype[0] == et_ip6)
846 else if (main_is_l3 && etype[0] == et_mpls)
848 else if (main_is_l3 == 0 &&
849 etype[0] != et_vlan && etype[0] != et_dot1ad)
854 slowpath_indices[n_slowpath++] = i;
865 vnet_main_t *vnm = vnet_get_main ();
867 u16 *si = slowpath_indices;
868 u32 last_unknown_etype = ~0;
869 u32 last_unknown_next = ~0;
870 eth_input_tag_lookup_t dot1ad_lookup, dot1q_lookup = {
872 .tag = tags[si[0]] ^ -1LL,
876 clib_memcpy_fast (&dot1ad_lookup, &dot1q_lookup, sizeof (dot1q_lookup));
881 u16 etype = etypes[i];
883 if (etype == et_vlan)
885 vlib_buffer_t *b = vlib_get_buffer (vm, buffer_indices[i]);
886 eth_input_tag_lookup (vm, vnm, node, hi, tags[i], nexts + i, b,
887 &dot1q_lookup, dmacs_bad[i], 0,
888 main_is_l3, dmac_check);
891 else if (etype == et_dot1ad)
893 vlib_buffer_t *b = vlib_get_buffer (vm, buffer_indices[i]);
894 eth_input_tag_lookup (vm, vnm, node, hi, tags[i], nexts + i, b,
895 &dot1ad_lookup, dmacs_bad[i], 1,
896 main_is_l3, dmac_check);
900 /* untagged packet with not well known etyertype */
901 if (last_unknown_etype != etype)
903 last_unknown_etype = etype;
904 etype = clib_host_to_net_u16 (etype);
905 last_unknown_next = eth_input_next_by_type (etype);
907 if (dmac_check && main_is_l3 && dmacs_bad[i])
909 vlib_buffer_t *b = vlib_get_buffer (vm, buffer_indices[i]);
910 b->error = node->errors[ETHERNET_ERROR_L3_MAC_MISMATCH];
911 nexts[i] = ETHERNET_INPUT_NEXT_PUNT;
914 nexts[i] = last_unknown_next;
922 eth_input_update_if_counters (vm, vnm, &dot1q_lookup);
923 eth_input_update_if_counters (vm, vnm, &dot1ad_lookup);
926 vlib_buffer_enqueue_to_next (vm, node, buffer_indices, nexts, n_packets);
929 static_always_inline void
930 eth_input_single_int (vlib_main_t * vm, vlib_node_runtime_t * node,
931 vnet_hw_interface_t * hi, u32 * from, u32 n_pkts,
934 ethernet_main_t *em = ðernet_main;
935 ethernet_interface_t *ei;
936 ei = pool_elt_at_index (em->interfaces, hi->hw_instance);
937 main_intf_t *intf0 = vec_elt_at_index (em->main_intfs, hi->hw_if_index);
938 subint_config_t *subint0 = &intf0->untagged_subint;
940 int main_is_l3 = (subint0->flags & SUBINT_CONFIG_L2) == 0;
941 int promisc = (ei->flags & ETHERNET_INTERFACE_FLAG_ACCEPT_ALL) != 0;
945 /* main interface is L3, we dont expect tagged packets and interface
946 is not in promisc node, so we dont't need to check DMAC */
950 eth_input_process_frame (vm, node, hi, from, n_pkts, is_l3,
953 /* subinterfaces and promisc mode so DMAC check is needed */
954 eth_input_process_frame (vm, node, hi, from, n_pkts, is_l3,
960 /* untagged packets are treated as L2 */
962 eth_input_process_frame (vm, node, hi, from, n_pkts, is_l3,
968 static_always_inline void
969 ethernet_input_trace (vlib_main_t * vm, vlib_node_runtime_t * node,
970 vlib_frame_t * from_frame)
973 if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)))
975 from = vlib_frame_vector_args (from_frame);
976 n_left = from_frame->n_vectors;
980 ethernet_input_trace_t *t0;
981 vlib_buffer_t *b0 = vlib_get_buffer (vm, from[0]);
983 if (b0->flags & VLIB_BUFFER_IS_TRACED)
985 t0 = vlib_add_trace (vm, node, b0,
986 sizeof (ethernet_input_trace_t));
987 clib_memcpy_fast (t0->packet_data, b0->data + b0->current_data,
988 sizeof (t0->packet_data));
989 t0->frame_flags = from_frame->flags;
990 clib_memcpy_fast (&t0->frame_data,
991 vlib_frame_scalar_args (from_frame),
992 sizeof (ethernet_input_frame_t));
999 /* rx pcap capture if enabled */
1000 if (PREDICT_FALSE (vlib_global_main.pcap[VLIB_RX].pcap_enable))
1004 from = vlib_frame_vector_args (from_frame);
1005 n_left = from_frame->n_vectors;
1011 b0 = vlib_get_buffer (vm, bi0);
1013 if (vlib_global_main.pcap[VLIB_RX].pcap_sw_if_index == 0 ||
1014 vlib_global_main.pcap[VLIB_RX].pcap_sw_if_index
1015 == vnet_buffer (b0)->sw_if_index[VLIB_RX])
1017 pcap_add_buffer (&vlib_global_main.pcap[VLIB_RX].pcap_main, vm,
1025 static_always_inline void
1026 ethernet_input_inline (vlib_main_t * vm,
1027 vlib_node_runtime_t * node,
1028 u32 * from, u32 n_packets,
1029 ethernet_input_variant_t variant)
1031 vnet_main_t *vnm = vnet_get_main ();
1032 ethernet_main_t *em = ðernet_main;
1033 vlib_node_runtime_t *error_node;
1034 u32 n_left_from, next_index, *to_next;
1035 u32 stats_sw_if_index, stats_n_packets, stats_n_bytes;
1036 u32 thread_index = vm->thread_index;
1037 u32 cached_sw_if_index = ~0;
1038 u32 cached_is_l2 = 0; /* shut up gcc */
1039 vnet_hw_interface_t *hi = NULL; /* used for main interface only */
1040 vlib_buffer_t *bufs[VLIB_FRAME_SIZE];
1041 vlib_buffer_t **b = bufs;
1043 if (variant != ETHERNET_INPUT_VARIANT_ETHERNET)
1044 error_node = vlib_node_get_runtime (vm, ethernet_input_node.index);
1048 n_left_from = n_packets;
1050 next_index = node->cached_next_index;
1051 stats_sw_if_index = node->runtime_data[0];
1052 stats_n_packets = stats_n_bytes = 0;
1053 vlib_get_buffers (vm, from, bufs, n_left_from);
1055 while (n_left_from > 0)
1059 vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
1061 while (n_left_from >= 4 && n_left_to_next >= 2)
1064 vlib_buffer_t *b0, *b1;
1065 u8 next0, next1, error0, error1;
1066 u16 type0, orig_type0, type1, orig_type1;
1067 u16 outer_id0, inner_id0, outer_id1, inner_id1;
1068 u32 match_flags0, match_flags1;
1069 u32 old_sw_if_index0, new_sw_if_index0, len0, old_sw_if_index1,
1070 new_sw_if_index1, len1;
1071 vnet_hw_interface_t *hi0, *hi1;
1072 main_intf_t *main_intf0, *main_intf1;
1073 vlan_intf_t *vlan_intf0, *vlan_intf1;
1074 qinq_intf_t *qinq_intf0, *qinq_intf1;
1076 ethernet_header_t *e0, *e1;
1078 /* Prefetch next iteration. */
1080 vlib_prefetch_buffer_header (b[2], STORE);
1081 vlib_prefetch_buffer_header (b[3], STORE);
1083 CLIB_PREFETCH (b[2]->data, sizeof (ethernet_header_t), LOAD);
1084 CLIB_PREFETCH (b[3]->data, sizeof (ethernet_header_t), LOAD);
1093 n_left_to_next -= 2;
1100 error0 = error1 = ETHERNET_ERROR_NONE;
1101 e0 = vlib_buffer_get_current (b0);
1102 type0 = clib_net_to_host_u16 (e0->type);
1103 e1 = vlib_buffer_get_current (b1);
1104 type1 = clib_net_to_host_u16 (e1->type);
1106 /* Set the L2 header offset for all packets */
1107 vnet_buffer (b0)->l2_hdr_offset = b0->current_data;
1108 vnet_buffer (b1)->l2_hdr_offset = b1->current_data;
1109 b0->flags |= VNET_BUFFER_F_L2_HDR_OFFSET_VALID;
1110 b1->flags |= VNET_BUFFER_F_L2_HDR_OFFSET_VALID;
1112 /* Speed-path for the untagged case */
1113 if (PREDICT_TRUE (variant == ETHERNET_INPUT_VARIANT_ETHERNET
1114 && !ethernet_frame_is_any_tagged_x2 (type0,
1118 subint_config_t *subint0;
1119 u32 sw_if_index0, sw_if_index1;
1121 sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
1122 sw_if_index1 = vnet_buffer (b1)->sw_if_index[VLIB_RX];
1123 is_l20 = cached_is_l2;
1125 /* This is probably wholly unnecessary */
1126 if (PREDICT_FALSE (sw_if_index0 != sw_if_index1))
1129 /* Now sw_if_index0 == sw_if_index1 */
1130 if (PREDICT_FALSE (cached_sw_if_index != sw_if_index0))
1132 cached_sw_if_index = sw_if_index0;
1133 hi = vnet_get_sup_hw_interface (vnm, sw_if_index0);
1134 intf0 = vec_elt_at_index (em->main_intfs, hi->hw_if_index);
1135 subint0 = &intf0->untagged_subint;
1136 cached_is_l2 = is_l20 = subint0->flags & SUBINT_CONFIG_L2;
1139 if (PREDICT_TRUE (is_l20 != 0))
1141 vnet_buffer (b0)->l3_hdr_offset =
1142 vnet_buffer (b0)->l2_hdr_offset +
1143 sizeof (ethernet_header_t);
1144 vnet_buffer (b1)->l3_hdr_offset =
1145 vnet_buffer (b1)->l2_hdr_offset +
1146 sizeof (ethernet_header_t);
1147 b0->flags |= VNET_BUFFER_F_L3_HDR_OFFSET_VALID;
1148 b1->flags |= VNET_BUFFER_F_L3_HDR_OFFSET_VALID;
1149 next0 = em->l2_next;
1150 vnet_buffer (b0)->l2.l2_len = sizeof (ethernet_header_t);
1151 next1 = em->l2_next;
1152 vnet_buffer (b1)->l2.l2_len = sizeof (ethernet_header_t);
1156 if (!ethernet_address_cast (e0->dst_address) &&
1157 (hi->hw_address != 0) &&
1158 !ethernet_mac_address_equal ((u8 *) e0, hi->hw_address))
1159 error0 = ETHERNET_ERROR_L3_MAC_MISMATCH;
1160 if (!ethernet_address_cast (e1->dst_address) &&
1161 (hi->hw_address != 0) &&
1162 !ethernet_mac_address_equal ((u8 *) e1, hi->hw_address))
1163 error1 = ETHERNET_ERROR_L3_MAC_MISMATCH;
1164 vlib_buffer_advance (b0, sizeof (ethernet_header_t));
1165 determine_next_node (em, variant, 0, type0, b0,
1167 vlib_buffer_advance (b1, sizeof (ethernet_header_t));
1168 determine_next_node (em, variant, 0, type1, b1,
1174 /* Slow-path for the tagged case */
1176 parse_header (variant,
1179 &orig_type0, &outer_id0, &inner_id0, &match_flags0);
1181 parse_header (variant,
1184 &orig_type1, &outer_id1, &inner_id1, &match_flags1);
1186 old_sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
1187 old_sw_if_index1 = vnet_buffer (b1)->sw_if_index[VLIB_RX];
1189 eth_vlan_table_lookups (em,
1196 &main_intf0, &vlan_intf0, &qinq_intf0);
1198 eth_vlan_table_lookups (em,
1205 &main_intf1, &vlan_intf1, &qinq_intf1);
1207 identify_subint (hi0,
1212 qinq_intf0, &new_sw_if_index0, &error0, &is_l20);
1214 identify_subint (hi1,
1219 qinq_intf1, &new_sw_if_index1, &error1, &is_l21);
1221 // Save RX sw_if_index for later nodes
1222 vnet_buffer (b0)->sw_if_index[VLIB_RX] =
1224 ETHERNET_ERROR_NONE ? old_sw_if_index0 : new_sw_if_index0;
1225 vnet_buffer (b1)->sw_if_index[VLIB_RX] =
1227 ETHERNET_ERROR_NONE ? old_sw_if_index1 : new_sw_if_index1;
1229 // Check if there is a stat to take (valid and non-main sw_if_index for pkt 0 or pkt 1)
1230 if (((new_sw_if_index0 != ~0)
1231 && (new_sw_if_index0 != old_sw_if_index0))
1232 || ((new_sw_if_index1 != ~0)
1233 && (new_sw_if_index1 != old_sw_if_index1)))
1236 len0 = vlib_buffer_length_in_chain (vm, b0) + b0->current_data
1237 - vnet_buffer (b0)->l2_hdr_offset;
1238 len1 = vlib_buffer_length_in_chain (vm, b1) + b1->current_data
1239 - vnet_buffer (b1)->l2_hdr_offset;
1241 stats_n_packets += 2;
1242 stats_n_bytes += len0 + len1;
1245 (!(new_sw_if_index0 == stats_sw_if_index
1246 && new_sw_if_index1 == stats_sw_if_index)))
1248 stats_n_packets -= 2;
1249 stats_n_bytes -= len0 + len1;
1251 if (new_sw_if_index0 != old_sw_if_index0
1252 && new_sw_if_index0 != ~0)
1253 vlib_increment_combined_counter (vnm->
1254 interface_main.combined_sw_if_counters
1256 VNET_INTERFACE_COUNTER_RX,
1258 new_sw_if_index0, 1,
1260 if (new_sw_if_index1 != old_sw_if_index1
1261 && new_sw_if_index1 != ~0)
1262 vlib_increment_combined_counter (vnm->
1263 interface_main.combined_sw_if_counters
1265 VNET_INTERFACE_COUNTER_RX,
1267 new_sw_if_index1, 1,
1270 if (new_sw_if_index0 == new_sw_if_index1)
1272 if (stats_n_packets > 0)
1274 vlib_increment_combined_counter
1275 (vnm->interface_main.combined_sw_if_counters
1276 + VNET_INTERFACE_COUNTER_RX,
1279 stats_n_packets, stats_n_bytes);
1280 stats_n_packets = stats_n_bytes = 0;
1282 stats_sw_if_index = new_sw_if_index0;
1287 if (variant == ETHERNET_INPUT_VARIANT_NOT_L2)
1288 is_l20 = is_l21 = 0;
1290 determine_next_node (em, variant, is_l20, type0, b0, &error0,
1292 determine_next_node (em, variant, is_l21, type1, b1, &error1,
1296 b0->error = error_node->errors[error0];
1297 b1->error = error_node->errors[error1];
1299 // verify speculative enqueue
1300 vlib_validate_buffer_enqueue_x2 (vm, node, next_index, to_next,
1301 n_left_to_next, bi0, bi1, next0,
1305 while (n_left_from > 0 && n_left_to_next > 0)
1310 u16 type0, orig_type0;
1311 u16 outer_id0, inner_id0;
1313 u32 old_sw_if_index0, new_sw_if_index0, len0;
1314 vnet_hw_interface_t *hi0;
1315 main_intf_t *main_intf0;
1316 vlan_intf_t *vlan_intf0;
1317 qinq_intf_t *qinq_intf0;
1318 ethernet_header_t *e0;
1321 // Prefetch next iteration
1322 if (n_left_from > 1)
1324 vlib_prefetch_buffer_header (b[1], STORE);
1325 CLIB_PREFETCH (b[1]->data, CLIB_CACHE_LINE_BYTES, LOAD);
1333 n_left_to_next -= 1;
1338 error0 = ETHERNET_ERROR_NONE;
1339 e0 = vlib_buffer_get_current (b0);
1340 type0 = clib_net_to_host_u16 (e0->type);
1342 /* Set the L2 header offset for all packets */
1343 vnet_buffer (b0)->l2_hdr_offset = b0->current_data;
1344 b0->flags |= VNET_BUFFER_F_L2_HDR_OFFSET_VALID;
1346 /* Speed-path for the untagged case */
1347 if (PREDICT_TRUE (variant == ETHERNET_INPUT_VARIANT_ETHERNET
1348 && !ethernet_frame_is_tagged (type0)))
1351 subint_config_t *subint0;
1354 sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
1355 is_l20 = cached_is_l2;
1357 if (PREDICT_FALSE (cached_sw_if_index != sw_if_index0))
1359 cached_sw_if_index = sw_if_index0;
1360 hi = vnet_get_sup_hw_interface (vnm, sw_if_index0);
1361 intf0 = vec_elt_at_index (em->main_intfs, hi->hw_if_index);
1362 subint0 = &intf0->untagged_subint;
1363 cached_is_l2 = is_l20 = subint0->flags & SUBINT_CONFIG_L2;
1367 if (PREDICT_TRUE (is_l20 != 0))
1369 vnet_buffer (b0)->l3_hdr_offset =
1370 vnet_buffer (b0)->l2_hdr_offset +
1371 sizeof (ethernet_header_t);
1372 b0->flags |= VNET_BUFFER_F_L3_HDR_OFFSET_VALID;
1373 next0 = em->l2_next;
1374 vnet_buffer (b0)->l2.l2_len = sizeof (ethernet_header_t);
1378 if (!ethernet_address_cast (e0->dst_address) &&
1379 (hi->hw_address != 0) &&
1380 !ethernet_mac_address_equal ((u8 *) e0, hi->hw_address))
1381 error0 = ETHERNET_ERROR_L3_MAC_MISMATCH;
1382 vlib_buffer_advance (b0, sizeof (ethernet_header_t));
1383 determine_next_node (em, variant, 0, type0, b0,
1389 /* Slow-path for the tagged case */
1390 parse_header (variant,
1393 &orig_type0, &outer_id0, &inner_id0, &match_flags0);
1395 old_sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
1397 eth_vlan_table_lookups (em,
1404 &main_intf0, &vlan_intf0, &qinq_intf0);
1406 identify_subint (hi0,
1411 qinq_intf0, &new_sw_if_index0, &error0, &is_l20);
1413 // Save RX sw_if_index for later nodes
1414 vnet_buffer (b0)->sw_if_index[VLIB_RX] =
1416 ETHERNET_ERROR_NONE ? old_sw_if_index0 : new_sw_if_index0;
1418 // Increment subinterface stats
1419 // Note that interface-level counters have already been incremented
1420 // prior to calling this function. Thus only subinterface counters
1421 // are incremented here.
1423 // Interface level counters include packets received on the main
1424 // interface and all subinterfaces. Subinterface level counters
1425 // include only those packets received on that subinterface
1426 // Increment stats if the subint is valid and it is not the main intf
1427 if ((new_sw_if_index0 != ~0)
1428 && (new_sw_if_index0 != old_sw_if_index0))
1431 len0 = vlib_buffer_length_in_chain (vm, b0) + b0->current_data
1432 - vnet_buffer (b0)->l2_hdr_offset;
1434 stats_n_packets += 1;
1435 stats_n_bytes += len0;
1437 // Batch stat increments from the same subinterface so counters
1438 // don't need to be incremented for every packet.
1439 if (PREDICT_FALSE (new_sw_if_index0 != stats_sw_if_index))
1441 stats_n_packets -= 1;
1442 stats_n_bytes -= len0;
1444 if (new_sw_if_index0 != ~0)
1445 vlib_increment_combined_counter
1446 (vnm->interface_main.combined_sw_if_counters
1447 + VNET_INTERFACE_COUNTER_RX,
1448 thread_index, new_sw_if_index0, 1, len0);
1449 if (stats_n_packets > 0)
1451 vlib_increment_combined_counter
1452 (vnm->interface_main.combined_sw_if_counters
1453 + VNET_INTERFACE_COUNTER_RX,
1455 stats_sw_if_index, stats_n_packets, stats_n_bytes);
1456 stats_n_packets = stats_n_bytes = 0;
1458 stats_sw_if_index = new_sw_if_index0;
1462 if (variant == ETHERNET_INPUT_VARIANT_NOT_L2)
1465 determine_next_node (em, variant, is_l20, type0, b0, &error0,
1469 b0->error = error_node->errors[error0];
1471 // verify speculative enqueue
1472 vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
1473 to_next, n_left_to_next,
1477 vlib_put_next_frame (vm, node, next_index, n_left_to_next);
1480 // Increment any remaining batched stats
1481 if (stats_n_packets > 0)
1483 vlib_increment_combined_counter
1484 (vnm->interface_main.combined_sw_if_counters
1485 + VNET_INTERFACE_COUNTER_RX,
1486 thread_index, stats_sw_if_index, stats_n_packets, stats_n_bytes);
1487 node->runtime_data[0] = stats_sw_if_index;
1491 VLIB_NODE_FN (ethernet_input_node) (vlib_main_t * vm,
1492 vlib_node_runtime_t * node,
1493 vlib_frame_t * frame)
1495 vnet_main_t *vnm = vnet_get_main ();
1496 u32 *from = vlib_frame_vector_args (frame);
1497 u32 n_packets = frame->n_vectors;
1499 ethernet_input_trace (vm, node, frame);
1501 if (frame->flags & ETH_INPUT_FRAME_F_SINGLE_SW_IF_IDX)
1503 ethernet_input_frame_t *ef = vlib_frame_scalar_args (frame);
1504 int ip4_cksum_ok = (frame->flags & ETH_INPUT_FRAME_F_IP4_CKSUM_OK) != 0;
1505 vnet_hw_interface_t *hi = vnet_get_hw_interface (vnm, ef->hw_if_index);
1506 eth_input_single_int (vm, node, hi, from, n_packets, ip4_cksum_ok);
1509 ethernet_input_inline (vm, node, from, n_packets,
1510 ETHERNET_INPUT_VARIANT_ETHERNET);
1514 VLIB_NODE_FN (ethernet_input_type_node) (vlib_main_t * vm,
1515 vlib_node_runtime_t * node,
1516 vlib_frame_t * from_frame)
1518 u32 *from = vlib_frame_vector_args (from_frame);
1519 u32 n_packets = from_frame->n_vectors;
1520 ethernet_input_trace (vm, node, from_frame);
1521 ethernet_input_inline (vm, node, from, n_packets,
1522 ETHERNET_INPUT_VARIANT_ETHERNET_TYPE);
1526 VLIB_NODE_FN (ethernet_input_not_l2_node) (vlib_main_t * vm,
1527 vlib_node_runtime_t * node,
1528 vlib_frame_t * from_frame)
1530 u32 *from = vlib_frame_vector_args (from_frame);
1531 u32 n_packets = from_frame->n_vectors;
1532 ethernet_input_trace (vm, node, from_frame);
1533 ethernet_input_inline (vm, node, from, n_packets,
1534 ETHERNET_INPUT_VARIANT_NOT_L2);
1539 // Return the subinterface config struct for the given sw_if_index
1540 // Also return via parameter the appropriate match flags for the
1541 // configured number of tags.
1542 // On error (unsupported or not ethernet) return 0.
1543 static subint_config_t *
1544 ethernet_sw_interface_get_config (vnet_main_t * vnm,
1546 u32 * flags, u32 * unsupported)
1548 ethernet_main_t *em = ðernet_main;
1549 vnet_hw_interface_t *hi;
1550 vnet_sw_interface_t *si;
1551 main_intf_t *main_intf;
1552 vlan_table_t *vlan_table;
1553 qinq_table_t *qinq_table;
1554 subint_config_t *subint = 0;
1556 hi = vnet_get_sup_hw_interface (vnm, sw_if_index);
1558 if (!hi || (hi->hw_class_index != ethernet_hw_interface_class.index))
1561 goto done; // non-ethernet interface
1564 // ensure there's an entry for the main intf (shouldn't really be necessary)
1565 vec_validate (em->main_intfs, hi->hw_if_index);
1566 main_intf = vec_elt_at_index (em->main_intfs, hi->hw_if_index);
1568 // Locate the subint for the given ethernet config
1569 si = vnet_get_sw_interface (vnm, sw_if_index);
1571 if (si->type == VNET_SW_INTERFACE_TYPE_P2P)
1573 p2p_ethernet_main_t *p2pm = &p2p_main;
1574 u32 p2pe_sw_if_index =
1575 p2p_ethernet_lookup (hi->hw_if_index, si->p2p.client_mac);
1576 if (p2pe_sw_if_index == ~0)
1578 pool_get (p2pm->p2p_subif_pool, subint);
1579 si->p2p.pool_index = subint - p2pm->p2p_subif_pool;
1582 subint = vec_elt_at_index (p2pm->p2p_subif_pool, si->p2p.pool_index);
1583 *flags = SUBINT_CONFIG_P2P;
1585 else if (si->type == VNET_SW_INTERFACE_TYPE_PIPE)
1589 pipe = pipe_get (sw_if_index);
1590 subint = &pipe->subint;
1591 *flags = SUBINT_CONFIG_P2P;
1593 else if (si->sub.eth.flags.default_sub)
1595 subint = &main_intf->default_subint;
1596 *flags = SUBINT_CONFIG_MATCH_1_TAG |
1597 SUBINT_CONFIG_MATCH_2_TAG | SUBINT_CONFIG_MATCH_3_TAG;
1599 else if ((si->sub.eth.flags.no_tags) || (si->sub.eth.raw_flags == 0))
1601 // if no flags are set then this is a main interface
1602 // so treat as untagged
1603 subint = &main_intf->untagged_subint;
1604 *flags = SUBINT_CONFIG_MATCH_0_TAG;
1609 // first get the vlan table
1610 if (si->sub.eth.flags.dot1ad)
1612 if (main_intf->dot1ad_vlans == 0)
1614 // Allocate a vlan table from the pool
1615 pool_get (em->vlan_pool, vlan_table);
1616 main_intf->dot1ad_vlans = vlan_table - em->vlan_pool;
1620 // Get ptr to existing vlan table
1622 vec_elt_at_index (em->vlan_pool, main_intf->dot1ad_vlans);
1627 if (main_intf->dot1q_vlans == 0)
1629 // Allocate a vlan table from the pool
1630 pool_get (em->vlan_pool, vlan_table);
1631 main_intf->dot1q_vlans = vlan_table - em->vlan_pool;
1635 // Get ptr to existing vlan table
1637 vec_elt_at_index (em->vlan_pool, main_intf->dot1q_vlans);
1641 if (si->sub.eth.flags.one_tag)
1643 *flags = si->sub.eth.flags.exact_match ?
1644 SUBINT_CONFIG_MATCH_1_TAG :
1645 (SUBINT_CONFIG_MATCH_1_TAG |
1646 SUBINT_CONFIG_MATCH_2_TAG | SUBINT_CONFIG_MATCH_3_TAG);
1648 if (si->sub.eth.flags.outer_vlan_id_any)
1650 // not implemented yet
1656 // a single vlan, a common case
1658 &vlan_table->vlans[si->sub.eth.
1659 outer_vlan_id].single_tag_subint;
1666 *flags = si->sub.eth.flags.exact_match ?
1667 SUBINT_CONFIG_MATCH_2_TAG :
1668 (SUBINT_CONFIG_MATCH_2_TAG | SUBINT_CONFIG_MATCH_3_TAG);
1670 if (si->sub.eth.flags.outer_vlan_id_any
1671 && si->sub.eth.flags.inner_vlan_id_any)
1673 // not implemented yet
1678 if (si->sub.eth.flags.inner_vlan_id_any)
1680 // a specific outer and "any" inner
1681 // don't need a qinq table for this
1683 &vlan_table->vlans[si->sub.eth.
1684 outer_vlan_id].inner_any_subint;
1685 if (si->sub.eth.flags.exact_match)
1687 *flags = SUBINT_CONFIG_MATCH_2_TAG;
1691 *flags = SUBINT_CONFIG_MATCH_2_TAG |
1692 SUBINT_CONFIG_MATCH_3_TAG;
1697 // a specific outer + specifc innner vlan id, a common case
1699 // get the qinq table
1700 if (vlan_table->vlans[si->sub.eth.outer_vlan_id].qinqs == 0)
1702 // Allocate a qinq table from the pool
1703 pool_get (em->qinq_pool, qinq_table);
1704 vlan_table->vlans[si->sub.eth.outer_vlan_id].qinqs =
1705 qinq_table - em->qinq_pool;
1709 // Get ptr to existing qinq table
1711 vec_elt_at_index (em->qinq_pool,
1712 vlan_table->vlans[si->sub.
1716 subint = &qinq_table->vlans[si->sub.eth.inner_vlan_id].subint;
1725 static clib_error_t *
1726 ethernet_sw_interface_up_down (vnet_main_t * vnm, u32 sw_if_index, u32 flags)
1728 subint_config_t *subint;
1731 clib_error_t *error = 0;
1733 // Find the config for this subinterface
1735 ethernet_sw_interface_get_config (vnm, sw_if_index, &dummy_flags,
1740 // not implemented yet or not ethernet
1744 subint->sw_if_index =
1745 ((flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP) ? sw_if_index : ~0);
1751 VNET_SW_INTERFACE_ADMIN_UP_DOWN_FUNCTION (ethernet_sw_interface_up_down);
1754 #ifndef CLIB_MARCH_VARIANT
1755 // Set the L2/L3 mode for the subinterface
1757 ethernet_sw_interface_set_l2_mode (vnet_main_t * vnm, u32 sw_if_index, u32 l2)
1759 subint_config_t *subint;
1763 vnet_sw_interface_t *sw = vnet_get_sw_interface (vnm, sw_if_index);
1765 is_port = !(sw->type == VNET_SW_INTERFACE_TYPE_SUB);
1767 // Find the config for this subinterface
1769 ethernet_sw_interface_get_config (vnm, sw_if_index, &dummy_flags,
1774 // unimplemented or not ethernet
1778 // Double check that the config we found is for our interface (or the interface is down)
1779 ASSERT ((subint->sw_if_index == sw_if_index) | (subint->sw_if_index == ~0));
1783 subint->flags |= SUBINT_CONFIG_L2;
1786 SUBINT_CONFIG_MATCH_0_TAG | SUBINT_CONFIG_MATCH_1_TAG
1787 | SUBINT_CONFIG_MATCH_2_TAG | SUBINT_CONFIG_MATCH_3_TAG;
1791 subint->flags &= ~SUBINT_CONFIG_L2;
1794 ~(SUBINT_CONFIG_MATCH_1_TAG | SUBINT_CONFIG_MATCH_2_TAG
1795 | SUBINT_CONFIG_MATCH_3_TAG);
1803 * Set the L2/L3 mode for the subinterface regardless of port
1806 ethernet_sw_interface_set_l2_mode_noport (vnet_main_t * vnm,
1807 u32 sw_if_index, u32 l2)
1809 subint_config_t *subint;
1813 /* Find the config for this subinterface */
1815 ethernet_sw_interface_get_config (vnm, sw_if_index, &dummy_flags,
1820 /* unimplemented or not ethernet */
1825 * Double check that the config we found is for our interface (or the
1826 * interface is down)
1828 ASSERT ((subint->sw_if_index == sw_if_index) | (subint->sw_if_index == ~0));
1832 subint->flags |= SUBINT_CONFIG_L2;
1836 subint->flags &= ~SUBINT_CONFIG_L2;
1844 static clib_error_t *
1845 ethernet_sw_interface_add_del (vnet_main_t * vnm,
1846 u32 sw_if_index, u32 is_create)
1848 clib_error_t *error = 0;
1849 subint_config_t *subint;
1851 u32 unsupported = 0;
1853 // Find the config for this subinterface
1855 ethernet_sw_interface_get_config (vnm, sw_if_index, &match_flags,
1860 // not implemented yet or not ethernet
1863 // this is the NYI case
1864 error = clib_error_return (0, "not implemented yet");
1875 // Initialize the subint
1876 if (subint->flags & SUBINT_CONFIG_VALID)
1878 // Error vlan already in use
1879 error = clib_error_return (0, "vlan is already in use");
1883 // Note that config is L3 by default
1884 subint->flags = SUBINT_CONFIG_VALID | match_flags;
1885 subint->sw_if_index = ~0; // because interfaces are initially down
1892 VNET_SW_INTERFACE_ADD_DEL_FUNCTION (ethernet_sw_interface_add_del);
1894 static char *ethernet_error_strings[] = {
1895 #define ethernet_error(n,c,s) s,
1896 #include "error.def"
1897 #undef ethernet_error
1901 VLIB_REGISTER_NODE (ethernet_input_node) = {
1902 .name = "ethernet-input",
1903 /* Takes a vector of packets. */
1904 .vector_size = sizeof (u32),
1905 .scalar_size = sizeof (ethernet_input_frame_t),
1906 .n_errors = ETHERNET_N_ERROR,
1907 .error_strings = ethernet_error_strings,
1908 .n_next_nodes = ETHERNET_INPUT_N_NEXT,
1910 #define _(s,n) [ETHERNET_INPUT_NEXT_##s] = n,
1911 foreach_ethernet_input_next
1914 .format_buffer = format_ethernet_header_with_length,
1915 .format_trace = format_ethernet_input_trace,
1916 .unformat_buffer = unformat_ethernet_header,
1919 VLIB_REGISTER_NODE (ethernet_input_type_node) = {
1920 .name = "ethernet-input-type",
1921 /* Takes a vector of packets. */
1922 .vector_size = sizeof (u32),
1923 .n_next_nodes = ETHERNET_INPUT_N_NEXT,
1925 #define _(s,n) [ETHERNET_INPUT_NEXT_##s] = n,
1926 foreach_ethernet_input_next
1931 VLIB_REGISTER_NODE (ethernet_input_not_l2_node) = {
1932 .name = "ethernet-input-not-l2",
1933 /* Takes a vector of packets. */
1934 .vector_size = sizeof (u32),
1935 .n_next_nodes = ETHERNET_INPUT_N_NEXT,
1937 #define _(s,n) [ETHERNET_INPUT_NEXT_##s] = n,
1938 foreach_ethernet_input_next
1944 #ifndef CLIB_MARCH_VARIANT
1946 ethernet_set_rx_redirect (vnet_main_t * vnm,
1947 vnet_hw_interface_t * hi, u32 enable)
1949 // Insure all packets go to ethernet-input (i.e. untagged ipv4 packets
1950 // don't go directly to ip4-input)
1951 vnet_hw_interface_rx_redirect_to_node
1952 (vnm, hi->hw_if_index, enable ? ethernet_input_node.index : ~0);
1957 * Initialization and registration for the next_by_ethernet structure
1961 next_by_ethertype_init (next_by_ethertype_t * l3_next)
1963 l3_next->input_next_by_type = sparse_vec_new
1964 ( /* elt bytes */ sizeof (l3_next->input_next_by_type[0]),
1965 /* bits in index */ BITS (((ethernet_header_t *) 0)->type));
1967 vec_validate (l3_next->sparse_index_by_input_next_index,
1968 ETHERNET_INPUT_NEXT_DROP);
1969 vec_validate (l3_next->sparse_index_by_input_next_index,
1970 ETHERNET_INPUT_NEXT_PUNT);
1971 l3_next->sparse_index_by_input_next_index[ETHERNET_INPUT_NEXT_DROP] =
1972 SPARSE_VEC_INVALID_INDEX;
1973 l3_next->sparse_index_by_input_next_index[ETHERNET_INPUT_NEXT_PUNT] =
1974 SPARSE_VEC_INVALID_INDEX;
1977 * Make sure we don't wipe out an ethernet registration by mistake
1978 * Can happen if init function ordering constraints are missing.
1982 ethernet_main_t *em = ðernet_main;
1983 ASSERT (em->next_by_ethertype_register_called == 0);
1989 // Add an ethertype -> next index mapping to the structure
1991 next_by_ethertype_register (next_by_ethertype_t * l3_next,
1992 u32 ethertype, u32 next_index)
1996 ethernet_main_t *em = ðernet_main;
2000 ethernet_main_t *em = ðernet_main;
2001 em->next_by_ethertype_register_called = 1;
2004 /* Setup ethernet type -> next index sparse vector mapping. */
2005 n = sparse_vec_validate (l3_next->input_next_by_type, ethertype);
2008 /* Rebuild next index -> sparse index inverse mapping when sparse vector
2010 vec_validate (l3_next->sparse_index_by_input_next_index, next_index);
2011 for (i = 1; i < vec_len (l3_next->input_next_by_type); i++)
2013 sparse_index_by_input_next_index[l3_next->input_next_by_type[i]] = i;
2015 // do not allow the cached next index's to be updated if L3
2016 // redirect is enabled, as it will have overwritten them
2017 if (!em->redirect_l3)
2019 // Cache common ethertypes directly
2020 if (ethertype == ETHERNET_TYPE_IP4)
2022 l3_next->input_next_ip4 = next_index;
2024 else if (ethertype == ETHERNET_TYPE_IP6)
2026 l3_next->input_next_ip6 = next_index;
2028 else if (ethertype == ETHERNET_TYPE_MPLS)
2030 l3_next->input_next_mpls = next_index;
2037 ethernet_input_init (vlib_main_t * vm, ethernet_main_t * em)
2039 __attribute__ ((unused)) vlan_table_t *invalid_vlan_table;
2040 __attribute__ ((unused)) qinq_table_t *invalid_qinq_table;
2042 ethernet_setup_node (vm, ethernet_input_node.index);
2043 ethernet_setup_node (vm, ethernet_input_type_node.index);
2044 ethernet_setup_node (vm, ethernet_input_not_l2_node.index);
2046 next_by_ethertype_init (&em->l3_next);
2048 // Initialize pools and vector for vlan parsing
2049 vec_validate (em->main_intfs, 10); // 10 main interfaces
2050 pool_alloc (em->vlan_pool, 10);
2051 pool_alloc (em->qinq_pool, 1);
2053 // The first vlan pool will always be reserved for an invalid table
2054 pool_get (em->vlan_pool, invalid_vlan_table); // first id = 0
2055 // The first qinq pool will always be reserved for an invalid table
2056 pool_get (em->qinq_pool, invalid_qinq_table); // first id = 0
2060 ethernet_register_input_type (vlib_main_t * vm,
2061 ethernet_type_t type, u32 node_index)
2063 ethernet_main_t *em = ðernet_main;
2064 ethernet_type_info_t *ti;
2068 clib_error_t *error = vlib_call_init_function (vm, ethernet_init);
2070 clib_error_report (error);
2073 ti = ethernet_get_type_info (em, type);
2076 clib_warning ("type_info NULL for type %d", type);
2079 ti->node_index = node_index;
2080 ti->next_index = vlib_node_add_next (vm,
2081 ethernet_input_node.index, node_index);
2082 i = vlib_node_add_next (vm, ethernet_input_type_node.index, node_index);
2083 ASSERT (i == ti->next_index);
2085 i = vlib_node_add_next (vm, ethernet_input_not_l2_node.index, node_index);
2086 ASSERT (i == ti->next_index);
2088 // Add the L3 node for this ethertype to the next nodes structure
2089 next_by_ethertype_register (&em->l3_next, type, ti->next_index);
2091 // Call the registration functions for other nodes that want a mapping
2092 l2bvi_register_input_type (vm, type, node_index);
2096 ethernet_register_l2_input (vlib_main_t * vm, u32 node_index)
2098 ethernet_main_t *em = ðernet_main;
2102 vlib_node_add_next (vm, ethernet_input_node.index, node_index);
2105 * Even if we never use these arcs, we have to align the next indices...
2107 i = vlib_node_add_next (vm, ethernet_input_type_node.index, node_index);
2109 ASSERT (i == em->l2_next);
2111 i = vlib_node_add_next (vm, ethernet_input_not_l2_node.index, node_index);
2112 ASSERT (i == em->l2_next);
2115 // Register a next node for L3 redirect, and enable L3 redirect
2117 ethernet_register_l3_redirect (vlib_main_t * vm, u32 node_index)
2119 ethernet_main_t *em = ðernet_main;
2122 em->redirect_l3 = 1;
2123 em->redirect_l3_next = vlib_node_add_next (vm,
2124 ethernet_input_node.index,
2127 * Change the cached next nodes to the redirect node
2129 em->l3_next.input_next_ip4 = em->redirect_l3_next;
2130 em->l3_next.input_next_ip6 = em->redirect_l3_next;
2131 em->l3_next.input_next_mpls = em->redirect_l3_next;
2134 * Even if we never use these arcs, we have to align the next indices...
2136 i = vlib_node_add_next (vm, ethernet_input_type_node.index, node_index);
2138 ASSERT (i == em->redirect_l3_next);
2140 i = vlib_node_add_next (vm, ethernet_input_not_l2_node.index, node_index);
2142 ASSERT (i == em->redirect_l3_next);
2147 * fd.io coding-style-patch-verification: ON
2150 * eval: (c-set-style "gnu")