2 * Copyright (c) 2018 Cisco and/or its affiliates.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at:
7 * http://www.apache.org/licenses/LICENSE-2.0
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
16 * ethernet_node.c: ethernet packet processing
18 * Copyright (c) 2008 Eliot Dresselhaus
20 * Permission is hereby granted, free of charge, to any person obtaining
21 * a copy of this software and associated documentation files (the
22 * "Software"), to deal in the Software without restriction, including
23 * without limitation the rights to use, copy, modify, merge, publish,
24 * distribute, sublicense, and/or sell copies of the Software, and to
25 * permit persons to whom the Software is furnished to do so, subject to
26 * the following conditions:
28 * The above copyright notice and this permission notice shall be
29 * included in all copies or substantial portions of the Software.
31 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
32 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
33 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
34 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
35 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
36 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
37 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
40 #include <vlib/vlib.h>
41 #include <vnet/pg/pg.h>
42 #include <vnet/ethernet/ethernet.h>
43 #include <vnet/ethernet/p2p_ethernet.h>
44 #include <vnet/devices/pipe/pipe.h>
45 #include <vppinfra/sparse_vec.h>
46 #include <vnet/l2/l2_bvi.h>
47 #include <vnet/classify/trace_classify.h>
49 #define foreach_ethernet_input_next \
50 _ (PUNT, "error-punt") \
51 _ (DROP, "error-drop") \
52 _ (LLC, "llc-input") \
53 _ (IP4_INPUT, "ip4-input") \
54 _ (IP4_INPUT_NCS, "ip4-input-no-checksum")
58 #define _(s,n) ETHERNET_INPUT_NEXT_##s,
59 foreach_ethernet_input_next
61 ETHERNET_INPUT_N_NEXT,
62 } ethernet_input_next_t;
68 ethernet_input_frame_t frame_data;
69 } ethernet_input_trace_t;
72 format_ethernet_input_trace (u8 * s, va_list * va)
74 CLIB_UNUSED (vlib_main_t * vm) = va_arg (*va, vlib_main_t *);
75 CLIB_UNUSED (vlib_node_t * node) = va_arg (*va, vlib_node_t *);
76 ethernet_input_trace_t *t = va_arg (*va, ethernet_input_trace_t *);
77 u32 indent = format_get_indent (s);
81 s = format (s, "frame: flags 0x%x", t->frame_flags);
82 if (t->frame_flags & ETH_INPUT_FRAME_F_SINGLE_SW_IF_IDX)
83 s = format (s, ", hw-if-index %u, sw-if-index %u",
84 t->frame_data.hw_if_index, t->frame_data.sw_if_index);
85 s = format (s, "\n%U", format_white_space, indent);
87 s = format (s, "%U", format_ethernet_header, t->packet_data);
92 extern vlib_node_registration_t ethernet_input_node;
96 ETHERNET_INPUT_VARIANT_ETHERNET,
97 ETHERNET_INPUT_VARIANT_ETHERNET_TYPE,
98 ETHERNET_INPUT_VARIANT_NOT_L2,
99 } ethernet_input_variant_t;
102 // Parse the ethernet header to extract vlan tags and innermost ethertype
103 static_always_inline void
104 parse_header (ethernet_input_variant_t variant,
108 u16 * outer_id, u16 * inner_id, u32 * match_flags)
112 if (variant == ETHERNET_INPUT_VARIANT_ETHERNET
113 || variant == ETHERNET_INPUT_VARIANT_NOT_L2)
115 ethernet_header_t *e0;
117 e0 = (void *) (b0->data + b0->current_data);
119 vnet_buffer (b0)->l2_hdr_offset = b0->current_data;
120 b0->flags |= VNET_BUFFER_F_L2_HDR_OFFSET_VALID;
122 vlib_buffer_advance (b0, sizeof (e0[0]));
124 *type = clib_net_to_host_u16 (e0->type);
126 else if (variant == ETHERNET_INPUT_VARIANT_ETHERNET_TYPE)
128 // here when prior node was LLC/SNAP processing
131 e0 = (void *) (b0->data + b0->current_data);
133 vlib_buffer_advance (b0, sizeof (e0[0]));
135 *type = clib_net_to_host_u16 (e0[0]);
138 // save for distinguishing between dot1q and dot1ad later
141 // default the tags to 0 (used if there is no corresponding tag)
145 *match_flags = SUBINT_CONFIG_VALID | SUBINT_CONFIG_MATCH_0_TAG;
148 // check for vlan encaps
149 if (ethernet_frame_is_tagged (*type))
151 ethernet_vlan_header_t *h0;
154 *match_flags = SUBINT_CONFIG_VALID | SUBINT_CONFIG_MATCH_1_TAG;
156 h0 = (void *) (b0->data + b0->current_data);
158 tag = clib_net_to_host_u16 (h0->priority_cfi_and_id);
160 *outer_id = tag & 0xfff;
162 *match_flags &= ~SUBINT_CONFIG_MATCH_1_TAG;
164 *type = clib_net_to_host_u16 (h0->type);
166 vlib_buffer_advance (b0, sizeof (h0[0]));
169 if (*type == ETHERNET_TYPE_VLAN)
171 // Double tagged packet
172 *match_flags = SUBINT_CONFIG_VALID | SUBINT_CONFIG_MATCH_2_TAG;
174 h0 = (void *) (b0->data + b0->current_data);
176 tag = clib_net_to_host_u16 (h0->priority_cfi_and_id);
178 *inner_id = tag & 0xfff;
180 *type = clib_net_to_host_u16 (h0->type);
182 vlib_buffer_advance (b0, sizeof (h0[0]));
184 if (*type == ETHERNET_TYPE_VLAN)
186 // More than double tagged packet
187 *match_flags = SUBINT_CONFIG_VALID | SUBINT_CONFIG_MATCH_3_TAG;
189 vlib_buffer_advance (b0, sizeof (h0[0]));
190 vlan_count = 3; // "unknown" number, aka, 3-or-more
194 ethernet_buffer_set_vlan_count (b0, vlan_count);
197 // Determine the subinterface for this packet, given the result of the
198 // vlan table lookups and vlan header parsing. Check the most specific
200 static_always_inline void
201 identify_subint (vnet_hw_interface_t * hi,
204 main_intf_t * main_intf,
205 vlan_intf_t * vlan_intf,
206 qinq_intf_t * qinq_intf,
207 u32 * new_sw_if_index, u8 * error0, u32 * is_l2)
211 matched = eth_identify_subint (hi, match_flags, main_intf, vlan_intf,
212 qinq_intf, new_sw_if_index, error0, is_l2);
217 // Perform L3 my-mac filter
218 // A unicast packet arriving on an L3 interface must have a dmac matching the interface mac.
219 // This is required for promiscuous mode, else we will forward packets we aren't supposed to.
222 ethernet_header_t *e0;
223 e0 = (void *) (b0->data + vnet_buffer (b0)->l2_hdr_offset);
225 if (!(ethernet_address_cast (e0->dst_address)))
227 if (!ethernet_mac_address_equal ((u8 *) e0, hi->hw_address))
229 *error0 = ETHERNET_ERROR_L3_MAC_MISMATCH;
234 // Check for down subinterface
235 *error0 = (*new_sw_if_index) != ~0 ? (*error0) : ETHERNET_ERROR_DOWN;
239 static_always_inline void
240 determine_next_node (ethernet_main_t * em,
241 ethernet_input_variant_t variant,
243 u32 type0, vlib_buffer_t * b0, u8 * error0, u8 * next0)
245 vnet_buffer (b0)->l3_hdr_offset = b0->current_data;
246 b0->flags |= VNET_BUFFER_F_L3_HDR_OFFSET_VALID;
248 if (PREDICT_FALSE (*error0 != ETHERNET_ERROR_NONE))
250 // some error occurred
251 *next0 = ETHERNET_INPUT_NEXT_DROP;
255 // record the L2 len and reset the buffer so the L2 header is preserved
256 u32 eth_start = vnet_buffer (b0)->l2_hdr_offset;
257 vnet_buffer (b0)->l2.l2_len = b0->current_data - eth_start;
258 *next0 = em->l2_next;
259 ASSERT (vnet_buffer (b0)->l2.l2_len ==
260 ethernet_buffer_header_size (b0));
261 vlib_buffer_advance (b0, -(vnet_buffer (b0)->l2.l2_len));
263 // check for common IP/MPLS ethertypes
265 else if (type0 == ETHERNET_TYPE_IP4)
267 *next0 = em->l3_next.input_next_ip4;
269 else if (type0 == ETHERNET_TYPE_IP6)
271 *next0 = em->l3_next.input_next_ip6;
273 else if (type0 == ETHERNET_TYPE_MPLS)
275 *next0 = em->l3_next.input_next_mpls;
278 else if (em->redirect_l3)
280 // L3 Redirect is on, the cached common next nodes will be
281 // pointing to the redirect node, catch the uncommon types here
282 *next0 = em->redirect_l3_next;
286 // uncommon ethertype, check table
288 i0 = sparse_vec_index (em->l3_next.input_next_by_type, type0);
289 *next0 = vec_elt (em->l3_next.input_next_by_type, i0);
292 SPARSE_VEC_INVALID_INDEX ? ETHERNET_ERROR_UNKNOWN_TYPE : *error0;
294 // The table is not populated with LLC values, so check that now.
295 // If variant is variant_ethernet then we came from LLC processing. Don't
296 // go back there; drop instead using by keeping the drop/bad table result.
297 if ((type0 < 0x600) && (variant == ETHERNET_INPUT_VARIANT_ETHERNET))
299 *next0 = ETHERNET_INPUT_NEXT_LLC;
305 /* following vector code relies on following assumptions */
306 STATIC_ASSERT_OFFSET_OF (vlib_buffer_t, current_data, 0);
307 STATIC_ASSERT_OFFSET_OF (vlib_buffer_t, current_length, 2);
308 STATIC_ASSERT_OFFSET_OF (vlib_buffer_t, flags, 4);
309 STATIC_ASSERT (STRUCT_OFFSET_OF (vnet_buffer_opaque_t, l2_hdr_offset) ==
310 STRUCT_OFFSET_OF (vnet_buffer_opaque_t, l3_hdr_offset) - 2,
311 "l3_hdr_offset must follow l2_hdr_offset");
313 static_always_inline void
314 eth_input_adv_and_flags_x4 (vlib_buffer_t ** b, int is_l3)
316 i16 adv = sizeof (ethernet_header_t);
317 u32 flags = VNET_BUFFER_F_L2_HDR_OFFSET_VALID |
318 VNET_BUFFER_F_L3_HDR_OFFSET_VALID;
320 #ifdef CLIB_HAVE_VEC256
321 /* to reduce number of small loads/stores we are loading first 64 bits
322 of each buffer metadata into 256-bit register so we can advance
323 current_data, current_length and flags.
324 Observed saving of this code is ~2 clocks per packet */
327 /* vector if signed 16 bit integers used in signed vector add operation
328 to advnce current_data and current_length */
329 u32x8 flags4 = { 0, flags, 0, flags, 0, flags, 0, flags };
331 adv, -adv, 0, 0, adv, -adv, 0, 0,
332 adv, -adv, 0, 0, adv, -adv, 0, 0
335 /* load 4 x 64 bits */
336 r = u64x4_gather (b[0], b[1], b[2], b[3]);
342 radv = (u64x4) ((i16x16) r + adv4);
344 /* write 4 x 64 bits */
345 u64x4_scatter (is_l3 ? radv : r, b[0], b[1], b[2], b[3]);
347 /* use old current_data as l2_hdr_offset and new current_data as
349 r = (u64x4) u16x16_blend (r, radv << 16, 0xaa);
351 /* store both l2_hdr_offset and l3_hdr_offset in single store operation */
352 u32x8_scatter_one ((u32x8) r, 0, &vnet_buffer (b[0])->l2_hdr_offset);
353 u32x8_scatter_one ((u32x8) r, 2, &vnet_buffer (b[1])->l2_hdr_offset);
354 u32x8_scatter_one ((u32x8) r, 4, &vnet_buffer (b[2])->l2_hdr_offset);
355 u32x8_scatter_one ((u32x8) r, 6, &vnet_buffer (b[3])->l2_hdr_offset);
359 ASSERT (b[0]->current_data == vnet_buffer (b[0])->l3_hdr_offset);
360 ASSERT (b[1]->current_data == vnet_buffer (b[1])->l3_hdr_offset);
361 ASSERT (b[2]->current_data == vnet_buffer (b[2])->l3_hdr_offset);
362 ASSERT (b[3]->current_data == vnet_buffer (b[3])->l3_hdr_offset);
364 ASSERT (b[0]->current_data - vnet_buffer (b[0])->l2_hdr_offset == adv);
365 ASSERT (b[1]->current_data - vnet_buffer (b[1])->l2_hdr_offset == adv);
366 ASSERT (b[2]->current_data - vnet_buffer (b[2])->l2_hdr_offset == adv);
367 ASSERT (b[3]->current_data - vnet_buffer (b[3])->l2_hdr_offset == adv);
371 ASSERT (b[0]->current_data == vnet_buffer (b[0])->l2_hdr_offset);
372 ASSERT (b[1]->current_data == vnet_buffer (b[1])->l2_hdr_offset);
373 ASSERT (b[2]->current_data == vnet_buffer (b[2])->l2_hdr_offset);
374 ASSERT (b[3]->current_data == vnet_buffer (b[3])->l2_hdr_offset);
376 ASSERT (b[0]->current_data - vnet_buffer (b[0])->l3_hdr_offset == -adv);
377 ASSERT (b[1]->current_data - vnet_buffer (b[1])->l3_hdr_offset == -adv);
378 ASSERT (b[2]->current_data - vnet_buffer (b[2])->l3_hdr_offset == -adv);
379 ASSERT (b[3]->current_data - vnet_buffer (b[3])->l3_hdr_offset == -adv);
383 vnet_buffer (b[0])->l2_hdr_offset = b[0]->current_data;
384 vnet_buffer (b[1])->l2_hdr_offset = b[1]->current_data;
385 vnet_buffer (b[2])->l2_hdr_offset = b[2]->current_data;
386 vnet_buffer (b[3])->l2_hdr_offset = b[3]->current_data;
387 vnet_buffer (b[0])->l3_hdr_offset = b[0]->current_data + adv;
388 vnet_buffer (b[1])->l3_hdr_offset = b[1]->current_data + adv;
389 vnet_buffer (b[2])->l3_hdr_offset = b[2]->current_data + adv;
390 vnet_buffer (b[3])->l3_hdr_offset = b[3]->current_data + adv;
394 vlib_buffer_advance (b[0], adv);
395 vlib_buffer_advance (b[1], adv);
396 vlib_buffer_advance (b[2], adv);
397 vlib_buffer_advance (b[3], adv);
400 b[0]->flags |= flags;
401 b[1]->flags |= flags;
402 b[2]->flags |= flags;
403 b[3]->flags |= flags;
408 vnet_buffer (b[0])->l2.l2_len = adv;
409 vnet_buffer (b[1])->l2.l2_len = adv;
410 vnet_buffer (b[2])->l2.l2_len = adv;
411 vnet_buffer (b[3])->l2.l2_len = adv;
415 static_always_inline void
416 eth_input_adv_and_flags_x1 (vlib_buffer_t ** b, int is_l3)
418 i16 adv = sizeof (ethernet_header_t);
419 u32 flags = VNET_BUFFER_F_L2_HDR_OFFSET_VALID |
420 VNET_BUFFER_F_L3_HDR_OFFSET_VALID;
422 vnet_buffer (b[0])->l2_hdr_offset = b[0]->current_data;
423 vnet_buffer (b[0])->l3_hdr_offset = b[0]->current_data + adv;
426 vlib_buffer_advance (b[0], adv);
427 b[0]->flags |= flags;
429 vnet_buffer (b[0])->l2.l2_len = adv;
433 static_always_inline void
434 eth_input_get_etype_and_tags (vlib_buffer_t ** b, u16 * etype, u64 * tags,
435 u64 * dmacs, int offset, int dmac_check)
437 ethernet_header_t *e;
438 e = vlib_buffer_get_current (b[offset]);
439 #ifdef CLIB_HAVE_VEC128
440 u64x2 r = u64x2_load_unaligned (((u8 *) & e->type) - 6);
441 etype[offset] = ((u16x8) r)[3];
444 etype[offset] = e->type;
445 tags[offset] = *(u64 *) (e + 1);
449 dmacs[offset] = *(u64 *) e;
452 static_always_inline u16
453 eth_input_next_by_type (u16 etype)
455 ethernet_main_t *em = ðernet_main;
457 return (etype < 0x600) ? ETHERNET_INPUT_NEXT_LLC :
458 vec_elt (em->l3_next.input_next_by_type,
459 sparse_vec_index (em->l3_next.input_next_by_type, etype));
469 u64 n_packets, n_bytes;
470 } eth_input_tag_lookup_t;
472 static_always_inline void
473 eth_input_update_if_counters (vlib_main_t * vm, vnet_main_t * vnm,
474 eth_input_tag_lookup_t * l)
476 if (l->n_packets == 0 || l->sw_if_index == ~0)
480 l->n_bytes += l->n_packets * l->len;
482 vlib_increment_combined_counter
483 (vnm->interface_main.combined_sw_if_counters +
484 VNET_INTERFACE_COUNTER_RX, vm->thread_index, l->sw_if_index,
485 l->n_packets, l->n_bytes);
488 static_always_inline void
489 eth_input_tag_lookup (vlib_main_t * vm, vnet_main_t * vnm,
490 vlib_node_runtime_t * node, vnet_hw_interface_t * hi,
491 u64 tag, u16 * next, vlib_buffer_t * b,
492 eth_input_tag_lookup_t * l, u8 dmac_bad, int is_dot1ad,
493 int main_is_l3, int check_dmac)
495 ethernet_main_t *em = ðernet_main;
497 if ((tag ^ l->tag) & l->mask)
499 main_intf_t *mif = vec_elt_at_index (em->main_intfs, hi->hw_if_index);
502 vlan_table_t *vlan_table;
503 qinq_table_t *qinq_table;
504 u16 *t = (u16 *) & tag;
505 u16 vlan1 = clib_net_to_host_u16 (t[0]) & 0xFFF;
506 u16 vlan2 = clib_net_to_host_u16 (t[2]) & 0xFFF;
507 u32 matched, is_l2, new_sw_if_index;
509 vlan_table = vec_elt_at_index (em->vlan_pool, is_dot1ad ?
510 mif->dot1ad_vlans : mif->dot1q_vlans);
511 vif = &vlan_table->vlans[vlan1];
512 qinq_table = vec_elt_at_index (em->qinq_pool, vif->qinqs);
513 qif = &qinq_table->vlans[vlan2];
514 l->err = ETHERNET_ERROR_NONE;
515 l->type = clib_net_to_host_u16 (t[1]);
517 if (l->type == ETHERNET_TYPE_VLAN)
519 l->type = clib_net_to_host_u16 (t[3]);
521 matched = eth_identify_subint (hi, SUBINT_CONFIG_VALID |
522 SUBINT_CONFIG_MATCH_2_TAG, mif, vif,
523 qif, &new_sw_if_index, &l->err,
531 new_sw_if_index = hi->sw_if_index;
532 l->err = ETHERNET_ERROR_NONE;
534 is_l2 = main_is_l3 == 0;
537 matched = eth_identify_subint (hi, SUBINT_CONFIG_VALID |
538 SUBINT_CONFIG_MATCH_1_TAG, mif,
539 vif, qif, &new_sw_if_index,
543 if (l->sw_if_index != new_sw_if_index)
545 eth_input_update_if_counters (vm, vnm, l);
548 l->sw_if_index = new_sw_if_index;
551 l->mask = (l->n_tags == 2) ?
552 clib_net_to_host_u64 (0xffffffffffffffff) :
553 clib_net_to_host_u64 (0xffffffff00000000);
555 if (matched && l->sw_if_index == ~0)
556 l->err = ETHERNET_ERROR_DOWN;
558 l->len = sizeof (ethernet_header_t) +
559 l->n_tags * sizeof (ethernet_vlan_header_t);
561 l->adv = is_l2 ? -(int) sizeof (ethernet_header_t) :
562 l->n_tags * sizeof (ethernet_vlan_header_t);
564 l->adv = is_l2 ? 0 : l->len;
566 if (PREDICT_FALSE (l->err != ETHERNET_ERROR_NONE))
567 l->next = ETHERNET_INPUT_NEXT_DROP;
569 l->next = em->l2_next;
570 else if (l->type == ETHERNET_TYPE_IP4)
571 l->next = em->l3_next.input_next_ip4;
572 else if (l->type == ETHERNET_TYPE_IP6)
573 l->next = em->l3_next.input_next_ip6;
574 else if (l->type == ETHERNET_TYPE_MPLS)
575 l->next = em->l3_next.input_next_mpls;
576 else if (em->redirect_l3)
577 l->next = em->redirect_l3_next;
580 l->next = eth_input_next_by_type (l->type);
581 if (l->next == ETHERNET_INPUT_NEXT_PUNT)
582 l->err = ETHERNET_ERROR_UNKNOWN_TYPE;
586 if (check_dmac && l->adv > 0 && dmac_bad)
588 l->err = ETHERNET_ERROR_L3_MAC_MISMATCH;
589 next[0] = ETHERNET_INPUT_NEXT_PUNT;
594 vlib_buffer_advance (b, l->adv);
595 vnet_buffer (b)->l2.l2_len = l->len;
596 vnet_buffer (b)->l3_hdr_offset = vnet_buffer (b)->l2_hdr_offset + l->len;
598 if (l->err == ETHERNET_ERROR_NONE)
600 vnet_buffer (b)->sw_if_index[VLIB_RX] = l->sw_if_index;
601 ethernet_buffer_set_vlan_count (b, l->n_tags);
604 b->error = node->errors[l->err];
606 /* update counters */
608 l->n_bytes += vlib_buffer_length_in_chain (vm, b);
611 static_always_inline void
612 eth_input_process_frame_dmac_check (vnet_hw_interface_t * hi,
613 u64 * dmacs, u8 * dmacs_bad,
616 u64 mask = clib_net_to_host_u64 (0xFFFFFFFFFFFF0000);
617 u64 igbit = clib_net_to_host_u64 (0x0100000000000000);
618 u64 hwaddr = (*(u64 *) hi->hw_address) & mask;
620 u8 *dmac_bad = dmacs_bad;
622 i32 n_left = n_packets;
624 #ifdef CLIB_HAVE_VEC256
625 u64x4 igbit4 = u64x4_splat (igbit);
626 u64x4 mask4 = u64x4_splat (mask);
627 u64x4 hwaddr4 = u64x4_splat (hwaddr);
631 r0 = u64x4_load_unaligned (dmac + 0) & mask4;
632 r1 = u64x4_load_unaligned (dmac + 4) & mask4;
634 r0 = (r0 != hwaddr4) & ((r0 & igbit4) == 0);
635 r1 = (r1 != hwaddr4) & ((r1 & igbit4) == 0);
637 *(u32 *) (dmac_bad + 0) = u8x32_msb_mask ((u8x32) (r0));
638 *(u32 *) (dmac_bad + 4) = u8x32_msb_mask ((u8x32) (r1));
655 r0 = (r0 != hwaddr) && ((r0 & igbit) == 0);
656 r1 = (r1 != hwaddr) && ((r1 & igbit) == 0);
657 r2 = (r2 != hwaddr) && ((r2 & igbit) == 0);
658 r3 = (r3 != hwaddr) && ((r3 & igbit) == 0);
673 /* process frame of buffers, store ethertype into array and update
674 buffer metadata fields depending on interface being l2 or l3 assuming that
675 packets are untagged. For tagged packets those fields are updated later.
676 Optionally store Destionation MAC address and tag data into arrays
677 for further processing */
679 STATIC_ASSERT (VLIB_FRAME_SIZE % 8 == 0,
680 "VLIB_FRAME_SIZE must be power of 8");
681 static_always_inline void
682 eth_input_process_frame (vlib_main_t * vm, vlib_node_runtime_t * node,
683 vnet_hw_interface_t * hi,
684 u32 * buffer_indices, u32 n_packets, int main_is_l3,
685 int ip4_cksum_ok, int dmac_check)
687 ethernet_main_t *em = ðernet_main;
688 u16 nexts[VLIB_FRAME_SIZE], *next;
689 u16 etypes[VLIB_FRAME_SIZE], *etype = etypes;
690 u64 dmacs[VLIB_FRAME_SIZE], *dmac = dmacs;
691 u8 dmacs_bad[VLIB_FRAME_SIZE];
692 u64 tags[VLIB_FRAME_SIZE], *tag = tags;
693 u16 slowpath_indices[VLIB_FRAME_SIZE];
695 u16 next_ip4, next_ip6, next_mpls, next_l2;
696 u16 et_ip4 = clib_host_to_net_u16 (ETHERNET_TYPE_IP4);
697 u16 et_ip6 = clib_host_to_net_u16 (ETHERNET_TYPE_IP6);
698 u16 et_mpls = clib_host_to_net_u16 (ETHERNET_TYPE_MPLS);
699 u16 et_vlan = clib_host_to_net_u16 (ETHERNET_TYPE_VLAN);
700 u16 et_dot1ad = clib_host_to_net_u16 (ETHERNET_TYPE_DOT1AD);
701 i32 n_left = n_packets;
702 vlib_buffer_t *b[20];
705 from = buffer_indices;
709 vlib_buffer_t **ph = b + 16, **pd = b + 8;
710 vlib_get_buffers (vm, from, b, 4);
711 vlib_get_buffers (vm, from + 8, pd, 4);
712 vlib_get_buffers (vm, from + 16, ph, 4);
714 vlib_prefetch_buffer_header (ph[0], LOAD);
715 vlib_prefetch_buffer_data (pd[0], LOAD);
716 eth_input_get_etype_and_tags (b, etype, tag, dmac, 0, dmac_check);
718 vlib_prefetch_buffer_header (ph[1], LOAD);
719 vlib_prefetch_buffer_data (pd[1], LOAD);
720 eth_input_get_etype_and_tags (b, etype, tag, dmac, 1, dmac_check);
722 vlib_prefetch_buffer_header (ph[2], LOAD);
723 vlib_prefetch_buffer_data (pd[2], LOAD);
724 eth_input_get_etype_and_tags (b, etype, tag, dmac, 2, dmac_check);
726 vlib_prefetch_buffer_header (ph[3], LOAD);
727 vlib_prefetch_buffer_data (pd[3], LOAD);
728 eth_input_get_etype_and_tags (b, etype, tag, dmac, 3, dmac_check);
730 eth_input_adv_and_flags_x4 (b, main_is_l3);
741 vlib_get_buffers (vm, from, b, 4);
742 eth_input_get_etype_and_tags (b, etype, tag, dmac, 0, dmac_check);
743 eth_input_get_etype_and_tags (b, etype, tag, dmac, 1, dmac_check);
744 eth_input_get_etype_and_tags (b, etype, tag, dmac, 2, dmac_check);
745 eth_input_get_etype_and_tags (b, etype, tag, dmac, 3, dmac_check);
746 eth_input_adv_and_flags_x4 (b, main_is_l3);
757 vlib_get_buffers (vm, from, b, 1);
758 eth_input_get_etype_and_tags (b, etype, tag, dmac, 0, dmac_check);
759 eth_input_adv_and_flags_x1 (b, main_is_l3);
770 eth_input_process_frame_dmac_check (hi, dmacs, dmacs_bad, n_packets);
772 next_ip4 = em->l3_next.input_next_ip4;
773 next_ip6 = em->l3_next.input_next_ip6;
774 next_mpls = em->l3_next.input_next_mpls;
775 next_l2 = em->l2_next;
777 if (next_ip4 == ETHERNET_INPUT_NEXT_IP4_INPUT && ip4_cksum_ok)
778 next_ip4 = ETHERNET_INPUT_NEXT_IP4_INPUT_NCS;
780 #ifdef CLIB_HAVE_VEC256
781 u16x16 et16_ip4 = u16x16_splat (et_ip4);
782 u16x16 et16_ip6 = u16x16_splat (et_ip6);
783 u16x16 et16_mpls = u16x16_splat (et_mpls);
784 u16x16 et16_vlan = u16x16_splat (et_vlan);
785 u16x16 et16_dot1ad = u16x16_splat (et_dot1ad);
786 u16x16 next16_ip4 = u16x16_splat (next_ip4);
787 u16x16 next16_ip6 = u16x16_splat (next_ip6);
788 u16x16 next16_mpls = u16x16_splat (next_mpls);
789 u16x16 next16_l2 = u16x16_splat (next_l2);
791 u16x16 stairs = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 };
800 /* fastpath - in l3 mode hadles ip4, ip6 and mpls packets, other packets
801 are considered as slowpath, in l2 mode all untagged packets are
802 considered as fastpath */
805 #ifdef CLIB_HAVE_VEC256
809 u16x16 e16 = u16x16_load_unaligned (etype);
812 r += (e16 == et16_ip4) & next16_ip4;
813 r += (e16 == et16_ip6) & next16_ip6;
814 r += (e16 == et16_mpls) & next16_mpls;
817 r = ((e16 != et16_vlan) & (e16 != et16_dot1ad)) & next16_l2;
818 u16x16_store_unaligned (r, next);
820 if (!u16x16_is_all_zero (r == zero))
822 if (u16x16_is_all_zero (r))
824 u16x16_store_unaligned (u16x16_splat (i) + stairs,
825 slowpath_indices + n_slowpath);
830 for (int j = 0; j < 16; j++)
832 slowpath_indices[n_slowpath++] = i + j;
843 if (main_is_l3 && etype[0] == et_ip4)
845 else if (main_is_l3 && etype[0] == et_ip6)
847 else if (main_is_l3 && etype[0] == et_mpls)
849 else if (main_is_l3 == 0 &&
850 etype[0] != et_vlan && etype[0] != et_dot1ad)
855 slowpath_indices[n_slowpath++] = i;
866 vnet_main_t *vnm = vnet_get_main ();
868 u16 *si = slowpath_indices;
869 u32 last_unknown_etype = ~0;
870 u32 last_unknown_next = ~0;
871 eth_input_tag_lookup_t dot1ad_lookup, dot1q_lookup = {
873 .tag = tags[si[0]] ^ -1LL,
877 clib_memcpy_fast (&dot1ad_lookup, &dot1q_lookup, sizeof (dot1q_lookup));
882 u16 etype = etypes[i];
884 if (etype == et_vlan)
886 vlib_buffer_t *b = vlib_get_buffer (vm, buffer_indices[i]);
887 eth_input_tag_lookup (vm, vnm, node, hi, tags[i], nexts + i, b,
888 &dot1q_lookup, dmacs_bad[i], 0,
889 main_is_l3, dmac_check);
892 else if (etype == et_dot1ad)
894 vlib_buffer_t *b = vlib_get_buffer (vm, buffer_indices[i]);
895 eth_input_tag_lookup (vm, vnm, node, hi, tags[i], nexts + i, b,
896 &dot1ad_lookup, dmacs_bad[i], 1,
897 main_is_l3, dmac_check);
901 /* untagged packet with not well known etyertype */
902 if (last_unknown_etype != etype)
904 last_unknown_etype = etype;
905 etype = clib_host_to_net_u16 (etype);
906 last_unknown_next = eth_input_next_by_type (etype);
908 if (dmac_check && main_is_l3 && dmacs_bad[i])
910 vlib_buffer_t *b = vlib_get_buffer (vm, buffer_indices[i]);
911 b->error = node->errors[ETHERNET_ERROR_L3_MAC_MISMATCH];
912 nexts[i] = ETHERNET_INPUT_NEXT_PUNT;
915 nexts[i] = last_unknown_next;
923 eth_input_update_if_counters (vm, vnm, &dot1q_lookup);
924 eth_input_update_if_counters (vm, vnm, &dot1ad_lookup);
927 vlib_buffer_enqueue_to_next (vm, node, buffer_indices, nexts, n_packets);
930 static_always_inline void
931 eth_input_single_int (vlib_main_t * vm, vlib_node_runtime_t * node,
932 vnet_hw_interface_t * hi, u32 * from, u32 n_pkts,
935 ethernet_main_t *em = ðernet_main;
936 ethernet_interface_t *ei;
937 ei = pool_elt_at_index (em->interfaces, hi->hw_instance);
938 main_intf_t *intf0 = vec_elt_at_index (em->main_intfs, hi->hw_if_index);
939 subint_config_t *subint0 = &intf0->untagged_subint;
941 int main_is_l3 = (subint0->flags & SUBINT_CONFIG_L2) == 0;
942 int promisc = (ei->flags & ETHERNET_INTERFACE_FLAG_ACCEPT_ALL) != 0;
946 /* main interface is L3, we dont expect tagged packets and interface
947 is not in promisc node, so we dont't need to check DMAC */
951 eth_input_process_frame (vm, node, hi, from, n_pkts, is_l3,
954 /* subinterfaces and promisc mode so DMAC check is needed */
955 eth_input_process_frame (vm, node, hi, from, n_pkts, is_l3,
961 /* untagged packets are treated as L2 */
963 eth_input_process_frame (vm, node, hi, from, n_pkts, is_l3,
969 static_always_inline void
970 ethernet_input_trace (vlib_main_t * vm, vlib_node_runtime_t * node,
971 vlib_frame_t * from_frame)
974 if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)))
976 from = vlib_frame_vector_args (from_frame);
977 n_left = from_frame->n_vectors;
981 ethernet_input_trace_t *t0;
982 vlib_buffer_t *b0 = vlib_get_buffer (vm, from[0]);
984 if (b0->flags & VLIB_BUFFER_IS_TRACED)
986 t0 = vlib_add_trace (vm, node, b0,
987 sizeof (ethernet_input_trace_t));
988 clib_memcpy_fast (t0->packet_data, b0->data + b0->current_data,
989 sizeof (t0->packet_data));
990 t0->frame_flags = from_frame->flags;
991 clib_memcpy_fast (&t0->frame_data,
992 vlib_frame_scalar_args (from_frame),
993 sizeof (ethernet_input_frame_t));
1000 /* rx pcap capture if enabled */
1001 if (PREDICT_FALSE (vlib_global_main.pcap[VLIB_RX].pcap_enable))
1004 vnet_main_t *vnm = vnet_get_main ();
1006 from = vlib_frame_vector_args (from_frame);
1007 n_left = from_frame->n_vectors;
1010 int classify_filter_result;
1015 b0 = vlib_get_buffer (vm, bi0);
1016 if (vec_len (vnm->classify_filter_table_indices))
1018 classify_filter_result =
1019 vnet_is_packet_traced_inline
1020 (b0, vnm->classify_filter_table_indices[0],
1021 0 /* full classify */ );
1022 if (classify_filter_result)
1023 pcap_add_buffer (&vlib_global_main.pcap[VLIB_RX].pcap_main,
1028 if (vlib_global_main.pcap[VLIB_RX].pcap_sw_if_index == 0 ||
1029 vlib_global_main.pcap[VLIB_RX].pcap_sw_if_index
1030 == vnet_buffer (b0)->sw_if_index[VLIB_RX])
1032 pcap_add_buffer (&vlib_global_main.pcap[VLIB_RX].pcap_main, vm,
1039 static_always_inline void
1040 ethernet_input_inline (vlib_main_t * vm,
1041 vlib_node_runtime_t * node,
1042 u32 * from, u32 n_packets,
1043 ethernet_input_variant_t variant)
1045 vnet_main_t *vnm = vnet_get_main ();
1046 ethernet_main_t *em = ðernet_main;
1047 vlib_node_runtime_t *error_node;
1048 u32 n_left_from, next_index, *to_next;
1049 u32 stats_sw_if_index, stats_n_packets, stats_n_bytes;
1050 u32 thread_index = vm->thread_index;
1051 u32 cached_sw_if_index = ~0;
1052 u32 cached_is_l2 = 0; /* shut up gcc */
1053 vnet_hw_interface_t *hi = NULL; /* used for main interface only */
1054 vlib_buffer_t *bufs[VLIB_FRAME_SIZE];
1055 vlib_buffer_t **b = bufs;
1057 if (variant != ETHERNET_INPUT_VARIANT_ETHERNET)
1058 error_node = vlib_node_get_runtime (vm, ethernet_input_node.index);
1062 n_left_from = n_packets;
1064 next_index = node->cached_next_index;
1065 stats_sw_if_index = node->runtime_data[0];
1066 stats_n_packets = stats_n_bytes = 0;
1067 vlib_get_buffers (vm, from, bufs, n_left_from);
1069 while (n_left_from > 0)
1073 vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
1075 while (n_left_from >= 4 && n_left_to_next >= 2)
1078 vlib_buffer_t *b0, *b1;
1079 u8 next0, next1, error0, error1;
1080 u16 type0, orig_type0, type1, orig_type1;
1081 u16 outer_id0, inner_id0, outer_id1, inner_id1;
1082 u32 match_flags0, match_flags1;
1083 u32 old_sw_if_index0, new_sw_if_index0, len0, old_sw_if_index1,
1084 new_sw_if_index1, len1;
1085 vnet_hw_interface_t *hi0, *hi1;
1086 main_intf_t *main_intf0, *main_intf1;
1087 vlan_intf_t *vlan_intf0, *vlan_intf1;
1088 qinq_intf_t *qinq_intf0, *qinq_intf1;
1090 ethernet_header_t *e0, *e1;
1092 /* Prefetch next iteration. */
1094 vlib_prefetch_buffer_header (b[2], STORE);
1095 vlib_prefetch_buffer_header (b[3], STORE);
1097 CLIB_PREFETCH (b[2]->data, sizeof (ethernet_header_t), LOAD);
1098 CLIB_PREFETCH (b[3]->data, sizeof (ethernet_header_t), LOAD);
1107 n_left_to_next -= 2;
1114 error0 = error1 = ETHERNET_ERROR_NONE;
1115 e0 = vlib_buffer_get_current (b0);
1116 type0 = clib_net_to_host_u16 (e0->type);
1117 e1 = vlib_buffer_get_current (b1);
1118 type1 = clib_net_to_host_u16 (e1->type);
1120 /* Set the L2 header offset for all packets */
1121 vnet_buffer (b0)->l2_hdr_offset = b0->current_data;
1122 vnet_buffer (b1)->l2_hdr_offset = b1->current_data;
1123 b0->flags |= VNET_BUFFER_F_L2_HDR_OFFSET_VALID;
1124 b1->flags |= VNET_BUFFER_F_L2_HDR_OFFSET_VALID;
1126 /* Speed-path for the untagged case */
1127 if (PREDICT_TRUE (variant == ETHERNET_INPUT_VARIANT_ETHERNET
1128 && !ethernet_frame_is_any_tagged_x2 (type0,
1132 subint_config_t *subint0;
1133 u32 sw_if_index0, sw_if_index1;
1135 sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
1136 sw_if_index1 = vnet_buffer (b1)->sw_if_index[VLIB_RX];
1137 is_l20 = cached_is_l2;
1139 /* This is probably wholly unnecessary */
1140 if (PREDICT_FALSE (sw_if_index0 != sw_if_index1))
1143 /* Now sw_if_index0 == sw_if_index1 */
1144 if (PREDICT_FALSE (cached_sw_if_index != sw_if_index0))
1146 cached_sw_if_index = sw_if_index0;
1147 hi = vnet_get_sup_hw_interface (vnm, sw_if_index0);
1148 intf0 = vec_elt_at_index (em->main_intfs, hi->hw_if_index);
1149 subint0 = &intf0->untagged_subint;
1150 cached_is_l2 = is_l20 = subint0->flags & SUBINT_CONFIG_L2;
1153 if (PREDICT_TRUE (is_l20 != 0))
1155 vnet_buffer (b0)->l3_hdr_offset =
1156 vnet_buffer (b0)->l2_hdr_offset +
1157 sizeof (ethernet_header_t);
1158 vnet_buffer (b1)->l3_hdr_offset =
1159 vnet_buffer (b1)->l2_hdr_offset +
1160 sizeof (ethernet_header_t);
1161 b0->flags |= VNET_BUFFER_F_L3_HDR_OFFSET_VALID;
1162 b1->flags |= VNET_BUFFER_F_L3_HDR_OFFSET_VALID;
1163 next0 = em->l2_next;
1164 vnet_buffer (b0)->l2.l2_len = sizeof (ethernet_header_t);
1165 next1 = em->l2_next;
1166 vnet_buffer (b1)->l2.l2_len = sizeof (ethernet_header_t);
1170 if (!ethernet_address_cast (e0->dst_address) &&
1171 (hi->hw_address != 0) &&
1172 !ethernet_mac_address_equal ((u8 *) e0, hi->hw_address))
1173 error0 = ETHERNET_ERROR_L3_MAC_MISMATCH;
1174 if (!ethernet_address_cast (e1->dst_address) &&
1175 (hi->hw_address != 0) &&
1176 !ethernet_mac_address_equal ((u8 *) e1, hi->hw_address))
1177 error1 = ETHERNET_ERROR_L3_MAC_MISMATCH;
1178 vlib_buffer_advance (b0, sizeof (ethernet_header_t));
1179 determine_next_node (em, variant, 0, type0, b0,
1181 vlib_buffer_advance (b1, sizeof (ethernet_header_t));
1182 determine_next_node (em, variant, 0, type1, b1,
1188 /* Slow-path for the tagged case */
1190 parse_header (variant,
1193 &orig_type0, &outer_id0, &inner_id0, &match_flags0);
1195 parse_header (variant,
1198 &orig_type1, &outer_id1, &inner_id1, &match_flags1);
1200 old_sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
1201 old_sw_if_index1 = vnet_buffer (b1)->sw_if_index[VLIB_RX];
1203 eth_vlan_table_lookups (em,
1210 &main_intf0, &vlan_intf0, &qinq_intf0);
1212 eth_vlan_table_lookups (em,
1219 &main_intf1, &vlan_intf1, &qinq_intf1);
1221 identify_subint (hi0,
1226 qinq_intf0, &new_sw_if_index0, &error0, &is_l20);
1228 identify_subint (hi1,
1233 qinq_intf1, &new_sw_if_index1, &error1, &is_l21);
1235 // Save RX sw_if_index for later nodes
1236 vnet_buffer (b0)->sw_if_index[VLIB_RX] =
1238 ETHERNET_ERROR_NONE ? old_sw_if_index0 : new_sw_if_index0;
1239 vnet_buffer (b1)->sw_if_index[VLIB_RX] =
1241 ETHERNET_ERROR_NONE ? old_sw_if_index1 : new_sw_if_index1;
1243 // Check if there is a stat to take (valid and non-main sw_if_index for pkt 0 or pkt 1)
1244 if (((new_sw_if_index0 != ~0)
1245 && (new_sw_if_index0 != old_sw_if_index0))
1246 || ((new_sw_if_index1 != ~0)
1247 && (new_sw_if_index1 != old_sw_if_index1)))
1250 len0 = vlib_buffer_length_in_chain (vm, b0) + b0->current_data
1251 - vnet_buffer (b0)->l2_hdr_offset;
1252 len1 = vlib_buffer_length_in_chain (vm, b1) + b1->current_data
1253 - vnet_buffer (b1)->l2_hdr_offset;
1255 stats_n_packets += 2;
1256 stats_n_bytes += len0 + len1;
1259 (!(new_sw_if_index0 == stats_sw_if_index
1260 && new_sw_if_index1 == stats_sw_if_index)))
1262 stats_n_packets -= 2;
1263 stats_n_bytes -= len0 + len1;
1265 if (new_sw_if_index0 != old_sw_if_index0
1266 && new_sw_if_index0 != ~0)
1267 vlib_increment_combined_counter (vnm->
1268 interface_main.combined_sw_if_counters
1270 VNET_INTERFACE_COUNTER_RX,
1272 new_sw_if_index0, 1,
1274 if (new_sw_if_index1 != old_sw_if_index1
1275 && new_sw_if_index1 != ~0)
1276 vlib_increment_combined_counter (vnm->
1277 interface_main.combined_sw_if_counters
1279 VNET_INTERFACE_COUNTER_RX,
1281 new_sw_if_index1, 1,
1284 if (new_sw_if_index0 == new_sw_if_index1)
1286 if (stats_n_packets > 0)
1288 vlib_increment_combined_counter
1289 (vnm->interface_main.combined_sw_if_counters
1290 + VNET_INTERFACE_COUNTER_RX,
1293 stats_n_packets, stats_n_bytes);
1294 stats_n_packets = stats_n_bytes = 0;
1296 stats_sw_if_index = new_sw_if_index0;
1301 if (variant == ETHERNET_INPUT_VARIANT_NOT_L2)
1302 is_l20 = is_l21 = 0;
1304 determine_next_node (em, variant, is_l20, type0, b0, &error0,
1306 determine_next_node (em, variant, is_l21, type1, b1, &error1,
1310 b0->error = error_node->errors[error0];
1311 b1->error = error_node->errors[error1];
1313 // verify speculative enqueue
1314 vlib_validate_buffer_enqueue_x2 (vm, node, next_index, to_next,
1315 n_left_to_next, bi0, bi1, next0,
1319 while (n_left_from > 0 && n_left_to_next > 0)
1324 u16 type0, orig_type0;
1325 u16 outer_id0, inner_id0;
1327 u32 old_sw_if_index0, new_sw_if_index0, len0;
1328 vnet_hw_interface_t *hi0;
1329 main_intf_t *main_intf0;
1330 vlan_intf_t *vlan_intf0;
1331 qinq_intf_t *qinq_intf0;
1332 ethernet_header_t *e0;
1335 // Prefetch next iteration
1336 if (n_left_from > 1)
1338 vlib_prefetch_buffer_header (b[1], STORE);
1339 CLIB_PREFETCH (b[1]->data, CLIB_CACHE_LINE_BYTES, LOAD);
1347 n_left_to_next -= 1;
1352 error0 = ETHERNET_ERROR_NONE;
1353 e0 = vlib_buffer_get_current (b0);
1354 type0 = clib_net_to_host_u16 (e0->type);
1356 /* Set the L2 header offset for all packets */
1357 vnet_buffer (b0)->l2_hdr_offset = b0->current_data;
1358 b0->flags |= VNET_BUFFER_F_L2_HDR_OFFSET_VALID;
1360 /* Speed-path for the untagged case */
1361 if (PREDICT_TRUE (variant == ETHERNET_INPUT_VARIANT_ETHERNET
1362 && !ethernet_frame_is_tagged (type0)))
1365 subint_config_t *subint0;
1368 sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
1369 is_l20 = cached_is_l2;
1371 if (PREDICT_FALSE (cached_sw_if_index != sw_if_index0))
1373 cached_sw_if_index = sw_if_index0;
1374 hi = vnet_get_sup_hw_interface (vnm, sw_if_index0);
1375 intf0 = vec_elt_at_index (em->main_intfs, hi->hw_if_index);
1376 subint0 = &intf0->untagged_subint;
1377 cached_is_l2 = is_l20 = subint0->flags & SUBINT_CONFIG_L2;
1381 if (PREDICT_TRUE (is_l20 != 0))
1383 vnet_buffer (b0)->l3_hdr_offset =
1384 vnet_buffer (b0)->l2_hdr_offset +
1385 sizeof (ethernet_header_t);
1386 b0->flags |= VNET_BUFFER_F_L3_HDR_OFFSET_VALID;
1387 next0 = em->l2_next;
1388 vnet_buffer (b0)->l2.l2_len = sizeof (ethernet_header_t);
1392 if (!ethernet_address_cast (e0->dst_address) &&
1393 (hi->hw_address != 0) &&
1394 !ethernet_mac_address_equal ((u8 *) e0, hi->hw_address))
1395 error0 = ETHERNET_ERROR_L3_MAC_MISMATCH;
1396 vlib_buffer_advance (b0, sizeof (ethernet_header_t));
1397 determine_next_node (em, variant, 0, type0, b0,
1403 /* Slow-path for the tagged case */
1404 parse_header (variant,
1407 &orig_type0, &outer_id0, &inner_id0, &match_flags0);
1409 old_sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
1411 eth_vlan_table_lookups (em,
1418 &main_intf0, &vlan_intf0, &qinq_intf0);
1420 identify_subint (hi0,
1425 qinq_intf0, &new_sw_if_index0, &error0, &is_l20);
1427 // Save RX sw_if_index for later nodes
1428 vnet_buffer (b0)->sw_if_index[VLIB_RX] =
1430 ETHERNET_ERROR_NONE ? old_sw_if_index0 : new_sw_if_index0;
1432 // Increment subinterface stats
1433 // Note that interface-level counters have already been incremented
1434 // prior to calling this function. Thus only subinterface counters
1435 // are incremented here.
1437 // Interface level counters include packets received on the main
1438 // interface and all subinterfaces. Subinterface level counters
1439 // include only those packets received on that subinterface
1440 // Increment stats if the subint is valid and it is not the main intf
1441 if ((new_sw_if_index0 != ~0)
1442 && (new_sw_if_index0 != old_sw_if_index0))
1445 len0 = vlib_buffer_length_in_chain (vm, b0) + b0->current_data
1446 - vnet_buffer (b0)->l2_hdr_offset;
1448 stats_n_packets += 1;
1449 stats_n_bytes += len0;
1451 // Batch stat increments from the same subinterface so counters
1452 // don't need to be incremented for every packet.
1453 if (PREDICT_FALSE (new_sw_if_index0 != stats_sw_if_index))
1455 stats_n_packets -= 1;
1456 stats_n_bytes -= len0;
1458 if (new_sw_if_index0 != ~0)
1459 vlib_increment_combined_counter
1460 (vnm->interface_main.combined_sw_if_counters
1461 + VNET_INTERFACE_COUNTER_RX,
1462 thread_index, new_sw_if_index0, 1, len0);
1463 if (stats_n_packets > 0)
1465 vlib_increment_combined_counter
1466 (vnm->interface_main.combined_sw_if_counters
1467 + VNET_INTERFACE_COUNTER_RX,
1469 stats_sw_if_index, stats_n_packets, stats_n_bytes);
1470 stats_n_packets = stats_n_bytes = 0;
1472 stats_sw_if_index = new_sw_if_index0;
1476 if (variant == ETHERNET_INPUT_VARIANT_NOT_L2)
1479 determine_next_node (em, variant, is_l20, type0, b0, &error0,
1483 b0->error = error_node->errors[error0];
1485 // verify speculative enqueue
1486 vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
1487 to_next, n_left_to_next,
1491 vlib_put_next_frame (vm, node, next_index, n_left_to_next);
1494 // Increment any remaining batched stats
1495 if (stats_n_packets > 0)
1497 vlib_increment_combined_counter
1498 (vnm->interface_main.combined_sw_if_counters
1499 + VNET_INTERFACE_COUNTER_RX,
1500 thread_index, stats_sw_if_index, stats_n_packets, stats_n_bytes);
1501 node->runtime_data[0] = stats_sw_if_index;
1505 VLIB_NODE_FN (ethernet_input_node) (vlib_main_t * vm,
1506 vlib_node_runtime_t * node,
1507 vlib_frame_t * frame)
1509 vnet_main_t *vnm = vnet_get_main ();
1510 u32 *from = vlib_frame_vector_args (frame);
1511 u32 n_packets = frame->n_vectors;
1513 ethernet_input_trace (vm, node, frame);
1515 if (frame->flags & ETH_INPUT_FRAME_F_SINGLE_SW_IF_IDX)
1517 ethernet_input_frame_t *ef = vlib_frame_scalar_args (frame);
1518 int ip4_cksum_ok = (frame->flags & ETH_INPUT_FRAME_F_IP4_CKSUM_OK) != 0;
1519 vnet_hw_interface_t *hi = vnet_get_hw_interface (vnm, ef->hw_if_index);
1520 eth_input_single_int (vm, node, hi, from, n_packets, ip4_cksum_ok);
1523 ethernet_input_inline (vm, node, from, n_packets,
1524 ETHERNET_INPUT_VARIANT_ETHERNET);
1528 VLIB_NODE_FN (ethernet_input_type_node) (vlib_main_t * vm,
1529 vlib_node_runtime_t * node,
1530 vlib_frame_t * from_frame)
1532 u32 *from = vlib_frame_vector_args (from_frame);
1533 u32 n_packets = from_frame->n_vectors;
1534 ethernet_input_trace (vm, node, from_frame);
1535 ethernet_input_inline (vm, node, from, n_packets,
1536 ETHERNET_INPUT_VARIANT_ETHERNET_TYPE);
1540 VLIB_NODE_FN (ethernet_input_not_l2_node) (vlib_main_t * vm,
1541 vlib_node_runtime_t * node,
1542 vlib_frame_t * from_frame)
1544 u32 *from = vlib_frame_vector_args (from_frame);
1545 u32 n_packets = from_frame->n_vectors;
1546 ethernet_input_trace (vm, node, from_frame);
1547 ethernet_input_inline (vm, node, from, n_packets,
1548 ETHERNET_INPUT_VARIANT_NOT_L2);
1553 // Return the subinterface config struct for the given sw_if_index
1554 // Also return via parameter the appropriate match flags for the
1555 // configured number of tags.
1556 // On error (unsupported or not ethernet) return 0.
1557 static subint_config_t *
1558 ethernet_sw_interface_get_config (vnet_main_t * vnm,
1560 u32 * flags, u32 * unsupported)
1562 ethernet_main_t *em = ðernet_main;
1563 vnet_hw_interface_t *hi;
1564 vnet_sw_interface_t *si;
1565 main_intf_t *main_intf;
1566 vlan_table_t *vlan_table;
1567 qinq_table_t *qinq_table;
1568 subint_config_t *subint = 0;
1570 hi = vnet_get_sup_hw_interface (vnm, sw_if_index);
1572 if (!hi || (hi->hw_class_index != ethernet_hw_interface_class.index))
1575 goto done; // non-ethernet interface
1578 // ensure there's an entry for the main intf (shouldn't really be necessary)
1579 vec_validate (em->main_intfs, hi->hw_if_index);
1580 main_intf = vec_elt_at_index (em->main_intfs, hi->hw_if_index);
1582 // Locate the subint for the given ethernet config
1583 si = vnet_get_sw_interface (vnm, sw_if_index);
1585 if (si->type == VNET_SW_INTERFACE_TYPE_P2P)
1587 p2p_ethernet_main_t *p2pm = &p2p_main;
1588 u32 p2pe_sw_if_index =
1589 p2p_ethernet_lookup (hi->hw_if_index, si->p2p.client_mac);
1590 if (p2pe_sw_if_index == ~0)
1592 pool_get (p2pm->p2p_subif_pool, subint);
1593 si->p2p.pool_index = subint - p2pm->p2p_subif_pool;
1596 subint = vec_elt_at_index (p2pm->p2p_subif_pool, si->p2p.pool_index);
1597 *flags = SUBINT_CONFIG_P2P;
1599 else if (si->type == VNET_SW_INTERFACE_TYPE_PIPE)
1603 pipe = pipe_get (sw_if_index);
1604 subint = &pipe->subint;
1605 *flags = SUBINT_CONFIG_P2P;
1607 else if (si->sub.eth.flags.default_sub)
1609 subint = &main_intf->default_subint;
1610 *flags = SUBINT_CONFIG_MATCH_1_TAG |
1611 SUBINT_CONFIG_MATCH_2_TAG | SUBINT_CONFIG_MATCH_3_TAG;
1613 else if ((si->sub.eth.flags.no_tags) || (si->sub.eth.raw_flags == 0))
1615 // if no flags are set then this is a main interface
1616 // so treat as untagged
1617 subint = &main_intf->untagged_subint;
1618 *flags = SUBINT_CONFIG_MATCH_0_TAG;
1623 // first get the vlan table
1624 if (si->sub.eth.flags.dot1ad)
1626 if (main_intf->dot1ad_vlans == 0)
1628 // Allocate a vlan table from the pool
1629 pool_get (em->vlan_pool, vlan_table);
1630 main_intf->dot1ad_vlans = vlan_table - em->vlan_pool;
1634 // Get ptr to existing vlan table
1636 vec_elt_at_index (em->vlan_pool, main_intf->dot1ad_vlans);
1641 if (main_intf->dot1q_vlans == 0)
1643 // Allocate a vlan table from the pool
1644 pool_get (em->vlan_pool, vlan_table);
1645 main_intf->dot1q_vlans = vlan_table - em->vlan_pool;
1649 // Get ptr to existing vlan table
1651 vec_elt_at_index (em->vlan_pool, main_intf->dot1q_vlans);
1655 if (si->sub.eth.flags.one_tag)
1657 *flags = si->sub.eth.flags.exact_match ?
1658 SUBINT_CONFIG_MATCH_1_TAG :
1659 (SUBINT_CONFIG_MATCH_1_TAG |
1660 SUBINT_CONFIG_MATCH_2_TAG | SUBINT_CONFIG_MATCH_3_TAG);
1662 if (si->sub.eth.flags.outer_vlan_id_any)
1664 // not implemented yet
1670 // a single vlan, a common case
1672 &vlan_table->vlans[si->sub.eth.
1673 outer_vlan_id].single_tag_subint;
1680 *flags = si->sub.eth.flags.exact_match ?
1681 SUBINT_CONFIG_MATCH_2_TAG :
1682 (SUBINT_CONFIG_MATCH_2_TAG | SUBINT_CONFIG_MATCH_3_TAG);
1684 if (si->sub.eth.flags.outer_vlan_id_any
1685 && si->sub.eth.flags.inner_vlan_id_any)
1687 // not implemented yet
1692 if (si->sub.eth.flags.inner_vlan_id_any)
1694 // a specific outer and "any" inner
1695 // don't need a qinq table for this
1697 &vlan_table->vlans[si->sub.eth.
1698 outer_vlan_id].inner_any_subint;
1699 if (si->sub.eth.flags.exact_match)
1701 *flags = SUBINT_CONFIG_MATCH_2_TAG;
1705 *flags = SUBINT_CONFIG_MATCH_2_TAG |
1706 SUBINT_CONFIG_MATCH_3_TAG;
1711 // a specific outer + specifc innner vlan id, a common case
1713 // get the qinq table
1714 if (vlan_table->vlans[si->sub.eth.outer_vlan_id].qinqs == 0)
1716 // Allocate a qinq table from the pool
1717 pool_get (em->qinq_pool, qinq_table);
1718 vlan_table->vlans[si->sub.eth.outer_vlan_id].qinqs =
1719 qinq_table - em->qinq_pool;
1723 // Get ptr to existing qinq table
1725 vec_elt_at_index (em->qinq_pool,
1726 vlan_table->vlans[si->sub.
1730 subint = &qinq_table->vlans[si->sub.eth.inner_vlan_id].subint;
1739 static clib_error_t *
1740 ethernet_sw_interface_up_down (vnet_main_t * vnm, u32 sw_if_index, u32 flags)
1742 subint_config_t *subint;
1745 clib_error_t *error = 0;
1747 // Find the config for this subinterface
1749 ethernet_sw_interface_get_config (vnm, sw_if_index, &dummy_flags,
1754 // not implemented yet or not ethernet
1758 subint->sw_if_index =
1759 ((flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP) ? sw_if_index : ~0);
1765 VNET_SW_INTERFACE_ADMIN_UP_DOWN_FUNCTION (ethernet_sw_interface_up_down);
1768 #ifndef CLIB_MARCH_VARIANT
1769 // Set the L2/L3 mode for the subinterface
1771 ethernet_sw_interface_set_l2_mode (vnet_main_t * vnm, u32 sw_if_index, u32 l2)
1773 subint_config_t *subint;
1777 vnet_sw_interface_t *sw = vnet_get_sw_interface (vnm, sw_if_index);
1779 is_port = !(sw->type == VNET_SW_INTERFACE_TYPE_SUB);
1781 // Find the config for this subinterface
1783 ethernet_sw_interface_get_config (vnm, sw_if_index, &dummy_flags,
1788 // unimplemented or not ethernet
1792 // Double check that the config we found is for our interface (or the interface is down)
1793 ASSERT ((subint->sw_if_index == sw_if_index) | (subint->sw_if_index == ~0));
1797 subint->flags |= SUBINT_CONFIG_L2;
1800 SUBINT_CONFIG_MATCH_0_TAG | SUBINT_CONFIG_MATCH_1_TAG
1801 | SUBINT_CONFIG_MATCH_2_TAG | SUBINT_CONFIG_MATCH_3_TAG;
1805 subint->flags &= ~SUBINT_CONFIG_L2;
1808 ~(SUBINT_CONFIG_MATCH_1_TAG | SUBINT_CONFIG_MATCH_2_TAG
1809 | SUBINT_CONFIG_MATCH_3_TAG);
1817 * Set the L2/L3 mode for the subinterface regardless of port
1820 ethernet_sw_interface_set_l2_mode_noport (vnet_main_t * vnm,
1821 u32 sw_if_index, u32 l2)
1823 subint_config_t *subint;
1827 /* Find the config for this subinterface */
1829 ethernet_sw_interface_get_config (vnm, sw_if_index, &dummy_flags,
1834 /* unimplemented or not ethernet */
1839 * Double check that the config we found is for our interface (or the
1840 * interface is down)
1842 ASSERT ((subint->sw_if_index == sw_if_index) | (subint->sw_if_index == ~0));
1846 subint->flags |= SUBINT_CONFIG_L2;
1850 subint->flags &= ~SUBINT_CONFIG_L2;
1858 static clib_error_t *
1859 ethernet_sw_interface_add_del (vnet_main_t * vnm,
1860 u32 sw_if_index, u32 is_create)
1862 clib_error_t *error = 0;
1863 subint_config_t *subint;
1865 u32 unsupported = 0;
1867 // Find the config for this subinterface
1869 ethernet_sw_interface_get_config (vnm, sw_if_index, &match_flags,
1874 // not implemented yet or not ethernet
1877 // this is the NYI case
1878 error = clib_error_return (0, "not implemented yet");
1889 // Initialize the subint
1890 if (subint->flags & SUBINT_CONFIG_VALID)
1892 // Error vlan already in use
1893 error = clib_error_return (0, "vlan is already in use");
1897 // Note that config is L3 by default
1898 subint->flags = SUBINT_CONFIG_VALID | match_flags;
1899 subint->sw_if_index = ~0; // because interfaces are initially down
1906 VNET_SW_INTERFACE_ADD_DEL_FUNCTION (ethernet_sw_interface_add_del);
1908 static char *ethernet_error_strings[] = {
1909 #define ethernet_error(n,c,s) s,
1910 #include "error.def"
1911 #undef ethernet_error
1915 VLIB_REGISTER_NODE (ethernet_input_node) = {
1916 .name = "ethernet-input",
1917 /* Takes a vector of packets. */
1918 .vector_size = sizeof (u32),
1919 .scalar_size = sizeof (ethernet_input_frame_t),
1920 .n_errors = ETHERNET_N_ERROR,
1921 .error_strings = ethernet_error_strings,
1922 .n_next_nodes = ETHERNET_INPUT_N_NEXT,
1924 #define _(s,n) [ETHERNET_INPUT_NEXT_##s] = n,
1925 foreach_ethernet_input_next
1928 .format_buffer = format_ethernet_header_with_length,
1929 .format_trace = format_ethernet_input_trace,
1930 .unformat_buffer = unformat_ethernet_header,
1933 VLIB_REGISTER_NODE (ethernet_input_type_node) = {
1934 .name = "ethernet-input-type",
1935 /* Takes a vector of packets. */
1936 .vector_size = sizeof (u32),
1937 .n_next_nodes = ETHERNET_INPUT_N_NEXT,
1939 #define _(s,n) [ETHERNET_INPUT_NEXT_##s] = n,
1940 foreach_ethernet_input_next
1945 VLIB_REGISTER_NODE (ethernet_input_not_l2_node) = {
1946 .name = "ethernet-input-not-l2",
1947 /* Takes a vector of packets. */
1948 .vector_size = sizeof (u32),
1949 .n_next_nodes = ETHERNET_INPUT_N_NEXT,
1951 #define _(s,n) [ETHERNET_INPUT_NEXT_##s] = n,
1952 foreach_ethernet_input_next
1958 #ifndef CLIB_MARCH_VARIANT
1960 ethernet_set_rx_redirect (vnet_main_t * vnm,
1961 vnet_hw_interface_t * hi, u32 enable)
1963 // Insure all packets go to ethernet-input (i.e. untagged ipv4 packets
1964 // don't go directly to ip4-input)
1965 vnet_hw_interface_rx_redirect_to_node
1966 (vnm, hi->hw_if_index, enable ? ethernet_input_node.index : ~0);
1971 * Initialization and registration for the next_by_ethernet structure
1975 next_by_ethertype_init (next_by_ethertype_t * l3_next)
1977 l3_next->input_next_by_type = sparse_vec_new
1978 ( /* elt bytes */ sizeof (l3_next->input_next_by_type[0]),
1979 /* bits in index */ BITS (((ethernet_header_t *) 0)->type));
1981 vec_validate (l3_next->sparse_index_by_input_next_index,
1982 ETHERNET_INPUT_NEXT_DROP);
1983 vec_validate (l3_next->sparse_index_by_input_next_index,
1984 ETHERNET_INPUT_NEXT_PUNT);
1985 l3_next->sparse_index_by_input_next_index[ETHERNET_INPUT_NEXT_DROP] =
1986 SPARSE_VEC_INVALID_INDEX;
1987 l3_next->sparse_index_by_input_next_index[ETHERNET_INPUT_NEXT_PUNT] =
1988 SPARSE_VEC_INVALID_INDEX;
1991 * Make sure we don't wipe out an ethernet registration by mistake
1992 * Can happen if init function ordering constraints are missing.
1996 ethernet_main_t *em = ðernet_main;
1997 ASSERT (em->next_by_ethertype_register_called == 0);
2003 // Add an ethertype -> next index mapping to the structure
2005 next_by_ethertype_register (next_by_ethertype_t * l3_next,
2006 u32 ethertype, u32 next_index)
2010 ethernet_main_t *em = ðernet_main;
2014 ethernet_main_t *em = ðernet_main;
2015 em->next_by_ethertype_register_called = 1;
2018 /* Setup ethernet type -> next index sparse vector mapping. */
2019 n = sparse_vec_validate (l3_next->input_next_by_type, ethertype);
2022 /* Rebuild next index -> sparse index inverse mapping when sparse vector
2024 vec_validate (l3_next->sparse_index_by_input_next_index, next_index);
2025 for (i = 1; i < vec_len (l3_next->input_next_by_type); i++)
2027 sparse_index_by_input_next_index[l3_next->input_next_by_type[i]] = i;
2029 // do not allow the cached next index's to be updated if L3
2030 // redirect is enabled, as it will have overwritten them
2031 if (!em->redirect_l3)
2033 // Cache common ethertypes directly
2034 if (ethertype == ETHERNET_TYPE_IP4)
2036 l3_next->input_next_ip4 = next_index;
2038 else if (ethertype == ETHERNET_TYPE_IP6)
2040 l3_next->input_next_ip6 = next_index;
2042 else if (ethertype == ETHERNET_TYPE_MPLS)
2044 l3_next->input_next_mpls = next_index;
2051 ethernet_input_init (vlib_main_t * vm, ethernet_main_t * em)
2053 __attribute__ ((unused)) vlan_table_t *invalid_vlan_table;
2054 __attribute__ ((unused)) qinq_table_t *invalid_qinq_table;
2056 ethernet_setup_node (vm, ethernet_input_node.index);
2057 ethernet_setup_node (vm, ethernet_input_type_node.index);
2058 ethernet_setup_node (vm, ethernet_input_not_l2_node.index);
2060 next_by_ethertype_init (&em->l3_next);
2062 // Initialize pools and vector for vlan parsing
2063 vec_validate (em->main_intfs, 10); // 10 main interfaces
2064 pool_alloc (em->vlan_pool, 10);
2065 pool_alloc (em->qinq_pool, 1);
2067 // The first vlan pool will always be reserved for an invalid table
2068 pool_get (em->vlan_pool, invalid_vlan_table); // first id = 0
2069 // The first qinq pool will always be reserved for an invalid table
2070 pool_get (em->qinq_pool, invalid_qinq_table); // first id = 0
2074 ethernet_register_input_type (vlib_main_t * vm,
2075 ethernet_type_t type, u32 node_index)
2077 ethernet_main_t *em = ðernet_main;
2078 ethernet_type_info_t *ti;
2082 clib_error_t *error = vlib_call_init_function (vm, ethernet_init);
2084 clib_error_report (error);
2087 ti = ethernet_get_type_info (em, type);
2090 clib_warning ("type_info NULL for type %d", type);
2093 ti->node_index = node_index;
2094 ti->next_index = vlib_node_add_next (vm,
2095 ethernet_input_node.index, node_index);
2096 i = vlib_node_add_next (vm, ethernet_input_type_node.index, node_index);
2097 ASSERT (i == ti->next_index);
2099 i = vlib_node_add_next (vm, ethernet_input_not_l2_node.index, node_index);
2100 ASSERT (i == ti->next_index);
2102 // Add the L3 node for this ethertype to the next nodes structure
2103 next_by_ethertype_register (&em->l3_next, type, ti->next_index);
2105 // Call the registration functions for other nodes that want a mapping
2106 l2bvi_register_input_type (vm, type, node_index);
2110 ethernet_register_l2_input (vlib_main_t * vm, u32 node_index)
2112 ethernet_main_t *em = ðernet_main;
2116 vlib_node_add_next (vm, ethernet_input_node.index, node_index);
2119 * Even if we never use these arcs, we have to align the next indices...
2121 i = vlib_node_add_next (vm, ethernet_input_type_node.index, node_index);
2123 ASSERT (i == em->l2_next);
2125 i = vlib_node_add_next (vm, ethernet_input_not_l2_node.index, node_index);
2126 ASSERT (i == em->l2_next);
2129 // Register a next node for L3 redirect, and enable L3 redirect
2131 ethernet_register_l3_redirect (vlib_main_t * vm, u32 node_index)
2133 ethernet_main_t *em = ðernet_main;
2136 em->redirect_l3 = 1;
2137 em->redirect_l3_next = vlib_node_add_next (vm,
2138 ethernet_input_node.index,
2141 * Change the cached next nodes to the redirect node
2143 em->l3_next.input_next_ip4 = em->redirect_l3_next;
2144 em->l3_next.input_next_ip6 = em->redirect_l3_next;
2145 em->l3_next.input_next_mpls = em->redirect_l3_next;
2148 * Even if we never use these arcs, we have to align the next indices...
2150 i = vlib_node_add_next (vm, ethernet_input_type_node.index, node_index);
2152 ASSERT (i == em->redirect_l3_next);
2154 i = vlib_node_add_next (vm, ethernet_input_not_l2_node.index, node_index);
2156 ASSERT (i == em->redirect_l3_next);
2161 * fd.io coding-style-patch-verification: ON
2164 * eval: (c-set-style "gnu")