2 * Copyright (c) 2018 Cisco and/or its affiliates.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at:
7 * http://www.apache.org/licenses/LICENSE-2.0
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
16 * ethernet_node.c: ethernet packet processing
18 * Copyright (c) 2008 Eliot Dresselhaus
20 * Permission is hereby granted, free of charge, to any person obtaining
21 * a copy of this software and associated documentation files (the
22 * "Software"), to deal in the Software without restriction, including
23 * without limitation the rights to use, copy, modify, merge, publish,
24 * distribute, sublicense, and/or sell copies of the Software, and to
25 * permit persons to whom the Software is furnished to do so, subject to
26 * the following conditions:
28 * The above copyright notice and this permission notice shall be
29 * included in all copies or substantial portions of the Software.
31 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
32 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
33 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
34 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
35 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
36 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
37 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
40 #include <vlib/vlib.h>
41 #include <vnet/pg/pg.h>
42 #include <vnet/ethernet/ethernet.h>
43 #include <vnet/ethernet/p2p_ethernet.h>
44 #include <vnet/devices/pipe/pipe.h>
45 #include <vppinfra/sparse_vec.h>
46 #include <vnet/l2/l2_bvi.h>
47 #include <vnet/classify/trace_classify.h>
49 #define foreach_ethernet_input_next \
50 _ (PUNT, "error-punt") \
51 _ (DROP, "error-drop") \
52 _ (LLC, "llc-input") \
53 _ (IP4_INPUT, "ip4-input") \
54 _ (IP4_INPUT_NCS, "ip4-input-no-checksum")
58 #define _(s,n) ETHERNET_INPUT_NEXT_##s,
59 foreach_ethernet_input_next
61 ETHERNET_INPUT_N_NEXT,
62 } ethernet_input_next_t;
68 ethernet_input_frame_t frame_data;
69 } ethernet_input_trace_t;
72 format_ethernet_input_trace (u8 * s, va_list * va)
74 CLIB_UNUSED (vlib_main_t * vm) = va_arg (*va, vlib_main_t *);
75 CLIB_UNUSED (vlib_node_t * node) = va_arg (*va, vlib_node_t *);
76 ethernet_input_trace_t *t = va_arg (*va, ethernet_input_trace_t *);
77 u32 indent = format_get_indent (s);
81 s = format (s, "frame: flags 0x%x", t->frame_flags);
82 if (t->frame_flags & ETH_INPUT_FRAME_F_SINGLE_SW_IF_IDX)
83 s = format (s, ", hw-if-index %u, sw-if-index %u",
84 t->frame_data.hw_if_index, t->frame_data.sw_if_index);
85 s = format (s, "\n%U", format_white_space, indent);
87 s = format (s, "%U", format_ethernet_header, t->packet_data);
92 extern vlib_node_registration_t ethernet_input_node;
96 ETHERNET_INPUT_VARIANT_ETHERNET,
97 ETHERNET_INPUT_VARIANT_ETHERNET_TYPE,
98 ETHERNET_INPUT_VARIANT_NOT_L2,
99 } ethernet_input_variant_t;
102 // Parse the ethernet header to extract vlan tags and innermost ethertype
103 static_always_inline void
104 parse_header (ethernet_input_variant_t variant,
108 u16 * outer_id, u16 * inner_id, u32 * match_flags)
112 if (variant == ETHERNET_INPUT_VARIANT_ETHERNET
113 || variant == ETHERNET_INPUT_VARIANT_NOT_L2)
115 ethernet_header_t *e0;
117 e0 = (void *) (b0->data + b0->current_data);
119 vnet_buffer (b0)->l2_hdr_offset = b0->current_data;
120 b0->flags |= VNET_BUFFER_F_L2_HDR_OFFSET_VALID;
122 vlib_buffer_advance (b0, sizeof (e0[0]));
124 *type = clib_net_to_host_u16 (e0->type);
126 else if (variant == ETHERNET_INPUT_VARIANT_ETHERNET_TYPE)
128 // here when prior node was LLC/SNAP processing
131 e0 = (void *) (b0->data + b0->current_data);
133 vlib_buffer_advance (b0, sizeof (e0[0]));
135 *type = clib_net_to_host_u16 (e0[0]);
138 // save for distinguishing between dot1q and dot1ad later
141 // default the tags to 0 (used if there is no corresponding tag)
145 *match_flags = SUBINT_CONFIG_VALID | SUBINT_CONFIG_MATCH_0_TAG;
148 // check for vlan encaps
149 if (ethernet_frame_is_tagged (*type))
151 ethernet_vlan_header_t *h0;
154 *match_flags = SUBINT_CONFIG_VALID | SUBINT_CONFIG_MATCH_1_TAG;
156 h0 = (void *) (b0->data + b0->current_data);
158 tag = clib_net_to_host_u16 (h0->priority_cfi_and_id);
160 *outer_id = tag & 0xfff;
162 *match_flags &= ~SUBINT_CONFIG_MATCH_1_TAG;
164 *type = clib_net_to_host_u16 (h0->type);
166 vlib_buffer_advance (b0, sizeof (h0[0]));
169 if (*type == ETHERNET_TYPE_VLAN)
171 // Double tagged packet
172 *match_flags = SUBINT_CONFIG_VALID | SUBINT_CONFIG_MATCH_2_TAG;
174 h0 = (void *) (b0->data + b0->current_data);
176 tag = clib_net_to_host_u16 (h0->priority_cfi_and_id);
178 *inner_id = tag & 0xfff;
180 *type = clib_net_to_host_u16 (h0->type);
182 vlib_buffer_advance (b0, sizeof (h0[0]));
184 if (*type == ETHERNET_TYPE_VLAN)
186 // More than double tagged packet
187 *match_flags = SUBINT_CONFIG_VALID | SUBINT_CONFIG_MATCH_3_TAG;
189 vlib_buffer_advance (b0, sizeof (h0[0]));
190 vlan_count = 3; // "unknown" number, aka, 3-or-more
194 ethernet_buffer_set_vlan_count (b0, vlan_count);
197 // Determine the subinterface for this packet, given the result of the
198 // vlan table lookups and vlan header parsing. Check the most specific
200 static_always_inline void
201 identify_subint (vnet_hw_interface_t * hi,
204 main_intf_t * main_intf,
205 vlan_intf_t * vlan_intf,
206 qinq_intf_t * qinq_intf,
207 u32 * new_sw_if_index, u8 * error0, u32 * is_l2)
211 matched = eth_identify_subint (hi, match_flags, main_intf, vlan_intf,
212 qinq_intf, new_sw_if_index, error0, is_l2);
217 // Perform L3 my-mac filter
218 // A unicast packet arriving on an L3 interface must have a dmac matching the interface mac.
219 // This is required for promiscuous mode, else we will forward packets we aren't supposed to.
222 ethernet_header_t *e0;
223 e0 = (void *) (b0->data + vnet_buffer (b0)->l2_hdr_offset);
225 if (!(ethernet_address_cast (e0->dst_address)))
227 if (!ethernet_mac_address_equal ((u8 *) e0, hi->hw_address))
229 *error0 = ETHERNET_ERROR_L3_MAC_MISMATCH;
234 // Check for down subinterface
235 *error0 = (*new_sw_if_index) != ~0 ? (*error0) : ETHERNET_ERROR_DOWN;
239 static_always_inline void
240 determine_next_node (ethernet_main_t * em,
241 ethernet_input_variant_t variant,
243 u32 type0, vlib_buffer_t * b0, u8 * error0, u8 * next0)
245 vnet_buffer (b0)->l3_hdr_offset = b0->current_data;
246 b0->flags |= VNET_BUFFER_F_L3_HDR_OFFSET_VALID;
248 if (PREDICT_FALSE (*error0 != ETHERNET_ERROR_NONE))
250 // some error occurred
251 *next0 = ETHERNET_INPUT_NEXT_DROP;
255 // record the L2 len and reset the buffer so the L2 header is preserved
256 u32 eth_start = vnet_buffer (b0)->l2_hdr_offset;
257 vnet_buffer (b0)->l2.l2_len = b0->current_data - eth_start;
258 *next0 = em->l2_next;
259 ASSERT (vnet_buffer (b0)->l2.l2_len ==
260 ethernet_buffer_header_size (b0));
261 vlib_buffer_advance (b0, -(vnet_buffer (b0)->l2.l2_len));
263 // check for common IP/MPLS ethertypes
265 else if (type0 == ETHERNET_TYPE_IP4)
267 *next0 = em->l3_next.input_next_ip4;
269 else if (type0 == ETHERNET_TYPE_IP6)
271 *next0 = em->l3_next.input_next_ip6;
273 else if (type0 == ETHERNET_TYPE_MPLS)
275 *next0 = em->l3_next.input_next_mpls;
278 else if (em->redirect_l3)
280 // L3 Redirect is on, the cached common next nodes will be
281 // pointing to the redirect node, catch the uncommon types here
282 *next0 = em->redirect_l3_next;
286 // uncommon ethertype, check table
288 i0 = sparse_vec_index (em->l3_next.input_next_by_type, type0);
289 *next0 = vec_elt (em->l3_next.input_next_by_type, i0);
292 SPARSE_VEC_INVALID_INDEX ? ETHERNET_ERROR_UNKNOWN_TYPE : *error0;
294 // The table is not populated with LLC values, so check that now.
295 // If variant is variant_ethernet then we came from LLC processing. Don't
296 // go back there; drop instead using by keeping the drop/bad table result.
297 if ((type0 < 0x600) && (variant == ETHERNET_INPUT_VARIANT_ETHERNET))
299 *next0 = ETHERNET_INPUT_NEXT_LLC;
305 /* following vector code relies on following assumptions */
306 STATIC_ASSERT_OFFSET_OF (vlib_buffer_t, current_data, 0);
307 STATIC_ASSERT_OFFSET_OF (vlib_buffer_t, current_length, 2);
308 STATIC_ASSERT_OFFSET_OF (vlib_buffer_t, flags, 4);
309 STATIC_ASSERT (STRUCT_OFFSET_OF (vnet_buffer_opaque_t, l2_hdr_offset) ==
310 STRUCT_OFFSET_OF (vnet_buffer_opaque_t, l3_hdr_offset) - 2,
311 "l3_hdr_offset must follow l2_hdr_offset");
313 static_always_inline void
314 eth_input_adv_and_flags_x4 (vlib_buffer_t ** b, int is_l3)
316 i16 adv = sizeof (ethernet_header_t);
317 u32 flags = VNET_BUFFER_F_L2_HDR_OFFSET_VALID |
318 VNET_BUFFER_F_L3_HDR_OFFSET_VALID;
320 #ifdef CLIB_HAVE_VEC256
321 /* to reduce number of small loads/stores we are loading first 64 bits
322 of each buffer metadata into 256-bit register so we can advance
323 current_data, current_length and flags.
324 Observed saving of this code is ~2 clocks per packet */
327 /* vector if signed 16 bit integers used in signed vector add operation
328 to advnce current_data and current_length */
329 u32x8 flags4 = { 0, flags, 0, flags, 0, flags, 0, flags };
331 adv, -adv, 0, 0, adv, -adv, 0, 0,
332 adv, -adv, 0, 0, adv, -adv, 0, 0
335 /* load 4 x 64 bits */
336 r = u64x4_gather (b[0], b[1], b[2], b[3]);
342 radv = (u64x4) ((i16x16) r + adv4);
344 /* write 4 x 64 bits */
345 u64x4_scatter (is_l3 ? radv : r, b[0], b[1], b[2], b[3]);
347 /* use old current_data as l2_hdr_offset and new current_data as
349 r = (u64x4) u16x16_blend (r, radv << 16, 0xaa);
351 /* store both l2_hdr_offset and l3_hdr_offset in single store operation */
352 u32x8_scatter_one ((u32x8) r, 0, &vnet_buffer (b[0])->l2_hdr_offset);
353 u32x8_scatter_one ((u32x8) r, 2, &vnet_buffer (b[1])->l2_hdr_offset);
354 u32x8_scatter_one ((u32x8) r, 4, &vnet_buffer (b[2])->l2_hdr_offset);
355 u32x8_scatter_one ((u32x8) r, 6, &vnet_buffer (b[3])->l2_hdr_offset);
359 ASSERT (b[0]->current_data == vnet_buffer (b[0])->l3_hdr_offset);
360 ASSERT (b[1]->current_data == vnet_buffer (b[1])->l3_hdr_offset);
361 ASSERT (b[2]->current_data == vnet_buffer (b[2])->l3_hdr_offset);
362 ASSERT (b[3]->current_data == vnet_buffer (b[3])->l3_hdr_offset);
364 ASSERT (b[0]->current_data - vnet_buffer (b[0])->l2_hdr_offset == adv);
365 ASSERT (b[1]->current_data - vnet_buffer (b[1])->l2_hdr_offset == adv);
366 ASSERT (b[2]->current_data - vnet_buffer (b[2])->l2_hdr_offset == adv);
367 ASSERT (b[3]->current_data - vnet_buffer (b[3])->l2_hdr_offset == adv);
371 ASSERT (b[0]->current_data == vnet_buffer (b[0])->l2_hdr_offset);
372 ASSERT (b[1]->current_data == vnet_buffer (b[1])->l2_hdr_offset);
373 ASSERT (b[2]->current_data == vnet_buffer (b[2])->l2_hdr_offset);
374 ASSERT (b[3]->current_data == vnet_buffer (b[3])->l2_hdr_offset);
376 ASSERT (b[0]->current_data - vnet_buffer (b[0])->l3_hdr_offset == -adv);
377 ASSERT (b[1]->current_data - vnet_buffer (b[1])->l3_hdr_offset == -adv);
378 ASSERT (b[2]->current_data - vnet_buffer (b[2])->l3_hdr_offset == -adv);
379 ASSERT (b[3]->current_data - vnet_buffer (b[3])->l3_hdr_offset == -adv);
383 vnet_buffer (b[0])->l2_hdr_offset = b[0]->current_data;
384 vnet_buffer (b[1])->l2_hdr_offset = b[1]->current_data;
385 vnet_buffer (b[2])->l2_hdr_offset = b[2]->current_data;
386 vnet_buffer (b[3])->l2_hdr_offset = b[3]->current_data;
387 vnet_buffer (b[0])->l3_hdr_offset = b[0]->current_data + adv;
388 vnet_buffer (b[1])->l3_hdr_offset = b[1]->current_data + adv;
389 vnet_buffer (b[2])->l3_hdr_offset = b[2]->current_data + adv;
390 vnet_buffer (b[3])->l3_hdr_offset = b[3]->current_data + adv;
394 vlib_buffer_advance (b[0], adv);
395 vlib_buffer_advance (b[1], adv);
396 vlib_buffer_advance (b[2], adv);
397 vlib_buffer_advance (b[3], adv);
400 b[0]->flags |= flags;
401 b[1]->flags |= flags;
402 b[2]->flags |= flags;
403 b[3]->flags |= flags;
408 vnet_buffer (b[0])->l2.l2_len = adv;
409 vnet_buffer (b[1])->l2.l2_len = adv;
410 vnet_buffer (b[2])->l2.l2_len = adv;
411 vnet_buffer (b[3])->l2.l2_len = adv;
415 static_always_inline void
416 eth_input_adv_and_flags_x1 (vlib_buffer_t ** b, int is_l3)
418 i16 adv = sizeof (ethernet_header_t);
419 u32 flags = VNET_BUFFER_F_L2_HDR_OFFSET_VALID |
420 VNET_BUFFER_F_L3_HDR_OFFSET_VALID;
422 vnet_buffer (b[0])->l2_hdr_offset = b[0]->current_data;
423 vnet_buffer (b[0])->l3_hdr_offset = b[0]->current_data + adv;
426 vlib_buffer_advance (b[0], adv);
427 b[0]->flags |= flags;
429 vnet_buffer (b[0])->l2.l2_len = adv;
433 static_always_inline void
434 eth_input_get_etype_and_tags (vlib_buffer_t ** b, u16 * etype, u64 * tags,
435 u64 * dmacs, int offset, int dmac_check)
437 ethernet_header_t *e;
438 e = vlib_buffer_get_current (b[offset]);
439 #ifdef CLIB_HAVE_VEC128
440 u64x2 r = u64x2_load_unaligned (((u8 *) & e->type) - 6);
441 etype[offset] = ((u16x8) r)[3];
444 etype[offset] = e->type;
445 tags[offset] = *(u64 *) (e + 1);
449 dmacs[offset] = *(u64 *) e;
452 static_always_inline u16
453 eth_input_next_by_type (u16 etype)
455 ethernet_main_t *em = ðernet_main;
457 return (etype < 0x600) ? ETHERNET_INPUT_NEXT_LLC :
458 vec_elt (em->l3_next.input_next_by_type,
459 sparse_vec_index (em->l3_next.input_next_by_type, etype));
469 u64 n_packets, n_bytes;
470 } eth_input_tag_lookup_t;
472 static_always_inline void
473 eth_input_update_if_counters (vlib_main_t * vm, vnet_main_t * vnm,
474 eth_input_tag_lookup_t * l)
476 if (l->n_packets == 0 || l->sw_if_index == ~0)
480 l->n_bytes += l->n_packets * l->len;
482 vlib_increment_combined_counter
483 (vnm->interface_main.combined_sw_if_counters +
484 VNET_INTERFACE_COUNTER_RX, vm->thread_index, l->sw_if_index,
485 l->n_packets, l->n_bytes);
488 static_always_inline void
489 eth_input_tag_lookup (vlib_main_t * vm, vnet_main_t * vnm,
490 vlib_node_runtime_t * node, vnet_hw_interface_t * hi,
491 u64 tag, u16 * next, vlib_buffer_t * b,
492 eth_input_tag_lookup_t * l, u8 dmac_bad, int is_dot1ad,
493 int main_is_l3, int check_dmac)
495 ethernet_main_t *em = ðernet_main;
497 if ((tag ^ l->tag) & l->mask)
499 main_intf_t *mif = vec_elt_at_index (em->main_intfs, hi->hw_if_index);
502 vlan_table_t *vlan_table;
503 qinq_table_t *qinq_table;
504 u16 *t = (u16 *) & tag;
505 u16 vlan1 = clib_net_to_host_u16 (t[0]) & 0xFFF;
506 u16 vlan2 = clib_net_to_host_u16 (t[2]) & 0xFFF;
507 u32 matched, is_l2, new_sw_if_index;
509 vlan_table = vec_elt_at_index (em->vlan_pool, is_dot1ad ?
510 mif->dot1ad_vlans : mif->dot1q_vlans);
511 vif = &vlan_table->vlans[vlan1];
512 qinq_table = vec_elt_at_index (em->qinq_pool, vif->qinqs);
513 qif = &qinq_table->vlans[vlan2];
514 l->err = ETHERNET_ERROR_NONE;
515 l->type = clib_net_to_host_u16 (t[1]);
517 if (l->type == ETHERNET_TYPE_VLAN)
519 l->type = clib_net_to_host_u16 (t[3]);
521 matched = eth_identify_subint (hi, SUBINT_CONFIG_VALID |
522 SUBINT_CONFIG_MATCH_2_TAG, mif, vif,
523 qif, &new_sw_if_index, &l->err,
531 new_sw_if_index = hi->sw_if_index;
532 l->err = ETHERNET_ERROR_NONE;
534 is_l2 = main_is_l3 == 0;
537 matched = eth_identify_subint (hi, SUBINT_CONFIG_VALID |
538 SUBINT_CONFIG_MATCH_1_TAG, mif,
539 vif, qif, &new_sw_if_index,
543 if (l->sw_if_index != new_sw_if_index)
545 eth_input_update_if_counters (vm, vnm, l);
548 l->sw_if_index = new_sw_if_index;
551 l->mask = (l->n_tags == 2) ?
552 clib_net_to_host_u64 (0xffffffffffffffff) :
553 clib_net_to_host_u64 (0xffffffff00000000);
555 if (matched && l->sw_if_index == ~0)
556 l->err = ETHERNET_ERROR_DOWN;
558 l->len = sizeof (ethernet_header_t) +
559 l->n_tags * sizeof (ethernet_vlan_header_t);
561 l->adv = is_l2 ? -(int) sizeof (ethernet_header_t) :
562 l->n_tags * sizeof (ethernet_vlan_header_t);
564 l->adv = is_l2 ? 0 : l->len;
566 if (PREDICT_FALSE (l->err != ETHERNET_ERROR_NONE))
567 l->next = ETHERNET_INPUT_NEXT_DROP;
569 l->next = em->l2_next;
570 else if (l->type == ETHERNET_TYPE_IP4)
571 l->next = em->l3_next.input_next_ip4;
572 else if (l->type == ETHERNET_TYPE_IP6)
573 l->next = em->l3_next.input_next_ip6;
574 else if (l->type == ETHERNET_TYPE_MPLS)
575 l->next = em->l3_next.input_next_mpls;
576 else if (em->redirect_l3)
577 l->next = em->redirect_l3_next;
580 l->next = eth_input_next_by_type (l->type);
581 if (l->next == ETHERNET_INPUT_NEXT_PUNT)
582 l->err = ETHERNET_ERROR_UNKNOWN_TYPE;
586 if (check_dmac && l->adv > 0 && dmac_bad)
588 l->err = ETHERNET_ERROR_L3_MAC_MISMATCH;
589 next[0] = ETHERNET_INPUT_NEXT_PUNT;
594 vlib_buffer_advance (b, l->adv);
595 vnet_buffer (b)->l2.l2_len = l->len;
596 vnet_buffer (b)->l3_hdr_offset = vnet_buffer (b)->l2_hdr_offset + l->len;
598 if (l->err == ETHERNET_ERROR_NONE)
600 vnet_buffer (b)->sw_if_index[VLIB_RX] = l->sw_if_index;
601 ethernet_buffer_set_vlan_count (b, l->n_tags);
604 b->error = node->errors[l->err];
606 /* update counters */
608 l->n_bytes += vlib_buffer_length_in_chain (vm, b);
611 static_always_inline void
612 eth_input_process_frame_dmac_check (vnet_hw_interface_t * hi,
613 u64 * dmacs, u8 * dmacs_bad,
616 u64 mask = clib_net_to_host_u64 (0xFFFFFFFFFFFF0000);
617 u64 igbit = clib_net_to_host_u64 (0x0100000000000000);
618 u64 hwaddr = (*(u64 *) hi->hw_address) & mask;
620 u8 *dmac_bad = dmacs_bad;
622 i32 n_left = n_packets;
624 #ifdef CLIB_HAVE_VEC256
625 u64x4 igbit4 = u64x4_splat (igbit);
626 u64x4 mask4 = u64x4_splat (mask);
627 u64x4 hwaddr4 = u64x4_splat (hwaddr);
631 r0 = u64x4_load_unaligned (dmac + 0) & mask4;
632 r1 = u64x4_load_unaligned (dmac + 4) & mask4;
634 r0 = (r0 != hwaddr4) & ((r0 & igbit4) == 0);
635 r1 = (r1 != hwaddr4) & ((r1 & igbit4) == 0);
637 *(u32 *) (dmac_bad + 0) = u8x32_msb_mask ((u8x32) (r0));
638 *(u32 *) (dmac_bad + 4) = u8x32_msb_mask ((u8x32) (r1));
655 r0 = (r0 != hwaddr) && ((r0 & igbit) == 0);
656 r1 = (r1 != hwaddr) && ((r1 & igbit) == 0);
657 r2 = (r2 != hwaddr) && ((r2 & igbit) == 0);
658 r3 = (r3 != hwaddr) && ((r3 & igbit) == 0);
673 /* process frame of buffers, store ethertype into array and update
674 buffer metadata fields depending on interface being l2 or l3 assuming that
675 packets are untagged. For tagged packets those fields are updated later.
676 Optionally store Destionation MAC address and tag data into arrays
677 for further processing */
679 STATIC_ASSERT (VLIB_FRAME_SIZE % 8 == 0,
680 "VLIB_FRAME_SIZE must be power of 8");
681 static_always_inline void
682 eth_input_process_frame (vlib_main_t * vm, vlib_node_runtime_t * node,
683 vnet_hw_interface_t * hi,
684 u32 * buffer_indices, u32 n_packets, int main_is_l3,
685 int ip4_cksum_ok, int dmac_check)
687 ethernet_main_t *em = ðernet_main;
688 u16 nexts[VLIB_FRAME_SIZE], *next;
689 u16 etypes[VLIB_FRAME_SIZE], *etype = etypes;
690 u64 dmacs[VLIB_FRAME_SIZE], *dmac = dmacs;
691 u8 dmacs_bad[VLIB_FRAME_SIZE];
692 u64 tags[VLIB_FRAME_SIZE], *tag = tags;
693 u16 slowpath_indices[VLIB_FRAME_SIZE];
695 u16 next_ip4, next_ip6, next_mpls, next_l2;
696 u16 et_ip4 = clib_host_to_net_u16 (ETHERNET_TYPE_IP4);
697 u16 et_ip6 = clib_host_to_net_u16 (ETHERNET_TYPE_IP6);
698 u16 et_mpls = clib_host_to_net_u16 (ETHERNET_TYPE_MPLS);
699 u16 et_vlan = clib_host_to_net_u16 (ETHERNET_TYPE_VLAN);
700 u16 et_dot1ad = clib_host_to_net_u16 (ETHERNET_TYPE_DOT1AD);
701 i32 n_left = n_packets;
702 vlib_buffer_t *b[20];
705 from = buffer_indices;
709 vlib_buffer_t **ph = b + 16, **pd = b + 8;
710 vlib_get_buffers (vm, from, b, 4);
711 vlib_get_buffers (vm, from + 8, pd, 4);
712 vlib_get_buffers (vm, from + 16, ph, 4);
714 vlib_prefetch_buffer_header (ph[0], LOAD);
715 vlib_prefetch_buffer_data (pd[0], LOAD);
716 eth_input_get_etype_and_tags (b, etype, tag, dmac, 0, dmac_check);
718 vlib_prefetch_buffer_header (ph[1], LOAD);
719 vlib_prefetch_buffer_data (pd[1], LOAD);
720 eth_input_get_etype_and_tags (b, etype, tag, dmac, 1, dmac_check);
722 vlib_prefetch_buffer_header (ph[2], LOAD);
723 vlib_prefetch_buffer_data (pd[2], LOAD);
724 eth_input_get_etype_and_tags (b, etype, tag, dmac, 2, dmac_check);
726 vlib_prefetch_buffer_header (ph[3], LOAD);
727 vlib_prefetch_buffer_data (pd[3], LOAD);
728 eth_input_get_etype_and_tags (b, etype, tag, dmac, 3, dmac_check);
730 eth_input_adv_and_flags_x4 (b, main_is_l3);
741 vlib_get_buffers (vm, from, b, 4);
742 eth_input_get_etype_and_tags (b, etype, tag, dmac, 0, dmac_check);
743 eth_input_get_etype_and_tags (b, etype, tag, dmac, 1, dmac_check);
744 eth_input_get_etype_and_tags (b, etype, tag, dmac, 2, dmac_check);
745 eth_input_get_etype_and_tags (b, etype, tag, dmac, 3, dmac_check);
746 eth_input_adv_and_flags_x4 (b, main_is_l3);
757 vlib_get_buffers (vm, from, b, 1);
758 eth_input_get_etype_and_tags (b, etype, tag, dmac, 0, dmac_check);
759 eth_input_adv_and_flags_x1 (b, main_is_l3);
770 eth_input_process_frame_dmac_check (hi, dmacs, dmacs_bad, n_packets);
772 next_ip4 = em->l3_next.input_next_ip4;
773 next_ip6 = em->l3_next.input_next_ip6;
774 next_mpls = em->l3_next.input_next_mpls;
775 next_l2 = em->l2_next;
777 if (next_ip4 == ETHERNET_INPUT_NEXT_IP4_INPUT && ip4_cksum_ok)
778 next_ip4 = ETHERNET_INPUT_NEXT_IP4_INPUT_NCS;
780 #ifdef CLIB_HAVE_VEC256
781 u16x16 et16_ip4 = u16x16_splat (et_ip4);
782 u16x16 et16_ip6 = u16x16_splat (et_ip6);
783 u16x16 et16_mpls = u16x16_splat (et_mpls);
784 u16x16 et16_vlan = u16x16_splat (et_vlan);
785 u16x16 et16_dot1ad = u16x16_splat (et_dot1ad);
786 u16x16 next16_ip4 = u16x16_splat (next_ip4);
787 u16x16 next16_ip6 = u16x16_splat (next_ip6);
788 u16x16 next16_mpls = u16x16_splat (next_mpls);
789 u16x16 next16_l2 = u16x16_splat (next_l2);
791 u16x16 stairs = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 };
800 /* fastpath - in l3 mode hadles ip4, ip6 and mpls packets, other packets
801 are considered as slowpath, in l2 mode all untagged packets are
802 considered as fastpath */
805 #ifdef CLIB_HAVE_VEC256
809 u16x16 e16 = u16x16_load_unaligned (etype);
812 r += (e16 == et16_ip4) & next16_ip4;
813 r += (e16 == et16_ip6) & next16_ip6;
814 r += (e16 == et16_mpls) & next16_mpls;
817 r = ((e16 != et16_vlan) & (e16 != et16_dot1ad)) & next16_l2;
818 u16x16_store_unaligned (r, next);
820 if (!u16x16_is_all_zero (r == zero))
822 if (u16x16_is_all_zero (r))
824 u16x16_store_unaligned (u16x16_splat (i) + stairs,
825 slowpath_indices + n_slowpath);
830 for (int j = 0; j < 16; j++)
832 slowpath_indices[n_slowpath++] = i + j;
843 if (main_is_l3 && etype[0] == et_ip4)
845 else if (main_is_l3 && etype[0] == et_ip6)
847 else if (main_is_l3 && etype[0] == et_mpls)
849 else if (main_is_l3 == 0 &&
850 etype[0] != et_vlan && etype[0] != et_dot1ad)
855 slowpath_indices[n_slowpath++] = i;
866 vnet_main_t *vnm = vnet_get_main ();
868 u16 *si = slowpath_indices;
869 u32 last_unknown_etype = ~0;
870 u32 last_unknown_next = ~0;
871 eth_input_tag_lookup_t dot1ad_lookup, dot1q_lookup = {
873 .tag = tags[si[0]] ^ -1LL,
877 clib_memcpy_fast (&dot1ad_lookup, &dot1q_lookup, sizeof (dot1q_lookup));
882 u16 etype = etypes[i];
884 if (etype == et_vlan)
886 vlib_buffer_t *b = vlib_get_buffer (vm, buffer_indices[i]);
887 eth_input_tag_lookup (vm, vnm, node, hi, tags[i], nexts + i, b,
888 &dot1q_lookup, dmacs_bad[i], 0,
889 main_is_l3, dmac_check);
892 else if (etype == et_dot1ad)
894 vlib_buffer_t *b = vlib_get_buffer (vm, buffer_indices[i]);
895 eth_input_tag_lookup (vm, vnm, node, hi, tags[i], nexts + i, b,
896 &dot1ad_lookup, dmacs_bad[i], 1,
897 main_is_l3, dmac_check);
901 /* untagged packet with not well known etyertype */
902 if (last_unknown_etype != etype)
904 last_unknown_etype = etype;
905 etype = clib_host_to_net_u16 (etype);
906 last_unknown_next = eth_input_next_by_type (etype);
908 if (dmac_check && main_is_l3 && dmacs_bad[i])
910 vlib_buffer_t *b = vlib_get_buffer (vm, buffer_indices[i]);
911 b->error = node->errors[ETHERNET_ERROR_L3_MAC_MISMATCH];
912 nexts[i] = ETHERNET_INPUT_NEXT_PUNT;
915 nexts[i] = last_unknown_next;
923 eth_input_update_if_counters (vm, vnm, &dot1q_lookup);
924 eth_input_update_if_counters (vm, vnm, &dot1ad_lookup);
927 vlib_buffer_enqueue_to_next (vm, node, buffer_indices, nexts, n_packets);
930 static_always_inline void
931 eth_input_single_int (vlib_main_t * vm, vlib_node_runtime_t * node,
932 vnet_hw_interface_t * hi, u32 * from, u32 n_pkts,
935 ethernet_main_t *em = ðernet_main;
936 ethernet_interface_t *ei;
937 ei = pool_elt_at_index (em->interfaces, hi->hw_instance);
938 main_intf_t *intf0 = vec_elt_at_index (em->main_intfs, hi->hw_if_index);
939 subint_config_t *subint0 = &intf0->untagged_subint;
941 int main_is_l3 = (subint0->flags & SUBINT_CONFIG_L2) == 0;
942 int promisc = (ei->flags & ETHERNET_INTERFACE_FLAG_ACCEPT_ALL) != 0;
946 /* main interface is L3, we dont expect tagged packets and interface
947 is not in promisc node, so we dont't need to check DMAC */
951 eth_input_process_frame (vm, node, hi, from, n_pkts, is_l3,
954 /* subinterfaces and promisc mode so DMAC check is needed */
955 eth_input_process_frame (vm, node, hi, from, n_pkts, is_l3,
961 /* untagged packets are treated as L2 */
963 eth_input_process_frame (vm, node, hi, from, n_pkts, is_l3,
969 static_always_inline void
970 ethernet_input_trace (vlib_main_t * vm, vlib_node_runtime_t * node,
971 vlib_frame_t * from_frame)
974 if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)))
976 from = vlib_frame_vector_args (from_frame);
977 n_left = from_frame->n_vectors;
981 ethernet_input_trace_t *t0;
982 vlib_buffer_t *b0 = vlib_get_buffer (vm, from[0]);
984 if (b0->flags & VLIB_BUFFER_IS_TRACED)
986 t0 = vlib_add_trace (vm, node, b0,
987 sizeof (ethernet_input_trace_t));
988 clib_memcpy_fast (t0->packet_data, b0->data + b0->current_data,
989 sizeof (t0->packet_data));
990 t0->frame_flags = from_frame->flags;
991 clib_memcpy_fast (&t0->frame_data,
992 vlib_frame_scalar_args (from_frame),
993 sizeof (ethernet_input_frame_t));
1000 /* rx pcap capture if enabled */
1001 if (PREDICT_FALSE (vlib_global_main.pcap.pcap_rx_enable))
1004 vnet_pcap_t *pp = &vlib_global_main.pcap;
1006 from = vlib_frame_vector_args (from_frame);
1007 n_left = from_frame->n_vectors;
1010 int classify_filter_result;
1015 b0 = vlib_get_buffer (vm, bi0);
1016 if (pp->filter_classify_table_index != ~0)
1018 classify_filter_result =
1019 vnet_is_packet_traced_inline
1020 (b0, pp->filter_classify_table_index, 0 /* full classify */ );
1021 if (classify_filter_result)
1022 pcap_add_buffer (&pp->pcap_main, vm, bi0,
1023 pp->max_bytes_per_pkt);
1027 if (pp->pcap_sw_if_index == 0 ||
1028 pp->pcap_sw_if_index == vnet_buffer (b0)->sw_if_index[VLIB_RX])
1030 pcap_add_buffer (&pp->pcap_main, vm, bi0,
1031 pp->max_bytes_per_pkt);
1037 static_always_inline void
1038 ethernet_input_inline (vlib_main_t * vm,
1039 vlib_node_runtime_t * node,
1040 u32 * from, u32 n_packets,
1041 ethernet_input_variant_t variant)
1043 vnet_main_t *vnm = vnet_get_main ();
1044 ethernet_main_t *em = ðernet_main;
1045 vlib_node_runtime_t *error_node;
1046 u32 n_left_from, next_index, *to_next;
1047 u32 stats_sw_if_index, stats_n_packets, stats_n_bytes;
1048 u32 thread_index = vm->thread_index;
1049 u32 cached_sw_if_index = ~0;
1050 u32 cached_is_l2 = 0; /* shut up gcc */
1051 vnet_hw_interface_t *hi = NULL; /* used for main interface only */
1052 vlib_buffer_t *bufs[VLIB_FRAME_SIZE];
1053 vlib_buffer_t **b = bufs;
1055 if (variant != ETHERNET_INPUT_VARIANT_ETHERNET)
1056 error_node = vlib_node_get_runtime (vm, ethernet_input_node.index);
1060 n_left_from = n_packets;
1062 next_index = node->cached_next_index;
1063 stats_sw_if_index = node->runtime_data[0];
1064 stats_n_packets = stats_n_bytes = 0;
1065 vlib_get_buffers (vm, from, bufs, n_left_from);
1067 while (n_left_from > 0)
1071 vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
1073 while (n_left_from >= 4 && n_left_to_next >= 2)
1076 vlib_buffer_t *b0, *b1;
1077 u8 next0, next1, error0, error1;
1078 u16 type0, orig_type0, type1, orig_type1;
1079 u16 outer_id0, inner_id0, outer_id1, inner_id1;
1080 u32 match_flags0, match_flags1;
1081 u32 old_sw_if_index0, new_sw_if_index0, len0, old_sw_if_index1,
1082 new_sw_if_index1, len1;
1083 vnet_hw_interface_t *hi0, *hi1;
1084 main_intf_t *main_intf0, *main_intf1;
1085 vlan_intf_t *vlan_intf0, *vlan_intf1;
1086 qinq_intf_t *qinq_intf0, *qinq_intf1;
1088 ethernet_header_t *e0, *e1;
1090 /* Prefetch next iteration. */
1092 vlib_prefetch_buffer_header (b[2], STORE);
1093 vlib_prefetch_buffer_header (b[3], STORE);
1095 CLIB_PREFETCH (b[2]->data, sizeof (ethernet_header_t), LOAD);
1096 CLIB_PREFETCH (b[3]->data, sizeof (ethernet_header_t), LOAD);
1105 n_left_to_next -= 2;
1112 error0 = error1 = ETHERNET_ERROR_NONE;
1113 e0 = vlib_buffer_get_current (b0);
1114 type0 = clib_net_to_host_u16 (e0->type);
1115 e1 = vlib_buffer_get_current (b1);
1116 type1 = clib_net_to_host_u16 (e1->type);
1118 /* Set the L2 header offset for all packets */
1119 vnet_buffer (b0)->l2_hdr_offset = b0->current_data;
1120 vnet_buffer (b1)->l2_hdr_offset = b1->current_data;
1121 b0->flags |= VNET_BUFFER_F_L2_HDR_OFFSET_VALID;
1122 b1->flags |= VNET_BUFFER_F_L2_HDR_OFFSET_VALID;
1124 /* Speed-path for the untagged case */
1125 if (PREDICT_TRUE (variant == ETHERNET_INPUT_VARIANT_ETHERNET
1126 && !ethernet_frame_is_any_tagged_x2 (type0,
1130 subint_config_t *subint0;
1131 u32 sw_if_index0, sw_if_index1;
1133 sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
1134 sw_if_index1 = vnet_buffer (b1)->sw_if_index[VLIB_RX];
1135 is_l20 = cached_is_l2;
1137 /* This is probably wholly unnecessary */
1138 if (PREDICT_FALSE (sw_if_index0 != sw_if_index1))
1141 /* Now sw_if_index0 == sw_if_index1 */
1142 if (PREDICT_FALSE (cached_sw_if_index != sw_if_index0))
1144 cached_sw_if_index = sw_if_index0;
1145 hi = vnet_get_sup_hw_interface (vnm, sw_if_index0);
1146 intf0 = vec_elt_at_index (em->main_intfs, hi->hw_if_index);
1147 subint0 = &intf0->untagged_subint;
1148 cached_is_l2 = is_l20 = subint0->flags & SUBINT_CONFIG_L2;
1151 if (PREDICT_TRUE (is_l20 != 0))
1153 vnet_buffer (b0)->l3_hdr_offset =
1154 vnet_buffer (b0)->l2_hdr_offset +
1155 sizeof (ethernet_header_t);
1156 vnet_buffer (b1)->l3_hdr_offset =
1157 vnet_buffer (b1)->l2_hdr_offset +
1158 sizeof (ethernet_header_t);
1159 b0->flags |= VNET_BUFFER_F_L3_HDR_OFFSET_VALID;
1160 b1->flags |= VNET_BUFFER_F_L3_HDR_OFFSET_VALID;
1161 next0 = em->l2_next;
1162 vnet_buffer (b0)->l2.l2_len = sizeof (ethernet_header_t);
1163 next1 = em->l2_next;
1164 vnet_buffer (b1)->l2.l2_len = sizeof (ethernet_header_t);
1168 if (!ethernet_address_cast (e0->dst_address) &&
1169 (hi->hw_address != 0) &&
1170 !ethernet_mac_address_equal ((u8 *) e0, hi->hw_address))
1171 error0 = ETHERNET_ERROR_L3_MAC_MISMATCH;
1172 if (!ethernet_address_cast (e1->dst_address) &&
1173 (hi->hw_address != 0) &&
1174 !ethernet_mac_address_equal ((u8 *) e1, hi->hw_address))
1175 error1 = ETHERNET_ERROR_L3_MAC_MISMATCH;
1176 vlib_buffer_advance (b0, sizeof (ethernet_header_t));
1177 determine_next_node (em, variant, 0, type0, b0,
1179 vlib_buffer_advance (b1, sizeof (ethernet_header_t));
1180 determine_next_node (em, variant, 0, type1, b1,
1186 /* Slow-path for the tagged case */
1188 parse_header (variant,
1191 &orig_type0, &outer_id0, &inner_id0, &match_flags0);
1193 parse_header (variant,
1196 &orig_type1, &outer_id1, &inner_id1, &match_flags1);
1198 old_sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
1199 old_sw_if_index1 = vnet_buffer (b1)->sw_if_index[VLIB_RX];
1201 eth_vlan_table_lookups (em,
1208 &main_intf0, &vlan_intf0, &qinq_intf0);
1210 eth_vlan_table_lookups (em,
1217 &main_intf1, &vlan_intf1, &qinq_intf1);
1219 identify_subint (hi0,
1224 qinq_intf0, &new_sw_if_index0, &error0, &is_l20);
1226 identify_subint (hi1,
1231 qinq_intf1, &new_sw_if_index1, &error1, &is_l21);
1233 // Save RX sw_if_index for later nodes
1234 vnet_buffer (b0)->sw_if_index[VLIB_RX] =
1236 ETHERNET_ERROR_NONE ? old_sw_if_index0 : new_sw_if_index0;
1237 vnet_buffer (b1)->sw_if_index[VLIB_RX] =
1239 ETHERNET_ERROR_NONE ? old_sw_if_index1 : new_sw_if_index1;
1241 // Check if there is a stat to take (valid and non-main sw_if_index for pkt 0 or pkt 1)
1242 if (((new_sw_if_index0 != ~0)
1243 && (new_sw_if_index0 != old_sw_if_index0))
1244 || ((new_sw_if_index1 != ~0)
1245 && (new_sw_if_index1 != old_sw_if_index1)))
1248 len0 = vlib_buffer_length_in_chain (vm, b0) + b0->current_data
1249 - vnet_buffer (b0)->l2_hdr_offset;
1250 len1 = vlib_buffer_length_in_chain (vm, b1) + b1->current_data
1251 - vnet_buffer (b1)->l2_hdr_offset;
1253 stats_n_packets += 2;
1254 stats_n_bytes += len0 + len1;
1257 (!(new_sw_if_index0 == stats_sw_if_index
1258 && new_sw_if_index1 == stats_sw_if_index)))
1260 stats_n_packets -= 2;
1261 stats_n_bytes -= len0 + len1;
1263 if (new_sw_if_index0 != old_sw_if_index0
1264 && new_sw_if_index0 != ~0)
1265 vlib_increment_combined_counter (vnm->
1266 interface_main.combined_sw_if_counters
1268 VNET_INTERFACE_COUNTER_RX,
1270 new_sw_if_index0, 1,
1272 if (new_sw_if_index1 != old_sw_if_index1
1273 && new_sw_if_index1 != ~0)
1274 vlib_increment_combined_counter (vnm->
1275 interface_main.combined_sw_if_counters
1277 VNET_INTERFACE_COUNTER_RX,
1279 new_sw_if_index1, 1,
1282 if (new_sw_if_index0 == new_sw_if_index1)
1284 if (stats_n_packets > 0)
1286 vlib_increment_combined_counter
1287 (vnm->interface_main.combined_sw_if_counters
1288 + VNET_INTERFACE_COUNTER_RX,
1291 stats_n_packets, stats_n_bytes);
1292 stats_n_packets = stats_n_bytes = 0;
1294 stats_sw_if_index = new_sw_if_index0;
1299 if (variant == ETHERNET_INPUT_VARIANT_NOT_L2)
1300 is_l20 = is_l21 = 0;
1302 determine_next_node (em, variant, is_l20, type0, b0, &error0,
1304 determine_next_node (em, variant, is_l21, type1, b1, &error1,
1308 b0->error = error_node->errors[error0];
1309 b1->error = error_node->errors[error1];
1311 // verify speculative enqueue
1312 vlib_validate_buffer_enqueue_x2 (vm, node, next_index, to_next,
1313 n_left_to_next, bi0, bi1, next0,
1317 while (n_left_from > 0 && n_left_to_next > 0)
1322 u16 type0, orig_type0;
1323 u16 outer_id0, inner_id0;
1325 u32 old_sw_if_index0, new_sw_if_index0, len0;
1326 vnet_hw_interface_t *hi0;
1327 main_intf_t *main_intf0;
1328 vlan_intf_t *vlan_intf0;
1329 qinq_intf_t *qinq_intf0;
1330 ethernet_header_t *e0;
1333 // Prefetch next iteration
1334 if (n_left_from > 1)
1336 vlib_prefetch_buffer_header (b[1], STORE);
1337 CLIB_PREFETCH (b[1]->data, CLIB_CACHE_LINE_BYTES, LOAD);
1345 n_left_to_next -= 1;
1350 error0 = ETHERNET_ERROR_NONE;
1351 e0 = vlib_buffer_get_current (b0);
1352 type0 = clib_net_to_host_u16 (e0->type);
1354 /* Set the L2 header offset for all packets */
1355 vnet_buffer (b0)->l2_hdr_offset = b0->current_data;
1356 b0->flags |= VNET_BUFFER_F_L2_HDR_OFFSET_VALID;
1358 /* Speed-path for the untagged case */
1359 if (PREDICT_TRUE (variant == ETHERNET_INPUT_VARIANT_ETHERNET
1360 && !ethernet_frame_is_tagged (type0)))
1363 subint_config_t *subint0;
1366 sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
1367 is_l20 = cached_is_l2;
1369 if (PREDICT_FALSE (cached_sw_if_index != sw_if_index0))
1371 cached_sw_if_index = sw_if_index0;
1372 hi = vnet_get_sup_hw_interface (vnm, sw_if_index0);
1373 intf0 = vec_elt_at_index (em->main_intfs, hi->hw_if_index);
1374 subint0 = &intf0->untagged_subint;
1375 cached_is_l2 = is_l20 = subint0->flags & SUBINT_CONFIG_L2;
1379 if (PREDICT_TRUE (is_l20 != 0))
1381 vnet_buffer (b0)->l3_hdr_offset =
1382 vnet_buffer (b0)->l2_hdr_offset +
1383 sizeof (ethernet_header_t);
1384 b0->flags |= VNET_BUFFER_F_L3_HDR_OFFSET_VALID;
1385 next0 = em->l2_next;
1386 vnet_buffer (b0)->l2.l2_len = sizeof (ethernet_header_t);
1390 if (!ethernet_address_cast (e0->dst_address) &&
1391 (hi->hw_address != 0) &&
1392 !ethernet_mac_address_equal ((u8 *) e0, hi->hw_address))
1393 error0 = ETHERNET_ERROR_L3_MAC_MISMATCH;
1394 vlib_buffer_advance (b0, sizeof (ethernet_header_t));
1395 determine_next_node (em, variant, 0, type0, b0,
1401 /* Slow-path for the tagged case */
1402 parse_header (variant,
1405 &orig_type0, &outer_id0, &inner_id0, &match_flags0);
1407 old_sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
1409 eth_vlan_table_lookups (em,
1416 &main_intf0, &vlan_intf0, &qinq_intf0);
1418 identify_subint (hi0,
1423 qinq_intf0, &new_sw_if_index0, &error0, &is_l20);
1425 // Save RX sw_if_index for later nodes
1426 vnet_buffer (b0)->sw_if_index[VLIB_RX] =
1428 ETHERNET_ERROR_NONE ? old_sw_if_index0 : new_sw_if_index0;
1430 // Increment subinterface stats
1431 // Note that interface-level counters have already been incremented
1432 // prior to calling this function. Thus only subinterface counters
1433 // are incremented here.
1435 // Interface level counters include packets received on the main
1436 // interface and all subinterfaces. Subinterface level counters
1437 // include only those packets received on that subinterface
1438 // Increment stats if the subint is valid and it is not the main intf
1439 if ((new_sw_if_index0 != ~0)
1440 && (new_sw_if_index0 != old_sw_if_index0))
1443 len0 = vlib_buffer_length_in_chain (vm, b0) + b0->current_data
1444 - vnet_buffer (b0)->l2_hdr_offset;
1446 stats_n_packets += 1;
1447 stats_n_bytes += len0;
1449 // Batch stat increments from the same subinterface so counters
1450 // don't need to be incremented for every packet.
1451 if (PREDICT_FALSE (new_sw_if_index0 != stats_sw_if_index))
1453 stats_n_packets -= 1;
1454 stats_n_bytes -= len0;
1456 if (new_sw_if_index0 != ~0)
1457 vlib_increment_combined_counter
1458 (vnm->interface_main.combined_sw_if_counters
1459 + VNET_INTERFACE_COUNTER_RX,
1460 thread_index, new_sw_if_index0, 1, len0);
1461 if (stats_n_packets > 0)
1463 vlib_increment_combined_counter
1464 (vnm->interface_main.combined_sw_if_counters
1465 + VNET_INTERFACE_COUNTER_RX,
1467 stats_sw_if_index, stats_n_packets, stats_n_bytes);
1468 stats_n_packets = stats_n_bytes = 0;
1470 stats_sw_if_index = new_sw_if_index0;
1474 if (variant == ETHERNET_INPUT_VARIANT_NOT_L2)
1477 determine_next_node (em, variant, is_l20, type0, b0, &error0,
1481 b0->error = error_node->errors[error0];
1483 // verify speculative enqueue
1484 vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
1485 to_next, n_left_to_next,
1489 vlib_put_next_frame (vm, node, next_index, n_left_to_next);
1492 // Increment any remaining batched stats
1493 if (stats_n_packets > 0)
1495 vlib_increment_combined_counter
1496 (vnm->interface_main.combined_sw_if_counters
1497 + VNET_INTERFACE_COUNTER_RX,
1498 thread_index, stats_sw_if_index, stats_n_packets, stats_n_bytes);
1499 node->runtime_data[0] = stats_sw_if_index;
1503 VLIB_NODE_FN (ethernet_input_node) (vlib_main_t * vm,
1504 vlib_node_runtime_t * node,
1505 vlib_frame_t * frame)
1507 vnet_main_t *vnm = vnet_get_main ();
1508 u32 *from = vlib_frame_vector_args (frame);
1509 u32 n_packets = frame->n_vectors;
1511 ethernet_input_trace (vm, node, frame);
1513 if (frame->flags & ETH_INPUT_FRAME_F_SINGLE_SW_IF_IDX)
1515 ethernet_input_frame_t *ef = vlib_frame_scalar_args (frame);
1516 int ip4_cksum_ok = (frame->flags & ETH_INPUT_FRAME_F_IP4_CKSUM_OK) != 0;
1517 vnet_hw_interface_t *hi = vnet_get_hw_interface (vnm, ef->hw_if_index);
1518 eth_input_single_int (vm, node, hi, from, n_packets, ip4_cksum_ok);
1521 ethernet_input_inline (vm, node, from, n_packets,
1522 ETHERNET_INPUT_VARIANT_ETHERNET);
1526 VLIB_NODE_FN (ethernet_input_type_node) (vlib_main_t * vm,
1527 vlib_node_runtime_t * node,
1528 vlib_frame_t * from_frame)
1530 u32 *from = vlib_frame_vector_args (from_frame);
1531 u32 n_packets = from_frame->n_vectors;
1532 ethernet_input_trace (vm, node, from_frame);
1533 ethernet_input_inline (vm, node, from, n_packets,
1534 ETHERNET_INPUT_VARIANT_ETHERNET_TYPE);
1538 VLIB_NODE_FN (ethernet_input_not_l2_node) (vlib_main_t * vm,
1539 vlib_node_runtime_t * node,
1540 vlib_frame_t * from_frame)
1542 u32 *from = vlib_frame_vector_args (from_frame);
1543 u32 n_packets = from_frame->n_vectors;
1544 ethernet_input_trace (vm, node, from_frame);
1545 ethernet_input_inline (vm, node, from, n_packets,
1546 ETHERNET_INPUT_VARIANT_NOT_L2);
1551 // Return the subinterface config struct for the given sw_if_index
1552 // Also return via parameter the appropriate match flags for the
1553 // configured number of tags.
1554 // On error (unsupported or not ethernet) return 0.
1555 static subint_config_t *
1556 ethernet_sw_interface_get_config (vnet_main_t * vnm,
1558 u32 * flags, u32 * unsupported)
1560 ethernet_main_t *em = ðernet_main;
1561 vnet_hw_interface_t *hi;
1562 vnet_sw_interface_t *si;
1563 main_intf_t *main_intf;
1564 vlan_table_t *vlan_table;
1565 qinq_table_t *qinq_table;
1566 subint_config_t *subint = 0;
1568 hi = vnet_get_sup_hw_interface (vnm, sw_if_index);
1570 if (!hi || (hi->hw_class_index != ethernet_hw_interface_class.index))
1573 goto done; // non-ethernet interface
1576 // ensure there's an entry for the main intf (shouldn't really be necessary)
1577 vec_validate (em->main_intfs, hi->hw_if_index);
1578 main_intf = vec_elt_at_index (em->main_intfs, hi->hw_if_index);
1580 // Locate the subint for the given ethernet config
1581 si = vnet_get_sw_interface (vnm, sw_if_index);
1583 if (si->type == VNET_SW_INTERFACE_TYPE_P2P)
1585 p2p_ethernet_main_t *p2pm = &p2p_main;
1586 u32 p2pe_sw_if_index =
1587 p2p_ethernet_lookup (hi->hw_if_index, si->p2p.client_mac);
1588 if (p2pe_sw_if_index == ~0)
1590 pool_get (p2pm->p2p_subif_pool, subint);
1591 si->p2p.pool_index = subint - p2pm->p2p_subif_pool;
1594 subint = vec_elt_at_index (p2pm->p2p_subif_pool, si->p2p.pool_index);
1595 *flags = SUBINT_CONFIG_P2P;
1597 else if (si->type == VNET_SW_INTERFACE_TYPE_PIPE)
1601 pipe = pipe_get (sw_if_index);
1602 subint = &pipe->subint;
1603 *flags = SUBINT_CONFIG_P2P;
1605 else if (si->sub.eth.flags.default_sub)
1607 subint = &main_intf->default_subint;
1608 *flags = SUBINT_CONFIG_MATCH_1_TAG |
1609 SUBINT_CONFIG_MATCH_2_TAG | SUBINT_CONFIG_MATCH_3_TAG;
1611 else if ((si->sub.eth.flags.no_tags) || (si->sub.eth.raw_flags == 0))
1613 // if no flags are set then this is a main interface
1614 // so treat as untagged
1615 subint = &main_intf->untagged_subint;
1616 *flags = SUBINT_CONFIG_MATCH_0_TAG;
1621 // first get the vlan table
1622 if (si->sub.eth.flags.dot1ad)
1624 if (main_intf->dot1ad_vlans == 0)
1626 // Allocate a vlan table from the pool
1627 pool_get (em->vlan_pool, vlan_table);
1628 main_intf->dot1ad_vlans = vlan_table - em->vlan_pool;
1632 // Get ptr to existing vlan table
1634 vec_elt_at_index (em->vlan_pool, main_intf->dot1ad_vlans);
1639 if (main_intf->dot1q_vlans == 0)
1641 // Allocate a vlan table from the pool
1642 pool_get (em->vlan_pool, vlan_table);
1643 main_intf->dot1q_vlans = vlan_table - em->vlan_pool;
1647 // Get ptr to existing vlan table
1649 vec_elt_at_index (em->vlan_pool, main_intf->dot1q_vlans);
1653 if (si->sub.eth.flags.one_tag)
1655 *flags = si->sub.eth.flags.exact_match ?
1656 SUBINT_CONFIG_MATCH_1_TAG :
1657 (SUBINT_CONFIG_MATCH_1_TAG |
1658 SUBINT_CONFIG_MATCH_2_TAG | SUBINT_CONFIG_MATCH_3_TAG);
1660 if (si->sub.eth.flags.outer_vlan_id_any)
1662 // not implemented yet
1668 // a single vlan, a common case
1670 &vlan_table->vlans[si->sub.eth.
1671 outer_vlan_id].single_tag_subint;
1678 *flags = si->sub.eth.flags.exact_match ?
1679 SUBINT_CONFIG_MATCH_2_TAG :
1680 (SUBINT_CONFIG_MATCH_2_TAG | SUBINT_CONFIG_MATCH_3_TAG);
1682 if (si->sub.eth.flags.outer_vlan_id_any
1683 && si->sub.eth.flags.inner_vlan_id_any)
1685 // not implemented yet
1690 if (si->sub.eth.flags.inner_vlan_id_any)
1692 // a specific outer and "any" inner
1693 // don't need a qinq table for this
1695 &vlan_table->vlans[si->sub.eth.
1696 outer_vlan_id].inner_any_subint;
1697 if (si->sub.eth.flags.exact_match)
1699 *flags = SUBINT_CONFIG_MATCH_2_TAG;
1703 *flags = SUBINT_CONFIG_MATCH_2_TAG |
1704 SUBINT_CONFIG_MATCH_3_TAG;
1709 // a specific outer + specifc innner vlan id, a common case
1711 // get the qinq table
1712 if (vlan_table->vlans[si->sub.eth.outer_vlan_id].qinqs == 0)
1714 // Allocate a qinq table from the pool
1715 pool_get (em->qinq_pool, qinq_table);
1716 vlan_table->vlans[si->sub.eth.outer_vlan_id].qinqs =
1717 qinq_table - em->qinq_pool;
1721 // Get ptr to existing qinq table
1723 vec_elt_at_index (em->qinq_pool,
1724 vlan_table->vlans[si->sub.
1728 subint = &qinq_table->vlans[si->sub.eth.inner_vlan_id].subint;
1737 static clib_error_t *
1738 ethernet_sw_interface_up_down (vnet_main_t * vnm, u32 sw_if_index, u32 flags)
1740 subint_config_t *subint;
1743 clib_error_t *error = 0;
1745 // Find the config for this subinterface
1747 ethernet_sw_interface_get_config (vnm, sw_if_index, &dummy_flags,
1752 // not implemented yet or not ethernet
1756 subint->sw_if_index =
1757 ((flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP) ? sw_if_index : ~0);
1763 VNET_SW_INTERFACE_ADMIN_UP_DOWN_FUNCTION (ethernet_sw_interface_up_down);
1766 #ifndef CLIB_MARCH_VARIANT
1767 // Set the L2/L3 mode for the subinterface
1769 ethernet_sw_interface_set_l2_mode (vnet_main_t * vnm, u32 sw_if_index, u32 l2)
1771 subint_config_t *subint;
1775 vnet_sw_interface_t *sw = vnet_get_sw_interface (vnm, sw_if_index);
1777 is_port = !(sw->type == VNET_SW_INTERFACE_TYPE_SUB);
1779 // Find the config for this subinterface
1781 ethernet_sw_interface_get_config (vnm, sw_if_index, &dummy_flags,
1786 // unimplemented or not ethernet
1790 // Double check that the config we found is for our interface (or the interface is down)
1791 ASSERT ((subint->sw_if_index == sw_if_index) | (subint->sw_if_index == ~0));
1795 subint->flags |= SUBINT_CONFIG_L2;
1798 SUBINT_CONFIG_MATCH_0_TAG | SUBINT_CONFIG_MATCH_1_TAG
1799 | SUBINT_CONFIG_MATCH_2_TAG | SUBINT_CONFIG_MATCH_3_TAG;
1803 subint->flags &= ~SUBINT_CONFIG_L2;
1806 ~(SUBINT_CONFIG_MATCH_1_TAG | SUBINT_CONFIG_MATCH_2_TAG
1807 | SUBINT_CONFIG_MATCH_3_TAG);
1815 * Set the L2/L3 mode for the subinterface regardless of port
1818 ethernet_sw_interface_set_l2_mode_noport (vnet_main_t * vnm,
1819 u32 sw_if_index, u32 l2)
1821 subint_config_t *subint;
1825 /* Find the config for this subinterface */
1827 ethernet_sw_interface_get_config (vnm, sw_if_index, &dummy_flags,
1832 /* unimplemented or not ethernet */
1837 * Double check that the config we found is for our interface (or the
1838 * interface is down)
1840 ASSERT ((subint->sw_if_index == sw_if_index) | (subint->sw_if_index == ~0));
1844 subint->flags |= SUBINT_CONFIG_L2;
1848 subint->flags &= ~SUBINT_CONFIG_L2;
1856 static clib_error_t *
1857 ethernet_sw_interface_add_del (vnet_main_t * vnm,
1858 u32 sw_if_index, u32 is_create)
1860 clib_error_t *error = 0;
1861 subint_config_t *subint;
1863 u32 unsupported = 0;
1865 // Find the config for this subinterface
1867 ethernet_sw_interface_get_config (vnm, sw_if_index, &match_flags,
1872 // not implemented yet or not ethernet
1875 // this is the NYI case
1876 error = clib_error_return (0, "not implemented yet");
1887 // Initialize the subint
1888 if (subint->flags & SUBINT_CONFIG_VALID)
1890 // Error vlan already in use
1891 error = clib_error_return (0, "vlan is already in use");
1895 // Note that config is L3 by default
1896 subint->flags = SUBINT_CONFIG_VALID | match_flags;
1897 subint->sw_if_index = ~0; // because interfaces are initially down
1904 VNET_SW_INTERFACE_ADD_DEL_FUNCTION (ethernet_sw_interface_add_del);
1906 static char *ethernet_error_strings[] = {
1907 #define ethernet_error(n,c,s) s,
1908 #include "error.def"
1909 #undef ethernet_error
1913 VLIB_REGISTER_NODE (ethernet_input_node) = {
1914 .name = "ethernet-input",
1915 /* Takes a vector of packets. */
1916 .vector_size = sizeof (u32),
1917 .scalar_size = sizeof (ethernet_input_frame_t),
1918 .n_errors = ETHERNET_N_ERROR,
1919 .error_strings = ethernet_error_strings,
1920 .n_next_nodes = ETHERNET_INPUT_N_NEXT,
1922 #define _(s,n) [ETHERNET_INPUT_NEXT_##s] = n,
1923 foreach_ethernet_input_next
1926 .format_buffer = format_ethernet_header_with_length,
1927 .format_trace = format_ethernet_input_trace,
1928 .unformat_buffer = unformat_ethernet_header,
1931 VLIB_REGISTER_NODE (ethernet_input_type_node) = {
1932 .name = "ethernet-input-type",
1933 /* Takes a vector of packets. */
1934 .vector_size = sizeof (u32),
1935 .n_next_nodes = ETHERNET_INPUT_N_NEXT,
1937 #define _(s,n) [ETHERNET_INPUT_NEXT_##s] = n,
1938 foreach_ethernet_input_next
1943 VLIB_REGISTER_NODE (ethernet_input_not_l2_node) = {
1944 .name = "ethernet-input-not-l2",
1945 /* Takes a vector of packets. */
1946 .vector_size = sizeof (u32),
1947 .n_next_nodes = ETHERNET_INPUT_N_NEXT,
1949 #define _(s,n) [ETHERNET_INPUT_NEXT_##s] = n,
1950 foreach_ethernet_input_next
1956 #ifndef CLIB_MARCH_VARIANT
1958 ethernet_set_rx_redirect (vnet_main_t * vnm,
1959 vnet_hw_interface_t * hi, u32 enable)
1961 // Insure all packets go to ethernet-input (i.e. untagged ipv4 packets
1962 // don't go directly to ip4-input)
1963 vnet_hw_interface_rx_redirect_to_node
1964 (vnm, hi->hw_if_index, enable ? ethernet_input_node.index : ~0);
1969 * Initialization and registration for the next_by_ethernet structure
1973 next_by_ethertype_init (next_by_ethertype_t * l3_next)
1975 l3_next->input_next_by_type = sparse_vec_new
1976 ( /* elt bytes */ sizeof (l3_next->input_next_by_type[0]),
1977 /* bits in index */ BITS (((ethernet_header_t *) 0)->type));
1979 vec_validate (l3_next->sparse_index_by_input_next_index,
1980 ETHERNET_INPUT_NEXT_DROP);
1981 vec_validate (l3_next->sparse_index_by_input_next_index,
1982 ETHERNET_INPUT_NEXT_PUNT);
1983 l3_next->sparse_index_by_input_next_index[ETHERNET_INPUT_NEXT_DROP] =
1984 SPARSE_VEC_INVALID_INDEX;
1985 l3_next->sparse_index_by_input_next_index[ETHERNET_INPUT_NEXT_PUNT] =
1986 SPARSE_VEC_INVALID_INDEX;
1989 * Make sure we don't wipe out an ethernet registration by mistake
1990 * Can happen if init function ordering constraints are missing.
1994 ethernet_main_t *em = ðernet_main;
1995 ASSERT (em->next_by_ethertype_register_called == 0);
2001 // Add an ethertype -> next index mapping to the structure
2003 next_by_ethertype_register (next_by_ethertype_t * l3_next,
2004 u32 ethertype, u32 next_index)
2008 ethernet_main_t *em = ðernet_main;
2012 ethernet_main_t *em = ðernet_main;
2013 em->next_by_ethertype_register_called = 1;
2016 /* Setup ethernet type -> next index sparse vector mapping. */
2017 n = sparse_vec_validate (l3_next->input_next_by_type, ethertype);
2020 /* Rebuild next index -> sparse index inverse mapping when sparse vector
2022 vec_validate (l3_next->sparse_index_by_input_next_index, next_index);
2023 for (i = 1; i < vec_len (l3_next->input_next_by_type); i++)
2025 sparse_index_by_input_next_index[l3_next->input_next_by_type[i]] = i;
2027 // do not allow the cached next index's to be updated if L3
2028 // redirect is enabled, as it will have overwritten them
2029 if (!em->redirect_l3)
2031 // Cache common ethertypes directly
2032 if (ethertype == ETHERNET_TYPE_IP4)
2034 l3_next->input_next_ip4 = next_index;
2036 else if (ethertype == ETHERNET_TYPE_IP6)
2038 l3_next->input_next_ip6 = next_index;
2040 else if (ethertype == ETHERNET_TYPE_MPLS)
2042 l3_next->input_next_mpls = next_index;
2049 ethernet_input_init (vlib_main_t * vm, ethernet_main_t * em)
2051 __attribute__ ((unused)) vlan_table_t *invalid_vlan_table;
2052 __attribute__ ((unused)) qinq_table_t *invalid_qinq_table;
2054 ethernet_setup_node (vm, ethernet_input_node.index);
2055 ethernet_setup_node (vm, ethernet_input_type_node.index);
2056 ethernet_setup_node (vm, ethernet_input_not_l2_node.index);
2058 next_by_ethertype_init (&em->l3_next);
2060 // Initialize pools and vector for vlan parsing
2061 vec_validate (em->main_intfs, 10); // 10 main interfaces
2062 pool_alloc (em->vlan_pool, 10);
2063 pool_alloc (em->qinq_pool, 1);
2065 // The first vlan pool will always be reserved for an invalid table
2066 pool_get (em->vlan_pool, invalid_vlan_table); // first id = 0
2067 // The first qinq pool will always be reserved for an invalid table
2068 pool_get (em->qinq_pool, invalid_qinq_table); // first id = 0
2072 ethernet_register_input_type (vlib_main_t * vm,
2073 ethernet_type_t type, u32 node_index)
2075 ethernet_main_t *em = ðernet_main;
2076 ethernet_type_info_t *ti;
2080 clib_error_t *error = vlib_call_init_function (vm, ethernet_init);
2082 clib_error_report (error);
2085 ti = ethernet_get_type_info (em, type);
2088 clib_warning ("type_info NULL for type %d", type);
2091 ti->node_index = node_index;
2092 ti->next_index = vlib_node_add_next (vm,
2093 ethernet_input_node.index, node_index);
2094 i = vlib_node_add_next (vm, ethernet_input_type_node.index, node_index);
2095 ASSERT (i == ti->next_index);
2097 i = vlib_node_add_next (vm, ethernet_input_not_l2_node.index, node_index);
2098 ASSERT (i == ti->next_index);
2100 // Add the L3 node for this ethertype to the next nodes structure
2101 next_by_ethertype_register (&em->l3_next, type, ti->next_index);
2103 // Call the registration functions for other nodes that want a mapping
2104 l2bvi_register_input_type (vm, type, node_index);
2108 ethernet_register_l2_input (vlib_main_t * vm, u32 node_index)
2110 ethernet_main_t *em = ðernet_main;
2114 vlib_node_add_next (vm, ethernet_input_node.index, node_index);
2117 * Even if we never use these arcs, we have to align the next indices...
2119 i = vlib_node_add_next (vm, ethernet_input_type_node.index, node_index);
2121 ASSERT (i == em->l2_next);
2123 i = vlib_node_add_next (vm, ethernet_input_not_l2_node.index, node_index);
2124 ASSERT (i == em->l2_next);
2127 // Register a next node for L3 redirect, and enable L3 redirect
2129 ethernet_register_l3_redirect (vlib_main_t * vm, u32 node_index)
2131 ethernet_main_t *em = ðernet_main;
2134 em->redirect_l3 = 1;
2135 em->redirect_l3_next = vlib_node_add_next (vm,
2136 ethernet_input_node.index,
2139 * Change the cached next nodes to the redirect node
2141 em->l3_next.input_next_ip4 = em->redirect_l3_next;
2142 em->l3_next.input_next_ip6 = em->redirect_l3_next;
2143 em->l3_next.input_next_mpls = em->redirect_l3_next;
2146 * Even if we never use these arcs, we have to align the next indices...
2148 i = vlib_node_add_next (vm, ethernet_input_type_node.index, node_index);
2150 ASSERT (i == em->redirect_l3_next);
2152 i = vlib_node_add_next (vm, ethernet_input_not_l2_node.index, node_index);
2154 ASSERT (i == em->redirect_l3_next);
2159 * fd.io coding-style-patch-verification: ON
2162 * eval: (c-set-style "gnu")