2 * Copyright (c) 2018 Cisco and/or its affiliates.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at:
7 * http://www.apache.org/licenses/LICENSE-2.0
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
16 * ethernet_node.c: ethernet packet processing
18 * Copyright (c) 2008 Eliot Dresselhaus
20 * Permission is hereby granted, free of charge, to any person obtaining
21 * a copy of this software and associated documentation files (the
22 * "Software"), to deal in the Software without restriction, including
23 * without limitation the rights to use, copy, modify, merge, publish,
24 * distribute, sublicense, and/or sell copies of the Software, and to
25 * permit persons to whom the Software is furnished to do so, subject to
26 * the following conditions:
28 * The above copyright notice and this permission notice shall be
29 * included in all copies or substantial portions of the Software.
31 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
32 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
33 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
34 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
35 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
36 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
37 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
40 #include <vlib/vlib.h>
41 #include <vnet/pg/pg.h>
42 #include <vnet/ethernet/ethernet.h>
43 #include <vnet/ethernet/p2p_ethernet.h>
44 #include <vnet/devices/pipe/pipe.h>
45 #include <vppinfra/sparse_vec.h>
46 #include <vnet/l2/l2_bvi.h>
48 #define foreach_ethernet_input_next \
49 _ (PUNT, "error-punt") \
50 _ (DROP, "error-drop") \
51 _ (LLC, "llc-input") \
52 _ (IP4_INPUT, "ip4-input") \
53 _ (IP4_INPUT_NCS, "ip4-input-no-checksum")
57 #define _(s,n) ETHERNET_INPUT_NEXT_##s,
58 foreach_ethernet_input_next
60 ETHERNET_INPUT_N_NEXT,
61 } ethernet_input_next_t;
67 ethernet_input_frame_t frame_data;
68 } ethernet_input_trace_t;
71 format_ethernet_input_trace (u8 * s, va_list * va)
73 CLIB_UNUSED (vlib_main_t * vm) = va_arg (*va, vlib_main_t *);
74 CLIB_UNUSED (vlib_node_t * node) = va_arg (*va, vlib_node_t *);
75 ethernet_input_trace_t *t = va_arg (*va, ethernet_input_trace_t *);
76 u32 indent = format_get_indent (s);
80 s = format (s, "frame: flags 0x%x", t->frame_flags);
81 if (t->frame_flags & ETH_INPUT_FRAME_F_SINGLE_SW_IF_IDX)
82 s = format (s, ", hw-if-index %u, sw-if-index %u",
83 t->frame_data.hw_if_index, t->frame_data.sw_if_index);
84 s = format (s, "\n%U", format_white_space, indent);
86 s = format (s, "%U", format_ethernet_header, t->packet_data);
91 extern vlib_node_registration_t ethernet_input_node;
95 ETHERNET_INPUT_VARIANT_ETHERNET,
96 ETHERNET_INPUT_VARIANT_ETHERNET_TYPE,
97 ETHERNET_INPUT_VARIANT_NOT_L2,
98 } ethernet_input_variant_t;
101 // Parse the ethernet header to extract vlan tags and innermost ethertype
102 static_always_inline void
103 parse_header (ethernet_input_variant_t variant,
107 u16 * outer_id, u16 * inner_id, u32 * match_flags)
111 if (variant == ETHERNET_INPUT_VARIANT_ETHERNET
112 || variant == ETHERNET_INPUT_VARIANT_NOT_L2)
114 ethernet_header_t *e0;
116 e0 = (void *) (b0->data + b0->current_data);
118 vnet_buffer (b0)->l2_hdr_offset = b0->current_data;
119 b0->flags |= VNET_BUFFER_F_L2_HDR_OFFSET_VALID;
121 vlib_buffer_advance (b0, sizeof (e0[0]));
123 *type = clib_net_to_host_u16 (e0->type);
125 else if (variant == ETHERNET_INPUT_VARIANT_ETHERNET_TYPE)
127 // here when prior node was LLC/SNAP processing
130 e0 = (void *) (b0->data + b0->current_data);
132 vlib_buffer_advance (b0, sizeof (e0[0]));
134 *type = clib_net_to_host_u16 (e0[0]);
137 // save for distinguishing between dot1q and dot1ad later
140 // default the tags to 0 (used if there is no corresponding tag)
144 *match_flags = SUBINT_CONFIG_VALID | SUBINT_CONFIG_MATCH_0_TAG;
147 // check for vlan encaps
148 if (ethernet_frame_is_tagged (*type))
150 ethernet_vlan_header_t *h0;
153 *match_flags = SUBINT_CONFIG_VALID | SUBINT_CONFIG_MATCH_1_TAG;
155 h0 = (void *) (b0->data + b0->current_data);
157 tag = clib_net_to_host_u16 (h0->priority_cfi_and_id);
159 *outer_id = tag & 0xfff;
161 *match_flags &= ~SUBINT_CONFIG_MATCH_1_TAG;
163 *type = clib_net_to_host_u16 (h0->type);
165 vlib_buffer_advance (b0, sizeof (h0[0]));
168 if (*type == ETHERNET_TYPE_VLAN)
170 // Double tagged packet
171 *match_flags = SUBINT_CONFIG_VALID | SUBINT_CONFIG_MATCH_2_TAG;
173 h0 = (void *) (b0->data + b0->current_data);
175 tag = clib_net_to_host_u16 (h0->priority_cfi_and_id);
177 *inner_id = tag & 0xfff;
179 *type = clib_net_to_host_u16 (h0->type);
181 vlib_buffer_advance (b0, sizeof (h0[0]));
183 if (*type == ETHERNET_TYPE_VLAN)
185 // More than double tagged packet
186 *match_flags = SUBINT_CONFIG_VALID | SUBINT_CONFIG_MATCH_3_TAG;
188 vlib_buffer_advance (b0, sizeof (h0[0]));
189 vlan_count = 3; // "unknown" number, aka, 3-or-more
193 ethernet_buffer_set_vlan_count (b0, vlan_count);
196 // Determine the subinterface for this packet, given the result of the
197 // vlan table lookups and vlan header parsing. Check the most specific
199 static_always_inline void
200 identify_subint (vnet_hw_interface_t * hi,
203 main_intf_t * main_intf,
204 vlan_intf_t * vlan_intf,
205 qinq_intf_t * qinq_intf,
206 u32 * new_sw_if_index, u8 * error0, u32 * is_l2)
210 matched = eth_identify_subint (hi, match_flags, main_intf, vlan_intf,
211 qinq_intf, new_sw_if_index, error0, is_l2);
216 // Perform L3 my-mac filter
217 // A unicast packet arriving on an L3 interface must have a dmac matching the interface mac.
218 // This is required for promiscuous mode, else we will forward packets we aren't supposed to.
221 ethernet_header_t *e0;
222 e0 = (void *) (b0->data + vnet_buffer (b0)->l2_hdr_offset);
224 if (!(ethernet_address_cast (e0->dst_address)))
226 if (!ethernet_mac_address_equal ((u8 *) e0, hi->hw_address))
228 *error0 = ETHERNET_ERROR_L3_MAC_MISMATCH;
233 // Check for down subinterface
234 *error0 = (*new_sw_if_index) != ~0 ? (*error0) : ETHERNET_ERROR_DOWN;
238 static_always_inline void
239 determine_next_node (ethernet_main_t * em,
240 ethernet_input_variant_t variant,
242 u32 type0, vlib_buffer_t * b0, u8 * error0, u8 * next0)
244 vnet_buffer (b0)->l3_hdr_offset = b0->current_data;
245 b0->flags |= VNET_BUFFER_F_L3_HDR_OFFSET_VALID;
247 if (PREDICT_FALSE (*error0 != ETHERNET_ERROR_NONE))
249 // some error occurred
250 *next0 = ETHERNET_INPUT_NEXT_DROP;
254 // record the L2 len and reset the buffer so the L2 header is preserved
255 u32 eth_start = vnet_buffer (b0)->l2_hdr_offset;
256 vnet_buffer (b0)->l2.l2_len = b0->current_data - eth_start;
257 *next0 = em->l2_next;
258 ASSERT (vnet_buffer (b0)->l2.l2_len ==
259 ethernet_buffer_header_size (b0));
260 vlib_buffer_advance (b0, -(vnet_buffer (b0)->l2.l2_len));
262 // check for common IP/MPLS ethertypes
264 else if (type0 == ETHERNET_TYPE_IP4)
266 *next0 = em->l3_next.input_next_ip4;
268 else if (type0 == ETHERNET_TYPE_IP6)
270 *next0 = em->l3_next.input_next_ip6;
272 else if (type0 == ETHERNET_TYPE_MPLS)
274 *next0 = em->l3_next.input_next_mpls;
277 else if (em->redirect_l3)
279 // L3 Redirect is on, the cached common next nodes will be
280 // pointing to the redirect node, catch the uncommon types here
281 *next0 = em->redirect_l3_next;
285 // uncommon ethertype, check table
287 i0 = sparse_vec_index (em->l3_next.input_next_by_type, type0);
288 *next0 = vec_elt (em->l3_next.input_next_by_type, i0);
291 SPARSE_VEC_INVALID_INDEX ? ETHERNET_ERROR_UNKNOWN_TYPE : *error0;
293 // The table is not populated with LLC values, so check that now.
294 // If variant is variant_ethernet then we came from LLC processing. Don't
295 // go back there; drop instead using by keeping the drop/bad table result.
296 if ((type0 < 0x600) && (variant == ETHERNET_INPUT_VARIANT_ETHERNET))
298 *next0 = ETHERNET_INPUT_NEXT_LLC;
304 /* following vector code relies on following assumptions */
305 STATIC_ASSERT_OFFSET_OF (vlib_buffer_t, current_data, 0);
306 STATIC_ASSERT_OFFSET_OF (vlib_buffer_t, current_length, 2);
307 STATIC_ASSERT_OFFSET_OF (vlib_buffer_t, flags, 4);
308 STATIC_ASSERT (STRUCT_OFFSET_OF (vnet_buffer_opaque_t, l2_hdr_offset) ==
309 STRUCT_OFFSET_OF (vnet_buffer_opaque_t, l3_hdr_offset) - 2,
310 "l3_hdr_offset must follow l2_hdr_offset");
312 static_always_inline void
313 eth_input_adv_and_flags_x4 (vlib_buffer_t ** b, int is_l3)
315 i16 adv = sizeof (ethernet_header_t);
316 u32 flags = VNET_BUFFER_F_L2_HDR_OFFSET_VALID |
317 VNET_BUFFER_F_L3_HDR_OFFSET_VALID;
319 #ifdef CLIB_HAVE_VEC256
320 /* to reduce number of small loads/stores we are loading first 64 bits
321 of each buffer metadata into 256-bit register so we can advance
322 current_data, current_length and flags.
323 Observed saving of this code is ~2 clocks per packet */
326 /* vector if signed 16 bit integers used in signed vector add operation
327 to advnce current_data and current_length */
328 u32x8 flags4 = { 0, flags, 0, flags, 0, flags, 0, flags };
330 adv, -adv, 0, 0, adv, -adv, 0, 0,
331 adv, -adv, 0, 0, adv, -adv, 0, 0
334 /* load 4 x 64 bits */
335 r = u64x4_gather (b[0], b[1], b[2], b[3]);
341 radv = (u64x4) ((i16x16) r + adv4);
343 /* write 4 x 64 bits */
344 u64x4_scatter (is_l3 ? radv : r, b[0], b[1], b[2], b[3]);
346 /* use old current_data as l2_hdr_offset and new current_data as
348 r = (u64x4) u16x16_blend (r, radv << 16, 0xaa);
350 /* store both l2_hdr_offset and l3_hdr_offset in single store operation */
351 u32x8_scatter_one ((u32x8) r, 0, &vnet_buffer (b[0])->l2_hdr_offset);
352 u32x8_scatter_one ((u32x8) r, 2, &vnet_buffer (b[1])->l2_hdr_offset);
353 u32x8_scatter_one ((u32x8) r, 4, &vnet_buffer (b[2])->l2_hdr_offset);
354 u32x8_scatter_one ((u32x8) r, 6, &vnet_buffer (b[3])->l2_hdr_offset);
358 ASSERT (b[0]->current_data == vnet_buffer (b[0])->l3_hdr_offset);
359 ASSERT (b[1]->current_data == vnet_buffer (b[1])->l3_hdr_offset);
360 ASSERT (b[2]->current_data == vnet_buffer (b[2])->l3_hdr_offset);
361 ASSERT (b[3]->current_data == vnet_buffer (b[3])->l3_hdr_offset);
363 ASSERT (b[0]->current_data - vnet_buffer (b[0])->l2_hdr_offset == adv);
364 ASSERT (b[1]->current_data - vnet_buffer (b[1])->l2_hdr_offset == adv);
365 ASSERT (b[2]->current_data - vnet_buffer (b[2])->l2_hdr_offset == adv);
366 ASSERT (b[3]->current_data - vnet_buffer (b[3])->l2_hdr_offset == adv);
370 ASSERT (b[0]->current_data == vnet_buffer (b[0])->l2_hdr_offset);
371 ASSERT (b[1]->current_data == vnet_buffer (b[1])->l2_hdr_offset);
372 ASSERT (b[2]->current_data == vnet_buffer (b[2])->l2_hdr_offset);
373 ASSERT (b[3]->current_data == vnet_buffer (b[3])->l2_hdr_offset);
375 ASSERT (b[0]->current_data - vnet_buffer (b[0])->l3_hdr_offset == -adv);
376 ASSERT (b[1]->current_data - vnet_buffer (b[1])->l3_hdr_offset == -adv);
377 ASSERT (b[2]->current_data - vnet_buffer (b[2])->l3_hdr_offset == -adv);
378 ASSERT (b[3]->current_data - vnet_buffer (b[3])->l3_hdr_offset == -adv);
382 vnet_buffer (b[0])->l2_hdr_offset = b[0]->current_data;
383 vnet_buffer (b[1])->l2_hdr_offset = b[1]->current_data;
384 vnet_buffer (b[2])->l2_hdr_offset = b[2]->current_data;
385 vnet_buffer (b[3])->l2_hdr_offset = b[3]->current_data;
386 vnet_buffer (b[0])->l3_hdr_offset = b[0]->current_data + adv;
387 vnet_buffer (b[1])->l3_hdr_offset = b[1]->current_data + adv;
388 vnet_buffer (b[2])->l3_hdr_offset = b[2]->current_data + adv;
389 vnet_buffer (b[3])->l3_hdr_offset = b[3]->current_data + adv;
393 vlib_buffer_advance (b[0], adv);
394 vlib_buffer_advance (b[1], adv);
395 vlib_buffer_advance (b[2], adv);
396 vlib_buffer_advance (b[3], adv);
399 b[0]->flags |= flags;
400 b[1]->flags |= flags;
401 b[2]->flags |= flags;
402 b[3]->flags |= flags;
407 vnet_buffer (b[0])->l2.l2_len = adv;
408 vnet_buffer (b[1])->l2.l2_len = adv;
409 vnet_buffer (b[2])->l2.l2_len = adv;
410 vnet_buffer (b[3])->l2.l2_len = adv;
414 static_always_inline void
415 eth_input_adv_and_flags_x1 (vlib_buffer_t ** b, int is_l3)
417 i16 adv = sizeof (ethernet_header_t);
418 u32 flags = VNET_BUFFER_F_L2_HDR_OFFSET_VALID |
419 VNET_BUFFER_F_L3_HDR_OFFSET_VALID;
421 vnet_buffer (b[0])->l2_hdr_offset = b[0]->current_data;
422 vnet_buffer (b[0])->l3_hdr_offset = b[0]->current_data + adv;
425 vlib_buffer_advance (b[0], adv);
426 b[0]->flags |= flags;
428 vnet_buffer (b[0])->l2.l2_len = adv;
432 static_always_inline void
433 eth_input_get_etype_and_tags (vlib_buffer_t ** b, u16 * etype, u64 * tags,
434 u64 * dmacs, int offset, int dmac_check)
436 ethernet_header_t *e;
437 e = vlib_buffer_get_current (b[offset]);
438 #ifdef CLIB_HAVE_VEC128
439 u64x2 r = u64x2_load_unaligned (((u8 *) & e->type) - 6);
440 etype[offset] = ((u16x8) r)[3];
443 etype[offset] = e->type;
444 tags[offset] = *(u64 *) (e + 1);
448 dmacs[offset] = *(u64 *) e;
451 static_always_inline u16
452 eth_input_next_by_type (u16 etype)
454 ethernet_main_t *em = ðernet_main;
456 return (etype < 0x600) ? ETHERNET_INPUT_NEXT_LLC :
457 vec_elt (em->l3_next.input_next_by_type,
458 sparse_vec_index (em->l3_next.input_next_by_type, etype));
468 u64 n_packets, n_bytes;
469 } eth_input_tag_lookup_t;
471 static_always_inline void
472 eth_input_update_if_counters (vlib_main_t * vm, vnet_main_t * vnm,
473 eth_input_tag_lookup_t * l)
475 if (l->n_packets == 0 || l->sw_if_index == ~0)
479 l->n_bytes += l->n_packets * l->len;
481 vlib_increment_combined_counter
482 (vnm->interface_main.combined_sw_if_counters +
483 VNET_INTERFACE_COUNTER_RX, vm->thread_index, l->sw_if_index,
484 l->n_packets, l->n_bytes);
487 static_always_inline void
488 eth_input_tag_lookup (vlib_main_t * vm, vnet_main_t * vnm,
489 vlib_node_runtime_t * node, vnet_hw_interface_t * hi,
490 u64 tag, u16 * next, vlib_buffer_t * b,
491 eth_input_tag_lookup_t * l, u8 dmac_bad, int is_dot1ad,
492 int main_is_l3, int check_dmac)
494 ethernet_main_t *em = ðernet_main;
496 if ((tag ^ l->tag) & l->mask)
498 main_intf_t *mif = vec_elt_at_index (em->main_intfs, hi->hw_if_index);
501 vlan_table_t *vlan_table;
502 qinq_table_t *qinq_table;
503 u16 *t = (u16 *) & tag;
504 u16 vlan1 = clib_net_to_host_u16 (t[0]) & 0xFFF;
505 u16 vlan2 = clib_net_to_host_u16 (t[2]) & 0xFFF;
506 u32 matched, is_l2, new_sw_if_index;
508 vlan_table = vec_elt_at_index (em->vlan_pool, is_dot1ad ?
509 mif->dot1ad_vlans : mif->dot1q_vlans);
510 vif = &vlan_table->vlans[vlan1];
511 qinq_table = vec_elt_at_index (em->qinq_pool, vif->qinqs);
512 qif = &qinq_table->vlans[vlan2];
513 l->err = ETHERNET_ERROR_NONE;
514 l->type = clib_net_to_host_u16 (t[1]);
516 if (l->type == ETHERNET_TYPE_VLAN)
518 l->type = clib_net_to_host_u16 (t[3]);
520 matched = eth_identify_subint (hi, SUBINT_CONFIG_VALID |
521 SUBINT_CONFIG_MATCH_2_TAG, mif, vif,
522 qif, &new_sw_if_index, &l->err,
530 new_sw_if_index = hi->sw_if_index;
531 l->err = ETHERNET_ERROR_NONE;
533 is_l2 = main_is_l3 == 0;
536 matched = eth_identify_subint (hi, SUBINT_CONFIG_VALID |
537 SUBINT_CONFIG_MATCH_1_TAG, mif,
538 vif, qif, &new_sw_if_index,
542 if (l->sw_if_index != new_sw_if_index)
544 eth_input_update_if_counters (vm, vnm, l);
547 l->sw_if_index = new_sw_if_index;
550 l->mask = (l->n_tags == 2) ?
551 clib_net_to_host_u64 (0xffffffffffffffff) :
552 clib_net_to_host_u64 (0xffffffff00000000);
554 if (matched && l->sw_if_index == ~0)
555 l->err = ETHERNET_ERROR_DOWN;
557 l->len = sizeof (ethernet_header_t) +
558 l->n_tags * sizeof (ethernet_vlan_header_t);
560 l->adv = is_l2 ? -(int) sizeof (ethernet_header_t) :
561 l->n_tags * sizeof (ethernet_vlan_header_t);
563 l->adv = is_l2 ? 0 : l->len;
565 if (PREDICT_FALSE (l->err != ETHERNET_ERROR_NONE))
566 l->next = ETHERNET_INPUT_NEXT_DROP;
568 l->next = em->l2_next;
569 else if (l->type == ETHERNET_TYPE_IP4)
570 l->next = em->l3_next.input_next_ip4;
571 else if (l->type == ETHERNET_TYPE_IP6)
572 l->next = em->l3_next.input_next_ip6;
573 else if (l->type == ETHERNET_TYPE_MPLS)
574 l->next = em->l3_next.input_next_mpls;
575 else if (em->redirect_l3)
576 l->next = em->redirect_l3_next;
579 l->next = eth_input_next_by_type (l->type);
580 if (l->next == ETHERNET_INPUT_NEXT_PUNT)
581 l->err = ETHERNET_ERROR_UNKNOWN_TYPE;
585 if (check_dmac && l->adv > 0 && dmac_bad)
587 l->err = ETHERNET_ERROR_L3_MAC_MISMATCH;
588 next[0] = ETHERNET_INPUT_NEXT_PUNT;
593 vlib_buffer_advance (b, l->adv);
594 vnet_buffer (b)->l2.l2_len = l->len;
595 vnet_buffer (b)->l3_hdr_offset = vnet_buffer (b)->l2_hdr_offset + l->len;
597 if (l->err == ETHERNET_ERROR_NONE)
599 vnet_buffer (b)->sw_if_index[VLIB_RX] = l->sw_if_index;
600 ethernet_buffer_set_vlan_count (b, l->n_tags);
603 b->error = node->errors[l->err];
605 /* update counters */
607 l->n_bytes += vlib_buffer_length_in_chain (vm, b);
610 /* process frame of buffers, store ethertype into array and update
611 buffer metadata fields depending on interface being l2 or l3 assuming that
612 packets are untagged. For tagged packets those fields are updated later.
613 Optionally store Destionation MAC address and tag data into arrays
614 for further processing */
616 STATIC_ASSERT (VLIB_FRAME_SIZE % 8 == 0,
617 "VLIB_FRAME_SIZE must be power of 8");
618 static_always_inline void
619 eth_input_process_frame (vlib_main_t * vm, vlib_node_runtime_t * node,
620 vnet_hw_interface_t * hi,
621 u32 * buffer_indices, u32 n_packets, int main_is_l3,
622 int ip4_cksum_ok, int dmac_check)
624 ethernet_main_t *em = ðernet_main;
625 u16 nexts[VLIB_FRAME_SIZE], *next;
626 u16 etypes[VLIB_FRAME_SIZE], *etype = etypes;
627 u64 dmacs[VLIB_FRAME_SIZE], *dmac = dmacs;
628 u8 dmacs_bad[VLIB_FRAME_SIZE];
629 u64 tags[VLIB_FRAME_SIZE], *tag = tags;
630 u16 slowpath_indices[VLIB_FRAME_SIZE];
632 u16 next_ip4, next_ip6, next_mpls, next_l2;
633 u16 et_ip4 = clib_host_to_net_u16 (ETHERNET_TYPE_IP4);
634 u16 et_ip6 = clib_host_to_net_u16 (ETHERNET_TYPE_IP6);
635 u16 et_mpls = clib_host_to_net_u16 (ETHERNET_TYPE_MPLS);
636 u16 et_vlan = clib_host_to_net_u16 (ETHERNET_TYPE_VLAN);
637 u16 et_dot1ad = clib_host_to_net_u16 (ETHERNET_TYPE_DOT1AD);
638 i32 n_left = n_packets;
639 vlib_buffer_t *b[20];
642 from = buffer_indices;
646 vlib_buffer_t **ph = b + 16, **pd = b + 8;
647 vlib_get_buffers (vm, from, b, 4);
648 vlib_get_buffers (vm, from + 8, pd, 4);
649 vlib_get_buffers (vm, from + 16, ph, 4);
651 vlib_prefetch_buffer_header (ph[0], LOAD);
652 vlib_prefetch_buffer_data (pd[0], LOAD);
653 eth_input_get_etype_and_tags (b, etype, tag, dmac, 0, dmac_check);
655 vlib_prefetch_buffer_header (ph[1], LOAD);
656 vlib_prefetch_buffer_data (pd[1], LOAD);
657 eth_input_get_etype_and_tags (b, etype, tag, dmac, 1, dmac_check);
659 vlib_prefetch_buffer_header (ph[2], LOAD);
660 vlib_prefetch_buffer_data (pd[2], LOAD);
661 eth_input_get_etype_and_tags (b, etype, tag, dmac, 2, dmac_check);
663 vlib_prefetch_buffer_header (ph[3], LOAD);
664 vlib_prefetch_buffer_data (pd[3], LOAD);
665 eth_input_get_etype_and_tags (b, etype, tag, dmac, 3, dmac_check);
667 eth_input_adv_and_flags_x4 (b, main_is_l3);
678 vlib_get_buffers (vm, from, b, 4);
679 eth_input_get_etype_and_tags (b, etype, tag, dmac, 0, dmac_check);
680 eth_input_get_etype_and_tags (b, etype, tag, dmac, 1, dmac_check);
681 eth_input_get_etype_and_tags (b, etype, tag, dmac, 2, dmac_check);
682 eth_input_get_etype_and_tags (b, etype, tag, dmac, 3, dmac_check);
683 eth_input_adv_and_flags_x4 (b, main_is_l3);
694 vlib_get_buffers (vm, from, b, 1);
695 eth_input_get_etype_and_tags (b, etype, tag, dmac, 0, dmac_check);
696 eth_input_adv_and_flags_x1 (b, main_is_l3);
708 u64 mask = clib_net_to_host_u64 (0xFFFFFFFFFFFF0000);
709 u64 igbit = clib_net_to_host_u64 (0x0100000000000000);
710 u64 hwaddr = (*(u64 *) hi->hw_address) & mask;
712 u8 *dmac_bad = dmacs_bad;
716 #ifdef CLIB_HAVE_VEC256
717 u64x4 igbit4 = u64x4_splat (igbit);
718 u64x4 mask4 = u64x4_splat (mask);
719 u64x4 hwaddr4 = u64x4_splat (hwaddr);
723 r0 = u64x4_load_unaligned (dmac + 0) & mask4;
724 r1 = u64x4_load_unaligned (dmac + 4) & mask4;
726 r0 = (r0 != hwaddr4) & ((r0 & igbit4) == 0);
727 r1 = (r1 != hwaddr4) & ((r1 & igbit4) == 0);
729 *(u32 *) (dmac_bad + 0) = u8x32_msb_mask ((u8x32) (r0));
730 *(u32 *) (dmac_bad + 4) = u8x32_msb_mask ((u8x32) (r1));
746 r0 = (r0 != hwaddr) && ((r0 & igbit) == 0);
747 r1 = (r1 != hwaddr) && ((r1 & igbit) == 0);
748 r2 = (r2 != hwaddr) && ((r2 & igbit) == 0);
749 r3 = (r3 != hwaddr) && ((r3 & igbit) == 0);
764 next_ip4 = em->l3_next.input_next_ip4;
765 next_ip6 = em->l3_next.input_next_ip6;
766 next_mpls = em->l3_next.input_next_mpls;
767 next_l2 = em->l2_next;
769 if (next_ip4 == ETHERNET_INPUT_NEXT_IP4_INPUT && ip4_cksum_ok)
770 next_ip4 = ETHERNET_INPUT_NEXT_IP4_INPUT_NCS;
772 #ifdef CLIB_HAVE_VEC256
773 u16x16 et16_ip4 = u16x16_splat (et_ip4);
774 u16x16 et16_ip6 = u16x16_splat (et_ip6);
775 u16x16 et16_mpls = u16x16_splat (et_mpls);
776 u16x16 et16_vlan = u16x16_splat (et_vlan);
777 u16x16 et16_dot1ad = u16x16_splat (et_dot1ad);
778 u16x16 next16_ip4 = u16x16_splat (next_ip4);
779 u16x16 next16_ip6 = u16x16_splat (next_ip6);
780 u16x16 next16_mpls = u16x16_splat (next_mpls);
781 u16x16 next16_l2 = u16x16_splat (next_l2);
783 u16x16 stairs = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 };
792 /* fastpath - in l3 mode hadles ip4, ip6 and mpls packets, other packets
793 are considered as slowpath, in l2 mode all untagged packets are
794 considered as fastpath */
797 #ifdef CLIB_HAVE_VEC256
801 u16x16 e16 = u16x16_load_unaligned (etype);
804 r += (e16 == et16_ip4) & next16_ip4;
805 r += (e16 == et16_ip6) & next16_ip6;
806 r += (e16 == et16_mpls) & next16_mpls;
809 r = ((e16 != et16_vlan) & (e16 != et16_dot1ad)) & next16_l2;
810 u16x16_store_unaligned (r, next);
812 if (!u16x16_is_all_zero (r == zero))
814 if (u16x16_is_all_zero (r))
816 u16x16_store_unaligned (u16x16_splat (i) + stairs,
817 slowpath_indices + n_slowpath);
822 for (int j = 0; j < 16; j++)
824 slowpath_indices[n_slowpath++] = i + j;
835 if (main_is_l3 && etype[0] == et_ip4)
837 else if (main_is_l3 && etype[0] == et_ip6)
839 else if (main_is_l3 && etype[0] == et_mpls)
841 else if (main_is_l3 == 0 &&
842 etype[0] != et_vlan && etype[0] != et_dot1ad)
847 slowpath_indices[n_slowpath++] = i;
858 vnet_main_t *vnm = vnet_get_main ();
860 u16 *si = slowpath_indices;
861 u32 last_unknown_etype = ~0;
862 u32 last_unknown_next = ~0;
863 eth_input_tag_lookup_t dot1ad_lookup, dot1q_lookup = {
865 .tag = tags[si[0]] ^ -1LL,
869 clib_memcpy_fast (&dot1ad_lookup, &dot1q_lookup, sizeof (dot1q_lookup));
874 u16 etype = etypes[i];
876 if (etype == et_vlan)
878 vlib_buffer_t *b = vlib_get_buffer (vm, buffer_indices[i]);
879 eth_input_tag_lookup (vm, vnm, node, hi, tags[i], nexts + i, b,
880 &dot1q_lookup, dmacs_bad[i], 0,
881 main_is_l3, dmac_check);
884 else if (etype == et_dot1ad)
886 vlib_buffer_t *b = vlib_get_buffer (vm, buffer_indices[i]);
887 eth_input_tag_lookup (vm, vnm, node, hi, tags[i], nexts + i, b,
888 &dot1ad_lookup, dmacs_bad[i], 1,
889 main_is_l3, dmac_check);
893 /* untagged packet with not well known etyertype */
894 if (last_unknown_etype != etype)
896 last_unknown_etype = etype;
897 etype = clib_host_to_net_u16 (etype);
898 last_unknown_next = eth_input_next_by_type (etype);
900 if (dmac_check && main_is_l3 && dmacs_bad[i])
902 vlib_buffer_t *b = vlib_get_buffer (vm, buffer_indices[i]);
903 b->error = node->errors[ETHERNET_ERROR_L3_MAC_MISMATCH];
904 nexts[i] = ETHERNET_INPUT_NEXT_PUNT;
907 nexts[i] = last_unknown_next;
915 eth_input_update_if_counters (vm, vnm, &dot1q_lookup);
916 eth_input_update_if_counters (vm, vnm, &dot1ad_lookup);
919 vlib_buffer_enqueue_to_next (vm, node, buffer_indices, nexts, n_packets);
922 static_always_inline void
923 eth_input_single_int (vlib_main_t * vm, vlib_node_runtime_t * node,
924 vnet_hw_interface_t * hi, u32 * from, u32 n_pkts,
927 ethernet_main_t *em = ðernet_main;
928 ethernet_interface_t *ei;
929 ei = pool_elt_at_index (em->interfaces, hi->hw_instance);
930 main_intf_t *intf0 = vec_elt_at_index (em->main_intfs, hi->hw_if_index);
931 subint_config_t *subint0 = &intf0->untagged_subint;
933 int main_is_l3 = (subint0->flags & SUBINT_CONFIG_L2) == 0;
934 int promisc = (ei->flags & ETHERNET_INTERFACE_FLAG_ACCEPT_ALL) != 0;
938 /* main interface is L3, we dont expect tagged packets and interface
939 is not in promisc node, so we dont't need to check DMAC */
943 eth_input_process_frame (vm, node, hi, from, n_pkts, is_l3,
946 /* subinterfaces and promisc mode so DMAC check is needed */
947 eth_input_process_frame (vm, node, hi, from, n_pkts, is_l3,
953 /* untagged packets are treated as L2 */
955 eth_input_process_frame (vm, node, hi, from, n_pkts, is_l3,
961 static_always_inline void
962 ethernet_input_trace (vlib_main_t * vm, vlib_node_runtime_t * node,
963 vlib_frame_t * from_frame)
966 if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)))
968 from = vlib_frame_vector_args (from_frame);
969 n_left = from_frame->n_vectors;
973 ethernet_input_trace_t *t0;
974 vlib_buffer_t *b0 = vlib_get_buffer (vm, from[0]);
976 if (b0->flags & VLIB_BUFFER_IS_TRACED)
978 t0 = vlib_add_trace (vm, node, b0,
979 sizeof (ethernet_input_trace_t));
980 clib_memcpy_fast (t0->packet_data, b0->data + b0->current_data,
981 sizeof (t0->packet_data));
982 t0->frame_flags = from_frame->flags;
983 clib_memcpy_fast (&t0->frame_data,
984 vlib_frame_scalar_args (from_frame),
985 sizeof (ethernet_input_frame_t));
992 /* rx pcap capture if enabled */
993 if (PREDICT_FALSE (vlib_global_main.pcap[VLIB_RX].pcap_enable))
997 from = vlib_frame_vector_args (from_frame);
998 n_left = from_frame->n_vectors;
1004 b0 = vlib_get_buffer (vm, bi0);
1006 if (vlib_global_main.pcap[VLIB_RX].pcap_sw_if_index == 0 ||
1007 vlib_global_main.pcap[VLIB_RX].pcap_sw_if_index
1008 == vnet_buffer (b0)->sw_if_index[VLIB_RX])
1010 pcap_add_buffer (&vlib_global_main.pcap[VLIB_RX].pcap_main, vm,
1018 static_always_inline void
1019 ethernet_input_inline (vlib_main_t * vm,
1020 vlib_node_runtime_t * node,
1021 u32 * from, u32 n_packets,
1022 ethernet_input_variant_t variant)
1024 vnet_main_t *vnm = vnet_get_main ();
1025 ethernet_main_t *em = ðernet_main;
1026 vlib_node_runtime_t *error_node;
1027 u32 n_left_from, next_index, *to_next;
1028 u32 stats_sw_if_index, stats_n_packets, stats_n_bytes;
1029 u32 thread_index = vm->thread_index;
1030 u32 cached_sw_if_index = ~0;
1031 u32 cached_is_l2 = 0; /* shut up gcc */
1032 vnet_hw_interface_t *hi = NULL; /* used for main interface only */
1033 vlib_buffer_t *bufs[VLIB_FRAME_SIZE];
1034 vlib_buffer_t **b = bufs;
1036 if (variant != ETHERNET_INPUT_VARIANT_ETHERNET)
1037 error_node = vlib_node_get_runtime (vm, ethernet_input_node.index);
1041 n_left_from = n_packets;
1043 next_index = node->cached_next_index;
1044 stats_sw_if_index = node->runtime_data[0];
1045 stats_n_packets = stats_n_bytes = 0;
1046 vlib_get_buffers (vm, from, bufs, n_left_from);
1048 while (n_left_from > 0)
1052 vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
1054 while (n_left_from >= 4 && n_left_to_next >= 2)
1057 vlib_buffer_t *b0, *b1;
1058 u8 next0, next1, error0, error1;
1059 u16 type0, orig_type0, type1, orig_type1;
1060 u16 outer_id0, inner_id0, outer_id1, inner_id1;
1061 u32 match_flags0, match_flags1;
1062 u32 old_sw_if_index0, new_sw_if_index0, len0, old_sw_if_index1,
1063 new_sw_if_index1, len1;
1064 vnet_hw_interface_t *hi0, *hi1;
1065 main_intf_t *main_intf0, *main_intf1;
1066 vlan_intf_t *vlan_intf0, *vlan_intf1;
1067 qinq_intf_t *qinq_intf0, *qinq_intf1;
1069 ethernet_header_t *e0, *e1;
1071 /* Prefetch next iteration. */
1073 vlib_prefetch_buffer_header (b[2], STORE);
1074 vlib_prefetch_buffer_header (b[3], STORE);
1076 CLIB_PREFETCH (b[2]->data, sizeof (ethernet_header_t), LOAD);
1077 CLIB_PREFETCH (b[3]->data, sizeof (ethernet_header_t), LOAD);
1086 n_left_to_next -= 2;
1093 error0 = error1 = ETHERNET_ERROR_NONE;
1094 e0 = vlib_buffer_get_current (b0);
1095 type0 = clib_net_to_host_u16 (e0->type);
1096 e1 = vlib_buffer_get_current (b1);
1097 type1 = clib_net_to_host_u16 (e1->type);
1099 /* Set the L2 header offset for all packets */
1100 vnet_buffer (b0)->l2_hdr_offset = b0->current_data;
1101 vnet_buffer (b1)->l2_hdr_offset = b1->current_data;
1102 b0->flags |= VNET_BUFFER_F_L2_HDR_OFFSET_VALID;
1103 b1->flags |= VNET_BUFFER_F_L2_HDR_OFFSET_VALID;
1105 /* Speed-path for the untagged case */
1106 if (PREDICT_TRUE (variant == ETHERNET_INPUT_VARIANT_ETHERNET
1107 && !ethernet_frame_is_any_tagged_x2 (type0,
1111 subint_config_t *subint0;
1112 u32 sw_if_index0, sw_if_index1;
1114 sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
1115 sw_if_index1 = vnet_buffer (b1)->sw_if_index[VLIB_RX];
1116 is_l20 = cached_is_l2;
1118 /* This is probably wholly unnecessary */
1119 if (PREDICT_FALSE (sw_if_index0 != sw_if_index1))
1122 /* Now sw_if_index0 == sw_if_index1 */
1123 if (PREDICT_FALSE (cached_sw_if_index != sw_if_index0))
1125 cached_sw_if_index = sw_if_index0;
1126 hi = vnet_get_sup_hw_interface (vnm, sw_if_index0);
1127 intf0 = vec_elt_at_index (em->main_intfs, hi->hw_if_index);
1128 subint0 = &intf0->untagged_subint;
1129 cached_is_l2 = is_l20 = subint0->flags & SUBINT_CONFIG_L2;
1132 if (PREDICT_TRUE (is_l20 != 0))
1134 vnet_buffer (b0)->l3_hdr_offset =
1135 vnet_buffer (b0)->l2_hdr_offset +
1136 sizeof (ethernet_header_t);
1137 vnet_buffer (b1)->l3_hdr_offset =
1138 vnet_buffer (b1)->l2_hdr_offset +
1139 sizeof (ethernet_header_t);
1140 b0->flags |= VNET_BUFFER_F_L3_HDR_OFFSET_VALID;
1141 b1->flags |= VNET_BUFFER_F_L3_HDR_OFFSET_VALID;
1142 next0 = em->l2_next;
1143 vnet_buffer (b0)->l2.l2_len = sizeof (ethernet_header_t);
1144 next1 = em->l2_next;
1145 vnet_buffer (b1)->l2.l2_len = sizeof (ethernet_header_t);
1149 if (!ethernet_address_cast (e0->dst_address) &&
1150 (hi->hw_address != 0) &&
1151 !ethernet_mac_address_equal ((u8 *) e0, hi->hw_address))
1152 error0 = ETHERNET_ERROR_L3_MAC_MISMATCH;
1153 if (!ethernet_address_cast (e1->dst_address) &&
1154 (hi->hw_address != 0) &&
1155 !ethernet_mac_address_equal ((u8 *) e1, hi->hw_address))
1156 error1 = ETHERNET_ERROR_L3_MAC_MISMATCH;
1157 vlib_buffer_advance (b0, sizeof (ethernet_header_t));
1158 determine_next_node (em, variant, 0, type0, b0,
1160 vlib_buffer_advance (b1, sizeof (ethernet_header_t));
1161 determine_next_node (em, variant, 0, type1, b1,
1167 /* Slow-path for the tagged case */
1169 parse_header (variant,
1172 &orig_type0, &outer_id0, &inner_id0, &match_flags0);
1174 parse_header (variant,
1177 &orig_type1, &outer_id1, &inner_id1, &match_flags1);
1179 old_sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
1180 old_sw_if_index1 = vnet_buffer (b1)->sw_if_index[VLIB_RX];
1182 eth_vlan_table_lookups (em,
1189 &main_intf0, &vlan_intf0, &qinq_intf0);
1191 eth_vlan_table_lookups (em,
1198 &main_intf1, &vlan_intf1, &qinq_intf1);
1200 identify_subint (hi0,
1205 qinq_intf0, &new_sw_if_index0, &error0, &is_l20);
1207 identify_subint (hi1,
1212 qinq_intf1, &new_sw_if_index1, &error1, &is_l21);
1214 // Save RX sw_if_index for later nodes
1215 vnet_buffer (b0)->sw_if_index[VLIB_RX] =
1217 ETHERNET_ERROR_NONE ? old_sw_if_index0 : new_sw_if_index0;
1218 vnet_buffer (b1)->sw_if_index[VLIB_RX] =
1220 ETHERNET_ERROR_NONE ? old_sw_if_index1 : new_sw_if_index1;
1222 // Check if there is a stat to take (valid and non-main sw_if_index for pkt 0 or pkt 1)
1223 if (((new_sw_if_index0 != ~0)
1224 && (new_sw_if_index0 != old_sw_if_index0))
1225 || ((new_sw_if_index1 != ~0)
1226 && (new_sw_if_index1 != old_sw_if_index1)))
1229 len0 = vlib_buffer_length_in_chain (vm, b0) + b0->current_data
1230 - vnet_buffer (b0)->l2_hdr_offset;
1231 len1 = vlib_buffer_length_in_chain (vm, b1) + b1->current_data
1232 - vnet_buffer (b1)->l2_hdr_offset;
1234 stats_n_packets += 2;
1235 stats_n_bytes += len0 + len1;
1238 (!(new_sw_if_index0 == stats_sw_if_index
1239 && new_sw_if_index1 == stats_sw_if_index)))
1241 stats_n_packets -= 2;
1242 stats_n_bytes -= len0 + len1;
1244 if (new_sw_if_index0 != old_sw_if_index0
1245 && new_sw_if_index0 != ~0)
1246 vlib_increment_combined_counter (vnm->
1247 interface_main.combined_sw_if_counters
1249 VNET_INTERFACE_COUNTER_RX,
1251 new_sw_if_index0, 1,
1253 if (new_sw_if_index1 != old_sw_if_index1
1254 && new_sw_if_index1 != ~0)
1255 vlib_increment_combined_counter (vnm->
1256 interface_main.combined_sw_if_counters
1258 VNET_INTERFACE_COUNTER_RX,
1260 new_sw_if_index1, 1,
1263 if (new_sw_if_index0 == new_sw_if_index1)
1265 if (stats_n_packets > 0)
1267 vlib_increment_combined_counter
1268 (vnm->interface_main.combined_sw_if_counters
1269 + VNET_INTERFACE_COUNTER_RX,
1272 stats_n_packets, stats_n_bytes);
1273 stats_n_packets = stats_n_bytes = 0;
1275 stats_sw_if_index = new_sw_if_index0;
1280 if (variant == ETHERNET_INPUT_VARIANT_NOT_L2)
1281 is_l20 = is_l21 = 0;
1283 determine_next_node (em, variant, is_l20, type0, b0, &error0,
1285 determine_next_node (em, variant, is_l21, type1, b1, &error1,
1289 b0->error = error_node->errors[error0];
1290 b1->error = error_node->errors[error1];
1292 // verify speculative enqueue
1293 vlib_validate_buffer_enqueue_x2 (vm, node, next_index, to_next,
1294 n_left_to_next, bi0, bi1, next0,
1298 while (n_left_from > 0 && n_left_to_next > 0)
1303 u16 type0, orig_type0;
1304 u16 outer_id0, inner_id0;
1306 u32 old_sw_if_index0, new_sw_if_index0, len0;
1307 vnet_hw_interface_t *hi0;
1308 main_intf_t *main_intf0;
1309 vlan_intf_t *vlan_intf0;
1310 qinq_intf_t *qinq_intf0;
1311 ethernet_header_t *e0;
1314 // Prefetch next iteration
1315 if (n_left_from > 1)
1317 vlib_prefetch_buffer_header (b[1], STORE);
1318 CLIB_PREFETCH (b[1]->data, CLIB_CACHE_LINE_BYTES, LOAD);
1326 n_left_to_next -= 1;
1331 error0 = ETHERNET_ERROR_NONE;
1332 e0 = vlib_buffer_get_current (b0);
1333 type0 = clib_net_to_host_u16 (e0->type);
1335 /* Set the L2 header offset for all packets */
1336 vnet_buffer (b0)->l2_hdr_offset = b0->current_data;
1337 b0->flags |= VNET_BUFFER_F_L2_HDR_OFFSET_VALID;
1339 /* Speed-path for the untagged case */
1340 if (PREDICT_TRUE (variant == ETHERNET_INPUT_VARIANT_ETHERNET
1341 && !ethernet_frame_is_tagged (type0)))
1344 subint_config_t *subint0;
1347 sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
1348 is_l20 = cached_is_l2;
1350 if (PREDICT_FALSE (cached_sw_if_index != sw_if_index0))
1352 cached_sw_if_index = sw_if_index0;
1353 hi = vnet_get_sup_hw_interface (vnm, sw_if_index0);
1354 intf0 = vec_elt_at_index (em->main_intfs, hi->hw_if_index);
1355 subint0 = &intf0->untagged_subint;
1356 cached_is_l2 = is_l20 = subint0->flags & SUBINT_CONFIG_L2;
1360 if (PREDICT_TRUE (is_l20 != 0))
1362 vnet_buffer (b0)->l3_hdr_offset =
1363 vnet_buffer (b0)->l2_hdr_offset +
1364 sizeof (ethernet_header_t);
1365 b0->flags |= VNET_BUFFER_F_L3_HDR_OFFSET_VALID;
1366 next0 = em->l2_next;
1367 vnet_buffer (b0)->l2.l2_len = sizeof (ethernet_header_t);
1371 if (!ethernet_address_cast (e0->dst_address) &&
1372 (hi->hw_address != 0) &&
1373 !ethernet_mac_address_equal ((u8 *) e0, hi->hw_address))
1374 error0 = ETHERNET_ERROR_L3_MAC_MISMATCH;
1375 vlib_buffer_advance (b0, sizeof (ethernet_header_t));
1376 determine_next_node (em, variant, 0, type0, b0,
1382 /* Slow-path for the tagged case */
1383 parse_header (variant,
1386 &orig_type0, &outer_id0, &inner_id0, &match_flags0);
1388 old_sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
1390 eth_vlan_table_lookups (em,
1397 &main_intf0, &vlan_intf0, &qinq_intf0);
1399 identify_subint (hi0,
1404 qinq_intf0, &new_sw_if_index0, &error0, &is_l20);
1406 // Save RX sw_if_index for later nodes
1407 vnet_buffer (b0)->sw_if_index[VLIB_RX] =
1409 ETHERNET_ERROR_NONE ? old_sw_if_index0 : new_sw_if_index0;
1411 // Increment subinterface stats
1412 // Note that interface-level counters have already been incremented
1413 // prior to calling this function. Thus only subinterface counters
1414 // are incremented here.
1416 // Interface level counters include packets received on the main
1417 // interface and all subinterfaces. Subinterface level counters
1418 // include only those packets received on that subinterface
1419 // Increment stats if the subint is valid and it is not the main intf
1420 if ((new_sw_if_index0 != ~0)
1421 && (new_sw_if_index0 != old_sw_if_index0))
1424 len0 = vlib_buffer_length_in_chain (vm, b0) + b0->current_data
1425 - vnet_buffer (b0)->l2_hdr_offset;
1427 stats_n_packets += 1;
1428 stats_n_bytes += len0;
1430 // Batch stat increments from the same subinterface so counters
1431 // don't need to be incremented for every packet.
1432 if (PREDICT_FALSE (new_sw_if_index0 != stats_sw_if_index))
1434 stats_n_packets -= 1;
1435 stats_n_bytes -= len0;
1437 if (new_sw_if_index0 != ~0)
1438 vlib_increment_combined_counter
1439 (vnm->interface_main.combined_sw_if_counters
1440 + VNET_INTERFACE_COUNTER_RX,
1441 thread_index, new_sw_if_index0, 1, len0);
1442 if (stats_n_packets > 0)
1444 vlib_increment_combined_counter
1445 (vnm->interface_main.combined_sw_if_counters
1446 + VNET_INTERFACE_COUNTER_RX,
1448 stats_sw_if_index, stats_n_packets, stats_n_bytes);
1449 stats_n_packets = stats_n_bytes = 0;
1451 stats_sw_if_index = new_sw_if_index0;
1455 if (variant == ETHERNET_INPUT_VARIANT_NOT_L2)
1458 determine_next_node (em, variant, is_l20, type0, b0, &error0,
1462 b0->error = error_node->errors[error0];
1464 // verify speculative enqueue
1465 vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
1466 to_next, n_left_to_next,
1470 vlib_put_next_frame (vm, node, next_index, n_left_to_next);
1473 // Increment any remaining batched stats
1474 if (stats_n_packets > 0)
1476 vlib_increment_combined_counter
1477 (vnm->interface_main.combined_sw_if_counters
1478 + VNET_INTERFACE_COUNTER_RX,
1479 thread_index, stats_sw_if_index, stats_n_packets, stats_n_bytes);
1480 node->runtime_data[0] = stats_sw_if_index;
1484 VLIB_NODE_FN (ethernet_input_node) (vlib_main_t * vm,
1485 vlib_node_runtime_t * node,
1486 vlib_frame_t * frame)
1488 vnet_main_t *vnm = vnet_get_main ();
1489 u32 *from = vlib_frame_vector_args (frame);
1490 u32 n_packets = frame->n_vectors;
1492 ethernet_input_trace (vm, node, frame);
1494 if (frame->flags & ETH_INPUT_FRAME_F_SINGLE_SW_IF_IDX)
1496 ethernet_input_frame_t *ef = vlib_frame_scalar_args (frame);
1497 int ip4_cksum_ok = (frame->flags & ETH_INPUT_FRAME_F_IP4_CKSUM_OK) != 0;
1498 vnet_hw_interface_t *hi = vnet_get_hw_interface (vnm, ef->hw_if_index);
1499 eth_input_single_int (vm, node, hi, from, n_packets, ip4_cksum_ok);
1502 ethernet_input_inline (vm, node, from, n_packets,
1503 ETHERNET_INPUT_VARIANT_ETHERNET);
1507 VLIB_NODE_FN (ethernet_input_type_node) (vlib_main_t * vm,
1508 vlib_node_runtime_t * node,
1509 vlib_frame_t * from_frame)
1511 u32 *from = vlib_frame_vector_args (from_frame);
1512 u32 n_packets = from_frame->n_vectors;
1513 ethernet_input_trace (vm, node, from_frame);
1514 ethernet_input_inline (vm, node, from, n_packets,
1515 ETHERNET_INPUT_VARIANT_ETHERNET_TYPE);
1519 VLIB_NODE_FN (ethernet_input_not_l2_node) (vlib_main_t * vm,
1520 vlib_node_runtime_t * node,
1521 vlib_frame_t * from_frame)
1523 u32 *from = vlib_frame_vector_args (from_frame);
1524 u32 n_packets = from_frame->n_vectors;
1525 ethernet_input_trace (vm, node, from_frame);
1526 ethernet_input_inline (vm, node, from, n_packets,
1527 ETHERNET_INPUT_VARIANT_NOT_L2);
1532 // Return the subinterface config struct for the given sw_if_index
1533 // Also return via parameter the appropriate match flags for the
1534 // configured number of tags.
1535 // On error (unsupported or not ethernet) return 0.
1536 static subint_config_t *
1537 ethernet_sw_interface_get_config (vnet_main_t * vnm,
1539 u32 * flags, u32 * unsupported)
1541 ethernet_main_t *em = ðernet_main;
1542 vnet_hw_interface_t *hi;
1543 vnet_sw_interface_t *si;
1544 main_intf_t *main_intf;
1545 vlan_table_t *vlan_table;
1546 qinq_table_t *qinq_table;
1547 subint_config_t *subint = 0;
1549 hi = vnet_get_sup_hw_interface (vnm, sw_if_index);
1551 if (!hi || (hi->hw_class_index != ethernet_hw_interface_class.index))
1554 goto done; // non-ethernet interface
1557 // ensure there's an entry for the main intf (shouldn't really be necessary)
1558 vec_validate (em->main_intfs, hi->hw_if_index);
1559 main_intf = vec_elt_at_index (em->main_intfs, hi->hw_if_index);
1561 // Locate the subint for the given ethernet config
1562 si = vnet_get_sw_interface (vnm, sw_if_index);
1564 if (si->type == VNET_SW_INTERFACE_TYPE_P2P)
1566 p2p_ethernet_main_t *p2pm = &p2p_main;
1567 u32 p2pe_sw_if_index =
1568 p2p_ethernet_lookup (hi->hw_if_index, si->p2p.client_mac);
1569 if (p2pe_sw_if_index == ~0)
1571 pool_get (p2pm->p2p_subif_pool, subint);
1572 si->p2p.pool_index = subint - p2pm->p2p_subif_pool;
1575 subint = vec_elt_at_index (p2pm->p2p_subif_pool, si->p2p.pool_index);
1576 *flags = SUBINT_CONFIG_P2P;
1578 else if (si->type == VNET_SW_INTERFACE_TYPE_PIPE)
1582 pipe = pipe_get (sw_if_index);
1583 subint = &pipe->subint;
1584 *flags = SUBINT_CONFIG_P2P;
1586 else if (si->sub.eth.flags.default_sub)
1588 subint = &main_intf->default_subint;
1589 *flags = SUBINT_CONFIG_MATCH_1_TAG |
1590 SUBINT_CONFIG_MATCH_2_TAG | SUBINT_CONFIG_MATCH_3_TAG;
1592 else if ((si->sub.eth.flags.no_tags) || (si->sub.eth.raw_flags == 0))
1594 // if no flags are set then this is a main interface
1595 // so treat as untagged
1596 subint = &main_intf->untagged_subint;
1597 *flags = SUBINT_CONFIG_MATCH_0_TAG;
1602 // first get the vlan table
1603 if (si->sub.eth.flags.dot1ad)
1605 if (main_intf->dot1ad_vlans == 0)
1607 // Allocate a vlan table from the pool
1608 pool_get (em->vlan_pool, vlan_table);
1609 main_intf->dot1ad_vlans = vlan_table - em->vlan_pool;
1613 // Get ptr to existing vlan table
1615 vec_elt_at_index (em->vlan_pool, main_intf->dot1ad_vlans);
1620 if (main_intf->dot1q_vlans == 0)
1622 // Allocate a vlan table from the pool
1623 pool_get (em->vlan_pool, vlan_table);
1624 main_intf->dot1q_vlans = vlan_table - em->vlan_pool;
1628 // Get ptr to existing vlan table
1630 vec_elt_at_index (em->vlan_pool, main_intf->dot1q_vlans);
1634 if (si->sub.eth.flags.one_tag)
1636 *flags = si->sub.eth.flags.exact_match ?
1637 SUBINT_CONFIG_MATCH_1_TAG :
1638 (SUBINT_CONFIG_MATCH_1_TAG |
1639 SUBINT_CONFIG_MATCH_2_TAG | SUBINT_CONFIG_MATCH_3_TAG);
1641 if (si->sub.eth.flags.outer_vlan_id_any)
1643 // not implemented yet
1649 // a single vlan, a common case
1651 &vlan_table->vlans[si->sub.eth.
1652 outer_vlan_id].single_tag_subint;
1659 *flags = si->sub.eth.flags.exact_match ?
1660 SUBINT_CONFIG_MATCH_2_TAG :
1661 (SUBINT_CONFIG_MATCH_2_TAG | SUBINT_CONFIG_MATCH_3_TAG);
1663 if (si->sub.eth.flags.outer_vlan_id_any
1664 && si->sub.eth.flags.inner_vlan_id_any)
1666 // not implemented yet
1671 if (si->sub.eth.flags.inner_vlan_id_any)
1673 // a specific outer and "any" inner
1674 // don't need a qinq table for this
1676 &vlan_table->vlans[si->sub.eth.
1677 outer_vlan_id].inner_any_subint;
1678 if (si->sub.eth.flags.exact_match)
1680 *flags = SUBINT_CONFIG_MATCH_2_TAG;
1684 *flags = SUBINT_CONFIG_MATCH_2_TAG |
1685 SUBINT_CONFIG_MATCH_3_TAG;
1690 // a specific outer + specifc innner vlan id, a common case
1692 // get the qinq table
1693 if (vlan_table->vlans[si->sub.eth.outer_vlan_id].qinqs == 0)
1695 // Allocate a qinq table from the pool
1696 pool_get (em->qinq_pool, qinq_table);
1697 vlan_table->vlans[si->sub.eth.outer_vlan_id].qinqs =
1698 qinq_table - em->qinq_pool;
1702 // Get ptr to existing qinq table
1704 vec_elt_at_index (em->qinq_pool,
1705 vlan_table->vlans[si->sub.
1709 subint = &qinq_table->vlans[si->sub.eth.inner_vlan_id].subint;
1718 static clib_error_t *
1719 ethernet_sw_interface_up_down (vnet_main_t * vnm, u32 sw_if_index, u32 flags)
1721 subint_config_t *subint;
1724 clib_error_t *error = 0;
1726 // Find the config for this subinterface
1728 ethernet_sw_interface_get_config (vnm, sw_if_index, &dummy_flags,
1733 // not implemented yet or not ethernet
1737 subint->sw_if_index =
1738 ((flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP) ? sw_if_index : ~0);
1744 VNET_SW_INTERFACE_ADMIN_UP_DOWN_FUNCTION (ethernet_sw_interface_up_down);
1747 #ifndef CLIB_MARCH_VARIANT
1748 // Set the L2/L3 mode for the subinterface
1750 ethernet_sw_interface_set_l2_mode (vnet_main_t * vnm, u32 sw_if_index, u32 l2)
1752 subint_config_t *subint;
1756 vnet_sw_interface_t *sw = vnet_get_sw_interface (vnm, sw_if_index);
1758 is_port = !(sw->type == VNET_SW_INTERFACE_TYPE_SUB);
1760 // Find the config for this subinterface
1762 ethernet_sw_interface_get_config (vnm, sw_if_index, &dummy_flags,
1767 // unimplemented or not ethernet
1771 // Double check that the config we found is for our interface (or the interface is down)
1772 ASSERT ((subint->sw_if_index == sw_if_index) | (subint->sw_if_index == ~0));
1776 subint->flags |= SUBINT_CONFIG_L2;
1779 SUBINT_CONFIG_MATCH_0_TAG | SUBINT_CONFIG_MATCH_1_TAG
1780 | SUBINT_CONFIG_MATCH_2_TAG | SUBINT_CONFIG_MATCH_3_TAG;
1784 subint->flags &= ~SUBINT_CONFIG_L2;
1787 ~(SUBINT_CONFIG_MATCH_1_TAG | SUBINT_CONFIG_MATCH_2_TAG
1788 | SUBINT_CONFIG_MATCH_3_TAG);
1796 * Set the L2/L3 mode for the subinterface regardless of port
1799 ethernet_sw_interface_set_l2_mode_noport (vnet_main_t * vnm,
1800 u32 sw_if_index, u32 l2)
1802 subint_config_t *subint;
1806 /* Find the config for this subinterface */
1808 ethernet_sw_interface_get_config (vnm, sw_if_index, &dummy_flags,
1813 /* unimplemented or not ethernet */
1818 * Double check that the config we found is for our interface (or the
1819 * interface is down)
1821 ASSERT ((subint->sw_if_index == sw_if_index) | (subint->sw_if_index == ~0));
1825 subint->flags |= SUBINT_CONFIG_L2;
1829 subint->flags &= ~SUBINT_CONFIG_L2;
1837 static clib_error_t *
1838 ethernet_sw_interface_add_del (vnet_main_t * vnm,
1839 u32 sw_if_index, u32 is_create)
1841 clib_error_t *error = 0;
1842 subint_config_t *subint;
1844 u32 unsupported = 0;
1846 // Find the config for this subinterface
1848 ethernet_sw_interface_get_config (vnm, sw_if_index, &match_flags,
1853 // not implemented yet or not ethernet
1856 // this is the NYI case
1857 error = clib_error_return (0, "not implemented yet");
1868 // Initialize the subint
1869 if (subint->flags & SUBINT_CONFIG_VALID)
1871 // Error vlan already in use
1872 error = clib_error_return (0, "vlan is already in use");
1876 // Note that config is L3 by default
1877 subint->flags = SUBINT_CONFIG_VALID | match_flags;
1878 subint->sw_if_index = ~0; // because interfaces are initially down
1885 VNET_SW_INTERFACE_ADD_DEL_FUNCTION (ethernet_sw_interface_add_del);
1887 static char *ethernet_error_strings[] = {
1888 #define ethernet_error(n,c,s) s,
1889 #include "error.def"
1890 #undef ethernet_error
1894 VLIB_REGISTER_NODE (ethernet_input_node) = {
1895 .name = "ethernet-input",
1896 /* Takes a vector of packets. */
1897 .vector_size = sizeof (u32),
1898 .scalar_size = sizeof (ethernet_input_frame_t),
1899 .n_errors = ETHERNET_N_ERROR,
1900 .error_strings = ethernet_error_strings,
1901 .n_next_nodes = ETHERNET_INPUT_N_NEXT,
1903 #define _(s,n) [ETHERNET_INPUT_NEXT_##s] = n,
1904 foreach_ethernet_input_next
1907 .format_buffer = format_ethernet_header_with_length,
1908 .format_trace = format_ethernet_input_trace,
1909 .unformat_buffer = unformat_ethernet_header,
1912 VLIB_REGISTER_NODE (ethernet_input_type_node) = {
1913 .name = "ethernet-input-type",
1914 /* Takes a vector of packets. */
1915 .vector_size = sizeof (u32),
1916 .n_next_nodes = ETHERNET_INPUT_N_NEXT,
1918 #define _(s,n) [ETHERNET_INPUT_NEXT_##s] = n,
1919 foreach_ethernet_input_next
1924 VLIB_REGISTER_NODE (ethernet_input_not_l2_node) = {
1925 .name = "ethernet-input-not-l2",
1926 /* Takes a vector of packets. */
1927 .vector_size = sizeof (u32),
1928 .n_next_nodes = ETHERNET_INPUT_N_NEXT,
1930 #define _(s,n) [ETHERNET_INPUT_NEXT_##s] = n,
1931 foreach_ethernet_input_next
1937 #ifndef CLIB_MARCH_VARIANT
1939 ethernet_set_rx_redirect (vnet_main_t * vnm,
1940 vnet_hw_interface_t * hi, u32 enable)
1942 // Insure all packets go to ethernet-input (i.e. untagged ipv4 packets
1943 // don't go directly to ip4-input)
1944 vnet_hw_interface_rx_redirect_to_node
1945 (vnm, hi->hw_if_index, enable ? ethernet_input_node.index : ~0);
1950 * Initialization and registration for the next_by_ethernet structure
1954 next_by_ethertype_init (next_by_ethertype_t * l3_next)
1956 l3_next->input_next_by_type = sparse_vec_new
1957 ( /* elt bytes */ sizeof (l3_next->input_next_by_type[0]),
1958 /* bits in index */ BITS (((ethernet_header_t *) 0)->type));
1960 vec_validate (l3_next->sparse_index_by_input_next_index,
1961 ETHERNET_INPUT_NEXT_DROP);
1962 vec_validate (l3_next->sparse_index_by_input_next_index,
1963 ETHERNET_INPUT_NEXT_PUNT);
1964 l3_next->sparse_index_by_input_next_index[ETHERNET_INPUT_NEXT_DROP] =
1965 SPARSE_VEC_INVALID_INDEX;
1966 l3_next->sparse_index_by_input_next_index[ETHERNET_INPUT_NEXT_PUNT] =
1967 SPARSE_VEC_INVALID_INDEX;
1970 * Make sure we don't wipe out an ethernet registration by mistake
1971 * Can happen if init function ordering constraints are missing.
1975 ethernet_main_t *em = ðernet_main;
1976 ASSERT (em->next_by_ethertype_register_called == 0);
1982 // Add an ethertype -> next index mapping to the structure
1984 next_by_ethertype_register (next_by_ethertype_t * l3_next,
1985 u32 ethertype, u32 next_index)
1989 ethernet_main_t *em = ðernet_main;
1993 ethernet_main_t *em = ðernet_main;
1994 em->next_by_ethertype_register_called = 1;
1997 /* Setup ethernet type -> next index sparse vector mapping. */
1998 n = sparse_vec_validate (l3_next->input_next_by_type, ethertype);
2001 /* Rebuild next index -> sparse index inverse mapping when sparse vector
2003 vec_validate (l3_next->sparse_index_by_input_next_index, next_index);
2004 for (i = 1; i < vec_len (l3_next->input_next_by_type); i++)
2006 sparse_index_by_input_next_index[l3_next->input_next_by_type[i]] = i;
2008 // do not allow the cached next index's to be updated if L3
2009 // redirect is enabled, as it will have overwritten them
2010 if (!em->redirect_l3)
2012 // Cache common ethertypes directly
2013 if (ethertype == ETHERNET_TYPE_IP4)
2015 l3_next->input_next_ip4 = next_index;
2017 else if (ethertype == ETHERNET_TYPE_IP6)
2019 l3_next->input_next_ip6 = next_index;
2021 else if (ethertype == ETHERNET_TYPE_MPLS)
2023 l3_next->input_next_mpls = next_index;
2030 ethernet_input_init (vlib_main_t * vm, ethernet_main_t * em)
2032 __attribute__ ((unused)) vlan_table_t *invalid_vlan_table;
2033 __attribute__ ((unused)) qinq_table_t *invalid_qinq_table;
2035 ethernet_setup_node (vm, ethernet_input_node.index);
2036 ethernet_setup_node (vm, ethernet_input_type_node.index);
2037 ethernet_setup_node (vm, ethernet_input_not_l2_node.index);
2039 next_by_ethertype_init (&em->l3_next);
2041 // Initialize pools and vector for vlan parsing
2042 vec_validate (em->main_intfs, 10); // 10 main interfaces
2043 pool_alloc (em->vlan_pool, 10);
2044 pool_alloc (em->qinq_pool, 1);
2046 // The first vlan pool will always be reserved for an invalid table
2047 pool_get (em->vlan_pool, invalid_vlan_table); // first id = 0
2048 // The first qinq pool will always be reserved for an invalid table
2049 pool_get (em->qinq_pool, invalid_qinq_table); // first id = 0
2053 ethernet_register_input_type (vlib_main_t * vm,
2054 ethernet_type_t type, u32 node_index)
2056 ethernet_main_t *em = ðernet_main;
2057 ethernet_type_info_t *ti;
2061 clib_error_t *error = vlib_call_init_function (vm, ethernet_init);
2063 clib_error_report (error);
2066 ti = ethernet_get_type_info (em, type);
2069 clib_warning ("type_info NULL for type %d", type);
2072 ti->node_index = node_index;
2073 ti->next_index = vlib_node_add_next (vm,
2074 ethernet_input_node.index, node_index);
2075 i = vlib_node_add_next (vm, ethernet_input_type_node.index, node_index);
2076 ASSERT (i == ti->next_index);
2078 i = vlib_node_add_next (vm, ethernet_input_not_l2_node.index, node_index);
2079 ASSERT (i == ti->next_index);
2081 // Add the L3 node for this ethertype to the next nodes structure
2082 next_by_ethertype_register (&em->l3_next, type, ti->next_index);
2084 // Call the registration functions for other nodes that want a mapping
2085 l2bvi_register_input_type (vm, type, node_index);
2089 ethernet_register_l2_input (vlib_main_t * vm, u32 node_index)
2091 ethernet_main_t *em = ðernet_main;
2095 vlib_node_add_next (vm, ethernet_input_node.index, node_index);
2098 * Even if we never use these arcs, we have to align the next indices...
2100 i = vlib_node_add_next (vm, ethernet_input_type_node.index, node_index);
2102 ASSERT (i == em->l2_next);
2104 i = vlib_node_add_next (vm, ethernet_input_not_l2_node.index, node_index);
2105 ASSERT (i == em->l2_next);
2108 // Register a next node for L3 redirect, and enable L3 redirect
2110 ethernet_register_l3_redirect (vlib_main_t * vm, u32 node_index)
2112 ethernet_main_t *em = ðernet_main;
2115 em->redirect_l3 = 1;
2116 em->redirect_l3_next = vlib_node_add_next (vm,
2117 ethernet_input_node.index,
2120 * Change the cached next nodes to the redirect node
2122 em->l3_next.input_next_ip4 = em->redirect_l3_next;
2123 em->l3_next.input_next_ip6 = em->redirect_l3_next;
2124 em->l3_next.input_next_mpls = em->redirect_l3_next;
2127 * Even if we never use these arcs, we have to align the next indices...
2129 i = vlib_node_add_next (vm, ethernet_input_type_node.index, node_index);
2131 ASSERT (i == em->redirect_l3_next);
2133 i = vlib_node_add_next (vm, ethernet_input_not_l2_node.index, node_index);
2135 ASSERT (i == em->redirect_l3_next);
2140 * fd.io coding-style-patch-verification: ON
2143 * eval: (c-set-style "gnu")