2 * Copyright (c) 2015 Cisco and/or its affiliates.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at:
7 * http://www.apache.org/licenses/LICENSE-2.0
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
15 #include <vlib/vlib.h>
16 #include <vnet/vnet.h>
17 #include <vnet/pg/pg.h>
18 #include <vnet/ethernet/ethernet.h>
19 #include <vppinfra/error.h>
20 #include <sample/sample.h>
31 /* packet trace format function */
33 format_sample_trace (u8 * s, va_list * args)
35 CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
36 CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
37 sample_trace_t *t = va_arg (*args, sample_trace_t *);
39 s = format (s, "SAMPLE: sw_if_index %d, next index %d\n",
40 t->sw_if_index, t->next_index);
41 s = format (s, " new src %U -> new dst %U",
42 format_mac_address, t->new_src_mac,
43 format_mac_address, t->new_dst_mac);
48 vlib_node_registration_t sample_node;
50 #define foreach_sample_error \
51 _(SWAPPED, "Mac swap packets processed")
55 #define _(sym,str) SAMPLE_ERROR_##sym,
61 static char *sample_error_strings[] = {
62 #define _(sym,string) string,
69 SAMPLE_NEXT_INTERFACE_OUTPUT,
74 * Simple dual/single loop version, default version which will compile
77 * Node costs 30 clocks/pkt at a vector size of 51
82 #define foreach_mac_address_offset \
91 sample_node_fn (vlib_main_t * vm,
92 vlib_node_runtime_t * node, vlib_frame_t * frame)
94 u32 n_left_from, *from, *to_next;
95 sample_next_t next_index;
98 from = vlib_frame_vector_args (frame);
99 n_left_from = frame->n_vectors;
100 next_index = node->cached_next_index;
102 while (n_left_from > 0)
106 vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
108 while (n_left_from >= 4 && n_left_to_next >= 2)
110 u32 next0 = SAMPLE_NEXT_INTERFACE_OUTPUT;
111 u32 next1 = SAMPLE_NEXT_INTERFACE_OUTPUT;
112 u32 sw_if_index0, sw_if_index1;
114 ethernet_header_t *en0, *en1;
116 vlib_buffer_t *b0, *b1;
118 /* Prefetch next iteration. */
120 vlib_buffer_t *p2, *p3;
122 p2 = vlib_get_buffer (vm, from[2]);
123 p3 = vlib_get_buffer (vm, from[3]);
125 vlib_prefetch_buffer_header (p2, LOAD);
126 vlib_prefetch_buffer_header (p3, LOAD);
128 CLIB_PREFETCH (p2->data, CLIB_CACHE_LINE_BYTES, STORE);
129 CLIB_PREFETCH (p3->data, CLIB_CACHE_LINE_BYTES, STORE);
132 /* speculatively enqueue b0 and b1 to the current next frame */
133 to_next[0] = bi0 = from[0];
134 to_next[1] = bi1 = from[1];
140 b0 = vlib_get_buffer (vm, bi0);
141 b1 = vlib_get_buffer (vm, bi1);
143 ASSERT (b0->current_data == 0);
144 ASSERT (b1->current_data == 0);
146 en0 = vlib_buffer_get_current (b0);
147 en1 = vlib_buffer_get_current (b1);
149 /* This is not the fastest way to swap src + dst mac addresses */
150 #define _(a) tmp0[a] = en0->src_address[a];
151 foreach_mac_address_offset;
153 #define _(a) en0->src_address[a] = en0->dst_address[a];
154 foreach_mac_address_offset;
156 #define _(a) en0->dst_address[a] = tmp0[a];
157 foreach_mac_address_offset;
160 #define _(a) tmp1[a] = en1->src_address[a];
161 foreach_mac_address_offset;
163 #define _(a) en1->src_address[a] = en1->dst_address[a];
164 foreach_mac_address_offset;
166 #define _(a) en1->dst_address[a] = tmp1[a];
167 foreach_mac_address_offset;
170 sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
171 sw_if_index1 = vnet_buffer (b1)->sw_if_index[VLIB_RX];
173 /* Send pkt back out the RX interface */
174 vnet_buffer (b0)->sw_if_index[VLIB_TX] = sw_if_index0;
175 vnet_buffer (b1)->sw_if_index[VLIB_TX] = sw_if_index1;
179 if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)))
181 if (b0->flags & VLIB_BUFFER_IS_TRACED)
184 vlib_add_trace (vm, node, b0, sizeof (*t));
185 t->sw_if_index = sw_if_index0;
186 t->next_index = next0;
187 clib_memcpy (t->new_src_mac, en0->src_address,
188 sizeof (t->new_src_mac));
189 clib_memcpy (t->new_dst_mac, en0->dst_address,
190 sizeof (t->new_dst_mac));
193 if (b1->flags & VLIB_BUFFER_IS_TRACED)
196 vlib_add_trace (vm, node, b1, sizeof (*t));
197 t->sw_if_index = sw_if_index1;
198 t->next_index = next1;
199 clib_memcpy (t->new_src_mac, en1->src_address,
200 sizeof (t->new_src_mac));
201 clib_memcpy (t->new_dst_mac, en1->dst_address,
202 sizeof (t->new_dst_mac));
206 /* verify speculative enqueues, maybe switch current next frame */
207 vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
208 to_next, n_left_to_next,
209 bi0, bi1, next0, next1);
212 while (n_left_from > 0 && n_left_to_next > 0)
216 u32 next0 = SAMPLE_NEXT_INTERFACE_OUTPUT;
219 ethernet_header_t *en0;
221 /* speculatively enqueue b0 to the current next frame */
229 b0 = vlib_get_buffer (vm, bi0);
231 * Direct from the driver, we should be at offset 0
232 * aka at &b0->data[0]
234 ASSERT (b0->current_data == 0);
236 en0 = vlib_buffer_get_current (b0);
238 /* This is not the fastest way to swap src + dst mac addresses */
239 #define _(a) tmp0[a] = en0->src_address[a];
240 foreach_mac_address_offset;
242 #define _(a) en0->src_address[a] = en0->dst_address[a];
243 foreach_mac_address_offset;
245 #define _(a) en0->dst_address[a] = tmp0[a];
246 foreach_mac_address_offset;
249 sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
251 /* Send pkt back out the RX interface */
252 vnet_buffer (b0)->sw_if_index[VLIB_TX] = sw_if_index0;
254 if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)
255 && (b0->flags & VLIB_BUFFER_IS_TRACED)))
257 sample_trace_t *t = vlib_add_trace (vm, node, b0, sizeof (*t));
258 t->sw_if_index = sw_if_index0;
259 t->next_index = next0;
260 clib_memcpy (t->new_src_mac, en0->src_address,
261 sizeof (t->new_src_mac));
262 clib_memcpy (t->new_dst_mac, en0->dst_address,
263 sizeof (t->new_dst_mac));
268 /* verify speculative enqueue, maybe switch current next frame */
269 vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
270 to_next, n_left_to_next,
274 vlib_put_next_frame (vm, node, next_index, n_left_to_next);
277 vlib_node_increment_counter (vm, sample_node.index,
278 SAMPLE_ERROR_SWAPPED, pkts_swapped);
279 return frame->n_vectors;
284 * This version swaps mac addresses using an MMX vector shuffle
285 * Node costs about 17 clocks/pkt at a vector size of 26
289 sample_node_fn (vlib_main_t * vm,
290 vlib_node_runtime_t * node, vlib_frame_t * frame)
292 u32 n_left_from, *from, *to_next;
293 sample_next_t next_index;
294 u32 pkts_swapped = 0;
295 /* Vector shuffle mask to swap src, dst */
296 u8x16 swapmac = { 6, 7, 8, 9, 10, 11, 0, 1, 2, 3, 4, 5, 12, 13, 14, 15 };
298 from = vlib_frame_vector_args (frame);
299 n_left_from = frame->n_vectors;
300 next_index = node->cached_next_index;
302 while (n_left_from > 0)
306 vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
307 while (n_left_from >= 4 && n_left_to_next >= 2)
309 u32 next0 = SAMPLE_NEXT_INTERFACE_OUTPUT;
310 u32 next1 = SAMPLE_NEXT_INTERFACE_OUTPUT;
311 u32 sw_if_index0, sw_if_index1;
312 u8x16 src_dst0, src_dst1;
313 ethernet_header_t *en0, *en1;
315 vlib_buffer_t *b0, *b1;
317 /* Prefetch next iteration. */
319 vlib_buffer_t *p2, *p3;
321 p2 = vlib_get_buffer (vm, from[2]);
322 p3 = vlib_get_buffer (vm, from[3]);
324 vlib_prefetch_buffer_header (p2, LOAD);
325 vlib_prefetch_buffer_header (p3, LOAD);
327 CLIB_PREFETCH (p2->data, CLIB_CACHE_LINE_BYTES, STORE);
328 CLIB_PREFETCH (p3->data, CLIB_CACHE_LINE_BYTES, STORE);
331 /* speculatively enqueue b0 and b1 to the current next frame */
332 to_next[0] = bi0 = from[0];
333 to_next[1] = bi1 = from[1];
339 b0 = vlib_get_buffer (vm, bi0);
340 b1 = vlib_get_buffer (vm, bi1);
342 ASSERT (b0->current_data == 0);
343 ASSERT (b1->current_data == 0);
345 en0 = vlib_buffer_get_current (b0);
346 en1 = vlib_buffer_get_current (b1);
348 src_dst0 = ((u8x16 *) en0)[0];
349 src_dst1 = ((u8x16 *) en1)[0];
350 src_dst0 = u8x16_shuffle (src_dst0, swapmac);
351 src_dst1 = u8x16_shuffle (src_dst1, swapmac);
352 ((u8x16 *) en0)[0] = src_dst0;
353 ((u8x16 *) en1)[0] = src_dst1;
355 sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
356 sw_if_index1 = vnet_buffer (b1)->sw_if_index[VLIB_RX];
358 /* Send pkt back out the RX interface */
359 vnet_buffer (b0)->sw_if_index[VLIB_TX] = sw_if_index0;
360 vnet_buffer (b1)->sw_if_index[VLIB_TX] = sw_if_index1;
364 if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)))
366 if (b0->flags & VLIB_BUFFER_IS_TRACED)
369 vlib_add_trace (vm, node, b0, sizeof (*t));
370 t->sw_if_index = sw_if_index0;
371 t->next_index = next0;
372 clib_memcpy (t->new_src_mac, en0->src_address,
373 sizeof (t->new_src_mac));
374 clib_memcpy (t->new_dst_mac, en0->dst_address,
375 sizeof (t->new_dst_mac));
378 if (b1->flags & VLIB_BUFFER_IS_TRACED)
381 vlib_add_trace (vm, node, b1, sizeof (*t));
382 t->sw_if_index = sw_if_index1;
383 t->next_index = next1;
384 clib_memcpy (t->new_src_mac, en1->src_address,
385 sizeof (t->new_src_mac));
386 clib_memcpy (t->new_dst_mac, en1->dst_address,
387 sizeof (t->new_dst_mac));
391 /* verify speculative enqueues, maybe switch current next frame */
392 vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
393 to_next, n_left_to_next,
394 bi0, bi1, next0, next1);
397 while (n_left_from > 0 && n_left_to_next > 0)
401 u32 next0 = SAMPLE_NEXT_INTERFACE_OUTPUT;
404 ethernet_header_t *en0;
406 /* speculatively enqueue b0 to the current next frame */
414 b0 = vlib_get_buffer (vm, bi0);
416 * Direct from the driver, we should be at offset 0
417 * aka at &b0->data[0]
419 ASSERT (b0->current_data == 0);
421 en0 = vlib_buffer_get_current (b0);
422 src_dst0 = ((u8x16 *) en0)[0];
423 src_dst0 = u8x16_shuffle (src_dst0, swapmac);
424 ((u8x16 *) en0)[0] = src_dst0;
426 sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
428 /* Send pkt back out the RX interface */
429 vnet_buffer (b0)->sw_if_index[VLIB_TX] = sw_if_index0;
431 if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)
432 && (b0->flags & VLIB_BUFFER_IS_TRACED)))
434 sample_trace_t *t = vlib_add_trace (vm, node, b0, sizeof (*t));
435 t->sw_if_index = sw_if_index0;
436 t->next_index = next0;
437 clib_memcpy (t->new_src_mac, en0->src_address,
438 sizeof (t->new_src_mac));
439 clib_memcpy (t->new_dst_mac, en0->dst_address,
440 sizeof (t->new_dst_mac));
445 /* verify speculative enqueue, maybe switch current next frame */
446 vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
447 to_next, n_left_to_next,
451 vlib_put_next_frame (vm, node, next_index, n_left_to_next);
454 vlib_node_increment_counter (vm, sample_node.index,
455 SAMPLE_ERROR_SWAPPED, pkts_swapped);
456 return frame->n_vectors;
462 * This version computes all of the buffer pointers in
463 * one motion, uses a quad/single loop model, and
464 * traces the entire frame in one motion.
466 * Node costs about 16 clocks/pkt at a vector size of 26
468 * Some compilation drama with u8x16_shuffle, so turned off by
474 #define u8x16_shuffle __builtin_shuffle
475 /* This would normally be a stack local, but since it's a constant... */
476 static const u16 nexts[VLIB_FRAME_SIZE] = { 0 };
479 sample_node_fn (vlib_main_t * vm,
480 vlib_node_runtime_t * node, vlib_frame_t * frame)
482 u32 n_left_from, *from;
483 u32 pkts_swapped = 0;
484 /* Vector shuffle mask to swap src, dst */
485 u8x16 swapmac = { 6, 7, 8, 9, 10, 11, 0, 1, 2, 3, 4, 5, 12, 13, 14, 15 };
486 vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b;
487 /* See comment below about sending all pkts to the same place... */
488 u16 *next __attribute__ ((unused));
490 from = vlib_frame_vector_args (frame);
491 n_left_from = frame->n_vectors;
493 vlib_get_buffers (vm, from, bufs, n_left_from);
498 * We send all pkts to SAMPLE_NEXT_INTERFACE_OUTPUT, aka
499 * graph arc 0. So the usual setting of next[0...3] is commented
503 while (n_left_from >= 4)
505 u8x16 src_dst0, src_dst1, src_dst2, src_dst3;
506 /* Prefetch next iteration. */
507 if (PREDICT_TRUE (n_left_from >= 8))
509 vlib_prefetch_buffer_header (b[4], STORE);
510 vlib_prefetch_buffer_header (b[5], STORE);
511 vlib_prefetch_buffer_header (b[6], STORE);
512 vlib_prefetch_buffer_header (b[7], STORE);
513 CLIB_PREFETCH (&b[4]->data, CLIB_CACHE_LINE_BYTES, STORE);
514 CLIB_PREFETCH (&b[5]->data, CLIB_CACHE_LINE_BYTES, STORE);
515 CLIB_PREFETCH (&b[6]->data, CLIB_CACHE_LINE_BYTES, STORE);
516 CLIB_PREFETCH (&b[7]->data, CLIB_CACHE_LINE_BYTES, STORE);
519 src_dst0 = ((u8x16 *) vlib_buffer_get_current (b[0]))[0];
520 src_dst1 = ((u8x16 *) vlib_buffer_get_current (b[1]))[0];
521 src_dst2 = ((u8x16 *) vlib_buffer_get_current (b[2]))[0];
522 src_dst3 = ((u8x16 *) vlib_buffer_get_current (b[3]))[0];
524 src_dst0 = u8x16_shuffle (src_dst0, swapmac);
525 src_dst1 = u8x16_shuffle (src_dst1, swapmac);
526 src_dst2 = u8x16_shuffle (src_dst2, swapmac);
527 src_dst3 = u8x16_shuffle (src_dst3, swapmac);
529 ((u8x16 *) vlib_buffer_get_current (b[0]))[0] = src_dst0;
530 ((u8x16 *) vlib_buffer_get_current (b[1]))[0] = src_dst1;
531 ((u8x16 *) vlib_buffer_get_current (b[2]))[0] = src_dst2;
532 ((u8x16 *) vlib_buffer_get_current (b[3]))[0] = src_dst3;
534 vnet_buffer (b[0])->sw_if_index[VLIB_TX] =
535 vnet_buffer (b[0])->sw_if_index[VLIB_RX];
536 vnet_buffer (b[1])->sw_if_index[VLIB_TX] =
537 vnet_buffer (b[1])->sw_if_index[VLIB_RX];
538 vnet_buffer (b[2])->sw_if_index[VLIB_TX] =
539 vnet_buffer (b[2])->sw_if_index[VLIB_RX];
540 vnet_buffer (b[3])->sw_if_index[VLIB_TX] =
541 vnet_buffer (b[3])->sw_if_index[VLIB_RX];
543 // next[0] = SAMPLE_NEXT_INTERFACE_OUTPUT;
544 // next[1] = SAMPLE_NEXT_INTERFACE_OUTPUT;
545 // next[2] = SAMPLE_NEXT_INTERFACE_OUTPUT;
546 // next[3] = SAMPLE_NEXT_INTERFACE_OUTPUT;
554 while (n_left_from > 0)
557 src_dst0 = ((u8x16 *) vlib_buffer_get_current (b[0]))[0];
558 src_dst0 = u8x16_shuffle (src_dst0, swapmac);
559 ((u8x16 *) vlib_buffer_get_current (b[0]))[0] = src_dst0;
560 vnet_buffer (b[0])->sw_if_index[VLIB_TX] =
561 vnet_buffer (b[0])->sw_if_index[VLIB_RX];
562 // next[0] = SAMPLE_NEXT_INTERFACE_OUTPUT;
570 vlib_buffer_enqueue_to_next (vm, node, from, (u16 *) nexts,
573 vlib_node_increment_counter (vm, sample_node.index,
574 SAMPLE_ERROR_SWAPPED, pkts_swapped);
576 if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)))
581 for (i = 0; i < frame->n_vectors; i++)
583 if (b[0]->flags & VLIB_BUFFER_IS_TRACED)
585 ethernet_header_t *en;
587 vlib_add_trace (vm, node, b[0], sizeof (*t));
588 t->sw_if_index = vnet_buffer (b[0])->sw_if_index[VLIB_TX];
589 t->next_index = SAMPLE_NEXT_INTERFACE_OUTPUT;
590 en = vlib_buffer_get_current (b[0]);
591 clib_memcpy (t->new_src_mac, en->src_address,
592 sizeof (t->new_src_mac));
593 clib_memcpy (t->new_dst_mac, en->dst_address,
594 sizeof (t->new_dst_mac));
601 return frame->n_vectors;
606 VLIB_REGISTER_NODE (sample_node) =
608 .function = sample_node_fn,
610 .vector_size = sizeof (u32),
611 .format_trace = format_sample_trace,
612 .type = VLIB_NODE_TYPE_INTERNAL,
614 .n_errors = ARRAY_LEN(sample_error_strings),
615 .error_strings = sample_error_strings,
617 .n_next_nodes = SAMPLE_N_NEXT,
619 /* edit / add dispositions here */
621 [SAMPLE_NEXT_INTERFACE_OUTPUT] = "interface-output",
626 VLIB_NODE_FUNCTION_MULTIARCH (sample_node, sample_node_fn);
629 * fd.io coding-style-patch-verification: ON
632 * eval: (c-set-style "gnu")