fib: Source Address Selection
[vpp.git] / src / vnet / interface_output.c
1 /*
2  * Copyright (c) 2015 Cisco and/or its affiliates.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at:
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 /*
16  * interface_output.c: interface output node
17  *
18  * Copyright (c) 2008 Eliot Dresselhaus
19  *
20  * Permission is hereby granted, free of charge, to any person obtaining
21  * a copy of this software and associated documentation files (the
22  * "Software"), to deal in the Software without restriction, including
23  * without limitation the rights to use, copy, modify, merge, publish,
24  * distribute, sublicense, and/or sell copies of the Software, and to
25  * permit persons to whom the Software is furnished to do so, subject to
26  * the following conditions:
27  *
28  * The above copyright notice and this permission notice shall be
29  * included in all copies or substantial portions of the Software.
30  *
31  *  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
32  *  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
33  *  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
34  *  NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
35  *  LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
36  *  OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
37  *  WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
38  */
39
40 #include <vnet/vnet.h>
41 #include <vnet/ip/icmp46_packet.h>
42 #include <vnet/ethernet/packet.h>
43 #include <vnet/ip/format.h>
44 #include <vnet/ip/ip4.h>
45 #include <vnet/ip/ip6.h>
46 #include <vnet/udp/udp_packet.h>
47 #include <vnet/feature/feature.h>
48 #include <vnet/classify/trace_classify.h>
49 #include <vnet/interface_output.h>
50
51 typedef struct
52 {
53   u32 sw_if_index;
54   u32 flags;
55   u8 data[128 - 2 * sizeof (u32)];
56 }
57 interface_output_trace_t;
58
59 #ifndef CLIB_MARCH_VARIANT
60 u8 *
61 format_vnet_interface_output_trace (u8 * s, va_list * va)
62 {
63   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*va, vlib_main_t *);
64   vlib_node_t *node = va_arg (*va, vlib_node_t *);
65   interface_output_trace_t *t = va_arg (*va, interface_output_trace_t *);
66   vnet_main_t *vnm = vnet_get_main ();
67   vnet_sw_interface_t *si;
68   u32 indent;
69
70   if (t->sw_if_index != (u32) ~ 0)
71     {
72       indent = format_get_indent (s);
73
74       if (pool_is_free_index
75           (vnm->interface_main.sw_interfaces, t->sw_if_index))
76         {
77           /* the interface may have been deleted by the time the trace is printed */
78           s = format (s, "sw_if_index: %d ", t->sw_if_index);
79         }
80       else
81         {
82           si = vnet_get_sw_interface (vnm, t->sw_if_index);
83           s =
84             format (s, "%U ", format_vnet_sw_interface_name, vnm, si,
85                     t->flags);
86         }
87       s =
88         format (s, "\n%U%U", format_white_space, indent,
89                 node->format_buffer ? node->format_buffer : format_hex_bytes,
90                 t->data, sizeof (t->data));
91     }
92   return s;
93 }
94 #endif /* CLIB_MARCH_VARIANT */
95
96 static void
97 vnet_interface_output_trace (vlib_main_t * vm,
98                              vlib_node_runtime_t * node,
99                              vlib_frame_t * frame, uword n_buffers)
100 {
101   u32 n_left, *from;
102
103   n_left = n_buffers;
104   from = vlib_frame_vector_args (frame);
105
106   while (n_left >= 4)
107     {
108       u32 bi0, bi1;
109       vlib_buffer_t *b0, *b1;
110       interface_output_trace_t *t0, *t1;
111
112       /* Prefetch next iteration. */
113       vlib_prefetch_buffer_with_index (vm, from[2], LOAD);
114       vlib_prefetch_buffer_with_index (vm, from[3], LOAD);
115
116       bi0 = from[0];
117       bi1 = from[1];
118
119       b0 = vlib_get_buffer (vm, bi0);
120       b1 = vlib_get_buffer (vm, bi1);
121
122       if (b0->flags & VLIB_BUFFER_IS_TRACED)
123         {
124           t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0]));
125           t0->sw_if_index = vnet_buffer (b0)->sw_if_index[VLIB_TX];
126           t0->flags = b0->flags;
127           clib_memcpy_fast (t0->data, vlib_buffer_get_current (b0),
128                             sizeof (t0->data));
129         }
130       if (b1->flags & VLIB_BUFFER_IS_TRACED)
131         {
132           t1 = vlib_add_trace (vm, node, b1, sizeof (t1[0]));
133           t1->sw_if_index = vnet_buffer (b1)->sw_if_index[VLIB_TX];
134           t1->flags = b1->flags;
135           clib_memcpy_fast (t1->data, vlib_buffer_get_current (b1),
136                             sizeof (t1->data));
137         }
138       from += 2;
139       n_left -= 2;
140     }
141
142   while (n_left >= 1)
143     {
144       u32 bi0;
145       vlib_buffer_t *b0;
146       interface_output_trace_t *t0;
147
148       bi0 = from[0];
149
150       b0 = vlib_get_buffer (vm, bi0);
151
152       if (b0->flags & VLIB_BUFFER_IS_TRACED)
153         {
154           t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0]));
155           t0->sw_if_index = vnet_buffer (b0)->sw_if_index[VLIB_TX];
156           t0->flags = b0->flags;
157           clib_memcpy_fast (t0->data, vlib_buffer_get_current (b0),
158                             sizeof (t0->data));
159         }
160       from += 1;
161       n_left -= 1;
162     }
163 }
164
165 static_always_inline uword
166 vnet_interface_output_node_inline (vlib_main_t * vm,
167                                    vlib_node_runtime_t * node,
168                                    vlib_frame_t * frame,
169                                    vnet_main_t * vnm,
170                                    vnet_hw_interface_t * hi,
171                                    int do_tx_offloads)
172 {
173   vnet_interface_output_runtime_t *rt = (void *) node->runtime_data;
174   vnet_sw_interface_t *si;
175   u32 n_left_to_tx, *from, *from_end, *to_tx;
176   u32 n_bytes, n_buffers, n_packets;
177   u32 n_bytes_b0, n_bytes_b1, n_bytes_b2, n_bytes_b3;
178   u32 thread_index = vm->thread_index;
179   vnet_interface_main_t *im = &vnm->interface_main;
180   u32 next_index = VNET_INTERFACE_OUTPUT_NEXT_TX;
181   u32 current_config_index = ~0;
182   u8 arc = im->output_feature_arc_index;
183   vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b = bufs;
184
185   n_buffers = frame->n_vectors;
186
187   if (node->flags & VLIB_NODE_FLAG_TRACE)
188     vnet_interface_output_trace (vm, node, frame, n_buffers);
189
190   from = vlib_frame_vector_args (frame);
191   vlib_get_buffers (vm, from, b, n_buffers);
192
193   if (rt->is_deleted)
194     return vlib_error_drop_buffers (vm, node, from,
195                                     /* buffer stride */ 1,
196                                     n_buffers,
197                                     VNET_INTERFACE_OUTPUT_NEXT_DROP,
198                                     node->node_index,
199                                     VNET_INTERFACE_OUTPUT_ERROR_INTERFACE_DELETED);
200
201   si = vnet_get_sw_interface (vnm, rt->sw_if_index);
202   hi = vnet_get_sup_hw_interface (vnm, rt->sw_if_index);
203   if (!(si->flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP) ||
204       !(hi->flags & VNET_HW_INTERFACE_FLAG_LINK_UP))
205     {
206       vlib_simple_counter_main_t *cm;
207
208       cm = vec_elt_at_index (vnm->interface_main.sw_if_counters,
209                              VNET_INTERFACE_COUNTER_TX_ERROR);
210       vlib_increment_simple_counter (cm, thread_index,
211                                      rt->sw_if_index, n_buffers);
212
213       return vlib_error_drop_buffers (vm, node, from,
214                                       /* buffer stride */ 1,
215                                       n_buffers,
216                                       VNET_INTERFACE_OUTPUT_NEXT_DROP,
217                                       node->node_index,
218                                       VNET_INTERFACE_OUTPUT_ERROR_INTERFACE_DOWN);
219     }
220
221   from_end = from + n_buffers;
222
223   /* Total byte count of all buffers. */
224   n_bytes = 0;
225   n_packets = 0;
226
227   /* interface-output feature arc handling */
228   if (PREDICT_FALSE (vnet_have_features (arc, rt->sw_if_index)))
229     {
230       vnet_feature_config_main_t *fcm;
231       fcm = vnet_feature_get_config_main (arc);
232       current_config_index = vnet_get_feature_config_index (arc,
233                                                             rt->sw_if_index);
234       vnet_get_config_data (&fcm->config_main, &current_config_index,
235                             &next_index, 0);
236     }
237
238   while (from < from_end)
239     {
240       /* Get new next frame since previous incomplete frame may have less
241          than VNET_FRAME_SIZE vectors in it. */
242       vlib_get_new_next_frame (vm, node, next_index, to_tx, n_left_to_tx);
243
244       while (from + 8 <= from_end && n_left_to_tx >= 4)
245         {
246           u32 bi0, bi1, bi2, bi3;
247           u32 tx_swif0, tx_swif1, tx_swif2, tx_swif3;
248           u32 or_flags;
249
250           /* Prefetch next iteration. */
251           vlib_prefetch_buffer_header (b[4], LOAD);
252           vlib_prefetch_buffer_header (b[5], LOAD);
253           vlib_prefetch_buffer_header (b[6], LOAD);
254           vlib_prefetch_buffer_header (b[7], LOAD);
255
256           bi0 = from[0];
257           bi1 = from[1];
258           bi2 = from[2];
259           bi3 = from[3];
260           to_tx[0] = bi0;
261           to_tx[1] = bi1;
262           to_tx[2] = bi2;
263           to_tx[3] = bi3;
264
265           or_flags = b[0]->flags | b[1]->flags | b[2]->flags | b[3]->flags;
266
267           from += 4;
268           to_tx += 4;
269           n_left_to_tx -= 4;
270
271           /* Be grumpy about zero length buffers for benefit of
272              driver tx function. */
273           ASSERT (b[0]->current_length > 0);
274           ASSERT (b[1]->current_length > 0);
275           ASSERT (b[2]->current_length > 0);
276           ASSERT (b[3]->current_length > 0);
277
278           n_bytes_b0 = vlib_buffer_length_in_chain (vm, b[0]);
279           n_bytes_b1 = vlib_buffer_length_in_chain (vm, b[1]);
280           n_bytes_b2 = vlib_buffer_length_in_chain (vm, b[2]);
281           n_bytes_b3 = vlib_buffer_length_in_chain (vm, b[3]);
282           tx_swif0 = vnet_buffer (b[0])->sw_if_index[VLIB_TX];
283           tx_swif1 = vnet_buffer (b[1])->sw_if_index[VLIB_TX];
284           tx_swif2 = vnet_buffer (b[2])->sw_if_index[VLIB_TX];
285           tx_swif3 = vnet_buffer (b[3])->sw_if_index[VLIB_TX];
286
287           n_bytes += n_bytes_b0 + n_bytes_b1;
288           n_bytes += n_bytes_b2 + n_bytes_b3;
289           n_packets += 4;
290
291           if (PREDICT_FALSE (current_config_index != ~0))
292             {
293               vnet_buffer (b[0])->feature_arc_index = arc;
294               vnet_buffer (b[1])->feature_arc_index = arc;
295               vnet_buffer (b[2])->feature_arc_index = arc;
296               vnet_buffer (b[3])->feature_arc_index = arc;
297               b[0]->current_config_index = current_config_index;
298               b[1]->current_config_index = current_config_index;
299               b[2]->current_config_index = current_config_index;
300               b[3]->current_config_index = current_config_index;
301             }
302
303           /* update vlan subif tx counts, if required */
304           if (PREDICT_FALSE (tx_swif0 != rt->sw_if_index))
305             {
306               vlib_increment_combined_counter (im->combined_sw_if_counters +
307                                                VNET_INTERFACE_COUNTER_TX,
308                                                thread_index, tx_swif0, 1,
309                                                n_bytes_b0);
310             }
311
312           if (PREDICT_FALSE (tx_swif1 != rt->sw_if_index))
313             {
314
315               vlib_increment_combined_counter (im->combined_sw_if_counters +
316                                                VNET_INTERFACE_COUNTER_TX,
317                                                thread_index, tx_swif1, 1,
318                                                n_bytes_b1);
319             }
320
321           if (PREDICT_FALSE (tx_swif2 != rt->sw_if_index))
322             {
323
324               vlib_increment_combined_counter (im->combined_sw_if_counters +
325                                                VNET_INTERFACE_COUNTER_TX,
326                                                thread_index, tx_swif2, 1,
327                                                n_bytes_b2);
328             }
329           if (PREDICT_FALSE (tx_swif3 != rt->sw_if_index))
330             {
331
332               vlib_increment_combined_counter (im->combined_sw_if_counters +
333                                                VNET_INTERFACE_COUNTER_TX,
334                                                thread_index, tx_swif3, 1,
335                                                n_bytes_b3);
336             }
337
338           if (do_tx_offloads)
339             {
340               u32 vnet_buffer_offload_flags =
341                 (VNET_BUFFER_F_OFFLOAD_TCP_CKSUM |
342                  VNET_BUFFER_F_OFFLOAD_UDP_CKSUM |
343                  VNET_BUFFER_F_OFFLOAD_IP_CKSUM);
344               if (or_flags & vnet_buffer_offload_flags)
345                 {
346                   if (b[0]->flags & vnet_buffer_offload_flags)
347                     vnet_calc_checksums_inline
348                       (vm, b[0],
349                        b[0]->flags & VNET_BUFFER_F_IS_IP4,
350                        b[0]->flags & VNET_BUFFER_F_IS_IP6);
351                   if (b[1]->flags & vnet_buffer_offload_flags)
352                     vnet_calc_checksums_inline
353                       (vm, b[1],
354                        b[1]->flags & VNET_BUFFER_F_IS_IP4,
355                        b[1]->flags & VNET_BUFFER_F_IS_IP6);
356                   if (b[2]->flags & vnet_buffer_offload_flags)
357                     vnet_calc_checksums_inline
358                       (vm, b[2],
359                        b[2]->flags & VNET_BUFFER_F_IS_IP4,
360                        b[2]->flags & VNET_BUFFER_F_IS_IP6);
361                   if (b[3]->flags & vnet_buffer_offload_flags)
362                     vnet_calc_checksums_inline
363                       (vm, b[3],
364                        b[3]->flags & VNET_BUFFER_F_IS_IP4,
365                        b[3]->flags & VNET_BUFFER_F_IS_IP6);
366                 }
367             }
368           b += 4;
369
370         }
371
372       while (from + 1 <= from_end && n_left_to_tx >= 1)
373         {
374           u32 bi0;
375           u32 tx_swif0;
376
377           bi0 = from[0];
378           to_tx[0] = bi0;
379           from += 1;
380           to_tx += 1;
381           n_left_to_tx -= 1;
382
383           /* Be grumpy about zero length buffers for benefit of
384              driver tx function. */
385           ASSERT (b[0]->current_length > 0);
386
387           n_bytes_b0 = vlib_buffer_length_in_chain (vm, b[0]);
388           tx_swif0 = vnet_buffer (b[0])->sw_if_index[VLIB_TX];
389           n_bytes += n_bytes_b0;
390           n_packets += 1;
391
392           if (PREDICT_FALSE (current_config_index != ~0))
393             {
394               vnet_buffer (b[0])->feature_arc_index = arc;
395               b[0]->current_config_index = current_config_index;
396             }
397
398           if (PREDICT_FALSE (tx_swif0 != rt->sw_if_index))
399             {
400
401               vlib_increment_combined_counter (im->combined_sw_if_counters +
402                                                VNET_INTERFACE_COUNTER_TX,
403                                                thread_index, tx_swif0, 1,
404                                                n_bytes_b0);
405             }
406
407           if (do_tx_offloads)
408             {
409               if (b[0]->flags &
410                   (VNET_BUFFER_F_OFFLOAD_TCP_CKSUM |
411                    VNET_BUFFER_F_OFFLOAD_UDP_CKSUM |
412                    VNET_BUFFER_F_OFFLOAD_IP_CKSUM))
413                 vnet_calc_checksums_inline
414                   (vm, b[0],
415                    b[0]->flags & VNET_BUFFER_F_IS_IP4,
416                    b[0]->flags & VNET_BUFFER_F_IS_IP6);
417             }
418           b += 1;
419         }
420
421       vlib_put_next_frame (vm, node, next_index, n_left_to_tx);
422     }
423
424   /* Update main interface stats. */
425   vlib_increment_combined_counter (im->combined_sw_if_counters
426                                    + VNET_INTERFACE_COUNTER_TX,
427                                    thread_index,
428                                    rt->sw_if_index, n_packets, n_bytes);
429   return n_buffers;
430 }
431
432 static_always_inline void vnet_interface_pcap_tx_trace
433   (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame,
434    int sw_if_index_from_buffer)
435 {
436   u32 n_left_from, *from;
437   u32 sw_if_index;
438   vnet_pcap_t *pp = &vlib_global_main.pcap;
439
440   if (PREDICT_TRUE (pp->pcap_tx_enable == 0))
441     return;
442
443   if (sw_if_index_from_buffer == 0)
444     {
445       vnet_interface_output_runtime_t *rt = (void *) node->runtime_data;
446       sw_if_index = rt->sw_if_index;
447     }
448   else
449     sw_if_index = ~0;
450
451   n_left_from = frame->n_vectors;
452   from = vlib_frame_vector_args (frame);
453
454   while (n_left_from > 0)
455     {
456       int classify_filter_result;
457       u32 bi0 = from[0];
458       vlib_buffer_t *b0 = vlib_get_buffer (vm, bi0);
459       from++;
460       n_left_from--;
461
462       if (pp->filter_classify_table_index != ~0)
463         {
464           classify_filter_result =
465             vnet_is_packet_traced_inline
466             (b0, pp->filter_classify_table_index, 0 /* full classify */ );
467           if (classify_filter_result)
468             pcap_add_buffer (&pp->pcap_main, vm, bi0, pp->max_bytes_per_pkt);
469           continue;
470         }
471
472       if (sw_if_index_from_buffer)
473         sw_if_index = vnet_buffer (b0)->sw_if_index[VLIB_TX];
474
475       if (pp->pcap_sw_if_index == 0 || pp->pcap_sw_if_index == sw_if_index)
476         {
477           vnet_main_t *vnm = vnet_get_main ();
478           vnet_hw_interface_t *hi =
479             vnet_get_sup_hw_interface (vnm, sw_if_index);
480           /* Capture pkt if not filtered, or if filter hits */
481           if (hi->trace_classify_table_index == ~0 ||
482               vnet_is_packet_traced_inline
483               (b0, hi->trace_classify_table_index, 0 /* full classify */ ))
484             pcap_add_buffer (&pp->pcap_main, vm, bi0, pp->max_bytes_per_pkt);
485         }
486     }
487 }
488
489 static vlib_node_function_t CLIB_MULTIARCH_FN (vnet_interface_output_node);
490
491 static uword
492 CLIB_MULTIARCH_FN (vnet_interface_output_node) (vlib_main_t * vm,
493                                                 vlib_node_runtime_t * node,
494                                                 vlib_frame_t * frame)
495 {
496   vnet_main_t *vnm = vnet_get_main ();
497   vnet_hw_interface_t *hi;
498   vnet_interface_output_runtime_t *rt = (void *) node->runtime_data;
499   hi = vnet_get_sup_hw_interface (vnm, rt->sw_if_index);
500
501   vnet_interface_pcap_tx_trace (vm, node, frame,
502                                 0 /* sw_if_index_from_buffer */ );
503
504   if (hi->flags & VNET_HW_INTERFACE_FLAG_SUPPORTS_TX_L4_CKSUM_OFFLOAD)
505     return vnet_interface_output_node_inline (vm, node, frame, vnm, hi,
506                                               /* do_tx_offloads */ 0);
507   else
508     return vnet_interface_output_node_inline (vm, node, frame, vnm, hi,
509                                               /* do_tx_offloads */ 1);
510 }
511
512 CLIB_MARCH_FN_REGISTRATION (vnet_interface_output_node);
513
514 #ifndef CLIB_MARCH_VARIANT
515 vlib_node_function_t *
516 vnet_interface_output_node_get (void)
517 {
518   return CLIB_MARCH_FN_POINTER (vnet_interface_output_node);
519 }
520 #endif /* CLIB_MARCH_VARIANT */
521
522 /* Use buffer's sw_if_index[VNET_TX] to choose output interface. */
523 VLIB_NODE_FN (vnet_per_buffer_interface_output_node) (vlib_main_t * vm,
524                                                       vlib_node_runtime_t *
525                                                       node,
526                                                       vlib_frame_t * frame)
527 {
528   vnet_main_t *vnm = vnet_get_main ();
529   u32 n_left_to_next, *from, *to_next;
530   u32 n_left_from, next_index;
531
532   vnet_interface_pcap_tx_trace (vm, node, frame,
533                                 1 /* sw_if_index_from_buffer */ );
534
535   n_left_from = frame->n_vectors;
536
537   from = vlib_frame_vector_args (frame);
538   next_index = node->cached_next_index;
539
540   while (n_left_from > 0)
541     {
542       vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
543
544       while (n_left_from >= 4 && n_left_to_next >= 2)
545         {
546           u32 bi0, bi1, next0, next1;
547           vlib_buffer_t *b0, *b1;
548           vnet_hw_interface_t *hi0, *hi1;
549
550           /* Prefetch next iteration. */
551           vlib_prefetch_buffer_with_index (vm, from[2], LOAD);
552           vlib_prefetch_buffer_with_index (vm, from[3], LOAD);
553
554           bi0 = from[0];
555           bi1 = from[1];
556           to_next[0] = bi0;
557           to_next[1] = bi1;
558           from += 2;
559           to_next += 2;
560           n_left_to_next -= 2;
561           n_left_from -= 2;
562
563           b0 = vlib_get_buffer (vm, bi0);
564           b1 = vlib_get_buffer (vm, bi1);
565
566           hi0 =
567             vnet_get_sup_hw_interface (vnm,
568                                        vnet_buffer (b0)->sw_if_index
569                                        [VLIB_TX]);
570           hi1 =
571             vnet_get_sup_hw_interface (vnm,
572                                        vnet_buffer (b1)->sw_if_index
573                                        [VLIB_TX]);
574
575           next0 = hi0->output_node_next_index;
576           next1 = hi1->output_node_next_index;
577
578           vlib_validate_buffer_enqueue_x2 (vm, node, next_index, to_next,
579                                            n_left_to_next, bi0, bi1, next0,
580                                            next1);
581         }
582
583       while (n_left_from > 0 && n_left_to_next > 0)
584         {
585           u32 bi0, next0;
586           vlib_buffer_t *b0;
587           vnet_hw_interface_t *hi0;
588
589           bi0 = from[0];
590           to_next[0] = bi0;
591           from += 1;
592           to_next += 1;
593           n_left_to_next -= 1;
594           n_left_from -= 1;
595
596           b0 = vlib_get_buffer (vm, bi0);
597
598           hi0 =
599             vnet_get_sup_hw_interface (vnm,
600                                        vnet_buffer (b0)->sw_if_index
601                                        [VLIB_TX]);
602
603           next0 = hi0->output_node_next_index;
604
605           vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
606                                            n_left_to_next, bi0, next0);
607         }
608
609       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
610     }
611
612   return frame->n_vectors;
613 }
614
615 typedef struct vnet_error_trace_t_
616 {
617   u32 sw_if_index;
618   i8 details_valid;
619   u8 is_ip6;
620   u8 pad[2];
621   u16 mactype;
622   ip46_address_t src, dst;
623 } vnet_error_trace_t;
624
625 static u8 *
626 format_vnet_error_trace (u8 * s, va_list * va)
627 {
628   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*va, vlib_main_t *);
629   CLIB_UNUSED (vlib_node_t * node) = va_arg (*va, vlib_node_t *);
630   vnet_error_trace_t *t = va_arg (*va, vnet_error_trace_t *);
631
632   /* Normal, non-catchup trace */
633   if (t->details_valid == 0)
634     {
635       s = format (s, "rx:%U", format_vnet_sw_if_index_name,
636                   vnet_get_main (), t->sw_if_index);
637     }
638   else if (t->details_valid == 1)
639     {
640       /* The trace capture code didn't understant the mactype */
641       s = format (s, "mactype 0x%4x (not decoded)", t->mactype);
642     }
643   else if (t->details_valid == 2)
644     {
645       /* Dump the src/dst addresses */
646       if (t->is_ip6 == 0)
647         s = format (s, "IP4: %U -> %U",
648                     format_ip4_address, &t->src.ip4,
649                     format_ip4_address, &t->dst.ip4);
650       else
651         s = format (s, "IP6: %U -> %U",
652                     format_ip6_address, &t->src.ip6,
653                     format_ip6_address, &t->dst.ip6);
654     }
655   return s;
656 }
657
658 static void
659 interface_trace_buffers (vlib_main_t * vm,
660                          vlib_node_runtime_t * node, vlib_frame_t * frame)
661 {
662   u32 n_left, *buffers;
663
664   buffers = vlib_frame_vector_args (frame);
665   n_left = frame->n_vectors;
666
667   while (n_left >= 4)
668     {
669       u32 bi0, bi1;
670       vlib_buffer_t *b0, *b1;
671       vnet_error_trace_t *t0, *t1;
672
673       /* Prefetch next iteration. */
674       vlib_prefetch_buffer_with_index (vm, buffers[2], LOAD);
675       vlib_prefetch_buffer_with_index (vm, buffers[3], LOAD);
676
677       bi0 = buffers[0];
678       bi1 = buffers[1];
679
680       b0 = vlib_get_buffer (vm, bi0);
681       b1 = vlib_get_buffer (vm, bi1);
682
683       if (b0->flags & VLIB_BUFFER_IS_TRACED)
684         {
685           t0 = vlib_add_trace (vm, node, b0,
686                                STRUCT_OFFSET_OF (vnet_error_trace_t, pad));
687           t0->sw_if_index = vnet_buffer (b0)->sw_if_index[VLIB_RX];
688           t0->details_valid = 0;
689         }
690       if (b1->flags & VLIB_BUFFER_IS_TRACED)
691         {
692           t1 = vlib_add_trace (vm, node, b1,
693                                STRUCT_OFFSET_OF (vnet_error_trace_t, pad));
694           t1->sw_if_index = vnet_buffer (b1)->sw_if_index[VLIB_RX];
695           t1->details_valid = 0;
696         }
697       buffers += 2;
698       n_left -= 2;
699     }
700
701   while (n_left >= 1)
702     {
703       u32 bi0;
704       vlib_buffer_t *b0;
705       vnet_error_trace_t *t0;
706
707       bi0 = buffers[0];
708
709       b0 = vlib_get_buffer (vm, bi0);
710
711       if (b0->flags & VLIB_BUFFER_IS_TRACED)
712         {
713           t0 = vlib_add_trace (vm, node, b0,
714                                STRUCT_OFFSET_OF (vnet_error_trace_t, pad));
715           t0->sw_if_index = vnet_buffer (b0)->sw_if_index[VLIB_RX];
716           t0->details_valid = 0;
717         }
718       buffers += 1;
719       n_left -= 1;
720     }
721 }
722
723 typedef enum
724 {
725   VNET_ERROR_DISPOSITION_DROP,
726   VNET_ERROR_DISPOSITION_PUNT,
727   VNET_ERROR_N_DISPOSITION,
728 } vnet_error_disposition_t;
729
730 static void
731 drop_catchup_trace (vlib_main_t * vm,
732                     vlib_node_runtime_t * node, vlib_buffer_t * b)
733 {
734   /* Can we safely rewind the buffer? If not, fagedaboudit */
735   if (b->flags & VNET_BUFFER_F_L2_HDR_OFFSET_VALID)
736     {
737       vnet_error_trace_t *t;
738       ip4_header_t *ip4;
739       ip6_header_t *ip6;
740       ethernet_header_t *eh;
741       i16 delta;
742
743       t = vlib_add_trace (vm, node, b, sizeof (*t));
744       delta = vnet_buffer (b)->l2_hdr_offset - b->current_data;
745       vlib_buffer_advance (b, delta);
746
747       eh = vlib_buffer_get_current (b);
748       /* Save mactype */
749       t->mactype = clib_net_to_host_u16 (eh->type);
750       t->details_valid = 1;
751       switch (t->mactype)
752         {
753         case ETHERNET_TYPE_IP4:
754           ip4 = (void *) (eh + 1);
755           t->details_valid = 2;
756           t->is_ip6 = 0;
757           t->src.ip4.as_u32 = ip4->src_address.as_u32;
758           t->dst.ip4.as_u32 = ip4->dst_address.as_u32;
759           break;
760
761         case ETHERNET_TYPE_IP6:
762           ip6 = (void *) (eh + 1);
763           t->details_valid = 2;
764           t->is_ip6 = 1;
765           clib_memcpy_fast (t->src.as_u8, ip6->src_address.as_u8,
766                             sizeof (ip6_address_t));
767           clib_memcpy_fast (t->dst.as_u8, ip6->dst_address.as_u8,
768                             sizeof (ip6_address_t));
769           break;
770
771         default:
772           /* Dunno, do nothing, leave details_valid alone */
773           break;
774         }
775       /* Restore current data (probably unnecessary) */
776       vlib_buffer_advance (b, -delta);
777     }
778 }
779
780 static_always_inline uword
781 interface_drop_punt (vlib_main_t * vm,
782                      vlib_node_runtime_t * node,
783                      vlib_frame_t * frame,
784                      vnet_error_disposition_t disposition)
785 {
786   u32 *from, n_left, thread_index, *sw_if_index;
787   vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b;
788   u32 sw_if_indices[VLIB_FRAME_SIZE];
789   vlib_simple_counter_main_t *cm;
790   u16 nexts[VLIB_FRAME_SIZE];
791   u32 n_trace;
792   vnet_main_t *vnm;
793
794   vnm = vnet_get_main ();
795   thread_index = vm->thread_index;
796   from = vlib_frame_vector_args (frame);
797   n_left = frame->n_vectors;
798   b = bufs;
799   sw_if_index = sw_if_indices;
800
801   vlib_get_buffers (vm, from, bufs, n_left);
802
803   /* "trace add error-drop NNN?" */
804   if (PREDICT_FALSE ((n_trace = vlib_get_trace_count (vm, node))))
805     {
806       /* If pkts aren't otherwise traced... */
807       if ((node->flags & VLIB_NODE_FLAG_TRACE) == 0)
808         {
809           /* Trace them from here */
810           node->flags |= VLIB_NODE_FLAG_TRACE;
811           while (n_trace && n_left)
812             {
813               if (PREDICT_TRUE
814                   (vlib_trace_buffer (vm, node, 0 /* next_index */ , b[0],
815                                       0 /* follow chain */ )))
816                 {
817                   /*
818                    * Here we have a wireshark dissector problem.
819                    * Packets may be well-formed, or not. We
820                    * must not blow chunks in any case.
821                    *
822                    * Try to produce trace records which will help
823                    * folks understand what's going on.
824                    */
825                   drop_catchup_trace (vm, node, b[0]);
826                   n_trace--;
827                 }
828               n_left--;
829               b++;
830             }
831         }
832
833       vlib_set_trace_count (vm, node, n_trace);
834       b = bufs;
835       n_left = frame->n_vectors;
836     }
837
838   if (node->flags & VLIB_NODE_FLAG_TRACE)
839     interface_trace_buffers (vm, node, frame);
840
841   /* All going to drop regardless, this is just a counting exercise */
842   clib_memset (nexts, 0, sizeof (nexts));
843
844   cm = vec_elt_at_index (vnm->interface_main.sw_if_counters,
845                          (disposition == VNET_ERROR_DISPOSITION_PUNT
846                           ? VNET_INTERFACE_COUNTER_PUNT
847                           : VNET_INTERFACE_COUNTER_DROP));
848
849   /* collect the array of interfaces first ... */
850   while (n_left >= 4)
851     {
852       if (n_left >= 12)
853         {
854           /* Prefetch 8 ahead - there's not much going on in each iteration */
855           vlib_prefetch_buffer_header (b[4], LOAD);
856           vlib_prefetch_buffer_header (b[5], LOAD);
857           vlib_prefetch_buffer_header (b[6], LOAD);
858           vlib_prefetch_buffer_header (b[7], LOAD);
859         }
860       sw_if_index[0] = vnet_buffer (b[0])->sw_if_index[VLIB_RX];
861       sw_if_index[1] = vnet_buffer (b[1])->sw_if_index[VLIB_RX];
862       sw_if_index[2] = vnet_buffer (b[2])->sw_if_index[VLIB_RX];
863       sw_if_index[3] = vnet_buffer (b[3])->sw_if_index[VLIB_RX];
864
865       sw_if_index += 4;
866       n_left -= 4;
867       b += 4;
868     }
869   while (n_left)
870     {
871       sw_if_index[0] = vnet_buffer (b[0])->sw_if_index[VLIB_RX];
872
873       sw_if_index += 1;
874       n_left -= 1;
875       b += 1;
876     }
877
878   /* ... then count against them in blocks */
879   n_left = frame->n_vectors;
880
881   while (n_left)
882     {
883       vnet_sw_interface_t *sw_if0;
884       u16 off, count;
885
886       off = frame->n_vectors - n_left;
887
888       sw_if_index = sw_if_indices + off;
889
890       count = clib_count_equal_u32 (sw_if_index, n_left);
891       n_left -= count;
892
893       vlib_increment_simple_counter (cm, thread_index, sw_if_index[0], count);
894
895       /* Increment super-interface drop/punt counters for
896          sub-interfaces. */
897       sw_if0 = vnet_get_sw_interface (vnm, sw_if_index[0]);
898       if (sw_if0->sup_sw_if_index != sw_if_index[0])
899         vlib_increment_simple_counter
900           (cm, thread_index, sw_if0->sup_sw_if_index, count);
901     }
902
903   vlib_buffer_enqueue_to_next (vm, node, from, nexts, frame->n_vectors);
904
905   return frame->n_vectors;
906 }
907
908 static inline void
909 pcap_drop_trace (vlib_main_t * vm,
910                  vnet_interface_main_t * im,
911                  vnet_pcap_t * pp, vlib_frame_t * f)
912 {
913   u32 *from;
914   u32 n_left = f->n_vectors;
915   vlib_buffer_t *b0, *p1;
916   u32 bi0;
917   i16 save_current_data;
918   u16 save_current_length;
919   vlib_error_main_t *em = &vm->error_main;
920   int do_trace = 0;
921
922
923   from = vlib_frame_vector_args (f);
924
925   while (n_left > 0)
926     {
927       if (PREDICT_TRUE (n_left > 1))
928         {
929           p1 = vlib_get_buffer (vm, from[1]);
930           vlib_prefetch_buffer_header (p1, LOAD);
931         }
932
933       bi0 = from[0];
934       b0 = vlib_get_buffer (vm, bi0);
935       from++;
936       n_left--;
937
938       /* See if we're pointedly ignoring this specific error */
939       if (im->pcap_drop_filter_hash
940           && hash_get (im->pcap_drop_filter_hash, b0->error))
941         continue;
942
943       do_trace = (pp->pcap_sw_if_index == 0) ||
944         pp->pcap_sw_if_index == vnet_buffer (b0)->sw_if_index[VLIB_RX];
945
946       if (PREDICT_FALSE
947           (do_trace == 0 && pp->filter_classify_table_index != ~0))
948         {
949           do_trace = vnet_is_packet_traced_inline
950             (b0, pp->filter_classify_table_index, 0 /* full classify */ );
951         }
952
953       /* Trace all drops, or drops received on a specific interface */
954       if (do_trace)
955         {
956           save_current_data = b0->current_data;
957           save_current_length = b0->current_length;
958
959           /*
960            * Typically, we'll need to rewind the buffer
961            * if l2_hdr_offset is valid, make sure to rewind to the start of
962            * the L2 header. This may not be the buffer start in case we pop-ed
963            * vlan tags.
964            * Otherwise, rewind to buffer start and hope for the best.
965            */
966           if (b0->flags & VNET_BUFFER_F_L2_HDR_OFFSET_VALID)
967             {
968               if (b0->current_data > vnet_buffer (b0)->l2_hdr_offset)
969                 vlib_buffer_advance (b0,
970                                      vnet_buffer (b0)->l2_hdr_offset -
971                                      b0->current_data);
972             }
973           else if (b0->current_data > 0)
974             vlib_buffer_advance (b0, (word) - b0->current_data);
975
976           {
977             vlib_buffer_t *last = b0;
978             u32 error_node_index;
979             int drop_string_len;
980             vlib_node_t *n;
981             /* Length of the error string */
982             int error_string_len =
983               clib_strnlen (em->counters_heap[b0->error].name, 128);
984
985             /* Dig up the drop node */
986             error_node_index = vm->node_main.node_by_error[b0->error];
987             n = vlib_get_node (vm, error_node_index);
988
989             /* Length of full drop string, w/ "nodename: " prepended */
990             drop_string_len = error_string_len + vec_len (n->name) + 2;
991
992             /* Find the last buffer in the chain */
993             while (last->flags & VLIB_BUFFER_NEXT_PRESENT)
994               last = vlib_get_buffer (vm, last->next_buffer);
995
996             /*
997              * Append <nodename>: <error-string> to the capture,
998              * only if we can do that without allocating a new buffer.
999              */
1000             if (PREDICT_TRUE ((last->current_data + last->current_length)
1001                               < (VLIB_BUFFER_DEFAULT_DATA_SIZE
1002                                  - drop_string_len)))
1003               {
1004                 clib_memcpy_fast (last->data + last->current_data +
1005                                   last->current_length, n->name,
1006                                   vec_len (n->name));
1007                 clib_memcpy_fast (last->data + last->current_data +
1008                                   last->current_length + vec_len (n->name),
1009                                   ": ", 2);
1010                 clib_memcpy_fast (last->data + last->current_data +
1011                                   last->current_length + vec_len (n->name) +
1012                                   2, em->counters_heap[b0->error].name,
1013                                   error_string_len);
1014                 last->current_length += drop_string_len;
1015                 b0->flags &= ~(VLIB_BUFFER_TOTAL_LENGTH_VALID);
1016                 pcap_add_buffer (&pp->pcap_main, vm, bi0,
1017                                  pp->max_bytes_per_pkt);
1018                 last->current_length -= drop_string_len;
1019                 b0->current_data = save_current_data;
1020                 b0->current_length = save_current_length;
1021                 continue;
1022               }
1023           }
1024
1025           /*
1026            * Didn't have space in the last buffer, here's the dropped
1027            * packet as-is
1028            */
1029           pcap_add_buffer (&pp->pcap_main, vm, bi0, pp->max_bytes_per_pkt);
1030
1031           b0->current_data = save_current_data;
1032           b0->current_length = save_current_length;
1033         }
1034     }
1035 }
1036
1037 #ifndef CLIB_MARCH_VARIANT
1038 void
1039 vnet_pcap_drop_trace_filter_add_del (u32 error_index, int is_add)
1040 {
1041   vnet_interface_main_t *im = &vnet_get_main ()->interface_main;
1042
1043   if (im->pcap_drop_filter_hash == 0)
1044     im->pcap_drop_filter_hash = hash_create (0, sizeof (uword));
1045
1046   if (is_add)
1047     hash_set (im->pcap_drop_filter_hash, error_index, 1);
1048   else
1049     hash_unset (im->pcap_drop_filter_hash, error_index);
1050 }
1051 #endif /* CLIB_MARCH_VARIANT */
1052
1053 VLIB_NODE_FN (interface_drop) (vlib_main_t * vm,
1054                                vlib_node_runtime_t * node,
1055                                vlib_frame_t * frame)
1056 {
1057   vnet_interface_main_t *im = &vnet_get_main ()->interface_main;
1058   vnet_pcap_t *pp = &vlib_global_main.pcap;
1059
1060   if (PREDICT_FALSE (pp->pcap_drop_enable))
1061     pcap_drop_trace (vm, im, pp, frame);
1062
1063   return interface_drop_punt (vm, node, frame, VNET_ERROR_DISPOSITION_DROP);
1064 }
1065
1066 VLIB_NODE_FN (interface_punt) (vlib_main_t * vm,
1067                                vlib_node_runtime_t * node,
1068                                vlib_frame_t * frame)
1069 {
1070   return interface_drop_punt (vm, node, frame, VNET_ERROR_DISPOSITION_PUNT);
1071 }
1072
1073 /* *INDENT-OFF* */
1074 VLIB_REGISTER_NODE (interface_drop) = {
1075   .name = "error-drop",
1076   .vector_size = sizeof (u32),
1077   .format_trace = format_vnet_error_trace,
1078   .flags = VLIB_NODE_FLAG_TRACE_SUPPORTED,
1079   .n_next_nodes = 1,
1080   .next_nodes = {
1081     [0] = "drop",
1082   },
1083 };
1084 /* *INDENT-ON* */
1085
1086 /* *INDENT-OFF* */
1087 VLIB_REGISTER_NODE (interface_punt) = {
1088   .name = "error-punt",
1089   .vector_size = sizeof (u32),
1090   .format_trace = format_vnet_error_trace,
1091   .flags = VLIB_NODE_FLAG_TRACE_SUPPORTED,
1092   .n_next_nodes = 1,
1093   .next_nodes = {
1094     [0] = "punt",
1095   },
1096 };
1097 /* *INDENT-ON* */
1098
1099 /* *INDENT-OFF* */
1100 VLIB_REGISTER_NODE (vnet_per_buffer_interface_output_node) = {
1101   .name = "interface-output",
1102   .vector_size = sizeof (u32),
1103 };
1104 /* *INDENT-ON* */
1105
1106 static uword
1107 interface_tx_node_fn (vlib_main_t * vm, vlib_node_runtime_t * node,
1108                       vlib_frame_t * from_frame)
1109 {
1110   vnet_main_t *vnm = vnet_get_main ();
1111   u32 last_sw_if_index = ~0;
1112   vlib_frame_t *to_frame = 0;
1113   vnet_hw_interface_t *hw = 0;
1114   u32 *from, *to_next = 0;
1115   u32 n_left_from;
1116
1117   from = vlib_frame_vector_args (from_frame);
1118   n_left_from = from_frame->n_vectors;
1119   while (n_left_from > 0)
1120     {
1121       u32 bi0;
1122       vlib_buffer_t *b0;
1123       u32 sw_if_index0;
1124
1125       bi0 = from[0];
1126       from++;
1127       n_left_from--;
1128       b0 = vlib_get_buffer (vm, bi0);
1129       sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_TX];
1130
1131       if (PREDICT_FALSE ((last_sw_if_index != sw_if_index0) || to_frame == 0))
1132         {
1133           if (to_frame)
1134             {
1135               hw = vnet_get_sup_hw_interface (vnm, last_sw_if_index);
1136               vlib_put_frame_to_node (vm, hw->tx_node_index, to_frame);
1137             }
1138           last_sw_if_index = sw_if_index0;
1139           hw = vnet_get_sup_hw_interface (vnm, sw_if_index0);
1140           to_frame = vlib_get_frame_to_node (vm, hw->tx_node_index);
1141           to_next = vlib_frame_vector_args (to_frame);
1142         }
1143
1144       to_next[0] = bi0;
1145       to_next++;
1146       to_frame->n_vectors++;
1147     }
1148   vlib_put_frame_to_node (vm, hw->tx_node_index, to_frame);
1149   return from_frame->n_vectors;
1150 }
1151
1152 /* *INDENT-OFF* */
1153 VLIB_REGISTER_NODE (interface_tx) = {
1154   .function = interface_tx_node_fn,
1155   .name = "interface-tx",
1156   .vector_size = sizeof (u32),
1157   .n_next_nodes = 1,
1158   .next_nodes = {
1159     [0] = "error-drop",
1160   },
1161 };
1162
1163 VNET_FEATURE_ARC_INIT (interface_output, static) =
1164 {
1165   .arc_name  = "interface-output",
1166   .start_nodes = VNET_FEATURES (0),
1167   .last_in_arc = "interface-tx",
1168   .arc_index_ptr = &vnet_main.interface_main.output_feature_arc_index,
1169 };
1170
1171 VNET_FEATURE_INIT (span_tx, static) = {
1172   .arc_name = "interface-output",
1173   .node_name = "span-output",
1174   .runs_before = VNET_FEATURES ("interface-tx"),
1175 };
1176
1177 VNET_FEATURE_INIT (ipsec_if_tx, static) = {
1178   .arc_name = "interface-output",
1179   .node_name = "ipsec-if-output",
1180   .runs_before = VNET_FEATURES ("interface-tx"),
1181 };
1182
1183 VNET_FEATURE_INIT (interface_tx, static) = {
1184   .arc_name = "interface-output",
1185   .node_name = "interface-tx",
1186   .runs_before = 0,
1187 };
1188 /* *INDENT-ON* */
1189
1190 #ifndef CLIB_MARCH_VARIANT
1191 clib_error_t *
1192 vnet_per_buffer_interface_output_hw_interface_add_del (vnet_main_t * vnm,
1193                                                        u32 hw_if_index,
1194                                                        u32 is_create)
1195 {
1196   vnet_hw_interface_t *hi = vnet_get_hw_interface (vnm, hw_if_index);
1197   u32 next_index;
1198
1199   if (hi->output_node_index == 0)
1200     return 0;
1201
1202   next_index = vlib_node_add_next
1203     (vnm->vlib_main, vnet_per_buffer_interface_output_node.index,
1204      hi->output_node_index);
1205   hi->output_node_next_index = next_index;
1206
1207   return 0;
1208 }
1209
1210 VNET_HW_INTERFACE_ADD_DEL_FUNCTION
1211   (vnet_per_buffer_interface_output_hw_interface_add_del);
1212
1213 void
1214 vnet_set_interface_output_node (vnet_main_t * vnm,
1215                                 u32 hw_if_index, u32 node_index)
1216 {
1217   ASSERT (node_index);
1218   vnet_hw_interface_t *hi = vnet_get_hw_interface (vnm, hw_if_index);
1219   u32 next_index = vlib_node_add_next
1220     (vnm->vlib_main, vnet_per_buffer_interface_output_node.index, node_index);
1221   hi->output_node_next_index = next_index;
1222   hi->output_node_index = node_index;
1223 }
1224 #endif /* CLIB_MARCH_VARIANT */
1225
1226 /*
1227  * fd.io coding-style-patch-verification: ON
1228  *
1229  * Local Variables:
1230  * eval: (c-set-style "gnu")
1231  * End:
1232  */