vlib: refactor checksum offload support
[vpp.git] / src / vnet / interface_output.c
1 /*
2  * Copyright (c) 2015 Cisco and/or its affiliates.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at:
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 /*
16  * interface_output.c: interface output node
17  *
18  * Copyright (c) 2008 Eliot Dresselhaus
19  *
20  * Permission is hereby granted, free of charge, to any person obtaining
21  * a copy of this software and associated documentation files (the
22  * "Software"), to deal in the Software without restriction, including
23  * without limitation the rights to use, copy, modify, merge, publish,
24  * distribute, sublicense, and/or sell copies of the Software, and to
25  * permit persons to whom the Software is furnished to do so, subject to
26  * the following conditions:
27  *
28  * The above copyright notice and this permission notice shall be
29  * included in all copies or substantial portions of the Software.
30  *
31  *  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
32  *  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
33  *  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
34  *  NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
35  *  LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
36  *  OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
37  *  WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
38  */
39
40 #include <vnet/vnet.h>
41 #include <vnet/ip/icmp46_packet.h>
42 #include <vnet/ethernet/packet.h>
43 #include <vnet/ip/format.h>
44 #include <vnet/ip/ip4.h>
45 #include <vnet/ip/ip6.h>
46 #include <vnet/udp/udp_packet.h>
47 #include <vnet/feature/feature.h>
48 #include <vnet/classify/trace_classify.h>
49 #include <vnet/interface_output.h>
50
51 typedef struct
52 {
53   u32 sw_if_index;
54   u32 flags;
55   u8 data[128 - 2 * sizeof (u32)];
56 }
57 interface_output_trace_t;
58
59 #ifndef CLIB_MARCH_VARIANT
60 u8 *
61 format_vnet_interface_output_trace (u8 * s, va_list * va)
62 {
63   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*va, vlib_main_t *);
64   vlib_node_t *node = va_arg (*va, vlib_node_t *);
65   interface_output_trace_t *t = va_arg (*va, interface_output_trace_t *);
66   vnet_main_t *vnm = vnet_get_main ();
67   vnet_sw_interface_t *si;
68   u32 indent;
69
70   if (t->sw_if_index != (u32) ~ 0)
71     {
72       indent = format_get_indent (s);
73
74       if (pool_is_free_index
75           (vnm->interface_main.sw_interfaces, t->sw_if_index))
76         {
77           /* the interface may have been deleted by the time the trace is printed */
78           s = format (s, "sw_if_index: %d ", t->sw_if_index);
79         }
80       else
81         {
82           si = vnet_get_sw_interface (vnm, t->sw_if_index);
83           s =
84             format (s, "%U ", format_vnet_sw_interface_name, vnm, si,
85                     t->flags);
86         }
87       s =
88         format (s, "\n%U%U", format_white_space, indent,
89                 node->format_buffer ? node->format_buffer : format_hex_bytes,
90                 t->data, sizeof (t->data));
91     }
92   return s;
93 }
94 #endif /* CLIB_MARCH_VARIANT */
95
96 static void
97 vnet_interface_output_trace (vlib_main_t * vm,
98                              vlib_node_runtime_t * node,
99                              vlib_frame_t * frame, uword n_buffers)
100 {
101   u32 n_left, *from;
102
103   n_left = n_buffers;
104   from = vlib_frame_vector_args (frame);
105
106   while (n_left >= 4)
107     {
108       u32 bi0, bi1;
109       vlib_buffer_t *b0, *b1;
110       interface_output_trace_t *t0, *t1;
111
112       /* Prefetch next iteration. */
113       vlib_prefetch_buffer_with_index (vm, from[2], LOAD);
114       vlib_prefetch_buffer_with_index (vm, from[3], LOAD);
115
116       bi0 = from[0];
117       bi1 = from[1];
118
119       b0 = vlib_get_buffer (vm, bi0);
120       b1 = vlib_get_buffer (vm, bi1);
121
122       if (b0->flags & VLIB_BUFFER_IS_TRACED)
123         {
124           t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0]));
125           t0->sw_if_index = vnet_buffer (b0)->sw_if_index[VLIB_TX];
126           t0->flags = b0->flags;
127           clib_memcpy_fast (t0->data, vlib_buffer_get_current (b0),
128                             sizeof (t0->data));
129         }
130       if (b1->flags & VLIB_BUFFER_IS_TRACED)
131         {
132           t1 = vlib_add_trace (vm, node, b1, sizeof (t1[0]));
133           t1->sw_if_index = vnet_buffer (b1)->sw_if_index[VLIB_TX];
134           t1->flags = b1->flags;
135           clib_memcpy_fast (t1->data, vlib_buffer_get_current (b1),
136                             sizeof (t1->data));
137         }
138       from += 2;
139       n_left -= 2;
140     }
141
142   while (n_left >= 1)
143     {
144       u32 bi0;
145       vlib_buffer_t *b0;
146       interface_output_trace_t *t0;
147
148       bi0 = from[0];
149
150       b0 = vlib_get_buffer (vm, bi0);
151
152       if (b0->flags & VLIB_BUFFER_IS_TRACED)
153         {
154           t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0]));
155           t0->sw_if_index = vnet_buffer (b0)->sw_if_index[VLIB_TX];
156           t0->flags = b0->flags;
157           clib_memcpy_fast (t0->data, vlib_buffer_get_current (b0),
158                             sizeof (t0->data));
159         }
160       from += 1;
161       n_left -= 1;
162     }
163 }
164
165 static_always_inline uword
166 vnet_interface_output_node_inline (vlib_main_t * vm,
167                                    vlib_node_runtime_t * node,
168                                    vlib_frame_t * frame,
169                                    vnet_main_t * vnm,
170                                    vnet_hw_interface_t * hi,
171                                    int do_tx_offloads)
172 {
173   vnet_interface_output_runtime_t *rt = (void *) node->runtime_data;
174   vnet_sw_interface_t *si;
175   u32 n_left_to_tx, *from, *from_end, *to_tx;
176   u32 n_bytes, n_buffers, n_packets;
177   u32 n_bytes_b0, n_bytes_b1, n_bytes_b2, n_bytes_b3;
178   u32 thread_index = vm->thread_index;
179   vnet_interface_main_t *im = &vnm->interface_main;
180   u32 next_index = VNET_INTERFACE_OUTPUT_NEXT_TX;
181   u32 current_config_index = ~0;
182   u8 arc = im->output_feature_arc_index;
183   vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b = bufs;
184
185   n_buffers = frame->n_vectors;
186
187   if (node->flags & VLIB_NODE_FLAG_TRACE)
188     vnet_interface_output_trace (vm, node, frame, n_buffers);
189
190   from = vlib_frame_vector_args (frame);
191   vlib_get_buffers (vm, from, b, n_buffers);
192
193   if (rt->is_deleted)
194     return vlib_error_drop_buffers (vm, node, from,
195                                     /* buffer stride */ 1,
196                                     n_buffers,
197                                     VNET_INTERFACE_OUTPUT_NEXT_DROP,
198                                     node->node_index,
199                                     VNET_INTERFACE_OUTPUT_ERROR_INTERFACE_DELETED);
200
201   si = vnet_get_sw_interface (vnm, rt->sw_if_index);
202   hi = vnet_get_sup_hw_interface (vnm, rt->sw_if_index);
203   if (!(si->flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP) ||
204       !(hi->flags & VNET_HW_INTERFACE_FLAG_LINK_UP))
205     {
206       vlib_simple_counter_main_t *cm;
207
208       cm = vec_elt_at_index (vnm->interface_main.sw_if_counters,
209                              VNET_INTERFACE_COUNTER_TX_ERROR);
210       vlib_increment_simple_counter (cm, thread_index,
211                                      rt->sw_if_index, n_buffers);
212
213       return vlib_error_drop_buffers (vm, node, from,
214                                       /* buffer stride */ 1,
215                                       n_buffers,
216                                       VNET_INTERFACE_OUTPUT_NEXT_DROP,
217                                       node->node_index,
218                                       VNET_INTERFACE_OUTPUT_ERROR_INTERFACE_DOWN);
219     }
220
221   from_end = from + n_buffers;
222
223   /* Total byte count of all buffers. */
224   n_bytes = 0;
225   n_packets = 0;
226
227   /* interface-output feature arc handling */
228   if (PREDICT_FALSE (vnet_have_features (arc, rt->sw_if_index)))
229     {
230       vnet_feature_config_main_t *fcm;
231       fcm = vnet_feature_get_config_main (arc);
232       current_config_index = vnet_get_feature_config_index (arc,
233                                                             rt->sw_if_index);
234       vnet_get_config_data (&fcm->config_main, &current_config_index,
235                             &next_index, 0);
236     }
237
238   while (from < from_end)
239     {
240       /* Get new next frame since previous incomplete frame may have less
241          than VNET_FRAME_SIZE vectors in it. */
242       vlib_get_new_next_frame (vm, node, next_index, to_tx, n_left_to_tx);
243
244       while (from + 8 <= from_end && n_left_to_tx >= 4)
245         {
246           u32 bi0, bi1, bi2, bi3;
247           u32 tx_swif0, tx_swif1, tx_swif2, tx_swif3;
248           u32 or_flags;
249
250           /* Prefetch next iteration. */
251           vlib_prefetch_buffer_header (b[4], LOAD);
252           vlib_prefetch_buffer_header (b[5], LOAD);
253           vlib_prefetch_buffer_header (b[6], LOAD);
254           vlib_prefetch_buffer_header (b[7], LOAD);
255
256           bi0 = from[0];
257           bi1 = from[1];
258           bi2 = from[2];
259           bi3 = from[3];
260           to_tx[0] = bi0;
261           to_tx[1] = bi1;
262           to_tx[2] = bi2;
263           to_tx[3] = bi3;
264
265           or_flags = b[0]->flags | b[1]->flags | b[2]->flags | b[3]->flags;
266
267           from += 4;
268           to_tx += 4;
269           n_left_to_tx -= 4;
270
271           /* Be grumpy about zero length buffers for benefit of
272              driver tx function. */
273           ASSERT (b[0]->current_length > 0);
274           ASSERT (b[1]->current_length > 0);
275           ASSERT (b[2]->current_length > 0);
276           ASSERT (b[3]->current_length > 0);
277
278           n_bytes_b0 = vlib_buffer_length_in_chain (vm, b[0]);
279           n_bytes_b1 = vlib_buffer_length_in_chain (vm, b[1]);
280           n_bytes_b2 = vlib_buffer_length_in_chain (vm, b[2]);
281           n_bytes_b3 = vlib_buffer_length_in_chain (vm, b[3]);
282           tx_swif0 = vnet_buffer (b[0])->sw_if_index[VLIB_TX];
283           tx_swif1 = vnet_buffer (b[1])->sw_if_index[VLIB_TX];
284           tx_swif2 = vnet_buffer (b[2])->sw_if_index[VLIB_TX];
285           tx_swif3 = vnet_buffer (b[3])->sw_if_index[VLIB_TX];
286
287           n_bytes += n_bytes_b0 + n_bytes_b1;
288           n_bytes += n_bytes_b2 + n_bytes_b3;
289           n_packets += 4;
290
291           if (PREDICT_FALSE (current_config_index != ~0))
292             {
293               vnet_buffer (b[0])->feature_arc_index = arc;
294               vnet_buffer (b[1])->feature_arc_index = arc;
295               vnet_buffer (b[2])->feature_arc_index = arc;
296               vnet_buffer (b[3])->feature_arc_index = arc;
297               b[0]->current_config_index = current_config_index;
298               b[1]->current_config_index = current_config_index;
299               b[2]->current_config_index = current_config_index;
300               b[3]->current_config_index = current_config_index;
301             }
302
303           /* update vlan subif tx counts, if required */
304           if (PREDICT_FALSE (tx_swif0 != rt->sw_if_index))
305             {
306               vlib_increment_combined_counter (im->combined_sw_if_counters +
307                                                VNET_INTERFACE_COUNTER_TX,
308                                                thread_index, tx_swif0, 1,
309                                                n_bytes_b0);
310             }
311
312           if (PREDICT_FALSE (tx_swif1 != rt->sw_if_index))
313             {
314
315               vlib_increment_combined_counter (im->combined_sw_if_counters +
316                                                VNET_INTERFACE_COUNTER_TX,
317                                                thread_index, tx_swif1, 1,
318                                                n_bytes_b1);
319             }
320
321           if (PREDICT_FALSE (tx_swif2 != rt->sw_if_index))
322             {
323
324               vlib_increment_combined_counter (im->combined_sw_if_counters +
325                                                VNET_INTERFACE_COUNTER_TX,
326                                                thread_index, tx_swif2, 1,
327                                                n_bytes_b2);
328             }
329           if (PREDICT_FALSE (tx_swif3 != rt->sw_if_index))
330             {
331
332               vlib_increment_combined_counter (im->combined_sw_if_counters +
333                                                VNET_INTERFACE_COUNTER_TX,
334                                                thread_index, tx_swif3, 1,
335                                                n_bytes_b3);
336             }
337
338           if (do_tx_offloads)
339             {
340               if (or_flags & VNET_BUFFER_F_OFFLOAD)
341                 {
342                   if (b[0]->flags & VNET_BUFFER_F_OFFLOAD)
343                     vnet_calc_checksums_inline
344                       (vm, b[0],
345                        b[0]->flags & VNET_BUFFER_F_IS_IP4,
346                        b[0]->flags & VNET_BUFFER_F_IS_IP6);
347                   if (b[1]->flags & VNET_BUFFER_F_OFFLOAD)
348                     vnet_calc_checksums_inline
349                       (vm, b[1],
350                        b[1]->flags & VNET_BUFFER_F_IS_IP4,
351                        b[1]->flags & VNET_BUFFER_F_IS_IP6);
352                   if (b[2]->flags & VNET_BUFFER_F_OFFLOAD)
353                     vnet_calc_checksums_inline
354                       (vm, b[2],
355                        b[2]->flags & VNET_BUFFER_F_IS_IP4,
356                        b[2]->flags & VNET_BUFFER_F_IS_IP6);
357                   if (b[3]->flags & VNET_BUFFER_F_OFFLOAD)
358                     vnet_calc_checksums_inline
359                       (vm, b[3],
360                        b[3]->flags & VNET_BUFFER_F_IS_IP4,
361                        b[3]->flags & VNET_BUFFER_F_IS_IP6);
362                 }
363             }
364           b += 4;
365
366         }
367
368       while (from + 1 <= from_end && n_left_to_tx >= 1)
369         {
370           u32 bi0;
371           u32 tx_swif0;
372
373           bi0 = from[0];
374           to_tx[0] = bi0;
375           from += 1;
376           to_tx += 1;
377           n_left_to_tx -= 1;
378
379           /* Be grumpy about zero length buffers for benefit of
380              driver tx function. */
381           ASSERT (b[0]->current_length > 0);
382
383           n_bytes_b0 = vlib_buffer_length_in_chain (vm, b[0]);
384           tx_swif0 = vnet_buffer (b[0])->sw_if_index[VLIB_TX];
385           n_bytes += n_bytes_b0;
386           n_packets += 1;
387
388           if (PREDICT_FALSE (current_config_index != ~0))
389             {
390               vnet_buffer (b[0])->feature_arc_index = arc;
391               b[0]->current_config_index = current_config_index;
392             }
393
394           if (PREDICT_FALSE (tx_swif0 != rt->sw_if_index))
395             {
396
397               vlib_increment_combined_counter (im->combined_sw_if_counters +
398                                                VNET_INTERFACE_COUNTER_TX,
399                                                thread_index, tx_swif0, 1,
400                                                n_bytes_b0);
401             }
402
403           if (do_tx_offloads)
404             {
405               if (b[0]->flags & VNET_BUFFER_F_OFFLOAD)
406                 vnet_calc_checksums_inline
407                   (vm, b[0],
408                    b[0]->flags & VNET_BUFFER_F_IS_IP4,
409                    b[0]->flags & VNET_BUFFER_F_IS_IP6);
410             }
411           b += 1;
412         }
413
414       vlib_put_next_frame (vm, node, next_index, n_left_to_tx);
415     }
416
417   /* Update main interface stats. */
418   vlib_increment_combined_counter (im->combined_sw_if_counters
419                                    + VNET_INTERFACE_COUNTER_TX,
420                                    thread_index,
421                                    rt->sw_if_index, n_packets, n_bytes);
422   return n_buffers;
423 }
424
425 static_always_inline void vnet_interface_pcap_tx_trace
426   (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame,
427    int sw_if_index_from_buffer)
428 {
429   u32 n_left_from, *from;
430   u32 sw_if_index;
431   vnet_pcap_t *pp = &vlib_global_main.pcap;
432
433   if (PREDICT_TRUE (pp->pcap_tx_enable == 0))
434     return;
435
436   if (sw_if_index_from_buffer == 0)
437     {
438       vnet_interface_output_runtime_t *rt = (void *) node->runtime_data;
439       sw_if_index = rt->sw_if_index;
440     }
441   else
442     sw_if_index = ~0;
443
444   n_left_from = frame->n_vectors;
445   from = vlib_frame_vector_args (frame);
446
447   while (n_left_from > 0)
448     {
449       int classify_filter_result;
450       u32 bi0 = from[0];
451       vlib_buffer_t *b0 = vlib_get_buffer (vm, bi0);
452       from++;
453       n_left_from--;
454
455       if (pp->filter_classify_table_index != ~0)
456         {
457           classify_filter_result =
458             vnet_is_packet_traced_inline
459             (b0, pp->filter_classify_table_index, 0 /* full classify */ );
460           if (classify_filter_result)
461             pcap_add_buffer (&pp->pcap_main, vm, bi0, pp->max_bytes_per_pkt);
462           continue;
463         }
464
465       if (sw_if_index_from_buffer)
466         sw_if_index = vnet_buffer (b0)->sw_if_index[VLIB_TX];
467
468       if (pp->pcap_sw_if_index == 0 || pp->pcap_sw_if_index == sw_if_index)
469         {
470           vnet_main_t *vnm = vnet_get_main ();
471           vnet_hw_interface_t *hi =
472             vnet_get_sup_hw_interface (vnm, sw_if_index);
473           /* Capture pkt if not filtered, or if filter hits */
474           if (hi->trace_classify_table_index == ~0 ||
475               vnet_is_packet_traced_inline
476               (b0, hi->trace_classify_table_index, 0 /* full classify */ ))
477             pcap_add_buffer (&pp->pcap_main, vm, bi0, pp->max_bytes_per_pkt);
478         }
479     }
480 }
481
482 static vlib_node_function_t CLIB_MULTIARCH_FN (vnet_interface_output_node);
483
484 static uword
485 CLIB_MULTIARCH_FN (vnet_interface_output_node) (vlib_main_t * vm,
486                                                 vlib_node_runtime_t * node,
487                                                 vlib_frame_t * frame)
488 {
489   vnet_main_t *vnm = vnet_get_main ();
490   vnet_hw_interface_t *hi;
491   vnet_interface_output_runtime_t *rt = (void *) node->runtime_data;
492   hi = vnet_get_sup_hw_interface (vnm, rt->sw_if_index);
493
494   vnet_interface_pcap_tx_trace (vm, node, frame,
495                                 0 /* sw_if_index_from_buffer */ );
496
497   if (hi->flags & VNET_HW_INTERFACE_FLAG_SUPPORTS_TX_L4_CKSUM_OFFLOAD)
498     return vnet_interface_output_node_inline (vm, node, frame, vnm, hi,
499                                               /* do_tx_offloads */ 0);
500   else
501     return vnet_interface_output_node_inline (vm, node, frame, vnm, hi,
502                                               /* do_tx_offloads */ 1);
503 }
504
505 CLIB_MARCH_FN_REGISTRATION (vnet_interface_output_node);
506
507 #ifndef CLIB_MARCH_VARIANT
508 vlib_node_function_t *
509 vnet_interface_output_node_get (void)
510 {
511   return CLIB_MARCH_FN_POINTER (vnet_interface_output_node);
512 }
513 #endif /* CLIB_MARCH_VARIANT */
514
515 /* Use buffer's sw_if_index[VNET_TX] to choose output interface. */
516 VLIB_NODE_FN (vnet_per_buffer_interface_output_node) (vlib_main_t * vm,
517                                                       vlib_node_runtime_t *
518                                                       node,
519                                                       vlib_frame_t * frame)
520 {
521   vnet_main_t *vnm = vnet_get_main ();
522   u32 n_left_to_next, *from, *to_next;
523   u32 n_left_from, next_index;
524
525   vnet_interface_pcap_tx_trace (vm, node, frame,
526                                 1 /* sw_if_index_from_buffer */ );
527
528   n_left_from = frame->n_vectors;
529
530   from = vlib_frame_vector_args (frame);
531   next_index = node->cached_next_index;
532
533   while (n_left_from > 0)
534     {
535       vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
536
537       while (n_left_from >= 4 && n_left_to_next >= 2)
538         {
539           u32 bi0, bi1, next0, next1;
540           vlib_buffer_t *b0, *b1;
541           vnet_hw_interface_t *hi0, *hi1;
542
543           /* Prefetch next iteration. */
544           vlib_prefetch_buffer_with_index (vm, from[2], LOAD);
545           vlib_prefetch_buffer_with_index (vm, from[3], LOAD);
546
547           bi0 = from[0];
548           bi1 = from[1];
549           to_next[0] = bi0;
550           to_next[1] = bi1;
551           from += 2;
552           to_next += 2;
553           n_left_to_next -= 2;
554           n_left_from -= 2;
555
556           b0 = vlib_get_buffer (vm, bi0);
557           b1 = vlib_get_buffer (vm, bi1);
558
559           hi0 =
560             vnet_get_sup_hw_interface (vnm,
561                                        vnet_buffer (b0)->sw_if_index
562                                        [VLIB_TX]);
563           hi1 =
564             vnet_get_sup_hw_interface (vnm,
565                                        vnet_buffer (b1)->sw_if_index
566                                        [VLIB_TX]);
567
568           next0 = hi0->output_node_next_index;
569           next1 = hi1->output_node_next_index;
570
571           vlib_validate_buffer_enqueue_x2 (vm, node, next_index, to_next,
572                                            n_left_to_next, bi0, bi1, next0,
573                                            next1);
574         }
575
576       while (n_left_from > 0 && n_left_to_next > 0)
577         {
578           u32 bi0, next0;
579           vlib_buffer_t *b0;
580           vnet_hw_interface_t *hi0;
581
582           bi0 = from[0];
583           to_next[0] = bi0;
584           from += 1;
585           to_next += 1;
586           n_left_to_next -= 1;
587           n_left_from -= 1;
588
589           b0 = vlib_get_buffer (vm, bi0);
590
591           hi0 =
592             vnet_get_sup_hw_interface (vnm,
593                                        vnet_buffer (b0)->sw_if_index
594                                        [VLIB_TX]);
595
596           next0 = hi0->output_node_next_index;
597
598           vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
599                                            n_left_to_next, bi0, next0);
600         }
601
602       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
603     }
604
605   return frame->n_vectors;
606 }
607
608 typedef struct vnet_error_trace_t_
609 {
610   u32 sw_if_index;
611   i8 details_valid;
612   u8 is_ip6;
613   u8 pad[2];
614   u16 mactype;
615   ip46_address_t src, dst;
616 } vnet_error_trace_t;
617
618 static u8 *
619 format_vnet_error_trace (u8 * s, va_list * va)
620 {
621   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*va, vlib_main_t *);
622   CLIB_UNUSED (vlib_node_t * node) = va_arg (*va, vlib_node_t *);
623   vnet_error_trace_t *t = va_arg (*va, vnet_error_trace_t *);
624
625   /* Normal, non-catchup trace */
626   if (t->details_valid == 0)
627     {
628       s = format (s, "rx:%U", format_vnet_sw_if_index_name,
629                   vnet_get_main (), t->sw_if_index);
630     }
631   else if (t->details_valid == 1)
632     {
633       /* The trace capture code didn't understant the mactype */
634       s = format (s, "mactype 0x%4x (not decoded)", t->mactype);
635     }
636   else if (t->details_valid == 2)
637     {
638       /* Dump the src/dst addresses */
639       if (t->is_ip6 == 0)
640         s = format (s, "IP4: %U -> %U",
641                     format_ip4_address, &t->src.ip4,
642                     format_ip4_address, &t->dst.ip4);
643       else
644         s = format (s, "IP6: %U -> %U",
645                     format_ip6_address, &t->src.ip6,
646                     format_ip6_address, &t->dst.ip6);
647     }
648   return s;
649 }
650
651 static void
652 interface_trace_buffers (vlib_main_t * vm,
653                          vlib_node_runtime_t * node, vlib_frame_t * frame)
654 {
655   u32 n_left, *buffers;
656
657   buffers = vlib_frame_vector_args (frame);
658   n_left = frame->n_vectors;
659
660   while (n_left >= 4)
661     {
662       u32 bi0, bi1;
663       vlib_buffer_t *b0, *b1;
664       vnet_error_trace_t *t0, *t1;
665
666       /* Prefetch next iteration. */
667       vlib_prefetch_buffer_with_index (vm, buffers[2], LOAD);
668       vlib_prefetch_buffer_with_index (vm, buffers[3], LOAD);
669
670       bi0 = buffers[0];
671       bi1 = buffers[1];
672
673       b0 = vlib_get_buffer (vm, bi0);
674       b1 = vlib_get_buffer (vm, bi1);
675
676       if (b0->flags & VLIB_BUFFER_IS_TRACED)
677         {
678           t0 = vlib_add_trace (vm, node, b0,
679                                STRUCT_OFFSET_OF (vnet_error_trace_t, pad));
680           t0->sw_if_index = vnet_buffer (b0)->sw_if_index[VLIB_RX];
681           t0->details_valid = 0;
682         }
683       if (b1->flags & VLIB_BUFFER_IS_TRACED)
684         {
685           t1 = vlib_add_trace (vm, node, b1,
686                                STRUCT_OFFSET_OF (vnet_error_trace_t, pad));
687           t1->sw_if_index = vnet_buffer (b1)->sw_if_index[VLIB_RX];
688           t1->details_valid = 0;
689         }
690       buffers += 2;
691       n_left -= 2;
692     }
693
694   while (n_left >= 1)
695     {
696       u32 bi0;
697       vlib_buffer_t *b0;
698       vnet_error_trace_t *t0;
699
700       bi0 = buffers[0];
701
702       b0 = vlib_get_buffer (vm, bi0);
703
704       if (b0->flags & VLIB_BUFFER_IS_TRACED)
705         {
706           t0 = vlib_add_trace (vm, node, b0,
707                                STRUCT_OFFSET_OF (vnet_error_trace_t, pad));
708           t0->sw_if_index = vnet_buffer (b0)->sw_if_index[VLIB_RX];
709           t0->details_valid = 0;
710         }
711       buffers += 1;
712       n_left -= 1;
713     }
714 }
715
716 typedef enum
717 {
718   VNET_ERROR_DISPOSITION_DROP,
719   VNET_ERROR_DISPOSITION_PUNT,
720   VNET_ERROR_N_DISPOSITION,
721 } vnet_error_disposition_t;
722
723 static void
724 drop_catchup_trace (vlib_main_t * vm,
725                     vlib_node_runtime_t * node, vlib_buffer_t * b)
726 {
727   /* Can we safely rewind the buffer? If not, fagedaboudit */
728   if (b->flags & VNET_BUFFER_F_L2_HDR_OFFSET_VALID)
729     {
730       vnet_error_trace_t *t;
731       ip4_header_t *ip4;
732       ip6_header_t *ip6;
733       ethernet_header_t *eh;
734       i16 delta;
735
736       t = vlib_add_trace (vm, node, b, sizeof (*t));
737       delta = vnet_buffer (b)->l2_hdr_offset - b->current_data;
738       vlib_buffer_advance (b, delta);
739
740       eh = vlib_buffer_get_current (b);
741       /* Save mactype */
742       t->mactype = clib_net_to_host_u16 (eh->type);
743       t->details_valid = 1;
744       switch (t->mactype)
745         {
746         case ETHERNET_TYPE_IP4:
747           ip4 = (void *) (eh + 1);
748           t->details_valid = 2;
749           t->is_ip6 = 0;
750           t->src.ip4.as_u32 = ip4->src_address.as_u32;
751           t->dst.ip4.as_u32 = ip4->dst_address.as_u32;
752           break;
753
754         case ETHERNET_TYPE_IP6:
755           ip6 = (void *) (eh + 1);
756           t->details_valid = 2;
757           t->is_ip6 = 1;
758           clib_memcpy_fast (t->src.as_u8, ip6->src_address.as_u8,
759                             sizeof (ip6_address_t));
760           clib_memcpy_fast (t->dst.as_u8, ip6->dst_address.as_u8,
761                             sizeof (ip6_address_t));
762           break;
763
764         default:
765           /* Dunno, do nothing, leave details_valid alone */
766           break;
767         }
768       /* Restore current data (probably unnecessary) */
769       vlib_buffer_advance (b, -delta);
770     }
771 }
772
773 static_always_inline uword
774 interface_drop_punt (vlib_main_t * vm,
775                      vlib_node_runtime_t * node,
776                      vlib_frame_t * frame,
777                      vnet_error_disposition_t disposition)
778 {
779   u32 *from, n_left, thread_index, *sw_if_index;
780   vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b;
781   u32 sw_if_indices[VLIB_FRAME_SIZE];
782   vlib_simple_counter_main_t *cm;
783   u16 nexts[VLIB_FRAME_SIZE];
784   u32 n_trace;
785   vnet_main_t *vnm;
786
787   vnm = vnet_get_main ();
788   thread_index = vm->thread_index;
789   from = vlib_frame_vector_args (frame);
790   n_left = frame->n_vectors;
791   b = bufs;
792   sw_if_index = sw_if_indices;
793
794   vlib_get_buffers (vm, from, bufs, n_left);
795
796   /* "trace add error-drop NNN?" */
797   if (PREDICT_FALSE ((n_trace = vlib_get_trace_count (vm, node))))
798     {
799       /* If pkts aren't otherwise traced... */
800       if ((node->flags & VLIB_NODE_FLAG_TRACE) == 0)
801         {
802           /* Trace them from here */
803           node->flags |= VLIB_NODE_FLAG_TRACE;
804           while (n_trace && n_left)
805             {
806               if (PREDICT_TRUE
807                   (vlib_trace_buffer (vm, node, 0 /* next_index */ , b[0],
808                                       0 /* follow chain */ )))
809                 {
810                   /*
811                    * Here we have a wireshark dissector problem.
812                    * Packets may be well-formed, or not. We
813                    * must not blow chunks in any case.
814                    *
815                    * Try to produce trace records which will help
816                    * folks understand what's going on.
817                    */
818                   drop_catchup_trace (vm, node, b[0]);
819                   n_trace--;
820                 }
821               n_left--;
822               b++;
823             }
824         }
825
826       vlib_set_trace_count (vm, node, n_trace);
827       b = bufs;
828       n_left = frame->n_vectors;
829     }
830
831   if (node->flags & VLIB_NODE_FLAG_TRACE)
832     interface_trace_buffers (vm, node, frame);
833
834   /* All going to drop regardless, this is just a counting exercise */
835   clib_memset (nexts, 0, sizeof (nexts));
836
837   cm = vec_elt_at_index (vnm->interface_main.sw_if_counters,
838                          (disposition == VNET_ERROR_DISPOSITION_PUNT
839                           ? VNET_INTERFACE_COUNTER_PUNT
840                           : VNET_INTERFACE_COUNTER_DROP));
841
842   /* collect the array of interfaces first ... */
843   while (n_left >= 4)
844     {
845       if (n_left >= 12)
846         {
847           /* Prefetch 8 ahead - there's not much going on in each iteration */
848           vlib_prefetch_buffer_header (b[4], LOAD);
849           vlib_prefetch_buffer_header (b[5], LOAD);
850           vlib_prefetch_buffer_header (b[6], LOAD);
851           vlib_prefetch_buffer_header (b[7], LOAD);
852         }
853       sw_if_index[0] = vnet_buffer (b[0])->sw_if_index[VLIB_RX];
854       sw_if_index[1] = vnet_buffer (b[1])->sw_if_index[VLIB_RX];
855       sw_if_index[2] = vnet_buffer (b[2])->sw_if_index[VLIB_RX];
856       sw_if_index[3] = vnet_buffer (b[3])->sw_if_index[VLIB_RX];
857
858       sw_if_index += 4;
859       n_left -= 4;
860       b += 4;
861     }
862   while (n_left)
863     {
864       sw_if_index[0] = vnet_buffer (b[0])->sw_if_index[VLIB_RX];
865
866       sw_if_index += 1;
867       n_left -= 1;
868       b += 1;
869     }
870
871   /* ... then count against them in blocks */
872   n_left = frame->n_vectors;
873
874   while (n_left)
875     {
876       vnet_sw_interface_t *sw_if0;
877       u16 off, count;
878
879       off = frame->n_vectors - n_left;
880
881       sw_if_index = sw_if_indices + off;
882
883       count = clib_count_equal_u32 (sw_if_index, n_left);
884       n_left -= count;
885
886       vlib_increment_simple_counter (cm, thread_index, sw_if_index[0], count);
887
888       /* Increment super-interface drop/punt counters for
889          sub-interfaces. */
890       sw_if0 = vnet_get_sw_interface (vnm, sw_if_index[0]);
891       if (sw_if0->sup_sw_if_index != sw_if_index[0])
892         vlib_increment_simple_counter
893           (cm, thread_index, sw_if0->sup_sw_if_index, count);
894     }
895
896   vlib_buffer_enqueue_to_next (vm, node, from, nexts, frame->n_vectors);
897
898   return frame->n_vectors;
899 }
900
901 static inline void
902 pcap_drop_trace (vlib_main_t * vm,
903                  vnet_interface_main_t * im,
904                  vnet_pcap_t * pp, vlib_frame_t * f)
905 {
906   u32 *from;
907   u32 n_left = f->n_vectors;
908   vlib_buffer_t *b0, *p1;
909   u32 bi0;
910   i16 save_current_data;
911   u16 save_current_length;
912   vlib_error_main_t *em = &vm->error_main;
913   int do_trace = 0;
914
915
916   from = vlib_frame_vector_args (f);
917
918   while (n_left > 0)
919     {
920       if (PREDICT_TRUE (n_left > 1))
921         {
922           p1 = vlib_get_buffer (vm, from[1]);
923           vlib_prefetch_buffer_header (p1, LOAD);
924         }
925
926       bi0 = from[0];
927       b0 = vlib_get_buffer (vm, bi0);
928       from++;
929       n_left--;
930
931       /* See if we're pointedly ignoring this specific error */
932       if (im->pcap_drop_filter_hash
933           && hash_get (im->pcap_drop_filter_hash, b0->error))
934         continue;
935
936       do_trace = (pp->pcap_sw_if_index == 0) ||
937         pp->pcap_sw_if_index == vnet_buffer (b0)->sw_if_index[VLIB_RX];
938
939       if (PREDICT_FALSE
940           (do_trace == 0 && pp->filter_classify_table_index != ~0))
941         {
942           do_trace = vnet_is_packet_traced_inline
943             (b0, pp->filter_classify_table_index, 0 /* full classify */ );
944         }
945
946       /* Trace all drops, or drops received on a specific interface */
947       if (do_trace)
948         {
949           save_current_data = b0->current_data;
950           save_current_length = b0->current_length;
951
952           /*
953            * Typically, we'll need to rewind the buffer
954            * if l2_hdr_offset is valid, make sure to rewind to the start of
955            * the L2 header. This may not be the buffer start in case we pop-ed
956            * vlan tags.
957            * Otherwise, rewind to buffer start and hope for the best.
958            */
959           if (b0->flags & VNET_BUFFER_F_L2_HDR_OFFSET_VALID)
960             {
961               if (b0->current_data > vnet_buffer (b0)->l2_hdr_offset)
962                 vlib_buffer_advance (b0,
963                                      vnet_buffer (b0)->l2_hdr_offset -
964                                      b0->current_data);
965             }
966           else if (b0->current_data > 0)
967             vlib_buffer_advance (b0, (word) - b0->current_data);
968
969           {
970             vlib_buffer_t *last = b0;
971             u32 error_node_index;
972             int drop_string_len;
973             vlib_node_t *n;
974             /* Length of the error string */
975             int error_string_len =
976               clib_strnlen (em->counters_heap[b0->error].name, 128);
977
978             /* Dig up the drop node */
979             error_node_index = vm->node_main.node_by_error[b0->error];
980             n = vlib_get_node (vm, error_node_index);
981
982             /* Length of full drop string, w/ "nodename: " prepended */
983             drop_string_len = error_string_len + vec_len (n->name) + 2;
984
985             /* Find the last buffer in the chain */
986             while (last->flags & VLIB_BUFFER_NEXT_PRESENT)
987               last = vlib_get_buffer (vm, last->next_buffer);
988
989             /*
990              * Append <nodename>: <error-string> to the capture,
991              * only if we can do that without allocating a new buffer.
992              */
993             if (PREDICT_TRUE ((last->current_data + last->current_length)
994                               < (VLIB_BUFFER_DEFAULT_DATA_SIZE
995                                  - drop_string_len)))
996               {
997                 clib_memcpy_fast (last->data + last->current_data +
998                                   last->current_length, n->name,
999                                   vec_len (n->name));
1000                 clib_memcpy_fast (last->data + last->current_data +
1001                                   last->current_length + vec_len (n->name),
1002                                   ": ", 2);
1003                 clib_memcpy_fast (last->data + last->current_data +
1004                                   last->current_length + vec_len (n->name) +
1005                                   2, em->counters_heap[b0->error].name,
1006                                   error_string_len);
1007                 last->current_length += drop_string_len;
1008                 b0->flags &= ~(VLIB_BUFFER_TOTAL_LENGTH_VALID);
1009                 pcap_add_buffer (&pp->pcap_main, vm, bi0,
1010                                  pp->max_bytes_per_pkt);
1011                 last->current_length -= drop_string_len;
1012                 b0->current_data = save_current_data;
1013                 b0->current_length = save_current_length;
1014                 continue;
1015               }
1016           }
1017
1018           /*
1019            * Didn't have space in the last buffer, here's the dropped
1020            * packet as-is
1021            */
1022           pcap_add_buffer (&pp->pcap_main, vm, bi0, pp->max_bytes_per_pkt);
1023
1024           b0->current_data = save_current_data;
1025           b0->current_length = save_current_length;
1026         }
1027     }
1028 }
1029
1030 #ifndef CLIB_MARCH_VARIANT
1031 void
1032 vnet_pcap_drop_trace_filter_add_del (u32 error_index, int is_add)
1033 {
1034   vnet_interface_main_t *im = &vnet_get_main ()->interface_main;
1035
1036   if (im->pcap_drop_filter_hash == 0)
1037     im->pcap_drop_filter_hash = hash_create (0, sizeof (uword));
1038
1039   if (is_add)
1040     hash_set (im->pcap_drop_filter_hash, error_index, 1);
1041   else
1042     hash_unset (im->pcap_drop_filter_hash, error_index);
1043 }
1044 #endif /* CLIB_MARCH_VARIANT */
1045
1046 VLIB_NODE_FN (interface_drop) (vlib_main_t * vm,
1047                                vlib_node_runtime_t * node,
1048                                vlib_frame_t * frame)
1049 {
1050   vnet_interface_main_t *im = &vnet_get_main ()->interface_main;
1051   vnet_pcap_t *pp = &vlib_global_main.pcap;
1052
1053   if (PREDICT_FALSE (pp->pcap_drop_enable))
1054     pcap_drop_trace (vm, im, pp, frame);
1055
1056   return interface_drop_punt (vm, node, frame, VNET_ERROR_DISPOSITION_DROP);
1057 }
1058
1059 VLIB_NODE_FN (interface_punt) (vlib_main_t * vm,
1060                                vlib_node_runtime_t * node,
1061                                vlib_frame_t * frame)
1062 {
1063   return interface_drop_punt (vm, node, frame, VNET_ERROR_DISPOSITION_PUNT);
1064 }
1065
1066 /* *INDENT-OFF* */
1067 VLIB_REGISTER_NODE (interface_drop) = {
1068   .name = "error-drop",
1069   .vector_size = sizeof (u32),
1070   .format_trace = format_vnet_error_trace,
1071   .flags = VLIB_NODE_FLAG_TRACE_SUPPORTED,
1072   .n_next_nodes = 1,
1073   .next_nodes = {
1074     [0] = "drop",
1075   },
1076 };
1077 /* *INDENT-ON* */
1078
1079 /* *INDENT-OFF* */
1080 VLIB_REGISTER_NODE (interface_punt) = {
1081   .name = "error-punt",
1082   .vector_size = sizeof (u32),
1083   .format_trace = format_vnet_error_trace,
1084   .flags = VLIB_NODE_FLAG_TRACE_SUPPORTED,
1085   .n_next_nodes = 1,
1086   .next_nodes = {
1087     [0] = "punt",
1088   },
1089 };
1090 /* *INDENT-ON* */
1091
1092 /* *INDENT-OFF* */
1093 VLIB_REGISTER_NODE (vnet_per_buffer_interface_output_node) = {
1094   .name = "interface-output",
1095   .vector_size = sizeof (u32),
1096 };
1097 /* *INDENT-ON* */
1098
1099 static uword
1100 interface_tx_node_fn (vlib_main_t * vm, vlib_node_runtime_t * node,
1101                       vlib_frame_t * from_frame)
1102 {
1103   vnet_main_t *vnm = vnet_get_main ();
1104   u32 last_sw_if_index = ~0;
1105   vlib_frame_t *to_frame = 0;
1106   vnet_hw_interface_t *hw = 0;
1107   u32 *from, *to_next = 0;
1108   u32 n_left_from;
1109
1110   from = vlib_frame_vector_args (from_frame);
1111   n_left_from = from_frame->n_vectors;
1112   while (n_left_from > 0)
1113     {
1114       u32 bi0;
1115       vlib_buffer_t *b0;
1116       u32 sw_if_index0;
1117
1118       bi0 = from[0];
1119       from++;
1120       n_left_from--;
1121       b0 = vlib_get_buffer (vm, bi0);
1122       sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_TX];
1123
1124       if (PREDICT_FALSE ((last_sw_if_index != sw_if_index0) || to_frame == 0))
1125         {
1126           if (to_frame)
1127             {
1128               hw = vnet_get_sup_hw_interface (vnm, last_sw_if_index);
1129               vlib_put_frame_to_node (vm, hw->tx_node_index, to_frame);
1130             }
1131           last_sw_if_index = sw_if_index0;
1132           hw = vnet_get_sup_hw_interface (vnm, sw_if_index0);
1133           to_frame = vlib_get_frame_to_node (vm, hw->tx_node_index);
1134           to_next = vlib_frame_vector_args (to_frame);
1135         }
1136
1137       to_next[0] = bi0;
1138       to_next++;
1139       to_frame->n_vectors++;
1140     }
1141   vlib_put_frame_to_node (vm, hw->tx_node_index, to_frame);
1142   return from_frame->n_vectors;
1143 }
1144
1145 /* *INDENT-OFF* */
1146 VLIB_REGISTER_NODE (interface_tx) = {
1147   .function = interface_tx_node_fn,
1148   .name = "interface-tx",
1149   .vector_size = sizeof (u32),
1150   .n_next_nodes = 1,
1151   .next_nodes = {
1152     [0] = "error-drop",
1153   },
1154 };
1155
1156 VNET_FEATURE_ARC_INIT (interface_output, static) =
1157 {
1158   .arc_name  = "interface-output",
1159   .start_nodes = VNET_FEATURES (0),
1160   .last_in_arc = "interface-tx",
1161   .arc_index_ptr = &vnet_main.interface_main.output_feature_arc_index,
1162 };
1163
1164 VNET_FEATURE_INIT (span_tx, static) = {
1165   .arc_name = "interface-output",
1166   .node_name = "span-output",
1167   .runs_before = VNET_FEATURES ("interface-tx"),
1168 };
1169
1170 VNET_FEATURE_INIT (ipsec_if_tx, static) = {
1171   .arc_name = "interface-output",
1172   .node_name = "ipsec-if-output",
1173   .runs_before = VNET_FEATURES ("interface-tx"),
1174 };
1175
1176 VNET_FEATURE_INIT (interface_tx, static) = {
1177   .arc_name = "interface-output",
1178   .node_name = "interface-tx",
1179   .runs_before = 0,
1180 };
1181 /* *INDENT-ON* */
1182
1183 #ifndef CLIB_MARCH_VARIANT
1184 clib_error_t *
1185 vnet_per_buffer_interface_output_hw_interface_add_del (vnet_main_t * vnm,
1186                                                        u32 hw_if_index,
1187                                                        u32 is_create)
1188 {
1189   vnet_hw_interface_t *hi = vnet_get_hw_interface (vnm, hw_if_index);
1190   u32 next_index;
1191
1192   if (hi->output_node_index == 0)
1193     return 0;
1194
1195   next_index = vlib_node_add_next
1196     (vnm->vlib_main, vnet_per_buffer_interface_output_node.index,
1197      hi->output_node_index);
1198   hi->output_node_next_index = next_index;
1199
1200   return 0;
1201 }
1202
1203 VNET_HW_INTERFACE_ADD_DEL_FUNCTION
1204   (vnet_per_buffer_interface_output_hw_interface_add_del);
1205
1206 void
1207 vnet_set_interface_output_node (vnet_main_t * vnm,
1208                                 u32 hw_if_index, u32 node_index)
1209 {
1210   ASSERT (node_index);
1211   vnet_hw_interface_t *hi = vnet_get_hw_interface (vnm, hw_if_index);
1212   u32 next_index = vlib_node_add_next
1213     (vnm->vlib_main, vnet_per_buffer_interface_output_node.index, node_index);
1214   hi->output_node_next_index = next_index;
1215   hi->output_node_index = node_index;
1216 }
1217 #endif /* CLIB_MARCH_VARIANT */
1218
1219 /*
1220  * fd.io coding-style-patch-verification: ON
1221  *
1222  * Local Variables:
1223  * eval: (c-set-style "gnu")
1224  * End:
1225  */