gso: add checksum validation in gro
[vpp.git] / src / vnet / gso / node.c
1 /*
2  * Copyright (c) 2018 Cisco and/or its affiliates.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at:
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15
16 #include <vlib/vlib.h>
17 #include <vnet/vnet.h>
18 #include <vppinfra/error.h>
19 #include <vnet/ethernet/ethernet.h>
20 #include <vnet/feature/feature.h>
21 #include <vnet/gso/gso.h>
22 #include <vnet/gso/hdr_offset_parser.h>
23 #include <vnet/ip/icmp46_packet.h>
24 #include <vnet/ip/ip4.h>
25 #include <vnet/ip/ip6.h>
26 #include <vnet/udp/udp_packet.h>
27
28 typedef struct
29 {
30   u32 flags;
31   u16 gso_size;
32   u8 gso_l4_hdr_sz;
33   generic_header_offset_t gho;
34 } gso_trace_t;
35
36 static u8 *
37 format_gso_trace (u8 * s, va_list * args)
38 {
39   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
40   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
41   gso_trace_t *t = va_arg (*args, gso_trace_t *);
42
43   if (t->flags & VNET_BUFFER_F_GSO)
44     {
45       s = format (s, "gso_sz %d gso_l4_hdr_sz %d\n%U",
46                   t->gso_size, t->gso_l4_hdr_sz, format_generic_header_offset,
47                   &t->gho);
48     }
49   else
50     {
51       s =
52         format (s, "non-gso buffer\n%U", format_generic_header_offset,
53                 &t->gho);
54     }
55
56   return s;
57 }
58
59 static_always_inline u16
60 tso_segment_ipip_tunnel_fixup (vlib_main_t * vm,
61                                vnet_interface_per_thread_data_t * ptd,
62                                vlib_buffer_t * sb0,
63                                generic_header_offset_t * gho)
64 {
65   u16 n_tx_bufs = vec_len (ptd->split_buffers);
66   u16 i = 0, n_tx_bytes = 0;
67
68   while (i < n_tx_bufs)
69     {
70       vlib_buffer_t *b0 = vlib_get_buffer (vm, ptd->split_buffers[i]);
71       vnet_get_outer_header (b0, gho);
72       clib_memcpy_fast (vlib_buffer_get_current (b0),
73                         vlib_buffer_get_current (sb0), gho->outer_hdr_sz);
74
75       ip4_header_t *ip4 =
76         (ip4_header_t *) (vlib_buffer_get_current (b0) +
77                           gho->outer_l3_hdr_offset);
78       ip6_header_t *ip6 =
79         (ip6_header_t *) (vlib_buffer_get_current (b0) +
80                           gho->outer_l3_hdr_offset);
81
82       if (gho->gho_flags & GHO_F_OUTER_IP4)
83         {
84           ip4->length =
85             clib_host_to_net_u16 (b0->current_length -
86                                   gho->outer_l3_hdr_offset);
87           ip4->checksum = ip4_header_checksum (ip4);
88         }
89       else if (gho->gho_flags & GHO_F_OUTER_IP6)
90         {
91           ip6->payload_length =
92             clib_host_to_net_u16 (b0->current_length -
93                                   gho->outer_l4_hdr_offset);
94         }
95
96       n_tx_bytes += gho->outer_hdr_sz;
97       i++;
98     }
99   return n_tx_bytes;
100 }
101
102 static_always_inline void
103 tso_segment_vxlan_tunnel_headers_fixup (vlib_main_t * vm, vlib_buffer_t * b,
104                                         generic_header_offset_t * gho)
105 {
106   u8 proto = 0;
107   ip4_header_t *ip4 = 0;
108   ip6_header_t *ip6 = 0;
109   udp_header_t *udp = 0;
110
111   ip4 =
112     (ip4_header_t *) (vlib_buffer_get_current (b) + gho->outer_l3_hdr_offset);
113   ip6 =
114     (ip6_header_t *) (vlib_buffer_get_current (b) + gho->outer_l3_hdr_offset);
115   udp =
116     (udp_header_t *) (vlib_buffer_get_current (b) + gho->outer_l4_hdr_offset);
117
118   if (gho->gho_flags & GHO_F_OUTER_IP4)
119     {
120       proto = ip4->protocol;
121       ip4->length =
122         clib_host_to_net_u16 (b->current_length - gho->outer_l3_hdr_offset);
123       ip4->checksum = ip4_header_checksum (ip4);
124     }
125   else if (gho->gho_flags & GHO_F_OUTER_IP6)
126     {
127       proto = ip6->protocol;
128       ip6->payload_length =
129         clib_host_to_net_u16 (b->current_length - gho->outer_l4_hdr_offset);
130     }
131   if (proto == IP_PROTOCOL_UDP)
132     {
133       int bogus;
134       udp->length =
135         clib_host_to_net_u16 (b->current_length - gho->outer_l4_hdr_offset);
136       udp->checksum = 0;
137       if (gho->gho_flags & GHO_F_OUTER_IP6)
138         {
139           udp->checksum =
140             ip6_tcp_udp_icmp_compute_checksum (vm, b, ip6, &bogus);
141         }
142       else if (gho->gho_flags & GHO_F_OUTER_IP4)
143         {
144           udp->checksum = ip4_tcp_udp_compute_checksum (vm, b, ip4);
145         }
146       b->flags &= ~VNET_BUFFER_F_OFFLOAD_UDP_CKSUM;
147     }
148 }
149
150 static_always_inline u16
151 tso_segment_vxlan_tunnel_fixup (vlib_main_t * vm,
152                                 vnet_interface_per_thread_data_t * ptd,
153                                 vlib_buffer_t * sb0,
154                                 generic_header_offset_t * gho)
155 {
156   u16 n_tx_bufs = vec_len (ptd->split_buffers);
157   u16 i = 0, n_tx_bytes = 0;
158
159   while (i < n_tx_bufs)
160     {
161       vlib_buffer_t *b0 = vlib_get_buffer (vm, ptd->split_buffers[i]);
162       vnet_get_outer_header (b0, gho);
163       clib_memcpy_fast (vlib_buffer_get_current (b0),
164                         vlib_buffer_get_current (sb0), gho->outer_hdr_sz);
165
166       tso_segment_vxlan_tunnel_headers_fixup (vm, b0, gho);
167       n_tx_bytes += gho->outer_hdr_sz;
168       i++;
169     }
170   return n_tx_bytes;
171 }
172
173 static_always_inline u16
174 tso_alloc_tx_bufs (vlib_main_t * vm,
175                    vnet_interface_per_thread_data_t * ptd,
176                    vlib_buffer_t * b0, u32 n_bytes_b0, u16 l234_sz,
177                    u16 gso_size, u16 first_data_size,
178                    generic_header_offset_t * gho)
179 {
180   u16 n_alloc, size;
181   u16 first_packet_length = l234_sz + first_data_size;
182
183   /*
184    * size is the amount of data per segmented buffer except the 1st
185    * segmented buffer.
186    * l2_hdr_offset is an offset == current_data of vlib_buffer_t.
187    * l234_sz is hdr_sz from l2_hdr_offset.
188    */
189   size =
190     clib_min (gso_size, vlib_buffer_get_default_data_size (vm) - l234_sz
191               - gho->l2_hdr_offset);
192
193   /*
194    * First segmented buffer length is calculated separately.
195    * As it may contain less data than gso_size (when gso_size is
196    * greater than current_length of 1st buffer from GSO chained
197    * buffers) and/or size calculated above.
198    */
199   u16 n_bufs = 1;
200
201   /*
202    * Total packet length minus first packet length including l234 header.
203    * rounded-up division
204    */
205   ASSERT (n_bytes_b0 > first_packet_length);
206   n_bufs += ((n_bytes_b0 - first_packet_length + (size - 1)) / size);
207
208   vec_validate (ptd->split_buffers, n_bufs - 1);
209
210   n_alloc = vlib_buffer_alloc (vm, ptd->split_buffers, n_bufs);
211   if (n_alloc < n_bufs)
212     {
213       vlib_buffer_free (vm, ptd->split_buffers, n_alloc);
214       return 0;
215     }
216   return n_alloc;
217 }
218
219 static_always_inline void
220 tso_init_buf_from_template_base (vlib_buffer_t * nb0, vlib_buffer_t * b0,
221                                  u32 flags, u16 length)
222 {
223   /* copying objects from cacheline 0 */
224   nb0->current_data = b0->current_data;
225   nb0->current_length = length;
226   nb0->flags = VLIB_BUFFER_TOTAL_LENGTH_VALID | flags;
227   nb0->flow_id = b0->flow_id;
228   nb0->error = b0->error;
229   nb0->current_config_index = b0->current_config_index;
230   clib_memcpy_fast (&nb0->opaque, &b0->opaque, sizeof (nb0->opaque));
231
232   /* copying objects from cacheline 1 */
233   nb0->trace_handle = b0->trace_handle;
234   nb0->total_length_not_including_first_buffer = 0;
235
236   /* copying data */
237   clib_memcpy_fast (vlib_buffer_get_current (nb0),
238                     vlib_buffer_get_current (b0), length);
239 }
240
241 static_always_inline void
242 tso_init_buf_from_template (vlib_main_t * vm, vlib_buffer_t * nb0,
243                             vlib_buffer_t * b0, u16 template_data_sz,
244                             u16 gso_size, u8 ** p_dst_ptr, u16 * p_dst_left,
245                             u32 next_tcp_seq, u32 flags,
246                             generic_header_offset_t * gho)
247 {
248   tso_init_buf_from_template_base (nb0, b0, flags, template_data_sz);
249
250   *p_dst_left =
251     clib_min (gso_size,
252               vlib_buffer_get_default_data_size (vm) - (template_data_sz +
253                                                         nb0->current_data));
254   *p_dst_ptr = vlib_buffer_get_current (nb0) + template_data_sz;
255
256   tcp_header_t *tcp =
257     (tcp_header_t *) (vlib_buffer_get_current (nb0) + gho->l4_hdr_offset);
258   tcp->seq_number = clib_host_to_net_u32 (next_tcp_seq);
259 }
260
261 static_always_inline void
262 tso_fixup_segmented_buf (vlib_main_t * vm, vlib_buffer_t * b0, u8 tcp_flags,
263                          int is_l2, int is_ip6, generic_header_offset_t * gho)
264 {
265   ip4_header_t *ip4 =
266     (ip4_header_t *) (vlib_buffer_get_current (b0) + gho->l3_hdr_offset);
267   ip6_header_t *ip6 =
268     (ip6_header_t *) (vlib_buffer_get_current (b0) + gho->l3_hdr_offset);
269   tcp_header_t *tcp =
270     (tcp_header_t *) (vlib_buffer_get_current (b0) + gho->l4_hdr_offset);
271
272   tcp->flags = tcp_flags;
273
274   if (is_ip6)
275     {
276       ip6->payload_length =
277         clib_host_to_net_u16 (b0->current_length - gho->l4_hdr_offset);
278       if (gho->gho_flags & GHO_F_TCP)
279         {
280           int bogus = 0;
281           tcp->checksum = 0;
282           tcp->checksum =
283             ip6_tcp_udp_icmp_compute_checksum (vm, b0, ip6, &bogus);
284           b0->flags &= ~VNET_BUFFER_F_OFFLOAD_TCP_CKSUM;
285         }
286     }
287   else
288     {
289       ip4->length =
290         clib_host_to_net_u16 (b0->current_length - gho->l3_hdr_offset);
291       if (gho->gho_flags & GHO_F_IP4)
292         ip4->checksum = ip4_header_checksum (ip4);
293       if (gho->gho_flags & GHO_F_TCP)
294         {
295           tcp->checksum = 0;
296           tcp->checksum = ip4_tcp_udp_compute_checksum (vm, b0, ip4);
297         }
298       b0->flags &= ~VNET_BUFFER_F_OFFLOAD_TCP_CKSUM;
299       b0->flags &= ~VNET_BUFFER_F_OFFLOAD_IP_CKSUM;
300     }
301
302   if (!is_l2 && ((gho->gho_flags & GHO_F_TUNNEL) == 0))
303     {
304       u32 adj_index0 = vnet_buffer (b0)->ip.adj_index[VLIB_TX];
305
306       ip_adjacency_t *adj0 = adj_get (adj_index0);
307
308       if (adj0->lookup_next_index == IP_LOOKUP_NEXT_MIDCHAIN &&
309           adj0->sub_type.midchain.fixup_func)
310         /* calls e.g. ipip44_fixup */
311         adj0->sub_type.midchain.fixup_func
312           (vm, adj0, b0, adj0->sub_type.midchain.fixup_data);
313     }
314 }
315
316 /**
317  * Allocate the necessary number of ptd->split_buffers,
318  * and segment the possibly chained buffer(s) from b0 into
319  * there.
320  *
321  * Return the cumulative number of bytes sent or zero
322  * if allocation failed.
323  */
324
325 static_always_inline u32
326 tso_segment_buffer (vlib_main_t * vm, vnet_interface_per_thread_data_t * ptd,
327                     u32 sbi0, vlib_buffer_t * sb0,
328                     generic_header_offset_t * gho, u32 n_bytes_b0, int is_l2,
329                     int is_ip6)
330 {
331   u32 n_tx_bytes = 0;
332   u16 gso_size = vnet_buffer2 (sb0)->gso_size;
333
334   u8 save_tcp_flags = 0;
335   u8 tcp_flags_no_fin_psh = 0;
336   u32 next_tcp_seq = 0;
337
338   tcp_header_t *tcp =
339     (tcp_header_t *) (vlib_buffer_get_current (sb0) + gho->l4_hdr_offset);
340   next_tcp_seq = clib_net_to_host_u32 (tcp->seq_number);
341   /* store original flags for last packet and reset FIN and PSH */
342   save_tcp_flags = tcp->flags;
343   tcp_flags_no_fin_psh = tcp->flags & ~(TCP_FLAG_FIN | TCP_FLAG_PSH);
344   tcp->checksum = 0;
345
346   u32 default_bflags =
347     sb0->flags & ~(VNET_BUFFER_F_GSO | VLIB_BUFFER_NEXT_PRESENT);
348   u16 l234_sz = gho->hdr_sz;
349   int first_data_size = clib_min (gso_size, sb0->current_length - l234_sz);
350   next_tcp_seq += first_data_size;
351
352   if (PREDICT_FALSE
353       (!tso_alloc_tx_bufs
354        (vm, ptd, sb0, n_bytes_b0, l234_sz, gso_size, first_data_size, gho)))
355     return 0;
356
357   vlib_buffer_t *b0 = vlib_get_buffer (vm, ptd->split_buffers[0]);
358   tso_init_buf_from_template_base (b0, sb0, default_bflags,
359                                    l234_sz + first_data_size);
360
361   u32 total_src_left = n_bytes_b0 - l234_sz - first_data_size;
362   if (total_src_left)
363     {
364       /* Need to copy more segments */
365       u8 *src_ptr, *dst_ptr;
366       u16 src_left, dst_left;
367       /* current source buffer */
368       vlib_buffer_t *csb0 = sb0;
369       u32 csbi0 = sbi0;
370       /* current dest buffer */
371       vlib_buffer_t *cdb0;
372       u16 dbi = 1;              /* the buffer [0] is b0 */
373
374       src_ptr = vlib_buffer_get_current (sb0) + l234_sz + first_data_size;
375       src_left = sb0->current_length - l234_sz - first_data_size;
376
377       tso_fixup_segmented_buf (vm, b0, tcp_flags_no_fin_psh, is_l2, is_ip6,
378                                gho);
379
380       /* grab a second buffer and prepare the loop */
381       ASSERT (dbi < vec_len (ptd->split_buffers));
382       cdb0 = vlib_get_buffer (vm, ptd->split_buffers[dbi++]);
383       tso_init_buf_from_template (vm, cdb0, b0, l234_sz, gso_size, &dst_ptr,
384                                   &dst_left, next_tcp_seq, default_bflags,
385                                   gho);
386
387       /* an arbitrary large number to catch the runaway loops */
388       int nloops = 2000;
389       while (total_src_left)
390         {
391           if (nloops-- <= 0)
392             clib_panic ("infinite loop detected");
393           u16 bytes_to_copy = clib_min (src_left, dst_left);
394
395           clib_memcpy_fast (dst_ptr, src_ptr, bytes_to_copy);
396
397           src_left -= bytes_to_copy;
398           src_ptr += bytes_to_copy;
399           total_src_left -= bytes_to_copy;
400           dst_left -= bytes_to_copy;
401           dst_ptr += bytes_to_copy;
402           next_tcp_seq += bytes_to_copy;
403           cdb0->current_length += bytes_to_copy;
404
405           if (0 == src_left)
406             {
407               int has_next = (csb0->flags & VLIB_BUFFER_NEXT_PRESENT);
408               u32 next_bi = csb0->next_buffer;
409
410               /* init src to the next buffer in chain */
411               if (has_next)
412                 {
413                   csbi0 = next_bi;
414                   csb0 = vlib_get_buffer (vm, csbi0);
415                   src_left = csb0->current_length;
416                   src_ptr = vlib_buffer_get_current (csb0);
417                 }
418               else
419                 {
420                   ASSERT (total_src_left == 0);
421                   break;
422                 }
423             }
424           if (0 == dst_left && total_src_left)
425             {
426               n_tx_bytes += cdb0->current_length;
427               tso_fixup_segmented_buf (vm, cdb0, tcp_flags_no_fin_psh, is_l2,
428                                        is_ip6, gho);
429               ASSERT (dbi < vec_len (ptd->split_buffers));
430               cdb0 = vlib_get_buffer (vm, ptd->split_buffers[dbi++]);
431               tso_init_buf_from_template (vm, cdb0, b0, l234_sz,
432                                           gso_size, &dst_ptr, &dst_left,
433                                           next_tcp_seq, default_bflags, gho);
434             }
435         }
436
437       tso_fixup_segmented_buf (vm, cdb0, save_tcp_flags, is_l2, is_ip6, gho);
438
439       n_tx_bytes += cdb0->current_length;
440     }
441   n_tx_bytes += b0->current_length;
442   return n_tx_bytes;
443 }
444
445 static_always_inline void
446 drop_one_buffer_and_count (vlib_main_t * vm, vnet_main_t * vnm,
447                            vlib_node_runtime_t * node, u32 * pbi0,
448                            u32 sw_if_index, u32 drop_error_code)
449 {
450   u32 thread_index = vm->thread_index;
451
452   vlib_simple_counter_main_t *cm;
453   cm =
454     vec_elt_at_index (vnm->interface_main.sw_if_counters,
455                       VNET_INTERFACE_COUNTER_TX_ERROR);
456   vlib_increment_simple_counter (cm, thread_index, sw_if_index, 1);
457
458   vlib_error_drop_buffers (vm, node, pbi0,
459                            /* buffer stride */ 1,
460                            /* n_buffers */ 1,
461                            VNET_INTERFACE_OUTPUT_NEXT_DROP,
462                            node->node_index, drop_error_code);
463 }
464
465 static_always_inline uword
466 vnet_gso_node_inline (vlib_main_t * vm,
467                       vlib_node_runtime_t * node,
468                       vlib_frame_t * frame,
469                       vnet_main_t * vnm,
470                       vnet_hw_interface_t * hi,
471                       int is_l2, int is_ip4, int is_ip6, int do_segmentation)
472 {
473   u32 *to_next;
474   u32 next_index = node->cached_next_index;
475   u32 *from = vlib_frame_vector_args (frame);
476   u32 n_left_from = frame->n_vectors;
477   u32 *from_end = from + n_left_from;
478   u32 thread_index = vm->thread_index;
479   vnet_interface_main_t *im = &vnm->interface_main;
480   vnet_interface_per_thread_data_t *ptd =
481     vec_elt_at_index (im->per_thread_data, thread_index);
482   vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b = bufs;
483
484   vlib_get_buffers (vm, from, b, n_left_from);
485
486   while (n_left_from > 0)
487     {
488       u32 n_left_to_next;
489
490       vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
491
492       if (!do_segmentation)
493         while (from + 8 <= from_end && n_left_to_next >= 4)
494           {
495             u32 bi0, bi1, bi2, bi3;
496             u32 next0, next1, next2, next3;
497             u32 swif0, swif1, swif2, swif3;
498             gso_trace_t *t0, *t1, *t2, *t3;
499             vnet_hw_interface_t *hi0, *hi1, *hi2, *hi3;
500
501             /* Prefetch next iteration. */
502             vlib_prefetch_buffer_header (b[4], LOAD);
503             vlib_prefetch_buffer_header (b[5], LOAD);
504             vlib_prefetch_buffer_header (b[6], LOAD);
505             vlib_prefetch_buffer_header (b[7], LOAD);
506
507             bi0 = from[0];
508             bi1 = from[1];
509             bi2 = from[2];
510             bi3 = from[3];
511             to_next[0] = bi0;
512             to_next[1] = bi1;
513             to_next[2] = bi2;
514             to_next[3] = bi3;
515
516             swif0 = vnet_buffer (b[0])->sw_if_index[VLIB_TX];
517             swif1 = vnet_buffer (b[1])->sw_if_index[VLIB_TX];
518             swif2 = vnet_buffer (b[2])->sw_if_index[VLIB_TX];
519             swif3 = vnet_buffer (b[3])->sw_if_index[VLIB_TX];
520
521             if (PREDICT_FALSE (hi->sw_if_index != swif0))
522               {
523                 hi0 = vnet_get_sup_hw_interface (vnm, swif0);
524                 if ((hi0->flags & VNET_HW_INTERFACE_FLAG_SUPPORTS_GSO) == 0 &&
525                     (b[0]->flags & VNET_BUFFER_F_GSO))
526                   break;
527               }
528             if (PREDICT_FALSE (hi->sw_if_index != swif1))
529               {
530                 hi1 = vnet_get_sup_hw_interface (vnm, swif1);
531                 if (!(hi1->flags & VNET_HW_INTERFACE_FLAG_SUPPORTS_GSO) &&
532                     (b[1]->flags & VNET_BUFFER_F_GSO))
533                   break;
534               }
535             if (PREDICT_FALSE (hi->sw_if_index != swif2))
536               {
537                 hi2 = vnet_get_sup_hw_interface (vnm, swif2);
538                 if ((hi2->flags & VNET_HW_INTERFACE_FLAG_SUPPORTS_GSO) == 0 &&
539                     (b[2]->flags & VNET_BUFFER_F_GSO))
540                   break;
541               }
542             if (PREDICT_FALSE (hi->sw_if_index != swif3))
543               {
544                 hi3 = vnet_get_sup_hw_interface (vnm, swif3);
545                 if (!(hi3->flags & VNET_HW_INTERFACE_FLAG_SUPPORTS_GSO) &&
546                     (b[3]->flags & VNET_BUFFER_F_GSO))
547                   break;
548               }
549
550             if (b[0]->flags & VLIB_BUFFER_IS_TRACED)
551               {
552                 t0 = vlib_add_trace (vm, node, b[0], sizeof (t0[0]));
553                 t0->flags = b[0]->flags & VNET_BUFFER_F_GSO;
554                 t0->gso_size = vnet_buffer2 (b[0])->gso_size;
555                 t0->gso_l4_hdr_sz = vnet_buffer2 (b[0])->gso_l4_hdr_sz;
556                 vnet_generic_header_offset_parser (b[0], &t0->gho, is_l2,
557                                                    is_ip4, is_ip6);
558               }
559             if (b[1]->flags & VLIB_BUFFER_IS_TRACED)
560               {
561                 t1 = vlib_add_trace (vm, node, b[1], sizeof (t1[0]));
562                 t1->flags = b[1]->flags & VNET_BUFFER_F_GSO;
563                 t1->gso_size = vnet_buffer2 (b[1])->gso_size;
564                 t1->gso_l4_hdr_sz = vnet_buffer2 (b[1])->gso_l4_hdr_sz;
565                 vnet_generic_header_offset_parser (b[1], &t1->gho, is_l2,
566                                                    is_ip4, is_ip6);
567               }
568             if (b[2]->flags & VLIB_BUFFER_IS_TRACED)
569               {
570                 t2 = vlib_add_trace (vm, node, b[2], sizeof (t2[0]));
571                 t2->flags = b[2]->flags & VNET_BUFFER_F_GSO;
572                 t2->gso_size = vnet_buffer2 (b[2])->gso_size;
573                 t2->gso_l4_hdr_sz = vnet_buffer2 (b[2])->gso_l4_hdr_sz;
574                 vnet_generic_header_offset_parser (b[2], &t2->gho, is_l2,
575                                                    is_ip4, is_ip6);
576               }
577             if (b[3]->flags & VLIB_BUFFER_IS_TRACED)
578               {
579                 t3 = vlib_add_trace (vm, node, b[3], sizeof (t3[0]));
580                 t3->flags = b[3]->flags & VNET_BUFFER_F_GSO;
581                 t3->gso_size = vnet_buffer2 (b[3])->gso_size;
582                 t3->gso_l4_hdr_sz = vnet_buffer2 (b[3])->gso_l4_hdr_sz;
583                 vnet_generic_header_offset_parser (b[3], &t3->gho, is_l2,
584                                                    is_ip4, is_ip6);
585               }
586
587             from += 4;
588             to_next += 4;
589             n_left_to_next -= 4;
590             n_left_from -= 4;
591
592             next0 = next1 = 0;
593             next2 = next3 = 0;
594             vnet_feature_next (&next0, b[0]);
595             vnet_feature_next (&next1, b[1]);
596             vnet_feature_next (&next2, b[2]);
597             vnet_feature_next (&next3, b[3]);
598             vlib_validate_buffer_enqueue_x4 (vm, node, next_index, to_next,
599                                              n_left_to_next, bi0, bi1, bi2,
600                                              bi3, next0, next1, next2, next3);
601             b += 4;
602           }
603
604       while (from + 1 <= from_end && n_left_to_next > 0)
605         {
606           u32 bi0, swif0;
607           gso_trace_t *t0;
608           vnet_hw_interface_t *hi0;
609           u32 next0 = 0;
610           u32 do_segmentation0 = 0;
611
612           swif0 = vnet_buffer (b[0])->sw_if_index[VLIB_TX];
613           if (PREDICT_FALSE (hi->sw_if_index != swif0))
614             {
615               hi0 = vnet_get_sup_hw_interface (vnm, swif0);
616               if ((hi0->flags & VNET_HW_INTERFACE_FLAG_SUPPORTS_GSO) == 0 &&
617                   (b[0]->flags & VNET_BUFFER_F_GSO))
618                 do_segmentation0 = 1;
619             }
620           else
621             do_segmentation0 = do_segmentation;
622
623           /* speculatively enqueue b0 to the current next frame */
624           to_next[0] = bi0 = from[0];
625           to_next += 1;
626           n_left_to_next -= 1;
627           from += 1;
628           n_left_from -= 1;
629
630           if (b[0]->flags & VLIB_BUFFER_IS_TRACED)
631             {
632               t0 = vlib_add_trace (vm, node, b[0], sizeof (t0[0]));
633               t0->flags = b[0]->flags & VNET_BUFFER_F_GSO;
634               t0->gso_size = vnet_buffer2 (b[0])->gso_size;
635               t0->gso_l4_hdr_sz = vnet_buffer2 (b[0])->gso_l4_hdr_sz;
636               vnet_generic_header_offset_parser (b[0], &t0->gho, is_l2,
637                                                  is_ip4, is_ip6);
638             }
639
640           if (do_segmentation0)
641             {
642               if (PREDICT_FALSE (b[0]->flags & VNET_BUFFER_F_GSO))
643                 {
644                   /*
645                    * Undo the enqueue of the b0 - it is not going anywhere,
646                    * and will be freed either after it's segmented or
647                    * when dropped, if there is no buffers to segment into.
648                    */
649                   to_next -= 1;
650                   n_left_to_next += 1;
651                   /* undo the counting. */
652                   generic_header_offset_t gho = { 0 };
653                   u32 n_bytes_b0 = vlib_buffer_length_in_chain (vm, b[0]);
654                   u32 n_tx_bytes = 0;
655                   u32 inner_is_ip6 = is_ip6;
656
657                   vnet_generic_header_offset_parser (b[0], &gho, is_l2,
658                                                      is_ip4, is_ip6);
659
660                   if (PREDICT_FALSE (gho.gho_flags & GHO_F_TUNNEL))
661                     {
662                       if (PREDICT_FALSE
663                           (gho.gho_flags & (GHO_F_GRE_TUNNEL |
664                                             GHO_F_GENEVE_TUNNEL)))
665                         {
666                           /* not supported yet */
667                           drop_one_buffer_and_count (vm, vnm, node, from - 1,
668                                                      hi->sw_if_index,
669                                                      VNET_INTERFACE_OUTPUT_ERROR_UNHANDLED_GSO_TYPE);
670                           b += 1;
671                           continue;
672                         }
673
674                       vnet_get_inner_header (b[0], &gho);
675
676                       n_bytes_b0 -= gho.outer_hdr_sz;
677                       inner_is_ip6 = (gho.gho_flags & GHO_F_IP6) != 0;
678                     }
679
680                   n_tx_bytes =
681                     tso_segment_buffer (vm, ptd, bi0, b[0], &gho, n_bytes_b0,
682                                         is_l2, inner_is_ip6);
683
684                   if (PREDICT_FALSE (n_tx_bytes == 0))
685                     {
686                       drop_one_buffer_and_count (vm, vnm, node, from - 1,
687                                                  hi->sw_if_index,
688                                                  VNET_INTERFACE_OUTPUT_ERROR_NO_BUFFERS_FOR_GSO);
689                       b += 1;
690                       continue;
691                     }
692
693
694                   if (PREDICT_FALSE (gho.gho_flags & GHO_F_VXLAN_TUNNEL))
695                     {
696                       vnet_get_outer_header (b[0], &gho);
697                       n_tx_bytes +=
698                         tso_segment_vxlan_tunnel_fixup (vm, ptd, b[0], &gho);
699                     }
700                   else
701                     if (PREDICT_FALSE
702                         (gho.gho_flags & (GHO_F_IPIP_TUNNEL |
703                                           GHO_F_IPIP6_TUNNEL)))
704                     {
705                       vnet_get_outer_header (b[0], &gho);
706                       n_tx_bytes +=
707                         tso_segment_ipip_tunnel_fixup (vm, ptd, b[0], &gho);
708                     }
709
710                   u16 n_tx_bufs = vec_len (ptd->split_buffers);
711                   u32 *from_seg = ptd->split_buffers;
712
713                   while (n_tx_bufs > 0)
714                     {
715                       u32 sbi0;
716                       vlib_buffer_t *sb0;
717                       while (n_tx_bufs > 0 && n_left_to_next > 0)
718                         {
719                           sbi0 = to_next[0] = from_seg[0];
720                           sb0 = vlib_get_buffer (vm, sbi0);
721                           ASSERT (sb0->current_length > 0);
722                           to_next += 1;
723                           from_seg += 1;
724                           n_left_to_next -= 1;
725                           n_tx_bufs -= 1;
726                           next0 = 0;
727                           vnet_feature_next (&next0, sb0);
728                           vlib_validate_buffer_enqueue_x1 (vm, node,
729                                                            next_index,
730                                                            to_next,
731                                                            n_left_to_next,
732                                                            sbi0, next0);
733                         }
734                       vlib_put_next_frame (vm, node, next_index,
735                                            n_left_to_next);
736                       if (n_tx_bufs > 0)
737                         vlib_get_next_frame (vm, node, next_index,
738                                              to_next, n_left_to_next);
739                     }
740                   /* The buffers were enqueued. Reset the length */
741                   _vec_len (ptd->split_buffers) = 0;
742                   /* Free the now segmented buffer */
743                   vlib_buffer_free_one (vm, bi0);
744                   b += 1;
745                   continue;
746                 }
747             }
748
749           vnet_feature_next (&next0, b[0]);
750           vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
751                                            n_left_to_next, bi0, next0);
752           b += 1;
753         }
754       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
755     }
756
757   return frame->n_vectors;
758 }
759
760 static_always_inline uword
761 vnet_gso_inline (vlib_main_t * vm,
762                  vlib_node_runtime_t * node, vlib_frame_t * frame, int is_l2,
763                  int is_ip4, int is_ip6)
764 {
765   vnet_main_t *vnm = vnet_get_main ();
766   vnet_hw_interface_t *hi;
767
768   if (frame->n_vectors > 0)
769     {
770       u32 *from = vlib_frame_vector_args (frame);
771       vlib_buffer_t *b = vlib_get_buffer (vm, from[0]);
772       hi = vnet_get_sup_hw_interface (vnm,
773                                       vnet_buffer (b)->sw_if_index[VLIB_TX]);
774
775       if (hi->flags & VNET_HW_INTERFACE_FLAG_SUPPORTS_GSO)
776         return vnet_gso_node_inline (vm, node, frame, vnm, hi,
777                                      is_l2, is_ip4, is_ip6,
778                                      /* do_segmentation */ 0);
779       else
780         return vnet_gso_node_inline (vm, node, frame, vnm, hi,
781                                      is_l2, is_ip4, is_ip6,
782                                      /* do_segmentation */ 1);
783     }
784   return 0;
785 }
786
787 VLIB_NODE_FN (gso_l2_ip4_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
788                                 vlib_frame_t * frame)
789 {
790   return vnet_gso_inline (vm, node, frame, 1 /* l2 */ , 1 /* ip4 */ ,
791                           0 /* ip6 */ );
792 }
793
794 VLIB_NODE_FN (gso_l2_ip6_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
795                                 vlib_frame_t * frame)
796 {
797   return vnet_gso_inline (vm, node, frame, 1 /* l2 */ , 0 /* ip4 */ ,
798                           1 /* ip6 */ );
799 }
800
801 VLIB_NODE_FN (gso_ip4_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
802                              vlib_frame_t * frame)
803 {
804   return vnet_gso_inline (vm, node, frame, 0 /* l2 */ , 1 /* ip4 */ ,
805                           0 /* ip6 */ );
806 }
807
808 VLIB_NODE_FN (gso_ip6_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
809                              vlib_frame_t * frame)
810 {
811   return vnet_gso_inline (vm, node, frame, 0 /* l2 */ , 0 /* ip4 */ ,
812                           1 /* ip6 */ );
813 }
814
815 /* *INDENT-OFF* */
816
817 VLIB_REGISTER_NODE (gso_l2_ip4_node) = {
818   .vector_size = sizeof (u32),
819   .format_trace = format_gso_trace,
820   .type = VLIB_NODE_TYPE_INTERNAL,
821   .n_errors = 0,
822   .n_next_nodes = 0,
823   .name = "gso-l2-ip4",
824 };
825
826 VLIB_REGISTER_NODE (gso_l2_ip6_node) = {
827   .vector_size = sizeof (u32),
828   .format_trace = format_gso_trace,
829   .type = VLIB_NODE_TYPE_INTERNAL,
830   .n_errors = 0,
831   .n_next_nodes = 0,
832   .name = "gso-l2-ip6",
833 };
834
835 VLIB_REGISTER_NODE (gso_ip4_node) = {
836   .vector_size = sizeof (u32),
837   .format_trace = format_gso_trace,
838   .type = VLIB_NODE_TYPE_INTERNAL,
839   .n_errors = 0,
840   .n_next_nodes = 0,
841   .name = "gso-ip4",
842 };
843
844 VLIB_REGISTER_NODE (gso_ip6_node) = {
845   .vector_size = sizeof (u32),
846   .format_trace = format_gso_trace,
847   .type = VLIB_NODE_TYPE_INTERNAL,
848   .n_errors = 0,
849   .n_next_nodes = 0,
850   .name = "gso-ip6",
851 };
852
853 VNET_FEATURE_INIT (gso_l2_ip4_node, static) = {
854   .arc_name = "l2-output-ip4",
855   .node_name = "gso-l2-ip4",
856   .runs_before = VNET_FEATURES ("l2-output-feat-arc-end"),
857 };
858
859 VNET_FEATURE_INIT (gso_l2_ip6_node, static) = {
860   .arc_name = "l2-output-ip6",
861   .node_name = "gso-l2-ip6",
862   .runs_before = VNET_FEATURES ("l2-output-feat-arc-end"),
863 };
864
865 VNET_FEATURE_INIT (gso_ip4_node, static) = {
866   .arc_name = "ip4-output",
867   .node_name = "gso-ip4",
868   .runs_before = VNET_FEATURES ("ipsec4-output-feature"),
869 };
870
871 VNET_FEATURE_INIT (gso_ip6_node, static) = {
872   .arc_name = "ip6-output",
873   .node_name = "gso-ip6",
874   .runs_before = VNET_FEATURES ("ipsec6-output-feature"),
875 };
876
877 /*
878  * fd.io coding-style-patch-verification: ON
879  *
880  * Local Variables:
881  * eval: (c-set-style "gnu")
882  * End:
883  */