gso: add vxlan tunnel support
[vpp.git] / src / vnet / gso / node.c
1 /*
2  * Copyright (c) 2018 Cisco and/or its affiliates.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at:
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15
16 #include <vlib/vlib.h>
17 #include <vnet/vnet.h>
18 #include <vppinfra/error.h>
19 #include <vnet/ethernet/ethernet.h>
20 #include <vnet/feature/feature.h>
21 #include <vnet/gso/gso.h>
22 #include <vnet/gso/hdr_offset_parser.h>
23 #include <vnet/ip/icmp46_packet.h>
24 #include <vnet/ip/ip4.h>
25 #include <vnet/ip/ip6.h>
26 #include <vnet/udp/udp_packet.h>
27
28 typedef struct
29 {
30   u32 flags;
31   u16 gso_size;
32   u8 gso_l4_hdr_sz;
33   generic_header_offset_t gho;
34 } gso_trace_t;
35
36 static u8 *
37 format_gso_trace (u8 * s, va_list * args)
38 {
39   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
40   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
41   gso_trace_t *t = va_arg (*args, gso_trace_t *);
42
43   if (t->flags & VNET_BUFFER_F_GSO)
44     {
45       s = format (s, "gso_sz %d gso_l4_hdr_sz %d %U",
46                   t->gso_size, t->gso_l4_hdr_sz, format_generic_header_offset,
47                   &t->gho);
48     }
49   else
50     {
51       s =
52         format (s, "non-gso buffer %U", format_generic_header_offset,
53                 &t->gho);
54     }
55
56   return s;
57 }
58
59 static_always_inline void
60 tso_segment_vxlan_tunnel_headers_fixup (vlib_main_t * vm, vlib_buffer_t * b,
61                                         generic_header_offset_t * gho)
62 {
63   u8 proto = 0;
64   ip4_header_t *ip4 = 0;
65   ip6_header_t *ip6 = 0;
66   udp_header_t *udp = 0;
67
68   ip4 =
69     (ip4_header_t *) (vlib_buffer_get_current (b) + gho->outer_l3_hdr_offset);
70   ip6 =
71     (ip6_header_t *) (vlib_buffer_get_current (b) + gho->outer_l3_hdr_offset);
72   udp =
73     (udp_header_t *) (vlib_buffer_get_current (b) + gho->outer_l4_hdr_offset);
74
75   if (gho->gho_flags & GHO_F_OUTER_IP4)
76     {
77       proto = ip4->protocol;
78       ip4->length =
79         clib_host_to_net_u16 (b->current_length - gho->outer_l3_hdr_offset);
80       ip4->checksum = ip4_header_checksum (ip4);
81     }
82   else if (gho->gho_flags & GHO_F_OUTER_IP6)
83     {
84       proto = ip6->protocol;
85       ip6->payload_length =
86         clib_host_to_net_u16 (b->current_length - gho->outer_l4_hdr_offset);
87     }
88   if (proto == IP_PROTOCOL_UDP)
89     {
90       int bogus;
91       udp->length =
92         clib_host_to_net_u16 (b->current_length - gho->outer_l4_hdr_offset);
93       udp->checksum = 0;
94       if (gho->gho_flags & GHO_F_OUTER_IP6)
95         {
96           udp->checksum =
97             ip6_tcp_udp_icmp_compute_checksum (vm, b, ip6, &bogus);
98         }
99       else if (gho->gho_flags & GHO_F_OUTER_IP4)
100         {
101           udp->checksum = ip4_tcp_udp_compute_checksum (vm, b, ip4);
102         }
103       b->flags &= ~VNET_BUFFER_F_OFFLOAD_UDP_CKSUM;
104     }
105 }
106
107 static_always_inline u16
108 tso_segment_vxlan_tunnel_fixup (vlib_main_t * vm,
109                                 vnet_interface_per_thread_data_t * ptd,
110                                 vlib_buffer_t * sb0,
111                                 generic_header_offset_t * gho)
112 {
113   u16 n_tx_bufs = vec_len (ptd->split_buffers);
114   u16 i = 0, n_tx_bytes = 0;
115
116   while (i < n_tx_bufs)
117     {
118       vlib_buffer_t *b0 = vlib_get_buffer (vm, ptd->split_buffers[i]);
119       vnet_get_outer_header (b0, gho);
120       clib_memcpy_fast (vlib_buffer_get_current (b0),
121                         vlib_buffer_get_current (sb0), gho->outer_hdr_sz);
122
123       tso_segment_vxlan_tunnel_headers_fixup (vm, b0, gho);
124       n_tx_bytes += gho->outer_hdr_sz;
125       i++;
126     }
127   return n_tx_bytes;
128 }
129
130 static_always_inline u16
131 tso_alloc_tx_bufs (vlib_main_t * vm,
132                    vnet_interface_per_thread_data_t * ptd,
133                    vlib_buffer_t * b0, u32 n_bytes_b0, u16 l234_sz,
134                    u16 gso_size, u16 first_data_size,
135                    generic_header_offset_t * gho)
136 {
137   u16 n_alloc, size;
138   u16 first_packet_length = l234_sz + first_data_size;
139
140   /*
141    * size is the amount of data per segmented buffer except the 1st
142    * segmented buffer.
143    * l2_hdr_offset is an offset == current_data of vlib_buffer_t.
144    * l234_sz is hdr_sz from l2_hdr_offset.
145    */
146   size =
147     clib_min (gso_size, vlib_buffer_get_default_data_size (vm) - l234_sz
148               - gho->l2_hdr_offset);
149
150   /*
151    * First segmented buffer length is calculated separately.
152    * As it may contain less data than gso_size (when gso_size is
153    * greater than current_length of 1st buffer from GSO chained
154    * buffers) and/or size calculated above.
155    */
156   u16 n_bufs = 1;
157
158   /*
159    * Total packet length minus first packet length including l234 header.
160    * rounded-up division
161    */
162   ASSERT (n_bytes_b0 > first_packet_length);
163   n_bufs += ((n_bytes_b0 - first_packet_length + (size - 1)) / size);
164
165   vec_validate (ptd->split_buffers, n_bufs - 1);
166
167   n_alloc = vlib_buffer_alloc (vm, ptd->split_buffers, n_bufs);
168   if (n_alloc < n_bufs)
169     {
170       vlib_buffer_free (vm, ptd->split_buffers, n_alloc);
171       return 0;
172     }
173   return n_alloc;
174 }
175
176 static_always_inline void
177 tso_init_buf_from_template_base (vlib_buffer_t * nb0, vlib_buffer_t * b0,
178                                  u32 flags, u16 length)
179 {
180   nb0->current_data = b0->current_data;
181   nb0->total_length_not_including_first_buffer = 0;
182   nb0->flags = VLIB_BUFFER_TOTAL_LENGTH_VALID | flags;
183   nb0->trace_handle = b0->trace_handle;
184   clib_memcpy_fast (&nb0->opaque, &b0->opaque, sizeof (nb0->opaque));
185   clib_memcpy_fast (vlib_buffer_get_current (nb0),
186                     vlib_buffer_get_current (b0), length);
187   nb0->current_length = length;
188 }
189
190 static_always_inline void
191 tso_init_buf_from_template (vlib_main_t * vm, vlib_buffer_t * nb0,
192                             vlib_buffer_t * b0, u16 template_data_sz,
193                             u16 gso_size, u8 ** p_dst_ptr, u16 * p_dst_left,
194                             u32 next_tcp_seq, u32 flags,
195                             generic_header_offset_t * gho)
196 {
197   tso_init_buf_from_template_base (nb0, b0, flags, template_data_sz);
198
199   *p_dst_left =
200     clib_min (gso_size,
201               vlib_buffer_get_default_data_size (vm) - (template_data_sz +
202                                                         nb0->current_data));
203   *p_dst_ptr = vlib_buffer_get_current (nb0) + template_data_sz;
204
205   tcp_header_t *tcp =
206     (tcp_header_t *) (vlib_buffer_get_current (nb0) + gho->l4_hdr_offset);
207   tcp->seq_number = clib_host_to_net_u32 (next_tcp_seq);
208 }
209
210 static_always_inline void
211 tso_fixup_segmented_buf (vlib_main_t * vm, vlib_buffer_t * b0, u8 tcp_flags,
212                          int is_ip6, generic_header_offset_t * gho,
213                          u32 do_csums)
214 {
215   ip4_header_t *ip4 =
216     (ip4_header_t *) (vlib_buffer_get_current (b0) + gho->l3_hdr_offset);
217   ip6_header_t *ip6 =
218     (ip6_header_t *) (vlib_buffer_get_current (b0) + gho->l3_hdr_offset);
219   tcp_header_t *tcp =
220     (tcp_header_t *) (vlib_buffer_get_current (b0) + gho->l4_hdr_offset);
221
222   tcp->flags = tcp_flags;
223
224   if (is_ip6)
225     {
226       ip6->payload_length =
227         clib_host_to_net_u16 (b0->current_length -
228                               (gho->l4_hdr_offset - gho->l2_hdr_offset));
229       if (do_csums && (b0->flags & VNET_BUFFER_F_OFFLOAD_TCP_CKSUM))
230         {
231           int bogus = 0;
232           tcp->checksum = 0;
233           tcp->checksum =
234             ip6_tcp_udp_icmp_compute_checksum (vm, b0, ip6, &bogus);
235           b0->flags &= ~VNET_BUFFER_F_OFFLOAD_TCP_CKSUM;
236         }
237     }
238   else
239     {
240       ip4->length =
241         clib_host_to_net_u16 (b0->current_length -
242                               (gho->l3_hdr_offset - gho->l2_hdr_offset));
243       if (do_csums)
244         {
245           if (b0->flags & VNET_BUFFER_F_OFFLOAD_IP_CKSUM)
246             ip4->checksum = ip4_header_checksum (ip4);
247           if (b0->flags & VNET_BUFFER_F_OFFLOAD_TCP_CKSUM)
248             {
249               tcp->checksum = 0;
250               tcp->checksum = ip4_tcp_udp_compute_checksum (vm, b0, ip4);
251             }
252           b0->flags &= ~VNET_BUFFER_F_OFFLOAD_TCP_CKSUM;
253           b0->flags &= ~VNET_BUFFER_F_OFFLOAD_IP_CKSUM;
254         }
255     }
256 }
257
258 /**
259  * Allocate the necessary number of ptd->split_buffers,
260  * and segment the possibly chained buffer(s) from b0 into
261  * there.
262  *
263  * Return the cumulative number of bytes sent or zero
264  * if allocation failed.
265  */
266
267 static_always_inline u32
268 tso_segment_buffer (vlib_main_t * vm, vnet_interface_per_thread_data_t * ptd,
269                     u32 sbi0, vlib_buffer_t * sb0,
270                     generic_header_offset_t * gho, u32 n_bytes_b0, int is_ip6,
271                     u32 do_csums)
272 {
273   u32 n_tx_bytes = 0;
274   u16 gso_size = vnet_buffer2 (sb0)->gso_size;
275
276   u8 save_tcp_flags = 0;
277   u8 tcp_flags_no_fin_psh = 0;
278   u32 next_tcp_seq = 0;
279
280   tcp_header_t *tcp =
281     (tcp_header_t *) (vlib_buffer_get_current (sb0) + gho->l4_hdr_offset);
282   next_tcp_seq = clib_net_to_host_u32 (tcp->seq_number);
283   /* store original flags for last packet and reset FIN and PSH */
284   save_tcp_flags = tcp->flags;
285   tcp_flags_no_fin_psh = tcp->flags & ~(TCP_FLAG_FIN | TCP_FLAG_PSH);
286   tcp->checksum = 0;
287
288   u32 default_bflags =
289     sb0->flags & ~(VNET_BUFFER_F_GSO | VLIB_BUFFER_NEXT_PRESENT);
290   u16 l234_sz = gho->hdr_sz;
291   int first_data_size = clib_min (gso_size, sb0->current_length - l234_sz);
292   next_tcp_seq += first_data_size;
293
294   if (PREDICT_FALSE
295       (!tso_alloc_tx_bufs
296        (vm, ptd, sb0, n_bytes_b0, l234_sz, gso_size, first_data_size, gho)))
297     return 0;
298
299   vlib_buffer_t *b0 = vlib_get_buffer (vm, ptd->split_buffers[0]);
300   tso_init_buf_from_template_base (b0, sb0, default_bflags,
301                                    l234_sz + first_data_size);
302
303   u32 total_src_left = n_bytes_b0 - l234_sz - first_data_size;
304   if (total_src_left)
305     {
306       /* Need to copy more segments */
307       u8 *src_ptr, *dst_ptr;
308       u16 src_left, dst_left;
309       /* current source buffer */
310       vlib_buffer_t *csb0 = sb0;
311       u32 csbi0 = sbi0;
312       /* current dest buffer */
313       vlib_buffer_t *cdb0;
314       u16 dbi = 1;              /* the buffer [0] is b0 */
315
316       src_ptr = vlib_buffer_get_current (sb0) + l234_sz + first_data_size;
317       src_left = sb0->current_length - l234_sz - first_data_size;
318
319       tso_fixup_segmented_buf (vm, b0, tcp_flags_no_fin_psh, is_ip6, gho,
320                                do_csums);
321
322       /* grab a second buffer and prepare the loop */
323       ASSERT (dbi < vec_len (ptd->split_buffers));
324       cdb0 = vlib_get_buffer (vm, ptd->split_buffers[dbi++]);
325       tso_init_buf_from_template (vm, cdb0, b0, l234_sz, gso_size, &dst_ptr,
326                                   &dst_left, next_tcp_seq, default_bflags,
327                                   gho);
328
329       /* an arbitrary large number to catch the runaway loops */
330       int nloops = 2000;
331       while (total_src_left)
332         {
333           if (nloops-- <= 0)
334             clib_panic ("infinite loop detected");
335           u16 bytes_to_copy = clib_min (src_left, dst_left);
336
337           clib_memcpy_fast (dst_ptr, src_ptr, bytes_to_copy);
338
339           src_left -= bytes_to_copy;
340           src_ptr += bytes_to_copy;
341           total_src_left -= bytes_to_copy;
342           dst_left -= bytes_to_copy;
343           dst_ptr += bytes_to_copy;
344           next_tcp_seq += bytes_to_copy;
345           cdb0->current_length += bytes_to_copy;
346
347           if (0 == src_left)
348             {
349               int has_next = (csb0->flags & VLIB_BUFFER_NEXT_PRESENT);
350               u32 next_bi = csb0->next_buffer;
351
352               /* init src to the next buffer in chain */
353               if (has_next)
354                 {
355                   csbi0 = next_bi;
356                   csb0 = vlib_get_buffer (vm, csbi0);
357                   src_left = csb0->current_length;
358                   src_ptr = vlib_buffer_get_current (csb0);
359                 }
360               else
361                 {
362                   ASSERT (total_src_left == 0);
363                   break;
364                 }
365             }
366           if (0 == dst_left && total_src_left)
367             {
368               n_tx_bytes += cdb0->current_length;
369               tso_fixup_segmented_buf (vm, cdb0, tcp_flags_no_fin_psh, is_ip6,
370                                        gho, do_csums);
371               ASSERT (dbi < vec_len (ptd->split_buffers));
372               cdb0 = vlib_get_buffer (vm, ptd->split_buffers[dbi++]);
373               tso_init_buf_from_template (vm, cdb0, b0, l234_sz,
374                                           gso_size, &dst_ptr, &dst_left,
375                                           next_tcp_seq, default_bflags, gho);
376             }
377         }
378
379       tso_fixup_segmented_buf (vm, cdb0, save_tcp_flags, is_ip6, gho,
380                                do_csums);
381
382       n_tx_bytes += cdb0->current_length;
383     }
384   n_tx_bytes += b0->current_length;
385   return n_tx_bytes;
386 }
387
388 static_always_inline void
389 drop_one_buffer_and_count (vlib_main_t * vm, vnet_main_t * vnm,
390                            vlib_node_runtime_t * node, u32 * pbi0,
391                            u32 sw_if_index, u32 drop_error_code)
392 {
393   u32 thread_index = vm->thread_index;
394
395   vlib_simple_counter_main_t *cm;
396   cm =
397     vec_elt_at_index (vnm->interface_main.sw_if_counters,
398                       VNET_INTERFACE_COUNTER_TX_ERROR);
399   vlib_increment_simple_counter (cm, thread_index, sw_if_index, 1);
400
401   vlib_error_drop_buffers (vm, node, pbi0,
402                            /* buffer stride */ 1,
403                            /* n_buffers */ 1,
404                            VNET_INTERFACE_OUTPUT_NEXT_DROP,
405                            node->node_index, drop_error_code);
406 }
407
408 static_always_inline uword
409 vnet_gso_node_inline (vlib_main_t * vm,
410                       vlib_node_runtime_t * node,
411                       vlib_frame_t * frame,
412                       vnet_main_t * vnm,
413                       vnet_hw_interface_t * hi,
414                       int is_ip6, int do_segmentation)
415 {
416   u32 *to_next;
417   u32 next_index = node->cached_next_index;
418   u32 *from = vlib_frame_vector_args (frame);
419   u32 n_left_from = frame->n_vectors;
420   u32 *from_end = from + n_left_from;
421   u32 thread_index = vm->thread_index;
422   vnet_interface_main_t *im = &vnm->interface_main;
423   vnet_interface_per_thread_data_t *ptd =
424     vec_elt_at_index (im->per_thread_data, thread_index);
425   vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b = bufs;
426
427   vlib_get_buffers (vm, from, b, n_left_from);
428
429   while (n_left_from > 0)
430     {
431       u32 n_left_to_next;
432
433       vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
434
435       if (!do_segmentation)
436         while (from + 8 <= from_end && n_left_to_next >= 4)
437           {
438             u32 bi0, bi1, bi2, bi3;
439             u32 next0, next1, next2, next3;
440             u32 swif0, swif1, swif2, swif3;
441             gso_trace_t *t0, *t1, *t2, *t3;
442             vnet_hw_interface_t *hi0, *hi1, *hi2, *hi3;
443
444             /* Prefetch next iteration. */
445             vlib_prefetch_buffer_header (b[4], LOAD);
446             vlib_prefetch_buffer_header (b[5], LOAD);
447             vlib_prefetch_buffer_header (b[6], LOAD);
448             vlib_prefetch_buffer_header (b[7], LOAD);
449
450             bi0 = from[0];
451             bi1 = from[1];
452             bi2 = from[2];
453             bi3 = from[3];
454             to_next[0] = bi0;
455             to_next[1] = bi1;
456             to_next[2] = bi2;
457             to_next[3] = bi3;
458
459             swif0 = vnet_buffer (b[0])->sw_if_index[VLIB_TX];
460             swif1 = vnet_buffer (b[1])->sw_if_index[VLIB_TX];
461             swif2 = vnet_buffer (b[2])->sw_if_index[VLIB_TX];
462             swif3 = vnet_buffer (b[3])->sw_if_index[VLIB_TX];
463
464             if (PREDICT_FALSE (hi->sw_if_index != swif0))
465               {
466                 hi0 = vnet_get_sup_hw_interface (vnm, swif0);
467                 if ((hi0->flags & VNET_HW_INTERFACE_FLAG_SUPPORTS_GSO) == 0 &&
468                     (b[0]->flags & VNET_BUFFER_F_GSO))
469                   break;
470               }
471             if (PREDICT_FALSE (hi->sw_if_index != swif1))
472               {
473                 hi1 = vnet_get_sup_hw_interface (vnm, swif1);
474                 if (!(hi1->flags & VNET_HW_INTERFACE_FLAG_SUPPORTS_GSO) &&
475                     (b[1]->flags & VNET_BUFFER_F_GSO))
476                   break;
477               }
478             if (PREDICT_FALSE (hi->sw_if_index != swif2))
479               {
480                 hi2 = vnet_get_sup_hw_interface (vnm, swif2);
481                 if ((hi2->flags & VNET_HW_INTERFACE_FLAG_SUPPORTS_GSO) == 0 &&
482                     (b[2]->flags & VNET_BUFFER_F_GSO))
483                   break;
484               }
485             if (PREDICT_FALSE (hi->sw_if_index != swif3))
486               {
487                 hi3 = vnet_get_sup_hw_interface (vnm, swif3);
488                 if (!(hi3->flags & VNET_HW_INTERFACE_FLAG_SUPPORTS_GSO) &&
489                     (b[3]->flags & VNET_BUFFER_F_GSO))
490                   break;
491               }
492
493             if (b[0]->flags & VLIB_BUFFER_IS_TRACED)
494               {
495                 t0 = vlib_add_trace (vm, node, b[0], sizeof (t0[0]));
496                 t0->flags = b[0]->flags & VNET_BUFFER_F_GSO;
497                 t0->gso_size = vnet_buffer2 (b[0])->gso_size;
498                 t0->gso_l4_hdr_sz = vnet_buffer2 (b[0])->gso_l4_hdr_sz;
499                 vnet_generic_header_offset_parser (b[0], &t0->gho);
500               }
501             if (b[1]->flags & VLIB_BUFFER_IS_TRACED)
502               {
503                 t1 = vlib_add_trace (vm, node, b[1], sizeof (t1[0]));
504                 t1->flags = b[1]->flags & VNET_BUFFER_F_GSO;
505                 t1->gso_size = vnet_buffer2 (b[1])->gso_size;
506                 t1->gso_l4_hdr_sz = vnet_buffer2 (b[1])->gso_l4_hdr_sz;
507                 vnet_generic_header_offset_parser (b[1], &t1->gho);
508               }
509             if (b[2]->flags & VLIB_BUFFER_IS_TRACED)
510               {
511                 t2 = vlib_add_trace (vm, node, b[2], sizeof (t2[0]));
512                 t2->flags = b[2]->flags & VNET_BUFFER_F_GSO;
513                 t2->gso_size = vnet_buffer2 (b[2])->gso_size;
514                 t2->gso_l4_hdr_sz = vnet_buffer2 (b[2])->gso_l4_hdr_sz;
515                 vnet_generic_header_offset_parser (b[2], &t2->gho);
516               }
517             if (b[3]->flags & VLIB_BUFFER_IS_TRACED)
518               {
519                 t3 = vlib_add_trace (vm, node, b[3], sizeof (t3[0]));
520                 t3->flags = b[3]->flags & VNET_BUFFER_F_GSO;
521                 t3->gso_size = vnet_buffer2 (b[3])->gso_size;
522                 t3->gso_l4_hdr_sz = vnet_buffer2 (b[3])->gso_l4_hdr_sz;
523                 vnet_generic_header_offset_parser (b[3], &t3->gho);
524               }
525
526             from += 4;
527             to_next += 4;
528             n_left_to_next -= 4;
529             n_left_from -= 4;
530
531             next0 = next1 = 0;
532             next2 = next3 = 0;
533             vnet_feature_next (&next0, b[0]);
534             vnet_feature_next (&next1, b[1]);
535             vnet_feature_next (&next2, b[2]);
536             vnet_feature_next (&next3, b[3]);
537             vlib_validate_buffer_enqueue_x4 (vm, node, next_index, to_next,
538                                              n_left_to_next, bi0, bi1, bi2,
539                                              bi3, next0, next1, next2, next3);
540             b += 4;
541           }
542
543       while (from + 1 <= from_end && n_left_to_next > 0)
544         {
545           u32 bi0, swif0;
546           gso_trace_t *t0;
547           vnet_hw_interface_t *hi0;
548           u32 next0 = 0;
549           u32 do_segmentation0 = 0;
550           u32 do_csums = 0;
551
552           swif0 = vnet_buffer (b[0])->sw_if_index[VLIB_TX];
553           if (PREDICT_FALSE (hi->sw_if_index != swif0))
554             {
555               hi0 = vnet_get_sup_hw_interface (vnm, swif0);
556               if ((hi0->flags & VNET_HW_INTERFACE_FLAG_SUPPORTS_GSO) == 0 &&
557                   (b[0]->flags & VNET_BUFFER_F_GSO))
558                 do_segmentation0 = 1;
559             }
560           else
561             do_segmentation0 = do_segmentation;
562
563           /* speculatively enqueue b0 to the current next frame */
564           to_next[0] = bi0 = from[0];
565           to_next += 1;
566           n_left_to_next -= 1;
567           from += 1;
568           n_left_from -= 1;
569
570           if (b[0]->flags & VLIB_BUFFER_IS_TRACED)
571             {
572               t0 = vlib_add_trace (vm, node, b[0], sizeof (t0[0]));
573               t0->flags = b[0]->flags & VNET_BUFFER_F_GSO;
574               t0->gso_size = vnet_buffer2 (b[0])->gso_size;
575               t0->gso_l4_hdr_sz = vnet_buffer2 (b[0])->gso_l4_hdr_sz;
576               vnet_generic_header_offset_parser (b[0], &t0->gho);
577             }
578
579           if (do_segmentation0)
580             {
581               if (PREDICT_FALSE (b[0]->flags & VNET_BUFFER_F_GSO))
582                 {
583                   /*
584                    * Undo the enqueue of the b0 - it is not going anywhere,
585                    * and will be freed either after it's segmented or
586                    * when dropped, if there is no buffers to segment into.
587                    */
588                   to_next -= 1;
589                   n_left_to_next += 1;
590                   /* undo the counting. */
591                   generic_header_offset_t gho = { 0 };
592                   u32 n_bytes_b0 = vlib_buffer_length_in_chain (vm, b[0]);
593                   u32 n_tx_bytes = 0;
594
595                   vnet_generic_header_offset_parser (b[0], &gho);
596
597                   if (PREDICT_FALSE (gho.gho_flags & GHO_F_TUNNEL))
598                     {
599                       if (PREDICT_FALSE
600                           ((gho.gho_flags & GHO_F_VXLAN_TUNNEL) == 0))
601                         {
602                           /* not supported yet */
603                           drop_one_buffer_and_count (vm, vnm, node, from - 1,
604                                                      hi->sw_if_index,
605                                                      VNET_INTERFACE_OUTPUT_ERROR_UNHANDLED_GSO_TYPE);
606                           b += 1;
607                           continue;
608                         }
609
610                       vnet_get_inner_header (b[0], &gho);
611
612                       n_bytes_b0 -= gho.outer_hdr_sz;
613                       /*
614                        * In case of tunnel encapsulated packet, we will
615                        * calculate the checksums for segmented inner packets.
616                        */
617                       do_csums = 1;
618                       is_ip6 = (gho.gho_flags & GHO_F_IP6) != 0;
619                     }
620
621                   n_tx_bytes =
622                     tso_segment_buffer (vm, ptd, bi0, b[0], &gho, n_bytes_b0,
623                                         is_ip6, do_csums);
624
625                   if (PREDICT_FALSE (n_tx_bytes == 0))
626                     {
627                       drop_one_buffer_and_count (vm, vnm, node, from - 1,
628                                                  hi->sw_if_index,
629                                                  VNET_INTERFACE_OUTPUT_ERROR_NO_BUFFERS_FOR_GSO);
630                       b += 1;
631                       continue;
632                     }
633
634                   if (PREDICT_FALSE (gho.gho_flags & GHO_F_VXLAN_TUNNEL))
635                     {
636                       vnet_get_outer_header (b[0], &gho);
637                       n_tx_bytes +=
638                         tso_segment_vxlan_tunnel_fixup (vm, ptd, b[0], &gho);
639                     }
640
641                   u16 n_tx_bufs = vec_len (ptd->split_buffers);
642                   u32 *from_seg = ptd->split_buffers;
643
644                   while (n_tx_bufs > 0)
645                     {
646                       u32 sbi0;
647                       vlib_buffer_t *sb0;
648                       if (n_tx_bufs >= n_left_to_next)
649                         {
650                           while (n_left_to_next > 0)
651                             {
652                               sbi0 = to_next[0] = from_seg[0];
653                               sb0 = vlib_get_buffer (vm, sbi0);
654                               ASSERT (sb0->current_length > 0);
655                               to_next += 1;
656                               from_seg += 1;
657                               n_left_to_next -= 1;
658                               n_tx_bufs -= 1;
659                               vnet_feature_next (&next0, sb0);
660                               vlib_validate_buffer_enqueue_x1 (vm, node,
661                                                                next_index,
662                                                                to_next,
663                                                                n_left_to_next,
664                                                                sbi0, next0);
665                             }
666                           vlib_put_next_frame (vm, node, next_index,
667                                                n_left_to_next);
668                           vlib_get_new_next_frame (vm, node, next_index,
669                                                    to_next, n_left_to_next);
670                         }
671                       while (n_tx_bufs > 0)
672                         {
673                           sbi0 = to_next[0] = from_seg[0];
674                           sb0 = vlib_get_buffer (vm, sbi0);
675                           ASSERT (sb0->current_length > 0);
676                           to_next += 1;
677                           from_seg += 1;
678                           n_left_to_next -= 1;
679                           n_tx_bufs -= 1;
680                           vnet_feature_next (&next0, sb0);
681                           vlib_validate_buffer_enqueue_x1 (vm, node,
682                                                            next_index,
683                                                            to_next,
684                                                            n_left_to_next,
685                                                            sbi0, next0);
686                         }
687                     }
688                   /* The buffers were enqueued. Reset the length */
689                   _vec_len (ptd->split_buffers) = 0;
690                   /* Free the now segmented buffer */
691                   vlib_buffer_free_one (vm, bi0);
692                   b += 1;
693                   continue;
694                 }
695             }
696
697           vnet_feature_next (&next0, b[0]);
698           vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
699                                            n_left_to_next, bi0, next0);
700           b += 1;
701         }
702       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
703     }
704
705   return frame->n_vectors;
706 }
707
708 static_always_inline uword
709 vnet_gso_inline (vlib_main_t * vm,
710                  vlib_node_runtime_t * node, vlib_frame_t * frame, int is_ip6)
711 {
712   vnet_main_t *vnm = vnet_get_main ();
713   vnet_hw_interface_t *hi;
714
715   if (frame->n_vectors > 0)
716     {
717       u32 *from = vlib_frame_vector_args (frame);
718       vlib_buffer_t *b = vlib_get_buffer (vm, from[0]);
719       hi = vnet_get_sup_hw_interface (vnm,
720                                       vnet_buffer (b)->sw_if_index[VLIB_TX]);
721
722       if (hi->flags & VNET_HW_INTERFACE_FLAG_SUPPORTS_GSO)
723         return vnet_gso_node_inline (vm, node, frame, vnm, hi,
724                                      is_ip6, /* do_segmentation */ 0);
725       else
726         return vnet_gso_node_inline (vm, node, frame, vnm, hi,
727                                      is_ip6, /* do_segmentation */ 1);
728     }
729   return 0;
730 }
731
732 VLIB_NODE_FN (gso_l2_ip4_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
733                                 vlib_frame_t * frame)
734 {
735   return vnet_gso_inline (vm, node, frame, 0 /* ip6 */ );
736 }
737
738 VLIB_NODE_FN (gso_l2_ip6_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
739                                 vlib_frame_t * frame)
740 {
741   return vnet_gso_inline (vm, node, frame, 1 /* ip6 */ );
742 }
743
744 VLIB_NODE_FN (gso_ip4_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
745                              vlib_frame_t * frame)
746 {
747   return vnet_gso_inline (vm, node, frame, 0 /* ip6 */ );
748 }
749
750 VLIB_NODE_FN (gso_ip6_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
751                              vlib_frame_t * frame)
752 {
753   return vnet_gso_inline (vm, node, frame, 1 /* ip6 */ );
754 }
755
756 /* *INDENT-OFF* */
757
758 VLIB_REGISTER_NODE (gso_l2_ip4_node) = {
759   .vector_size = sizeof (u32),
760   .format_trace = format_gso_trace,
761   .type = VLIB_NODE_TYPE_INTERNAL,
762   .n_errors = 0,
763   .n_next_nodes = 0,
764   .name = "gso-l2-ip4",
765 };
766
767 VLIB_REGISTER_NODE (gso_l2_ip6_node) = {
768   .vector_size = sizeof (u32),
769   .format_trace = format_gso_trace,
770   .type = VLIB_NODE_TYPE_INTERNAL,
771   .n_errors = 0,
772   .n_next_nodes = 0,
773   .name = "gso-l2-ip6",
774 };
775
776 VLIB_REGISTER_NODE (gso_ip4_node) = {
777   .vector_size = sizeof (u32),
778   .format_trace = format_gso_trace,
779   .type = VLIB_NODE_TYPE_INTERNAL,
780   .n_errors = 0,
781   .n_next_nodes = 0,
782   .name = "gso-ip4",
783 };
784
785 VLIB_REGISTER_NODE (gso_ip6_node) = {
786   .vector_size = sizeof (u32),
787   .format_trace = format_gso_trace,
788   .type = VLIB_NODE_TYPE_INTERNAL,
789   .n_errors = 0,
790   .n_next_nodes = 0,
791   .name = "gso-ip6",
792 };
793
794 VNET_FEATURE_INIT (gso_l2_ip4_node, static) = {
795   .arc_name = "l2-output-ip4",
796   .node_name = "gso-l2-ip4",
797   .runs_before = VNET_FEATURES ("l2-output-feat-arc-end"),
798 };
799
800 VNET_FEATURE_INIT (gso_l2_ip6_node, static) = {
801   .arc_name = "l2-output-ip6",
802   .node_name = "gso-l2-ip6",
803   .runs_before = VNET_FEATURES ("l2-output-feat-arc-end"),
804 };
805
806 VNET_FEATURE_INIT (gso_ip4_node, static) = {
807   .arc_name = "ip4-output",
808   .node_name = "gso-ip4",
809   .runs_after = VNET_FEATURES ("ipsec4-output-feature"),
810   .runs_before = VNET_FEATURES ("interface-output"),
811 };
812
813 VNET_FEATURE_INIT (gso_ip6_node, static) = {
814   .arc_name = "ip6-output",
815   .node_name = "gso-ip6",
816   .runs_after = VNET_FEATURES ("ipsec6-output-feature"),
817   .runs_before = VNET_FEATURES ("interface-output"),
818 };
819
820 /*
821  * fd.io coding-style-patch-verification: ON
822  *
823  * Local Variables:
824  * eval: (c-set-style "gnu")
825  * End:
826  */