udp: fix csum computation when offload disabled
[vpp.git] / src / vnet / devices / virtio / node.c
1 /*
2  *------------------------------------------------------------------
3  * Copyright (c) 2016 Cisco and/or its affiliates.
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at:
7  *
8  *     http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  *------------------------------------------------------------------
16  */
17
18 #include <sys/types.h>
19 #include <sys/stat.h>
20 #include <fcntl.h>
21 #include <net/if.h>
22 #ifdef __linux__
23 #include <linux/if_tun.h>
24 #elif __FreeBSD__
25 #include <net/if_tun.h>
26 #endif /* __linux */
27 #include <sys/ioctl.h>
28 #include <sys/eventfd.h>
29
30 #include <vlib/vlib.h>
31 #include <vlib/unix/unix.h>
32 #include <vnet/ethernet/ethernet.h>
33 #include <vnet/feature/feature.h>
34 #include <vnet/interface/rx_queue_funcs.h>
35 #include <vnet/ip/ip4_packet.h>
36 #include <vnet/ip/ip6_packet.h>
37 #include <vnet/udp/udp_packet.h>
38 #include <vnet/tcp/tcp_packet.h>
39 #include <vnet/devices/virtio/virtio.h>
40 #include <vnet/devices/virtio/virtio_inline.h>
41
42 static char *virtio_input_error_strings[] = {
43 #define _(n, s) s,
44   foreach_virtio_input_error
45 #undef _
46 };
47
48 typedef struct
49 {
50   u32 next_index;
51   u32 hw_if_index;
52   u16 ring;
53   u16 len;
54   vnet_virtio_net_hdr_v1_t hdr;
55 } virtio_input_trace_t;
56
57 static u8 *
58 format_virtio_input_trace (u8 * s, va_list * args)
59 {
60   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
61   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
62   virtio_input_trace_t *t = va_arg (*args, virtio_input_trace_t *);
63   u32 indent = format_get_indent (s);
64
65   s = format (s, "virtio: hw_if_index %d next-index %d vring %u len %u",
66               t->hw_if_index, t->next_index, t->ring, t->len);
67   s = format (s, "\n%Uhdr: flags 0x%02x gso_type 0x%02x hdr_len %u "
68               "gso_size %u csum_start %u csum_offset %u num_buffers %u",
69               format_white_space, indent + 2,
70               t->hdr.flags, t->hdr.gso_type, t->hdr.hdr_len, t->hdr.gso_size,
71               t->hdr.csum_start, t->hdr.csum_offset, t->hdr.num_buffers);
72   return s;
73 }
74
75 static_always_inline void
76 virtio_needs_csum (vlib_buffer_t *b0, vnet_virtio_net_hdr_v1_t *hdr,
77                    u8 *l4_proto, u8 *l4_hdr_sz, virtio_if_type_t type)
78 {
79   if (hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM)
80     {
81       u16 ethertype = 0, l2hdr_sz = 0;
82       vnet_buffer_oflags_t oflags = 0;
83
84       if (type == VIRTIO_IF_TYPE_TUN)
85         {
86           switch (b0->data[0] & 0xf0)
87             {
88             case 0x40:
89               ethertype = ETHERNET_TYPE_IP4;
90               break;
91             case 0x60:
92               ethertype = ETHERNET_TYPE_IP6;
93               break;
94             }
95         }
96       else
97         {
98           ethernet_header_t *eh = (ethernet_header_t *) b0->data;
99           ethertype = clib_net_to_host_u16 (eh->type);
100           l2hdr_sz = sizeof (ethernet_header_t);
101
102           if (ethernet_frame_is_tagged (ethertype))
103             {
104               ethernet_vlan_header_t *vlan =
105                 (ethernet_vlan_header_t *) (eh + 1);
106
107               ethertype = clib_net_to_host_u16 (vlan->type);
108               l2hdr_sz += sizeof (*vlan);
109               if (ethertype == ETHERNET_TYPE_VLAN)
110                 {
111                   vlan++;
112                   ethertype = clib_net_to_host_u16 (vlan->type);
113                   l2hdr_sz += sizeof (*vlan);
114                 }
115             }
116         }
117
118       vnet_buffer (b0)->l2_hdr_offset = 0;
119       vnet_buffer (b0)->l3_hdr_offset = l2hdr_sz;
120
121       if (PREDICT_TRUE (ethertype == ETHERNET_TYPE_IP4))
122         {
123           ip4_header_t *ip4 = (ip4_header_t *) (b0->data + l2hdr_sz);
124           vnet_buffer (b0)->l4_hdr_offset = l2hdr_sz + ip4_header_bytes (ip4);
125           *l4_proto = ip4->protocol;
126           oflags |= VNET_BUFFER_OFFLOAD_F_IP_CKSUM;
127           b0->flags |=
128             (VNET_BUFFER_F_IS_IP4 | VNET_BUFFER_F_L2_HDR_OFFSET_VALID |
129              VNET_BUFFER_F_L3_HDR_OFFSET_VALID |
130              VNET_BUFFER_F_L4_HDR_OFFSET_VALID);
131         }
132       else if (PREDICT_TRUE (ethertype == ETHERNET_TYPE_IP6))
133         {
134           ip6_header_t *ip6 = (ip6_header_t *) (b0->data + l2hdr_sz);
135           vnet_buffer (b0)->l4_hdr_offset = l2hdr_sz + sizeof (ip6_header_t);
136           /* FIXME IPv6 EH traversal */
137           *l4_proto = ip6->protocol;
138           b0->flags |= (VNET_BUFFER_F_IS_IP6 |
139                         VNET_BUFFER_F_L2_HDR_OFFSET_VALID
140                         | VNET_BUFFER_F_L3_HDR_OFFSET_VALID |
141                         VNET_BUFFER_F_L4_HDR_OFFSET_VALID);
142         }
143       if (*l4_proto == IP_PROTOCOL_TCP)
144         {
145           oflags |= VNET_BUFFER_OFFLOAD_F_TCP_CKSUM;
146           tcp_header_t *tcp =
147             (tcp_header_t *) (b0->data + vnet_buffer (b0)->l4_hdr_offset);
148           *l4_hdr_sz = tcp_header_bytes (tcp);
149         }
150       else if (*l4_proto == IP_PROTOCOL_UDP)
151         {
152           oflags |= VNET_BUFFER_OFFLOAD_F_UDP_CKSUM;
153           *l4_hdr_sz = sizeof (udp_header_t);
154         }
155       if (oflags)
156         vnet_buffer_offload_flags_set (b0, oflags);
157     }
158 }
159
160 static_always_inline void
161 fill_gso_buffer_flags (vlib_buffer_t *b0, vnet_virtio_net_hdr_v1_t *hdr,
162                        u8 l4_proto, u8 l4_hdr_sz)
163 {
164   if (hdr->gso_type == VIRTIO_NET_HDR_GSO_TCPV4)
165     {
166       ASSERT (hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM);
167       vnet_buffer2 (b0)->gso_size = hdr->gso_size;
168       vnet_buffer2 (b0)->gso_l4_hdr_sz = l4_hdr_sz;
169       b0->flags |= VNET_BUFFER_F_GSO | VNET_BUFFER_F_IS_IP4;
170     }
171   if (hdr->gso_type == VIRTIO_NET_HDR_GSO_TCPV6)
172     {
173       ASSERT (hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM);
174       vnet_buffer2 (b0)->gso_size = hdr->gso_size;
175       vnet_buffer2 (b0)->gso_l4_hdr_sz = l4_hdr_sz;
176       b0->flags |= VNET_BUFFER_F_GSO | VNET_BUFFER_F_IS_IP6;
177     }
178 }
179
180 static_always_inline u16
181 virtio_n_left_to_process (vnet_virtio_vring_t *vring, const int packed)
182 {
183   if (packed)
184     return vring->desc_in_use;
185   else
186     return vring->used->idx - vring->last_used_idx;
187 }
188
189 static_always_inline u16
190 virtio_get_slot_id (vnet_virtio_vring_t *vring, const int packed, u16 last,
191                     u16 mask)
192 {
193   if (packed)
194     return vring->packed_desc[last].id;
195   else
196     return vring->used->ring[last & mask].id;
197 }
198
199 static_always_inline u16
200 virtio_get_len (vnet_virtio_vring_t *vring, const int packed, const int hdr_sz,
201                 u16 last, u16 mask)
202 {
203   if (packed)
204     return vring->packed_desc[last].len - hdr_sz;
205   else
206     return vring->used->ring[last & mask].len - hdr_sz;
207 }
208
209 #define virtio_packed_check_n_left(vring, last)                               \
210   do                                                                          \
211     {                                                                         \
212       vnet_virtio_vring_packed_desc_t *d = &vring->packed_desc[last];         \
213       u16 flags = d->flags;                                                   \
214       if ((flags & VRING_DESC_F_AVAIL) != (vring->used_wrap_counter << 7) ||  \
215           (flags & VRING_DESC_F_USED) != (vring->used_wrap_counter << 15))    \
216         {                                                                     \
217           n_left = 0;                                                         \
218         }                                                                     \
219     }                                                                         \
220   while (0)
221
222 #define increment_last(last, packed, vring)                                   \
223   do                                                                          \
224     {                                                                         \
225       last++;                                                                 \
226       if (packed && last >= vring->queue_size)                                \
227         {                                                                     \
228           last = 0;                                                           \
229           vring->used_wrap_counter ^= 1;                                      \
230         }                                                                     \
231     }                                                                         \
232   while (0)
233
234 static_always_inline void
235 virtio_device_input_ethernet (vlib_main_t *vm, vlib_node_runtime_t *node,
236                               const u32 next_index, const u32 sw_if_index,
237                               const u32 hw_if_index)
238 {
239   vlib_next_frame_t *nf;
240   vlib_frame_t *f;
241   ethernet_input_frame_t *ef;
242
243   if (PREDICT_FALSE (VNET_DEVICE_INPUT_NEXT_ETHERNET_INPUT != next_index))
244     return;
245
246   nf = vlib_node_runtime_get_next_frame (
247     vm, node, VNET_DEVICE_INPUT_NEXT_ETHERNET_INPUT);
248   f = vlib_get_frame (vm, nf->frame);
249   f->flags = ETH_INPUT_FRAME_F_SINGLE_SW_IF_IDX;
250
251   ef = vlib_frame_scalar_args (f);
252   ef->sw_if_index = sw_if_index;
253   ef->hw_if_index = hw_if_index;
254   vlib_frame_no_append (f);
255 }
256
257 static_always_inline uword
258 virtio_device_input_gso_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
259                                 vlib_frame_t *frame, virtio_if_t *vif,
260                                 vnet_virtio_vring_t *vring,
261                                 virtio_if_type_t type, int gso_enabled,
262                                 int checksum_offload_enabled, int packed)
263 {
264   vnet_main_t *vnm = vnet_get_main ();
265   u32 thread_index = vm->thread_index;
266   uword n_trace = vlib_get_trace_count (vm, node);
267   u32 next_index;
268   const int hdr_sz = vif->virtio_net_hdr_sz;
269   u32 *to_next = 0;
270   u32 n_rx_packets = 0;
271   u32 n_rx_bytes = 0;
272   u16 mask = vring->queue_size - 1;
273   u16 last = vring->last_used_idx;
274   u16 n_left = virtio_n_left_to_process (vring, packed);
275   vlib_buffer_t bt = {};
276
277   if (packed)
278     {
279       virtio_packed_check_n_left (vring, last);
280     }
281
282   if (n_left == 0)
283     return 0;
284
285   if (PREDICT_FALSE (n_left == vring->queue_size))
286     {
287       /*
288        * Informational error logging when VPP is not pulling packets fast
289        * enough.
290        */
291       vlib_error_count (vm, node->node_index, VIRTIO_INPUT_ERROR_FULL_RX_QUEUE,
292                         1);
293     }
294
295   if (type == VIRTIO_IF_TYPE_TUN)
296     {
297       next_index = VNET_DEVICE_INPUT_NEXT_IP4_INPUT;
298     }
299   else
300     {
301       next_index = VNET_DEVICE_INPUT_NEXT_ETHERNET_INPUT;
302       if (PREDICT_FALSE (vif->per_interface_next_index != ~0))
303         next_index = vif->per_interface_next_index;
304
305       /* only for l2, redirect if feature path enabled */
306       vnet_feature_start_device_input (vif->sw_if_index, &next_index, &bt);
307     }
308
309   while (n_left)
310     {
311       u32 n_left_to_next;
312       u32 next0 = next_index;
313
314       vlib_get_new_next_frame (vm, node, next_index, to_next, n_left_to_next);
315
316       while (n_left && n_left_to_next)
317         {
318           if (packed)
319             {
320               vnet_virtio_vring_packed_desc_t *d = &vring->packed_desc[last];
321               u16 flags = d->flags;
322               if ((flags & VRING_DESC_F_AVAIL) !=
323                   (vring->used_wrap_counter << 7)
324                   || (flags & VRING_DESC_F_USED) !=
325                   (vring->used_wrap_counter << 15))
326                 {
327                   n_left = 0;
328                   break;
329                 }
330             }
331           u8 l4_proto = 0, l4_hdr_sz = 0;
332           u16 num_buffers = 1;
333           vnet_virtio_net_hdr_v1_t *hdr;
334           u16 slot = virtio_get_slot_id (vring, packed, last, mask);
335           u16 len = virtio_get_len (vring, packed, hdr_sz, last, mask);
336           u32 bi0 = vring->buffers[slot];
337           vlib_buffer_t *b0 = vlib_get_buffer (vm, bi0);
338           hdr = vlib_buffer_get_current (b0);
339           if (hdr_sz == sizeof (vnet_virtio_net_hdr_v1_t))
340             num_buffers = hdr->num_buffers;
341
342           b0->flags = VLIB_BUFFER_TOTAL_LENGTH_VALID;
343           b0->current_data = 0;
344           b0->current_length = len;
345
346           if (checksum_offload_enabled)
347             virtio_needs_csum (b0, hdr, &l4_proto, &l4_hdr_sz, type);
348
349           if (gso_enabled)
350             fill_gso_buffer_flags (b0, hdr, l4_proto, l4_hdr_sz);
351
352           vnet_buffer (b0)->sw_if_index[VLIB_RX] = vif->sw_if_index;
353           vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
354
355           /* if multisegment packet */
356           if (PREDICT_FALSE (num_buffers > 1))
357             {
358               vlib_buffer_t *pb, *cb;
359               pb = b0;
360               b0->total_length_not_including_first_buffer = 0;
361               while (num_buffers > 1)
362                 {
363                   increment_last (last, packed, vring);
364                   u16 cslot = virtio_get_slot_id (vring, packed, last, mask);
365                   /* hdr size is 0 after 1st packet in chain buffers */
366                   u16 clen = virtio_get_len (vring, packed, 0, last, mask);
367                   u32 cbi = vring->buffers[cslot];
368                   cb = vlib_get_buffer (vm, cbi);
369
370                   /* current buffer */
371                   cb->current_length = clen;
372
373                   /* previous buffer */
374                   pb->next_buffer = cbi;
375                   pb->flags |= VLIB_BUFFER_NEXT_PRESENT;
376
377                   /* first buffer */
378                   b0->total_length_not_including_first_buffer += clen;
379
380                   pb = cb;
381                   vring->desc_in_use--;
382                   num_buffers--;
383                   n_left--;
384                 }
385               len += b0->total_length_not_including_first_buffer;
386             }
387
388           if (type == VIRTIO_IF_TYPE_TUN)
389             {
390               switch (b0->data[0] & 0xf0)
391                 {
392                 case 0x40:
393                   next0 = VNET_DEVICE_INPUT_NEXT_IP4_INPUT;
394                   break;
395                 case 0x60:
396                   next0 = VNET_DEVICE_INPUT_NEXT_IP6_INPUT;
397                   break;
398                 default:
399                   next0 = VNET_DEVICE_INPUT_NEXT_DROP;
400                   break;
401                 }
402
403               if (PREDICT_FALSE (vif->per_interface_next_index != ~0))
404                 next0 = vif->per_interface_next_index;
405             }
406           else
407             {
408               /* copy feature arc data from template */
409               b0->current_config_index = bt.current_config_index;
410               vnet_buffer (b0)->feature_arc_index =
411                 vnet_buffer (&bt)->feature_arc_index;
412             }
413
414           /* trace */
415           if (PREDICT_FALSE (n_trace > 0 && vlib_trace_buffer (vm, node, next0, b0,     /* follow_chain */
416                                                                1)))
417             {
418               virtio_input_trace_t *tr;
419               vlib_set_trace_count (vm, node, --n_trace);
420               tr = vlib_add_trace (vm, node, b0, sizeof (*tr));
421               tr->next_index = next0;
422               tr->hw_if_index = vif->hw_if_index;
423               tr->len = len;
424               clib_memcpy_fast (&tr->hdr, hdr, (hdr_sz == 12) ? 12 : 10);
425             }
426
427           /* enqueue buffer */
428           to_next[0] = bi0;
429           vring->desc_in_use--;
430           to_next += 1;
431           n_left_to_next--;
432           n_left--;
433           increment_last (last, packed, vring);
434
435           /* only tun interfaces may have different next index */
436           if (type == VIRTIO_IF_TYPE_TUN)
437             vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
438                                              n_left_to_next, bi0, next0);
439
440           /* next packet */
441           n_rx_packets++;
442           n_rx_bytes += len;
443         }
444       virtio_device_input_ethernet (vm, node, next_index, vif->sw_if_index,
445                                     vif->hw_if_index);
446       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
447     }
448   vring->last_used_idx = last;
449
450   vring->total_packets += n_rx_packets;
451   vlib_increment_combined_counter (vnm->interface_main.combined_sw_if_counters
452                                    + VNET_INTERFACE_COUNTER_RX, thread_index,
453                                    vif->sw_if_index, n_rx_packets,
454                                    n_rx_bytes);
455
456   return n_rx_packets;
457 }
458
459 static_always_inline uword
460 virtio_device_input_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
461                             vlib_frame_t * frame, virtio_if_t * vif, u16 qid,
462                             virtio_if_type_t type)
463 {
464   vnet_virtio_vring_t *vring = vec_elt_at_index (vif->rxq_vrings, qid);
465   const int hdr_sz = vif->virtio_net_hdr_sz;
466   uword rv;
467
468   if (vif->is_packed)
469     {
470       if (vif->gso_enabled)
471         rv =
472           virtio_device_input_gso_inline (vm, node, frame, vif, vring, type,
473                                           1, 1, 1);
474       else if (vif->csum_offload_enabled)
475         rv =
476           virtio_device_input_gso_inline (vm, node, frame, vif, vring, type,
477                                           0, 1, 1);
478       else
479         rv =
480           virtio_device_input_gso_inline (vm, node, frame, vif, vring, type,
481                                           0, 0, 1);
482
483       virtio_refill_vring_packed (vm, vif, type, vring, hdr_sz,
484                                   node->node_index);
485     }
486   else
487     {
488       if (vif->gso_enabled)
489         rv =
490           virtio_device_input_gso_inline (vm, node, frame, vif, vring, type,
491                                           1, 1, 0);
492       else if (vif->csum_offload_enabled)
493         rv =
494           virtio_device_input_gso_inline (vm, node, frame, vif, vring, type,
495                                           0, 1, 0);
496       else
497         rv =
498           virtio_device_input_gso_inline (vm, node, frame, vif, vring, type,
499                                           0, 0, 0);
500
501       virtio_refill_vring_split (vm, vif, type, vring, hdr_sz,
502                                  node->node_index);
503     }
504   return rv;
505 }
506
507 VLIB_NODE_FN (virtio_input_node) (vlib_main_t * vm,
508                                   vlib_node_runtime_t * node,
509                                   vlib_frame_t * frame)
510 {
511   u32 n_rx = 0;
512   virtio_main_t *vim = &virtio_main;
513   vnet_hw_if_rxq_poll_vector_t *p,
514     *pv = vnet_hw_if_get_rxq_poll_vector (vm, node);
515
516   vec_foreach (p, pv)
517     {
518       virtio_if_t *vif;
519       vif = vec_elt_at_index (vim->interfaces, p->dev_instance);
520       if (vif->flags & VIRTIO_IF_FLAG_ADMIN_UP)
521         {
522           if (vif->type == VIRTIO_IF_TYPE_TAP)
523             n_rx += virtio_device_input_inline (
524               vm, node, frame, vif, p->queue_id, VIRTIO_IF_TYPE_TAP);
525           else if (vif->type == VIRTIO_IF_TYPE_PCI)
526             n_rx += virtio_device_input_inline (
527               vm, node, frame, vif, p->queue_id, VIRTIO_IF_TYPE_PCI);
528           else if (vif->type == VIRTIO_IF_TYPE_TUN)
529             n_rx += virtio_device_input_inline (
530               vm, node, frame, vif, p->queue_id, VIRTIO_IF_TYPE_TUN);
531         }
532     }
533
534   return n_rx;
535 }
536
537 VLIB_REGISTER_NODE (virtio_input_node) = {
538   .name = "virtio-input",
539   .sibling_of = "device-input",
540   .format_trace = format_virtio_input_trace,
541   .flags = VLIB_NODE_FLAG_TRACE_SUPPORTED,
542   .type = VLIB_NODE_TYPE_INPUT,
543   .state = VLIB_NODE_STATE_INTERRUPT,
544   .n_errors = VIRTIO_INPUT_N_ERROR,
545   .error_strings = virtio_input_error_strings,
546 };
547
548 /*
549  * fd.io coding-style-patch-verification: ON
550  *
551  * Local Variables:
552  * eval: (c-set-style "gnu")
553  * End:
554  */