session: remove ipv6 lookup threading assert
[vpp.git] / src / vnet / devices / virtio / node.c
1 /*
2  *------------------------------------------------------------------
3  * Copyright (c) 2016 Cisco and/or its affiliates.
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at:
7  *
8  *     http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  *------------------------------------------------------------------
16  */
17
18 #include <sys/types.h>
19 #include <sys/stat.h>
20 #include <fcntl.h>
21 #include <net/if.h>
22 #ifdef __linux__
23 #include <linux/if_tun.h>
24 #elif __FreeBSD__
25 #include <net/if_tun.h>
26 #endif /* __linux */
27 #include <sys/ioctl.h>
28 #include <sys/eventfd.h>
29
30 #include <vlib/vlib.h>
31 #include <vlib/unix/unix.h>
32 #include <vnet/ethernet/ethernet.h>
33 #include <vnet/feature/feature.h>
34 #include <vnet/interface/rx_queue_funcs.h>
35 #include <vnet/ip/ip4_packet.h>
36 #include <vnet/ip/ip6_packet.h>
37 #include <vnet/udp/udp_packet.h>
38 #include <vnet/tcp/tcp_packet.h>
39 #include <vnet/devices/virtio/virtio.h>
40 #include <vnet/devices/virtio/virtio_inline.h>
41
42 static char *virtio_input_error_strings[] = {
43 #define _(n, s) s,
44   foreach_virtio_input_error
45 #undef _
46 };
47
48 typedef struct
49 {
50   u32 next_index;
51   u32 hw_if_index;
52   u16 ring;
53   u16 len;
54   vnet_virtio_net_hdr_v1_t hdr;
55 } virtio_input_trace_t;
56
57 static u8 *
58 format_virtio_input_trace (u8 * s, va_list * args)
59 {
60   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
61   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
62   virtio_input_trace_t *t = va_arg (*args, virtio_input_trace_t *);
63   u32 indent = format_get_indent (s);
64
65   s = format (s, "virtio: hw_if_index %d next-index %d vring %u len %u",
66               t->hw_if_index, t->next_index, t->ring, t->len);
67   s = format (s, "\n%Uhdr: flags 0x%02x gso_type 0x%02x hdr_len %u "
68               "gso_size %u csum_start %u csum_offset %u num_buffers %u",
69               format_white_space, indent + 2,
70               t->hdr.flags, t->hdr.gso_type, t->hdr.hdr_len, t->hdr.gso_size,
71               t->hdr.csum_start, t->hdr.csum_offset, t->hdr.num_buffers);
72   return s;
73 }
74
75 static_always_inline void
76 virtio_needs_csum (vlib_buffer_t *b0, vnet_virtio_net_hdr_v1_t *hdr,
77                    u8 *l4_proto, u8 *l4_hdr_sz, virtio_if_type_t type)
78 {
79   if (hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM)
80     {
81       u16 ethertype = 0, l2hdr_sz = 0;
82       vnet_buffer_oflags_t oflags = 0;
83
84       if (type == VIRTIO_IF_TYPE_TUN)
85         {
86           switch (b0->data[0] & 0xf0)
87             {
88             case 0x40:
89               ethertype = ETHERNET_TYPE_IP4;
90               break;
91             case 0x60:
92               ethertype = ETHERNET_TYPE_IP6;
93               break;
94             }
95         }
96       else
97         {
98           ethernet_header_t *eh = (ethernet_header_t *) b0->data;
99           ethertype = clib_net_to_host_u16 (eh->type);
100           l2hdr_sz = sizeof (ethernet_header_t);
101
102           if (ethernet_frame_is_tagged (ethertype))
103             {
104               ethernet_vlan_header_t *vlan =
105                 (ethernet_vlan_header_t *) (eh + 1);
106
107               ethertype = clib_net_to_host_u16 (vlan->type);
108               l2hdr_sz += sizeof (*vlan);
109               if (ethertype == ETHERNET_TYPE_VLAN)
110                 {
111                   vlan++;
112                   ethertype = clib_net_to_host_u16 (vlan->type);
113                   l2hdr_sz += sizeof (*vlan);
114                 }
115             }
116         }
117
118       vnet_buffer (b0)->l2_hdr_offset = 0;
119       vnet_buffer (b0)->l3_hdr_offset = l2hdr_sz;
120
121       if (PREDICT_TRUE (ethertype == ETHERNET_TYPE_IP4))
122         {
123           ip4_header_t *ip4 = (ip4_header_t *) (b0->data + l2hdr_sz);
124           vnet_buffer (b0)->l4_hdr_offset = l2hdr_sz + ip4_header_bytes (ip4);
125           *l4_proto = ip4->protocol;
126           oflags |= VNET_BUFFER_OFFLOAD_F_IP_CKSUM;
127           b0->flags |=
128             (VNET_BUFFER_F_IS_IP4 | VNET_BUFFER_F_L2_HDR_OFFSET_VALID |
129              VNET_BUFFER_F_L3_HDR_OFFSET_VALID |
130              VNET_BUFFER_F_L4_HDR_OFFSET_VALID);
131         }
132       else if (PREDICT_TRUE (ethertype == ETHERNET_TYPE_IP6))
133         {
134           ip6_header_t *ip6 = (ip6_header_t *) (b0->data + l2hdr_sz);
135           vnet_buffer (b0)->l4_hdr_offset = l2hdr_sz + sizeof (ip6_header_t);
136           /* FIXME IPv6 EH traversal */
137           *l4_proto = ip6->protocol;
138           b0->flags |= (VNET_BUFFER_F_IS_IP6 |
139                         VNET_BUFFER_F_L2_HDR_OFFSET_VALID
140                         | VNET_BUFFER_F_L3_HDR_OFFSET_VALID |
141                         VNET_BUFFER_F_L4_HDR_OFFSET_VALID);
142         }
143       if (*l4_proto == IP_PROTOCOL_TCP)
144         {
145           oflags |= VNET_BUFFER_OFFLOAD_F_TCP_CKSUM;
146           tcp_header_t *tcp =
147             (tcp_header_t *) (b0->data + vnet_buffer (b0)->l4_hdr_offset);
148           *l4_hdr_sz = tcp_header_bytes (tcp);
149         }
150       else if (*l4_proto == IP_PROTOCOL_UDP)
151         {
152           oflags |= VNET_BUFFER_OFFLOAD_F_UDP_CKSUM;
153           *l4_hdr_sz = sizeof (udp_header_t);
154         }
155       if (oflags)
156         vnet_buffer_offload_flags_set (b0, oflags);
157     }
158 }
159
160 static_always_inline void
161 fill_gso_buffer_flags (vlib_buffer_t *b0, vnet_virtio_net_hdr_v1_t *hdr,
162                        u8 l4_proto, u8 l4_hdr_sz)
163 {
164   if (hdr->gso_type == VIRTIO_NET_HDR_GSO_TCPV4)
165     {
166       ASSERT (hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM);
167       vnet_buffer2 (b0)->gso_size = hdr->gso_size;
168       vnet_buffer2 (b0)->gso_l4_hdr_sz = l4_hdr_sz;
169       b0->flags |= VNET_BUFFER_F_GSO | VNET_BUFFER_F_IS_IP4;
170     }
171   if (hdr->gso_type == VIRTIO_NET_HDR_GSO_TCPV6)
172     {
173       ASSERT (hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM);
174       vnet_buffer2 (b0)->gso_size = hdr->gso_size;
175       vnet_buffer2 (b0)->gso_l4_hdr_sz = l4_hdr_sz;
176       b0->flags |= VNET_BUFFER_F_GSO | VNET_BUFFER_F_IS_IP6;
177     }
178 }
179
180 static_always_inline u16
181 virtio_n_left_to_process (vnet_virtio_vring_t *vring, const int packed)
182 {
183   if (packed)
184     return vring->desc_in_use;
185   else
186     return vring->used->idx - vring->last_used_idx;
187 }
188
189 static_always_inline u16
190 virtio_get_slot_id (vnet_virtio_vring_t *vring, const int packed, u16 last,
191                     u16 mask)
192 {
193   if (packed)
194     return vring->packed_desc[last].id;
195   else
196     return vring->used->ring[last & mask].id;
197 }
198
199 static_always_inline u16
200 virtio_get_len (vnet_virtio_vring_t *vring, const int packed, const int hdr_sz,
201                 u16 last, u16 mask)
202 {
203   if (packed)
204     return vring->packed_desc[last].len - hdr_sz;
205   else
206     return vring->used->ring[last & mask].len - hdr_sz;
207 }
208
209 #define virtio_packed_check_n_left(vring, last)                               \
210   do                                                                          \
211     {                                                                         \
212       vnet_virtio_vring_packed_desc_t *d = &vring->packed_desc[last];         \
213       u16 flags = d->flags;                                                   \
214       if ((flags & VRING_DESC_F_AVAIL) != (vring->used_wrap_counter << 7) ||  \
215           (flags & VRING_DESC_F_USED) != (vring->used_wrap_counter << 15))    \
216         {                                                                     \
217           n_left = 0;                                                         \
218         }                                                                     \
219     }                                                                         \
220   while (0)
221
222 #define increment_last(last, packed, vring)                                   \
223   do                                                                          \
224     {                                                                         \
225       last++;                                                                 \
226       if (packed && last >= vring->queue_size)                                \
227         {                                                                     \
228           last = 0;                                                           \
229           vring->used_wrap_counter ^= 1;                                      \
230         }                                                                     \
231     }                                                                         \
232   while (0)
233
234 static_always_inline void
235 virtio_device_input_ethernet (vlib_main_t *vm, vlib_node_runtime_t *node,
236                               const u32 next_index, const u32 sw_if_index,
237                               const u32 hw_if_index)
238 {
239   vlib_next_frame_t *nf;
240   vlib_frame_t *f;
241   ethernet_input_frame_t *ef;
242
243   if (PREDICT_FALSE (VNET_DEVICE_INPUT_NEXT_ETHERNET_INPUT != next_index))
244     return;
245
246   nf = vlib_node_runtime_get_next_frame (
247     vm, node, VNET_DEVICE_INPUT_NEXT_ETHERNET_INPUT);
248   f = vlib_get_frame (vm, nf->frame);
249   f->flags = ETH_INPUT_FRAME_F_SINGLE_SW_IF_IDX;
250
251   ef = vlib_frame_scalar_args (f);
252   ef->sw_if_index = sw_if_index;
253   ef->hw_if_index = hw_if_index;
254   vlib_frame_no_append (f);
255 }
256
257 static_always_inline uword
258 virtio_device_input_gso_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
259                                 vlib_frame_t *frame, virtio_if_t *vif,
260                                 vnet_virtio_vring_t *vring,
261                                 virtio_if_type_t type, int gso_enabled,
262                                 int checksum_offload_enabled, int packed)
263 {
264   vnet_main_t *vnm = vnet_get_main ();
265   u32 thread_index = vm->thread_index;
266   uword n_trace = vlib_get_trace_count (vm, node);
267   u32 next_index;
268   const int hdr_sz = vif->virtio_net_hdr_sz;
269   u32 *to_next = 0;
270   u32 n_rx_packets = 0;
271   u32 n_rx_bytes = 0;
272   u16 mask = vring->queue_size - 1;
273   u16 last = vring->last_used_idx;
274   u16 n_left = virtio_n_left_to_process (vring, packed);
275   vlib_buffer_t bt = {};
276
277   if (packed)
278     {
279       virtio_packed_check_n_left (vring, last);
280     }
281
282   if (n_left == 0)
283     return 0;
284
285   if (type == VIRTIO_IF_TYPE_TUN)
286     {
287       next_index = VNET_DEVICE_INPUT_NEXT_IP4_INPUT;
288     }
289   else
290     {
291       next_index = VNET_DEVICE_INPUT_NEXT_ETHERNET_INPUT;
292       if (PREDICT_FALSE (vif->per_interface_next_index != ~0))
293         next_index = vif->per_interface_next_index;
294
295       /* only for l2, redirect if feature path enabled */
296       vnet_feature_start_device_input (vif->sw_if_index, &next_index, &bt);
297     }
298
299   while (n_left)
300     {
301       u32 n_left_to_next;
302       u32 next0 = next_index;
303
304       vlib_get_new_next_frame (vm, node, next_index, to_next, n_left_to_next);
305
306       while (n_left && n_left_to_next)
307         {
308           if (packed)
309             {
310               vnet_virtio_vring_packed_desc_t *d = &vring->packed_desc[last];
311               u16 flags = d->flags;
312               if ((flags & VRING_DESC_F_AVAIL) !=
313                   (vring->used_wrap_counter << 7)
314                   || (flags & VRING_DESC_F_USED) !=
315                   (vring->used_wrap_counter << 15))
316                 {
317                   n_left = 0;
318                   break;
319                 }
320             }
321           u8 l4_proto = 0, l4_hdr_sz = 0;
322           u16 num_buffers = 1;
323           vnet_virtio_net_hdr_v1_t *hdr;
324           u16 slot = virtio_get_slot_id (vring, packed, last, mask);
325           u16 len = virtio_get_len (vring, packed, hdr_sz, last, mask);
326           u32 bi0 = vring->buffers[slot];
327           vlib_buffer_t *b0 = vlib_get_buffer (vm, bi0);
328           hdr = vlib_buffer_get_current (b0);
329           if (hdr_sz == sizeof (vnet_virtio_net_hdr_v1_t))
330             num_buffers = hdr->num_buffers;
331
332           b0->flags = VLIB_BUFFER_TOTAL_LENGTH_VALID;
333           b0->current_data = 0;
334           b0->current_length = len;
335
336           if (checksum_offload_enabled)
337             virtio_needs_csum (b0, hdr, &l4_proto, &l4_hdr_sz, type);
338
339           if (gso_enabled)
340             fill_gso_buffer_flags (b0, hdr, l4_proto, l4_hdr_sz);
341
342           vnet_buffer (b0)->sw_if_index[VLIB_RX] = vif->sw_if_index;
343           vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
344
345           /* if multisegment packet */
346           if (PREDICT_FALSE (num_buffers > 1))
347             {
348               vlib_buffer_t *pb, *cb;
349               pb = b0;
350               b0->total_length_not_including_first_buffer = 0;
351               while (num_buffers > 1)
352                 {
353                   increment_last (last, packed, vring);
354                   u16 cslot = virtio_get_slot_id (vring, packed, last, mask);
355                   /* hdr size is 0 after 1st packet in chain buffers */
356                   u16 clen = virtio_get_len (vring, packed, 0, last, mask);
357                   u32 cbi = vring->buffers[cslot];
358                   cb = vlib_get_buffer (vm, cbi);
359
360                   /* current buffer */
361                   cb->current_length = clen;
362
363                   /* previous buffer */
364                   pb->next_buffer = cbi;
365                   pb->flags |= VLIB_BUFFER_NEXT_PRESENT;
366
367                   /* first buffer */
368                   b0->total_length_not_including_first_buffer += clen;
369
370                   pb = cb;
371                   vring->desc_in_use--;
372                   num_buffers--;
373                   n_left--;
374                 }
375               len += b0->total_length_not_including_first_buffer;
376             }
377
378           if (type == VIRTIO_IF_TYPE_TUN)
379             {
380               switch (b0->data[0] & 0xf0)
381                 {
382                 case 0x40:
383                   next0 = VNET_DEVICE_INPUT_NEXT_IP4_INPUT;
384                   break;
385                 case 0x60:
386                   next0 = VNET_DEVICE_INPUT_NEXT_IP6_INPUT;
387                   break;
388                 default:
389                   next0 = VNET_DEVICE_INPUT_NEXT_DROP;
390                   break;
391                 }
392
393               if (PREDICT_FALSE (vif->per_interface_next_index != ~0))
394                 next0 = vif->per_interface_next_index;
395             }
396           else
397             {
398               /* copy feature arc data from template */
399               b0->current_config_index = bt.current_config_index;
400               vnet_buffer (b0)->feature_arc_index =
401                 vnet_buffer (&bt)->feature_arc_index;
402             }
403
404           /* trace */
405           if (PREDICT_FALSE (n_trace > 0 && vlib_trace_buffer (vm, node, next0, b0,     /* follow_chain */
406                                                                1)))
407             {
408               virtio_input_trace_t *tr;
409               vlib_set_trace_count (vm, node, --n_trace);
410               tr = vlib_add_trace (vm, node, b0, sizeof (*tr));
411               tr->next_index = next0;
412               tr->hw_if_index = vif->hw_if_index;
413               tr->len = len;
414               clib_memcpy_fast (&tr->hdr, hdr, (hdr_sz == 12) ? 12 : 10);
415             }
416
417           /* enqueue buffer */
418           to_next[0] = bi0;
419           vring->desc_in_use--;
420           to_next += 1;
421           n_left_to_next--;
422           n_left--;
423           increment_last (last, packed, vring);
424
425           /* only tun interfaces may have different next index */
426           if (type == VIRTIO_IF_TYPE_TUN)
427             vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
428                                              n_left_to_next, bi0, next0);
429
430           /* next packet */
431           n_rx_packets++;
432           n_rx_bytes += len;
433         }
434       virtio_device_input_ethernet (vm, node, next_index, vif->sw_if_index,
435                                     vif->hw_if_index);
436       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
437     }
438   vring->last_used_idx = last;
439
440   vring->total_packets += n_rx_packets;
441   vlib_increment_combined_counter (vnm->interface_main.combined_sw_if_counters
442                                    + VNET_INTERFACE_COUNTER_RX, thread_index,
443                                    vif->sw_if_index, n_rx_packets,
444                                    n_rx_bytes);
445
446   return n_rx_packets;
447 }
448
449 static_always_inline uword
450 virtio_device_input_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
451                             vlib_frame_t * frame, virtio_if_t * vif, u16 qid,
452                             virtio_if_type_t type)
453 {
454   vnet_virtio_vring_t *vring = vec_elt_at_index (vif->rxq_vrings, qid);
455   const int hdr_sz = vif->virtio_net_hdr_sz;
456   uword rv;
457
458   if (vif->is_packed)
459     {
460       if (vif->gso_enabled)
461         rv =
462           virtio_device_input_gso_inline (vm, node, frame, vif, vring, type,
463                                           1, 1, 1);
464       else if (vif->csum_offload_enabled)
465         rv =
466           virtio_device_input_gso_inline (vm, node, frame, vif, vring, type,
467                                           0, 1, 1);
468       else
469         rv =
470           virtio_device_input_gso_inline (vm, node, frame, vif, vring, type,
471                                           0, 0, 1);
472
473       virtio_refill_vring_packed (vm, vif, type, vring, hdr_sz,
474                                   node->node_index);
475     }
476   else
477     {
478       if (vif->gso_enabled)
479         rv =
480           virtio_device_input_gso_inline (vm, node, frame, vif, vring, type,
481                                           1, 1, 0);
482       else if (vif->csum_offload_enabled)
483         rv =
484           virtio_device_input_gso_inline (vm, node, frame, vif, vring, type,
485                                           0, 1, 0);
486       else
487         rv =
488           virtio_device_input_gso_inline (vm, node, frame, vif, vring, type,
489                                           0, 0, 0);
490
491       virtio_refill_vring_split (vm, vif, type, vring, hdr_sz,
492                                  node->node_index);
493     }
494   return rv;
495 }
496
497 VLIB_NODE_FN (virtio_input_node) (vlib_main_t * vm,
498                                   vlib_node_runtime_t * node,
499                                   vlib_frame_t * frame)
500 {
501   u32 n_rx = 0;
502   virtio_main_t *vim = &virtio_main;
503   vnet_hw_if_rxq_poll_vector_t *p,
504     *pv = vnet_hw_if_get_rxq_poll_vector (vm, node);
505
506   vec_foreach (p, pv)
507     {
508       virtio_if_t *vif;
509       vif = vec_elt_at_index (vim->interfaces, p->dev_instance);
510       if (vif->flags & VIRTIO_IF_FLAG_ADMIN_UP)
511         {
512           if (vif->type == VIRTIO_IF_TYPE_TAP)
513             n_rx += virtio_device_input_inline (
514               vm, node, frame, vif, p->queue_id, VIRTIO_IF_TYPE_TAP);
515           else if (vif->type == VIRTIO_IF_TYPE_PCI)
516             n_rx += virtio_device_input_inline (
517               vm, node, frame, vif, p->queue_id, VIRTIO_IF_TYPE_PCI);
518           else if (vif->type == VIRTIO_IF_TYPE_TUN)
519             n_rx += virtio_device_input_inline (
520               vm, node, frame, vif, p->queue_id, VIRTIO_IF_TYPE_TUN);
521         }
522     }
523
524   return n_rx;
525 }
526
527 VLIB_REGISTER_NODE (virtio_input_node) = {
528   .name = "virtio-input",
529   .sibling_of = "device-input",
530   .format_trace = format_virtio_input_trace,
531   .flags = VLIB_NODE_FLAG_TRACE_SUPPORTED,
532   .type = VLIB_NODE_TYPE_INPUT,
533   .state = VLIB_NODE_STATE_INTERRUPT,
534   .n_errors = VIRTIO_INPUT_N_ERROR,
535   .error_strings = virtio_input_error_strings,
536 };
537
538 /*
539  * fd.io coding-style-patch-verification: ON
540  *
541  * Local Variables:
542  * eval: (c-set-style "gnu")
543  * End:
544  */