dpdk: Add support for Mellanox ConnectX-4 devices
[vpp.git] / plugins / flowperpkt-plugin / flowperpkt / node.c
1 /*
2  * node.c - ipv4 ipfix-per-packet graph node
3  *
4  * Copyright (c) <current-year> <your-organization>
5  * Licensed under the Apache License, Version 2.0 (the "License");
6  * you may not use this file except in compliance with the License.
7  * You may obtain a copy of the License at:
8  *
9  *     http://www.apache.org/licenses/LICENSE-2.0
10  *
11  * Unless required by applicable law or agreed to in writing, software
12  * distributed under the License is distributed on an "AS IS" BASIS,
13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  * See the License for the specific language governing permissions and
15  * limitations under the License.
16  */
17 #include <vlib/vlib.h>
18 #include <vnet/vnet.h>
19 #include <vnet/pg/pg.h>
20 #include <vppinfra/error.h>
21 #include <flowperpkt/flowperpkt.h>
22
23 /**
24  * @file ipv4 flow record generator graph node
25  */
26
27 typedef struct
28 {
29   /** interface handle */
30   u32 rx_sw_if_index;
31   u32 tx_sw_if_index;
32   u32 src_address;
33   u32 dst_address;
34   /** ToS bits */
35   u8 tos;
36   /** packet timestamp */
37   u64 timestamp;
38   /** size of the buffer */
39   u16 buffer_size;
40 } flowperpkt_ipv4_trace_t;
41
42 /* packet trace format function */
43 static u8 *
44 format_flowperpkt_ipv4_trace (u8 * s, va_list * args)
45 {
46   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
47   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
48   flowperpkt_ipv4_trace_t *t = va_arg (*args, flowperpkt_ipv4_trace_t *);
49
50   s = format (s,
51               "FLOWPERPKT-V4: rx_sw_if_index %d, tx_sw_if_index %d, src %U dst %U tos %0x2, timestamp %lld, size %d",
52               t->rx_sw_if_index, t->tx_sw_if_index,
53               format_ip4_address, &t->src_address,
54               format_ip4_address, &t->dst_address,
55               t->tos, t->timestamp, t->buffer_size);
56   return s;
57 }
58
59 vlib_node_registration_t flowperpkt_ipv4_node;
60
61 /* No counters at the moment */
62 #define foreach_flowperpkt_ipv4_error
63
64 typedef enum
65 {
66 #define _(sym,str) FLOWPERPKT_ERROR_##sym,
67   foreach_flowperpkt_ipv4_error
68 #undef _
69     FLOWPERPKT_N_ERROR,
70 } flowperpkt_ipv4_error_t;
71
72 static char *flowperpkt_ipv4_error_strings[] = {
73 #define _(sym,string) string,
74   foreach_flowperpkt_ipv4_error
75 #undef _
76 };
77
78 typedef enum
79 {
80   FLOWPERPKT_IPV4_NEXT_DROP,
81   FLOWPERPKT_IPV4_NEXT_LOOKUP,
82   FLOWPERPKT_IPV4_N_NEXT,
83 } flowperpkt_ipv4_next_t;
84
85 /**
86  * @brief add an entry to the flow record under construction
87  * @param vm vlib_main_t * current worker thread main structure pointer
88  * @param fm flowperpkt_main_t * flow-per-packet main structure pointer
89  * @param sw_if_index u32 interface handle
90  * @param tos u8 ToS bits from the packet
91  * @param timestamp u64 timestamp, nanoseconds since 1/1/70
92  * @param length u16 ip length of the packet
93  * @param do_flush int 1 = flush all cached records, 0 = construct a record
94  */
95
96 static inline void
97 add_to_flow_record_ipv4 (vlib_main_t * vm,
98                          vlib_node_runtime_t * node,
99                          flowperpkt_main_t * fm,
100                          u32 rx_sw_if_index, u32 tx_sw_if_index,
101                          u32 src_address, u32 dst_address,
102                          u8 tos, u64 timestamp, u16 length, int do_flush)
103 {
104   u32 my_cpu_number = vm->cpu_index;
105   flow_report_main_t *frm = &flow_report_main;
106   ip4_header_t *ip;
107   udp_header_t *udp;
108   ip4_ipfix_template_packet_t *tp;
109   ipfix_message_header_t *h;
110   ipfix_set_header_t *s;
111   vlib_frame_t *f;
112   vlib_buffer_t *b0;
113   u16 offset;
114   u32 bi0;
115   vlib_buffer_free_list_t *fl;
116
117   /* Find or allocate a buffer */
118   b0 = fm->ipv4_buffers_per_worker[my_cpu_number];
119
120   /* Need to allocate a buffer? */
121   if (PREDICT_FALSE (b0 == 0))
122     {
123       /* Nothing to flush */
124       if (do_flush)
125         return;
126
127       /* $$$$ drop counter? */
128       if (vlib_buffer_alloc (vm, &bi0, 1) != 1)
129         return;
130
131       /* Initialize the buffer */
132       b0 = fm->ipv4_buffers_per_worker[my_cpu_number] =
133         vlib_get_buffer (vm, bi0);
134       fl =
135         vlib_buffer_get_free_list (vm, VLIB_BUFFER_DEFAULT_FREE_LIST_INDEX);
136       vlib_buffer_init_for_free_list (b0, fl);
137       VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b0);
138       offset = 0;
139     }
140   else
141     {
142       /* use the current buffer */
143       bi0 = vlib_get_buffer_index (vm, b0);
144       offset = fm->ipv4_next_record_offset_per_worker[my_cpu_number];
145     }
146
147   /* Find or allocate a frame */
148   f = fm->ipv4_frames_per_worker[my_cpu_number];
149   if (PREDICT_FALSE (f == 0))
150     {
151       u32 *to_next;
152       f = vlib_get_frame_to_node (vm, ip4_lookup_node.index);
153       fm->ipv4_frames_per_worker[my_cpu_number] = f;
154
155       /* Enqueue the buffer */
156       to_next = vlib_frame_vector_args (f);
157       to_next[0] = bi0;
158       f->n_vectors = 1;
159     }
160
161   /* Fresh packet, construct header */
162   if (PREDICT_FALSE (offset == 0))
163     {
164       flow_report_stream_t *stream;
165
166       stream = &frm->streams[0];
167
168       b0->current_data = 0;
169       b0->current_length = sizeof (*ip) + sizeof (*udp) + sizeof (*h) +
170         sizeof (*s);
171       b0->flags |= (VLIB_BUFFER_TOTAL_LENGTH_VALID | VLIB_BUFFER_FLOW_REPORT);
172       vnet_buffer (b0)->sw_if_index[VLIB_RX] = 0;
173       vnet_buffer (b0)->sw_if_index[VLIB_TX] = frm->fib_index;
174
175       tp = vlib_buffer_get_current (b0);
176       ip = (ip4_header_t *) & tp->ip4;
177       udp = (udp_header_t *) (ip + 1);
178       h = (ipfix_message_header_t *) (udp + 1);
179       s = (ipfix_set_header_t *) (h + 1);
180
181       ip->ip_version_and_header_length = 0x45;
182       ip->ttl = 254;
183       ip->protocol = IP_PROTOCOL_UDP;
184       ip->flags_and_fragment_offset = 0;
185       ip->src_address.as_u32 = frm->src_address.as_u32;
186       ip->dst_address.as_u32 = frm->ipfix_collector.as_u32;
187       udp->src_port = clib_host_to_net_u16 (UDP_DST_PORT_ipfix);
188       udp->dst_port = clib_host_to_net_u16 (UDP_DST_PORT_ipfix);
189       udp->checksum = 0;
190
191       /* FIXUP: message header export_time */
192       h->export_time = (u32)
193         (((f64) frm->unix_time_0) +
194          (vlib_time_now (frm->vlib_main) - frm->vlib_time_0));
195       h->export_time = clib_host_to_net_u32 (h->export_time);
196       h->domain_id = clib_host_to_net_u32 (stream->domain_id);
197
198       /* FIXUP: message header sequence_number */
199       h->sequence_number = stream->sequence_number++;
200       h->sequence_number = clib_host_to_net_u32 (h->sequence_number);
201
202       offset = (u32) (((u8 *) (s + 1)) - (u8 *) tp);
203     }
204
205   /* Add data, unless we're flushing stale data */
206   if (PREDICT_TRUE (do_flush == 0))
207     {
208
209       /* Add data */
210       /* Ingress interface */
211       {
212         u32 ingress_interface = clib_host_to_net_u32 (rx_sw_if_index);
213         clib_memcpy (b0->data + offset, &ingress_interface,
214                      sizeof (ingress_interface));
215         offset += sizeof (ingress_interface);
216       }
217       /* Egress interface */
218       {
219         u32 egress_interface = clib_host_to_net_u32 (tx_sw_if_index);
220         clib_memcpy (b0->data + offset, &egress_interface,
221                      sizeof (egress_interface));
222         offset += sizeof (egress_interface);
223       }
224       /* ip4 src address */
225       {
226         clib_memcpy (b0->data + offset, &src_address, sizeof (src_address));
227         offset += sizeof (src_address);
228       }
229       /* ip4 dst address */
230       {
231         clib_memcpy (b0->data + offset, &dst_address, sizeof (dst_address));
232         offset += sizeof (dst_address);
233       }
234
235       /* ToS */
236       b0->data[offset++] = tos;
237
238       /* Timestamp */
239       clib_memcpy (b0->data + offset, &timestamp, sizeof (f64));
240       offset += sizeof (f64);
241
242       /* pkt size */
243       {
244         u16 pkt_size = clib_host_to_net_u16 (length);
245         clib_memcpy (b0->data + offset, &pkt_size, sizeof (pkt_size));
246         offset += sizeof (pkt_size);
247       }
248
249       b0->current_length +=
250         /* sw_if_index + tos + timestamp + length = 15 */
251         4 * sizeof (u32) + sizeof (u8) + sizeof (f64) + sizeof (u16);
252
253     }
254   /* Time to flush the buffer? */
255   if (PREDICT_FALSE
256       (do_flush || (offset + 4 * sizeof (u32) + sizeof (u8)
257                     + sizeof (f64) + sizeof (u16)) > frm->path_mtu))
258     {
259       tp = vlib_buffer_get_current (b0);
260       ip = (ip4_header_t *) & tp->ip4;
261       udp = (udp_header_t *) (ip + 1);
262       h = (ipfix_message_header_t *) (udp + 1);
263       s = (ipfix_set_header_t *) (h + 1);
264
265       s->set_id_length = ipfix_set_id_length (fm->ipv4_report_id,
266                                               b0->current_length -
267                                               (sizeof (*ip) + sizeof (*udp) +
268                                                sizeof (*h)));
269       h->version_length = version_length (b0->current_length -
270                                           (sizeof (*ip) + sizeof (*udp)));
271
272       ip->length = clib_host_to_net_u16 (b0->current_length);
273
274       ip->checksum = ip4_header_checksum (ip);
275       udp->length = clib_host_to_net_u16 (b0->current_length - sizeof (*ip));
276
277       if (frm->udp_checksum)
278         {
279           /* RFC 7011 section 10.3.2. */
280           udp->checksum = ip4_tcp_udp_compute_checksum (vm, b0, ip);
281           if (udp->checksum == 0)
282             udp->checksum = 0xffff;
283         }
284
285       ASSERT (ip->checksum == ip4_header_checksum (ip));
286
287       if (PREDICT_FALSE (vlib_get_trace_count (vm, node) > 0))
288         {
289           vlib_trace_buffer (vm, node, FLOWPERPKT_IPV4_NEXT_LOOKUP, b0,
290                              0 /* follow chain */ );
291           flowperpkt_ipv4_trace_t *t =
292             vlib_add_trace (vm, node, b0, sizeof (*t));
293           t->rx_sw_if_index = vnet_buffer (b0)->sw_if_index[VLIB_RX];
294           t->tx_sw_if_index = vnet_buffer (b0)->sw_if_index[VLIB_TX];
295           t->src_address = 0;
296           t->dst_address = 0;
297           t->tos = 0;
298           t->timestamp = 0;
299           t->buffer_size = b0->current_length;
300         }
301
302       vlib_put_frame_to_node (vm, ip4_lookup_node.index,
303                               fm->ipv4_frames_per_worker[my_cpu_number]);
304       fm->ipv4_frames_per_worker[my_cpu_number] = 0;
305       fm->ipv4_buffers_per_worker[my_cpu_number] = 0;
306       offset = 0;
307     }
308
309   fm->ipv4_next_record_offset_per_worker[my_cpu_number] = offset;
310 }
311
312 void
313 flowperpkt_flush_callback_ipv4 (void)
314 {
315   vlib_main_t *vm = vlib_get_main ();
316   flowperpkt_main_t *fm = &flowperpkt_main;
317   vlib_node_runtime_t *node;
318   node = vlib_node_get_runtime (vm, flowperpkt_ipv4_node.index);
319
320   add_to_flow_record_ipv4 (vm, node, fm, 0 /* rx_sw_if_index */ ,
321                            0 /* tx_sw_if_index */ ,
322                            0 /* src_address */ ,
323                            0 /* dst_address */ ,
324                            0 /* ToS */ ,
325                            0ULL /* timestamp */ ,
326                            0 /* length */ ,
327                            1 /* do_flush */ );
328 }
329
330
331 static uword
332 flowperpkt_ipv4_node_fn (vlib_main_t * vm,
333                          vlib_node_runtime_t * node, vlib_frame_t * frame)
334 {
335   u32 n_left_from, *from, *to_next;
336   flowperpkt_ipv4_next_t next_index;
337   flowperpkt_main_t *fm = &flowperpkt_main;
338   u64 now;
339
340   now = (u64) ((vlib_time_now (vm) - fm->vlib_time_0) * 1e9);
341   now += fm->nanosecond_time_0;
342
343   from = vlib_frame_vector_args (frame);
344   n_left_from = frame->n_vectors;
345   next_index = node->cached_next_index;
346
347   while (n_left_from > 0)
348     {
349       u32 n_left_to_next;
350
351       vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
352
353       while (n_left_from >= 4 && n_left_to_next >= 2)
354         {
355           u32 next0 = FLOWPERPKT_IPV4_NEXT_DROP;
356           u32 next1 = FLOWPERPKT_IPV4_NEXT_DROP;
357           ip4_header_t *ip0, *ip1;
358           u16 len0, len1;
359           u32 bi0, bi1;
360           vlib_buffer_t *b0, *b1;
361
362           /* Prefetch next iteration. */
363           {
364             vlib_buffer_t *p2, *p3;
365
366             p2 = vlib_get_buffer (vm, from[2]);
367             p3 = vlib_get_buffer (vm, from[3]);
368
369             vlib_prefetch_buffer_header (p2, LOAD);
370             vlib_prefetch_buffer_header (p3, LOAD);
371
372             CLIB_PREFETCH (p2->data, CLIB_CACHE_LINE_BYTES, STORE);
373             CLIB_PREFETCH (p3->data, CLIB_CACHE_LINE_BYTES, STORE);
374           }
375
376           /* speculatively enqueue b0 and b1 to the current next frame */
377           to_next[0] = bi0 = from[0];
378           to_next[1] = bi1 = from[1];
379           from += 2;
380           to_next += 2;
381           n_left_from -= 2;
382           n_left_to_next -= 2;
383
384           b0 = vlib_get_buffer (vm, bi0);
385           b1 = vlib_get_buffer (vm, bi1);
386
387           vnet_feature_next (vnet_buffer (b0)->sw_if_index[VLIB_TX],
388                              &next0, b0);
389           vnet_feature_next (vnet_buffer (b1)->sw_if_index[VLIB_TX],
390                              &next1, b1);
391
392           ip0 = (ip4_header_t *) ((u8 *) vlib_buffer_get_current (b0) +
393                                   vnet_buffer (b0)->ip.save_rewrite_length);
394
395           len0 = vlib_buffer_length_in_chain (vm, b0);
396
397           if (PREDICT_TRUE ((b0->flags & VLIB_BUFFER_FLOW_REPORT) == 0))
398             add_to_flow_record_ipv4 (vm, node, fm,
399                                      vnet_buffer (b0)->sw_if_index[VLIB_RX],
400                                      vnet_buffer (b0)->sw_if_index[VLIB_TX],
401                                      ip0->src_address.as_u32,
402                                      ip0->dst_address.as_u32,
403                                      ip0->tos, now, len0, 0 /* flush */ );
404
405           ip1 = (ip4_header_t *) ((u8 *) vlib_buffer_get_current (b1) +
406                                   vnet_buffer (b1)->ip.save_rewrite_length);
407           len1 = vlib_buffer_length_in_chain (vm, b1);
408
409           if (PREDICT_TRUE ((b1->flags & VLIB_BUFFER_FLOW_REPORT) == 0))
410             add_to_flow_record_ipv4 (vm, node, fm,
411                                      vnet_buffer (b1)->sw_if_index[VLIB_RX],
412                                      vnet_buffer (b1)->sw_if_index[VLIB_TX],
413                                      ip1->src_address.as_u32,
414                                      ip1->dst_address.as_u32,
415                                      ip1->tos, now, len1, 0 /* flush */ );
416
417           if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)))
418             {
419               if (b0->flags & VLIB_BUFFER_IS_TRACED)
420                 {
421                   flowperpkt_ipv4_trace_t *t =
422                     vlib_add_trace (vm, node, b0, sizeof (*t));
423                   t->rx_sw_if_index = vnet_buffer (b0)->sw_if_index[VLIB_RX];
424                   t->tx_sw_if_index = vnet_buffer (b0)->sw_if_index[VLIB_TX];
425                   t->src_address = ip0->src_address.as_u32;
426                   t->dst_address = ip0->dst_address.as_u32;
427                   t->tos = ip0->tos;
428                   t->timestamp = now;
429                   t->buffer_size = len0;
430                 }
431               if (b1->flags & VLIB_BUFFER_IS_TRACED)
432                 {
433                   flowperpkt_ipv4_trace_t *t =
434                     vlib_add_trace (vm, node, b1, sizeof (*t));
435                   t->rx_sw_if_index = vnet_buffer (b1)->sw_if_index[VLIB_RX];
436                   t->tx_sw_if_index = vnet_buffer (b1)->sw_if_index[VLIB_TX];
437                   t->src_address = ip1->src_address.as_u32;
438                   t->dst_address = ip1->dst_address.as_u32;
439                   t->tos = ip1->tos;
440                   t->timestamp = now;
441                   t->buffer_size = len1;
442                 }
443             }
444
445           /* verify speculative enqueues, maybe switch current next frame */
446           vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
447                                            to_next, n_left_to_next,
448                                            bi0, bi1, next0, next1);
449         }
450
451       while (n_left_from > 0 && n_left_to_next > 0)
452         {
453           u32 bi0;
454           vlib_buffer_t *b0;
455           u32 next0 = FLOWPERPKT_IPV4_NEXT_DROP;
456           ip4_header_t *ip0;
457           u16 len0;
458
459           /* speculatively enqueue b0 to the current next frame */
460           bi0 = from[0];
461           to_next[0] = bi0;
462           from += 1;
463           to_next += 1;
464           n_left_from -= 1;
465           n_left_to_next -= 1;
466
467           b0 = vlib_get_buffer (vm, bi0);
468
469           vnet_feature_next (vnet_buffer (b0)->sw_if_index[VLIB_TX],
470                              &next0, b0);
471
472           ip0 = (ip4_header_t *) ((u8 *) vlib_buffer_get_current (b0) +
473                                   vnet_buffer (b0)->ip.save_rewrite_length);
474           /*
475            * egressInterface, TLV type 14, u32
476            * ipClassOfService, TLV type 5, u8
477            * flowStartNanoseconds, TLV type 156, dateTimeNanoseconds (f64)
478            *   Implementation: f64 nanoseconds since VPP started
479            * dataLinkFrameSize, TLV type 312, u16
480            */
481           len0 = vlib_buffer_length_in_chain (vm, b0);
482
483           if (PREDICT_TRUE ((b0->flags & VLIB_BUFFER_FLOW_REPORT) == 0))
484             add_to_flow_record_ipv4 (vm, node, fm,
485                                      vnet_buffer (b0)->sw_if_index[VLIB_RX],
486                                      vnet_buffer (b0)->sw_if_index[VLIB_TX],
487                                      ip0->src_address.as_u32,
488                                      ip0->dst_address.as_u32,
489                                      ip0->tos, now, len0, 0 /* flush */ );
490
491           if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)
492                              && (b0->flags & VLIB_BUFFER_IS_TRACED)))
493             {
494               flowperpkt_ipv4_trace_t *t =
495                 vlib_add_trace (vm, node, b0, sizeof (*t));
496               t->rx_sw_if_index = vnet_buffer (b0)->sw_if_index[VLIB_RX];
497               t->tx_sw_if_index = vnet_buffer (b0)->sw_if_index[VLIB_TX];
498               t->src_address = ip0->src_address.as_u32;
499               t->dst_address = ip0->dst_address.as_u32;
500               t->tos = ip0->tos;
501               t->timestamp = now;
502               t->buffer_size = len0;
503             }
504
505           /* verify speculative enqueue, maybe switch current next frame */
506           vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
507                                            to_next, n_left_to_next,
508                                            bi0, next0);
509         }
510
511       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
512     }
513   return frame->n_vectors;
514 }
515
516 /**
517  * @brief IPFIX ipv4 flow-per-packet graph node
518  * @node flowperpkt-ipv4
519  *
520  * This is the IPFIX flow-record-per-packet node.
521  *
522  * @param vm    vlib_main_t corresponding to the current thread.
523  * @param node  vlib_node_runtime_t data for this node.
524  * @param frame vlib_frame_t whose contents should be dispatched.
525  *
526  * @par Graph mechanics: buffer metadata, next index usage
527  *
528  * <em>Uses:</em>
529  * - <code>vnet_buffer(b)->ip.save_rewrite_length</code>
530  *     - tells the node the length of the rewrite which was applied in
531  *       ip4/6_rewrite_inline, allows the code to find the IP header without
532  *       having to parse L2 headers, or make stupid assumptions about their
533  *       length.
534  * - <code>vnet_buffer(b)->flags & VLIB_BUFFER_FLOW_REPORT</code>
535  *     - Used to suppress flow record generation for flow record packets.
536  *
537  * <em>Sets:</em>
538  * - <code>vnet_buffer(b)->flags & VLIB_BUFFER_FLOW_REPORT</code>
539  *     - To suppress flow record generation for flow record packets
540  *
541  * <em>Next Index:</em>
542  * - Next configured output feature on the interface, usually
543  *   "interface-output." Generated flow records head for ip4-lookup
544  */
545
546 /* *INDENT-OFF* */
547 VLIB_REGISTER_NODE (flowperpkt_ipv4_node) = {
548   .function = flowperpkt_ipv4_node_fn,
549   .name = "flowperpkt-ipv4",
550   .vector_size = sizeof (u32),
551   .format_trace = format_flowperpkt_ipv4_trace,
552   .type = VLIB_NODE_TYPE_INTERNAL,
553
554   .n_errors = ARRAY_LEN(flowperpkt_ipv4_error_strings),
555   .error_strings = flowperpkt_ipv4_error_strings,
556
557   .n_next_nodes = FLOWPERPKT_IPV4_N_NEXT,
558
559   /* edit / add dispositions here */
560   .next_nodes = {
561     [FLOWPERPKT_IPV4_NEXT_DROP] = "error-drop",
562     /* Used only to trace ipfix data packets */
563     [FLOWPERPKT_IPV4_NEXT_LOOKUP] = "ip4-lookup",
564   },
565 };
566 /* *INDENT-ON* */
567
568 /*
569  * fd.io coding-style-patch-verification: ON
570  *
571  * Local Variables:
572  * eval: (c-set-style "gnu")
573  * End:
574  */