Flowprobe: Stateful flows and IPv6, L4 recording
[vpp.git] / src / vnet / flow / flow_report_classify.c
1 /*
2  * Copyright (c) 2015 Cisco and/or its affiliates.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at:
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 #include <vnet/flow/flow_report.h>
16 #include <vnet/flow/flow_report_classify.h>
17 #include <vnet/api_errno.h>
18
19 /* Common prefix of tcp and udp headers
20  * containing only source and destination port fields */
21 typedef struct {
22   u16 src_port, dst_port;
23 } tcpudp_header_t;
24
25 flow_report_classify_main_t flow_report_classify_main;
26
27 u8 * ipfix_classify_template_rewrite (flow_report_main_t * frm,
28                                       flow_report_t * fr,
29                                       ip4_address_t * collector_address,
30                                       ip4_address_t * src_address,
31                                       u16 collector_port)
32 {
33   flow_report_classify_main_t * fcm = &flow_report_classify_main;
34   vnet_classify_table_t * tblp;
35   vnet_classify_main_t * vcm = &vnet_classify_main;
36   u32 flow_table_index = fr->opaque.as_uword;
37   u8 * ip_start;
38   ip4_header_t * ip;
39   ip6_header_t * ip6;
40   tcpudp_header_t * tcpudp;
41   udp_header_t * udp;
42   ipfix_message_header_t * h;
43   ipfix_set_header_t * s;
44   ipfix_template_header_t * t;
45   ipfix_field_specifier_t * f;
46   ipfix_field_specifier_t * first_field;
47   u8 * rewrite = 0;
48   ip4_ipfix_template_packet_t * tp;
49   u32 field_count = 0;
50   u32 field_index = 0;
51   flow_report_stream_t * stream;
52   u8 ip_version;
53   u8 transport_protocol;
54   u8 * virt_mask;
55   u8 * real_mask;
56
57   stream = &frm->streams[fr->stream_index];
58
59   ipfix_classify_table_t * table = &fcm->tables[flow_table_index];
60
61   ip_version = table->ip_version;
62   transport_protocol = table->transport_protocol;
63
64   tblp = pool_elt_at_index (vcm->tables, table->classify_table_index);
65
66   virt_mask = (u8 *)(tblp->mask - tblp->skip_n_vectors);
67   real_mask = (u8 *)(tblp->mask);
68
69   /* Determine field count */
70   ip_start = virt_mask + sizeof(ethernet_header_t);
71 #define _(field,mask,item,length)                                             \
72   if (((u8 *)&field >= real_mask) && (memcmp(&field, &mask, length) == 0))    \
73     {                                                                         \
74       field_count++;                                                          \
75                                                                               \
76       fr->fields_to_send = clib_bitmap_set (fr->fields_to_send,               \
77                                             field_index, 1);                  \
78     }                                                                         \
79   field_index++;
80   foreach_ipfix_field;
81 #undef _
82
83   /* Add packetTotalCount manually */
84   field_count += 1;
85
86   /* $$$ enterprise fields, at some later date */
87
88   /* allocate rewrite space */
89   vec_validate_aligned (rewrite, 
90                         sizeof (ip4_ipfix_template_packet_t) 
91                         + field_count * sizeof (ipfix_field_specifier_t) - 1,
92                         CLIB_CACHE_LINE_BYTES);
93
94   tp = (ip4_ipfix_template_packet_t *) rewrite;
95   ip = (ip4_header_t *) &tp->ip4;
96   udp = (udp_header_t *) (ip+1);
97   h = (ipfix_message_header_t *)(udp+1);
98   s = (ipfix_set_header_t *)(h+1);
99   t = (ipfix_template_header_t *)(s+1);
100   first_field = f = (ipfix_field_specifier_t *)(t+1);
101
102   ip->ip_version_and_header_length = 0x45;
103   ip->ttl = 254;
104   ip->protocol = IP_PROTOCOL_UDP;
105   ip->src_address.as_u32 = src_address->as_u32;
106   ip->dst_address.as_u32 = collector_address->as_u32;
107   udp->src_port = clib_host_to_net_u16 (stream->src_port);
108   udp->dst_port = clib_host_to_net_u16 (collector_port);
109   udp->length = clib_host_to_net_u16 (vec_len(rewrite) - sizeof (*ip));
110
111   /* FIXUP: message header export_time */ 
112   /* FIXUP: message header sequence_number */
113   h->domain_id = clib_host_to_net_u32 (stream->domain_id);
114
115   /* Take another trip through the mask and build the template */
116   ip_start = virt_mask + sizeof(ethernet_header_t);
117 #define _(field,mask,item,length)                                             \
118   if (((u8 *)&field >= real_mask) && (memcmp(&field, &mask, length) == 0))    \
119     {                                                                         \
120       f->e_id_length = ipfix_e_id_length (0 /* enterprise */,                 \
121                                           item, length);                      \
122       f++;                                                                    \
123     }
124   foreach_ipfix_field;
125 #undef _
126
127   /* Add packetTotalCount manually */
128   f->e_id_length = ipfix_e_id_length (0 /* enterprise */, packetTotalCount, 8);
129   f++;
130
131   /* Back to the template packet... */
132   ip = (ip4_header_t *) &tp->ip4;
133   udp = (udp_header_t *) (ip+1);
134   
135   ASSERT (f - first_field);
136   /* Field count in this template */
137   t->id_count = ipfix_id_count (fr->template_id, f - first_field);
138
139   /* set length in octets*/
140   s->set_id_length = ipfix_set_id_length (2 /* set_id */, (u8 *) f - (u8 *)s);
141
142   /* message length in octets */
143   h->version_length = version_length ((u8 *)f - (u8 *)h);
144
145   ip->length = clib_host_to_net_u16 ((u8 *)f - (u8 *)ip);
146   ip->checksum = ip4_header_checksum (ip);
147
148   return rewrite;
149 }
150
151 vlib_frame_t * ipfix_classify_send_flows (flow_report_main_t * frm,
152                                           flow_report_t * fr,
153                                           vlib_frame_t * f,
154                                           u32 * to_next,
155                                           u32 node_index)
156 {
157   flow_report_classify_main_t * fcm = &flow_report_classify_main;
158   vnet_classify_main_t * vcm = &vnet_classify_main;
159   u32 flow_table_index = fr->opaque.as_uword;
160   vnet_classify_table_t * t;
161   vnet_classify_bucket_t * b;
162   vnet_classify_entry_t * v, * save_v;
163   vlib_buffer_t *b0 = 0;
164   u32 next_offset = 0;
165   u32 record_offset = 0;
166   u32 bi0 = ~0;
167   int i, j, k;
168   ip4_ipfix_template_packet_t * tp;
169   ipfix_message_header_t * h = 0;
170   ipfix_set_header_t * s = 0;
171   u8 * ip_start;
172   ip4_header_t * ip;
173   ip6_header_t * ip6;
174   tcpudp_header_t * tcpudp;
175   udp_header_t * udp;
176   int field_index;
177   u32 records_this_buffer;
178   u16 new_l0, old_l0;
179   ip_csum_t sum0;
180   vlib_main_t * vm = frm->vlib_main;
181   flow_report_stream_t * stream;
182   u8 ip_version;
183   u8 transport_protocol;
184   u8 * virt_key;
185
186   stream = &frm->streams[fr->stream_index];
187
188   ipfix_classify_table_t * table = &fcm->tables[flow_table_index];
189
190   ip_version = table->ip_version;
191   transport_protocol = table->transport_protocol;
192
193   t = pool_elt_at_index (vcm->tables, table->classify_table_index);
194   
195   while (__sync_lock_test_and_set (t->writer_lock, 1))
196     ; 
197   
198   for (i = 0; i < t->nbuckets; i++)
199     {
200       b = &t->buckets [i];
201       if (b->offset == 0)
202         continue;
203       
204       save_v = vnet_classify_get_entry (t, b->offset);
205       for (j = 0; j < (1<<b->log2_pages); j++)
206         {
207           for (k = 0; k < t->entries_per_page; k++)
208             {
209               v = vnet_classify_entry_at_index 
210                 (t, save_v, j*t->entries_per_page + k);
211               
212               if (vnet_classify_entry_is_free (v))
213                 continue;
214               
215               /* OK, we have something to send... */
216               if (PREDICT_FALSE (b0 == 0))
217                 {
218                   if (vlib_buffer_alloc (vm, &bi0, 1) != 1)
219                     goto flush;
220                   b0 = vlib_get_buffer (vm, bi0);
221                   
222                   u32 copy_len = sizeof(ip4_header_t) +
223                                  sizeof(udp_header_t) +
224                                  sizeof(ipfix_message_header_t);
225                   clib_memcpy (b0->data, fr->rewrite, copy_len);
226                   b0->current_data = 0;
227                   b0->current_length = copy_len;
228                   b0->flags |= VLIB_BUFFER_TOTAL_LENGTH_VALID;
229                   vnet_buffer (b0)->sw_if_index[VLIB_RX] = 0;
230                   vnet_buffer (b0)->sw_if_index[VLIB_TX] = frm->fib_index;
231                   
232                   tp = vlib_buffer_get_current (b0);
233                   ip = (ip4_header_t *) &tp->ip4;
234                   udp = (udp_header_t *) (ip+1);
235                   h = (ipfix_message_header_t *)(udp+1);
236                   s = (ipfix_set_header_t *)(h+1);
237                   
238                   /* FIXUP: message header export_time */ 
239                   h->export_time = (u32) 
240                     (((f64)frm->unix_time_0) + 
241                      (vlib_time_now(frm->vlib_main) - frm->vlib_time_0));
242                   h->export_time = clib_host_to_net_u32(h->export_time);
243                   
244                   /* FIXUP: message header sequence_number */
245                   h->sequence_number = stream->sequence_number;
246                   h->sequence_number = clib_host_to_net_u32 (h->sequence_number);
247
248                   next_offset = (u32) (((u8 *)(s+1)) - (u8 *)tp);
249                   record_offset = next_offset;
250                   records_this_buffer = 0;
251                 }
252
253               field_index = 0;
254               virt_key = (u8 *)(v->key - t->skip_n_vectors);
255               ip_start = virt_key + sizeof(ethernet_header_t);
256 #define _(field,mask,item,length)                                       \
257               if (clib_bitmap_get (fr->fields_to_send, field_index))    \
258                 {                                                       \
259                   clib_memcpy (b0->data + next_offset, &field,          \
260                           length);                                      \
261                   next_offset += length;                                \
262                 }                                                       \
263               field_index++;
264               foreach_ipfix_field;
265 #undef _
266
267               /* Add packetTotalCount manually */
268               {
269                 u64 packets = clib_host_to_net_u64 (v->hits);
270                 clib_memcpy (b0->data + next_offset, &packets, sizeof (packets));
271                 next_offset += sizeof (packets);
272               }
273               records_this_buffer++;
274               stream->sequence_number++;
275               
276               /* Next record will have the same size as this record */
277               u32 next_record_size = next_offset - record_offset;
278               record_offset = next_offset;
279
280               if (next_offset + next_record_size > frm->path_mtu)
281                 {
282                   s->set_id_length = ipfix_set_id_length (fr->template_id,
283                                                           next_offset - 
284                                                           (sizeof (*ip) + sizeof (*udp) +
285                                                            sizeof (*h)));
286                   h->version_length = version_length (next_offset -
287                                                       (sizeof (*ip) + sizeof (*udp)));
288                   b0->current_length = next_offset;
289                   b0->flags |= VLIB_BUFFER_TOTAL_LENGTH_VALID;
290                   
291                   tp = vlib_buffer_get_current (b0);
292                   ip = (ip4_header_t *) &tp->ip4;
293                   udp = (udp_header_t *) (ip+1);
294                   
295                   sum0 = ip->checksum;
296                   old_l0 = ip->length;
297                   new_l0 = 
298                     clib_host_to_net_u16 ((u16)next_offset);
299                   
300                   sum0 = ip_csum_update (sum0, old_l0, new_l0, ip4_header_t,
301                                          length /* changed member */);
302                   
303                   ip->checksum = ip_csum_fold (sum0);
304                   ip->length = new_l0;
305                   udp->length = 
306                       clib_host_to_net_u16 (b0->current_length - sizeof (*ip));
307
308                   if (frm->udp_checksum)
309                     {
310                       /* RFC 7011 section 10.3.2. */
311                       udp->checksum = ip4_tcp_udp_compute_checksum (vm, b0, ip);
312                       if (udp->checksum == 0)
313                         udp->checksum = 0xffff;
314                     }
315
316                   ASSERT (ip->checksum == ip4_header_checksum (ip));
317
318                   to_next[0] = bi0;
319                   f->n_vectors++;
320                   to_next++;
321                   
322                   if (f->n_vectors == VLIB_FRAME_SIZE)
323                     {
324                       vlib_put_frame_to_node (vm, node_index, f);
325                       f = vlib_get_frame_to_node (vm, node_index);
326                       f->n_vectors = 0;
327                       to_next = vlib_frame_vector_args (f);
328                     }
329                   b0 = 0;
330                   bi0 = ~0;
331                 }
332             }
333         }
334     }
335   
336  flush:
337   if (b0)
338     {
339         s->set_id_length = ipfix_set_id_length (fr->template_id,
340                                                 next_offset - 
341                                                 (sizeof (*ip) + sizeof (*udp) +
342                                                  sizeof (*h)));
343         h->version_length = version_length (next_offset -
344                                             (sizeof (*ip) + sizeof (*udp)));
345       b0->current_length = next_offset;
346       b0->flags |= VLIB_BUFFER_TOTAL_LENGTH_VALID;
347       
348       tp = vlib_buffer_get_current (b0);
349       ip = (ip4_header_t *) &tp->ip4;
350       udp = (udp_header_t *) (ip+1);
351       
352       sum0 = ip->checksum;
353       old_l0 = ip->length;
354       new_l0 = clib_host_to_net_u16 ((u16)next_offset);
355       
356       sum0 = ip_csum_update (sum0, old_l0, new_l0, ip4_header_t,
357                              length /* changed member */);
358       
359       ip->checksum = ip_csum_fold (sum0);
360       ip->length = new_l0;
361       udp->length = clib_host_to_net_u16 (b0->current_length - sizeof (*ip));
362
363       if (frm->udp_checksum)
364         {
365           /* RFC 7011 section 10.3.2. */
366           udp->checksum = ip4_tcp_udp_compute_checksum (vm, b0, ip);
367           if (udp->checksum == 0)
368           udp->checksum = 0xffff;
369         }
370
371       ASSERT (ip->checksum == ip4_header_checksum (ip));
372       
373       to_next[0] = bi0;
374       f->n_vectors++;
375       
376       b0 = 0;
377       bi0 = ~0;
378     }
379   
380   *(t->writer_lock) = 0;
381   return f;
382 }
383
384 static clib_error_t *
385 ipfix_classify_table_add_del_command_fn (vlib_main_t * vm,
386                                          unformat_input_t * input,
387                                          vlib_cli_command_t * cmd)
388 {
389   flow_report_classify_main_t *fcm = &flow_report_classify_main;
390   flow_report_main_t *frm = &flow_report_main;
391   vnet_flow_report_add_del_args_t args;
392   ipfix_classify_table_t * table;
393   int rv;
394   int is_add = -1;
395   u32 classify_table_index = ~0;
396   u8 ip_version = 0;
397   u8 transport_protocol = 255;
398   clib_error_t * error = 0;
399
400   if (fcm->src_port == 0)
401     clib_error_return (0, "call 'set ipfix classify stream' first");
402
403   memset (&args, 0, sizeof (args));
404
405   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) {
406     if (unformat (input, "add"))
407       is_add = 1;
408     else if (unformat (input, "del"))
409       is_add = 0;
410     else if (unformat (input, "%d", &classify_table_index))
411       ;
412     else if (unformat (input, "ip4"))
413       ip_version = 4;
414     else if (unformat (input, "ip6"))
415       ip_version = 6;
416     else if (unformat (input, "tcp"))
417       transport_protocol = 6;
418     else if (unformat (input, "udp"))
419       transport_protocol = 17;
420     else
421       return clib_error_return (0, "unknown input `%U'",
422                                 format_unformat_error, input);
423   }
424
425   if (is_add == -1)
426     return clib_error_return (0, "expecting: add|del");
427   if (classify_table_index == ~0)
428     return clib_error_return (0, "classifier table not specified");
429   if (ip_version == 0)
430     return clib_error_return (0, "IP version not specified");
431
432   table = 0;
433   int i;
434   for (i = 0; i < vec_len(fcm->tables); i++)
435     if (ipfix_classify_table_index_valid(i))
436       if (fcm->tables[i].classify_table_index == classify_table_index) {
437         table = &fcm->tables[i];
438         break;
439       }
440
441   if (is_add) {
442     if (table)
443       return clib_error_return (0, "Specified classifier table already used");
444     table = ipfix_classify_add_table();
445     table->classify_table_index = classify_table_index;
446   } else {
447     if (!table)
448       return clib_error_return (0, "Specified classifier table not registered");
449   }
450
451   table->ip_version = ip_version;
452   table->transport_protocol = transport_protocol;
453
454   args.opaque.as_uword = table - fcm->tables;
455   args.rewrite_callback = ipfix_classify_template_rewrite;
456   args.flow_data_callback = ipfix_classify_send_flows;
457   args.is_add = is_add;
458   args.domain_id = fcm->domain_id;
459   args.src_port = fcm->src_port;
460
461   rv = vnet_flow_report_add_del (frm, &args, NULL);
462
463   error = flow_report_add_del_error_to_clib_error(rv);
464
465   /* If deleting, or add failed */
466   if (is_add == 0 || (rv && is_add))
467     ipfix_classify_delete_table (table - fcm->tables);
468
469   return error;
470 }
471
472 VLIB_CLI_COMMAND (ipfix_classify_table_add_del_command, static) = {
473   .path = "ipfix classify table",
474   .short_help = "ipfix classify table add|del <table-index>",
475   .function = ipfix_classify_table_add_del_command_fn,
476 };
477
478 static clib_error_t *
479 set_ipfix_classify_stream_command_fn (vlib_main_t * vm,
480                                       unformat_input_t * input,
481                                       vlib_cli_command_t * cmd)
482 {
483   flow_report_classify_main_t *fcm = &flow_report_classify_main;
484   flow_report_main_t *frm = &flow_report_main;
485   u32 domain_id = 1;
486   u32 src_port = UDP_DST_PORT_ipfix;
487
488   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) {
489     if (unformat (input, "domain %d", &domain_id))
490       ;
491     else if (unformat (input, "src-port %d", &src_port))
492       ;
493     else
494       return clib_error_return (0, "unknown input `%U'",
495                                 format_unformat_error, input);
496   }
497
498   if (fcm->src_port != 0 &&
499       (fcm->domain_id != domain_id ||
500        fcm->src_port != (u16)src_port)) {
501     int rv = vnet_stream_change (frm, fcm->domain_id, fcm->src_port,
502                                  domain_id, (u16)src_port);
503     ASSERT (rv == 0);
504   }
505
506   fcm->domain_id = domain_id;
507   fcm->src_port = (u16)src_port;
508
509   return 0;
510 }
511
512 VLIB_CLI_COMMAND (set_ipfix_classify_stream_command, static) = {
513   .path = "set ipfix classify stream",
514   .short_help = "set ipfix classify stream"
515                 "[domain <domain-id>] [src-port <src-port>]",
516   .function = set_ipfix_classify_stream_command_fn,
517 };
518
519 static clib_error_t *
520 flow_report_classify_init (vlib_main_t *vm)
521 {
522   clib_error_t * error;
523
524   if ((error = vlib_call_init_function (vm, flow_report_init)))
525     return error;
526
527   return 0;
528 }
529
530 VLIB_INIT_FUNCTION (flow_report_classify_init);