dpdk: Add support for Mellanox ConnectX-4 devices
[vpp.git] / vnet / vnet / flow / flow_report_classify.c
1 /*
2  * Copyright (c) 2015 Cisco and/or its affiliates.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at:
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 #include <vnet/flow/flow_report.h>
16 #include <vnet/flow/flow_report_classify.h>
17 #include <vnet/api_errno.h>
18
19 /* Common prefix of tcp and udp headers
20  * containing only source and destination port fields */
21 typedef struct {
22   u16 src_port, dst_port;
23 } tcpudp_header_t;
24
25 flow_report_classify_main_t flow_report_classify_main;
26
27 u8 * ipfix_classify_template_rewrite (flow_report_main_t * frm,
28                                       flow_report_t * fr,
29                                       ip4_address_t * collector_address,
30                                       ip4_address_t * src_address,
31                                       u16 collector_port)
32 {
33   flow_report_classify_main_t * fcm = &flow_report_classify_main;
34   vnet_classify_table_t * tblp;
35   vnet_classify_main_t * vcm = &vnet_classify_main;
36   u32 flow_table_index = fr->opaque.as_uword;
37   u8 * ip_start;
38   ip4_header_t * ip;
39   ip6_header_t * ip6;
40   tcpudp_header_t * tcpudp;
41   udp_header_t * udp;
42   ipfix_message_header_t * h;
43   ipfix_set_header_t * s;
44   ipfix_template_header_t * t;
45   ipfix_field_specifier_t * f;
46   ipfix_field_specifier_t * first_field;
47   u8 * rewrite = 0;
48   ip4_ipfix_template_packet_t * tp;
49   i32 l3_offset = -2;  /* sizeof (ethernet_header_t) - sizeof (u32x4) */
50   u32 field_count = 0;
51   u32 field_index = 0;
52   flow_report_stream_t * stream;
53   u8 ip_version;
54   u8 transport_protocol;
55
56   stream = &frm->streams[fr->stream_index];
57
58   ipfix_classify_table_t * table = &fcm->tables[flow_table_index];
59
60   ip_version = table->ip_version;
61   transport_protocol = table->transport_protocol;
62
63   tblp = pool_elt_at_index (vcm->tables, table->classify_table_index);
64
65   /* 
66    * Mumble, assumes that we're not classifying on L2 or first 2 octets
67    * of L3..
68    */
69
70   /* Determine field count */
71   ip_start = ((u8 *)(tblp->mask)) + l3_offset;
72 #define _(field,mask,item,length)                                       \
73   if (memcmp(&field, &mask, length) == 0)                               \
74     {                                                                   \
75       field_count++;                                                    \
76                                                                         \
77       fr->fields_to_send = clib_bitmap_set (fr->fields_to_send,         \
78                                             field_index, 1);            \
79     }                                                                   \
80   field_index++;
81   foreach_ipfix_field;
82 #undef _
83
84   /* Add packetTotalCount manually */
85   field_count += 1;
86
87   /* $$$ enterprise fields, at some later date */
88
89   /* allocate rewrite space */
90   vec_validate_aligned (rewrite, 
91                         sizeof (ip4_ipfix_template_packet_t) 
92                         + field_count * sizeof (ipfix_field_specifier_t) - 1,
93                         CLIB_CACHE_LINE_BYTES);
94
95   tp = (ip4_ipfix_template_packet_t *) rewrite;
96   ip = (ip4_header_t *) &tp->ip4;
97   udp = (udp_header_t *) (ip+1);
98   h = (ipfix_message_header_t *)(udp+1);
99   s = (ipfix_set_header_t *)(h+1);
100   t = (ipfix_template_header_t *)(s+1);
101   first_field = f = (ipfix_field_specifier_t *)(t+1);
102
103   ip->ip_version_and_header_length = 0x45;
104   ip->ttl = 254;
105   ip->protocol = IP_PROTOCOL_UDP;
106   ip->src_address.as_u32 = src_address->as_u32;
107   ip->dst_address.as_u32 = collector_address->as_u32;
108   udp->src_port = clib_host_to_net_u16 (stream->src_port);
109   udp->dst_port = clib_host_to_net_u16 (collector_port);
110   udp->length = clib_host_to_net_u16 (vec_len(rewrite) - sizeof (*ip));
111
112   /* FIXUP: message header export_time */ 
113   /* FIXUP: message header sequence_number */
114   h->domain_id = clib_host_to_net_u32 (stream->domain_id);
115
116   /* Take another trip through the mask and build the template */
117   ip_start = ((u8 *)(tblp->mask)) + l3_offset;
118 #define _(field,mask,item,length)                               \
119   if (memcmp(&field, &mask, length) == 0)                       \
120     {                                                           \
121       f->e_id_length = ipfix_e_id_length (0 /* enterprise */,   \
122                                           item, length);        \
123       f++;                                                      \
124     }
125   foreach_ipfix_field;
126 #undef _
127
128   /* Add packetTotalCount manually */
129   f->e_id_length = ipfix_e_id_length (0 /* enterprise */, packetTotalCount, 8);
130   f++;
131
132   /* Back to the template packet... */
133   ip = (ip4_header_t *) &tp->ip4;
134   udp = (udp_header_t *) (ip+1);
135   
136   ASSERT (f - first_field);
137   /* Field count in this template */
138   t->id_count = ipfix_id_count (fr->template_id, f - first_field);
139
140   /* set length in octets*/
141   s->set_id_length = ipfix_set_id_length (2 /* set_id */, (u8 *) f - (u8 *)s);
142
143   /* message length in octets */
144   h->version_length = version_length ((u8 *)f - (u8 *)h);
145
146   ip->length = clib_host_to_net_u16 ((u8 *)f - (u8 *)ip);
147   ip->checksum = ip4_header_checksum (ip);
148
149   return rewrite;
150 }
151
152 vlib_frame_t * ipfix_classify_send_flows (flow_report_main_t * frm,
153                                           flow_report_t * fr,
154                                           vlib_frame_t * f,
155                                           u32 * to_next,
156                                           u32 node_index)
157 {
158   flow_report_classify_main_t * fcm = &flow_report_classify_main;
159   vnet_classify_main_t * vcm = &vnet_classify_main;
160   u32 flow_table_index = fr->opaque.as_uword;
161   vnet_classify_table_t * t;
162   vnet_classify_bucket_t * b;
163   vnet_classify_entry_t * v, * save_v;
164   vlib_buffer_t *b0 = 0;
165   u32 next_offset = 0;
166   u32 record_offset = 0;
167   u32 bi0 = ~0;
168   int i, j, k;
169   ip4_ipfix_template_packet_t * tp;
170   ipfix_message_header_t * h = 0;
171   ipfix_set_header_t * s = 0;
172   u8 * ip_start;
173   ip4_header_t * ip;
174   ip6_header_t * ip6;
175   tcpudp_header_t * tcpudp;
176   udp_header_t * udp;
177   int field_index;
178   u32 records_this_buffer;
179   u16 new_l0, old_l0;
180   ip_csum_t sum0;
181   vlib_main_t * vm = frm->vlib_main;
182   flow_report_stream_t * stream;
183   u8 ip_version;
184   u8 transport_protocol;
185
186   stream = &frm->streams[fr->stream_index];
187
188   ipfix_classify_table_t * table = &fcm->tables[flow_table_index];
189
190   ip_version = table->ip_version;
191   transport_protocol = table->transport_protocol;
192
193   t = pool_elt_at_index (vcm->tables, table->classify_table_index);
194   
195   while (__sync_lock_test_and_set (t->writer_lock, 1))
196     ; 
197   
198   for (i = 0; i < t->nbuckets; i++)
199     {
200       b = &t->buckets [i];
201       if (b->offset == 0)
202         continue;
203       
204       save_v = vnet_classify_get_entry (t, b->offset);
205       for (j = 0; j < (1<<b->log2_pages); j++)
206         {
207           for (k = 0; k < t->entries_per_page; k++)
208             {
209               v = vnet_classify_entry_at_index 
210                 (t, save_v, j*t->entries_per_page + k);
211               
212               if (vnet_classify_entry_is_free (v))
213                 continue;
214               
215               /* OK, we have something to send... */
216               if (PREDICT_FALSE (b0 == 0))
217                 {
218                   if (vlib_buffer_alloc (vm, &bi0, 1) != 1)
219                     goto flush;
220                   b0 = vlib_get_buffer (vm, bi0);
221                   
222                   u32 copy_len = sizeof(ip4_header_t) +
223                                  sizeof(udp_header_t) +
224                                  sizeof(ipfix_message_header_t);
225                   clib_memcpy (b0->data, fr->rewrite, copy_len);
226                   b0->current_data = 0;
227                   b0->current_length = copy_len;
228                   b0->flags |= VLIB_BUFFER_TOTAL_LENGTH_VALID;
229                   vnet_buffer (b0)->sw_if_index[VLIB_RX] = 0;
230                   vnet_buffer (b0)->sw_if_index[VLIB_TX] = frm->fib_index;
231                   
232                   tp = vlib_buffer_get_current (b0);
233                   ip = (ip4_header_t *) &tp->ip4;
234                   udp = (udp_header_t *) (ip+1);
235                   h = (ipfix_message_header_t *)(udp+1);
236                   s = (ipfix_set_header_t *)(h+1);
237                   
238                   /* FIXUP: message header export_time */ 
239                   h->export_time = (u32) 
240                     (((f64)frm->unix_time_0) + 
241                      (vlib_time_now(frm->vlib_main) - frm->vlib_time_0));
242                   h->export_time = clib_host_to_net_u32(h->export_time);
243                   
244                   /* FIXUP: message header sequence_number */
245                   h->sequence_number = stream->sequence_number;
246                   h->sequence_number = clib_host_to_net_u32 (h->sequence_number);
247
248                   next_offset = (u32) (((u8 *)(s+1)) - (u8 *)tp);
249                   record_offset = next_offset;
250                   records_this_buffer = 0;
251                 }
252
253               field_index = 0;
254               ip_start = ((u8 *)v->key) - 2;
255 #define _(field,mask,item,length)                                       \
256               if (clib_bitmap_get (fr->fields_to_send, field_index))    \
257                 {                                                       \
258                   clib_memcpy (b0->data + next_offset, &field,          \
259                           length);                                      \
260                   next_offset += length;                                \
261                 }                                                       \
262               field_index++;
263               foreach_ipfix_field;
264 #undef _
265
266               /* Add packetTotalCount manually */
267               {
268                 u64 packets = clib_host_to_net_u64 (v->hits);
269                 clib_memcpy (b0->data + next_offset, &packets, sizeof (packets));
270                 next_offset += sizeof (packets);
271               }
272               records_this_buffer++;
273               stream->sequence_number++;
274               
275               /* Next record will have the same size as this record */
276               u32 next_record_size = next_offset - record_offset;
277               record_offset = next_offset;
278
279               if (next_offset + next_record_size > frm->path_mtu)
280                 {
281                   s->set_id_length = ipfix_set_id_length (fr->template_id,
282                                                           next_offset - 
283                                                           (sizeof (*ip) + sizeof (*udp) +
284                                                            sizeof (*h)));
285                   h->version_length = version_length (next_offset -
286                                                       (sizeof (*ip) + sizeof (*udp)));
287                   b0->current_length = next_offset;
288                   b0->flags |= VLIB_BUFFER_TOTAL_LENGTH_VALID;
289                   
290                   tp = vlib_buffer_get_current (b0);
291                   ip = (ip4_header_t *) &tp->ip4;
292                   udp = (udp_header_t *) (ip+1);
293                   
294                   sum0 = ip->checksum;
295                   old_l0 = ip->length;
296                   new_l0 = 
297                     clib_host_to_net_u16 ((u16)next_offset);
298                   
299                   sum0 = ip_csum_update (sum0, old_l0, new_l0, ip4_header_t,
300                                          length /* changed member */);
301                   
302                   ip->checksum = ip_csum_fold (sum0);
303                   ip->length = new_l0;
304                   udp->length = 
305                       clib_host_to_net_u16 (b0->current_length - sizeof (*ip));
306
307                   if (frm->udp_checksum)
308                     {
309                       /* RFC 7011 section 10.3.2. */
310                       udp->checksum = ip4_tcp_udp_compute_checksum (vm, b0, ip);
311                       if (udp->checksum == 0)
312                         udp->checksum = 0xffff;
313                     }
314
315                   ASSERT (ip->checksum == ip4_header_checksum (ip));
316
317                   to_next[0] = bi0;
318                   f->n_vectors++;
319                   to_next++;
320                   
321                   if (f->n_vectors == VLIB_FRAME_SIZE)
322                     {
323                       vlib_put_frame_to_node (vm, node_index, f);
324                       f = vlib_get_frame_to_node (vm, node_index);
325                       f->n_vectors = 0;
326                       to_next = vlib_frame_vector_args (f);
327                     }
328                   b0 = 0;
329                   bi0 = ~0;
330                 }
331             }
332         }
333     }
334   
335  flush:
336   if (b0)
337     {
338         s->set_id_length = ipfix_set_id_length (fr->template_id,
339                                                 next_offset - 
340                                                 (sizeof (*ip) + sizeof (*udp) +
341                                                  sizeof (*h)));
342         h->version_length = version_length (next_offset -
343                                             (sizeof (*ip) + sizeof (*udp)));
344       b0->current_length = next_offset;
345       b0->flags |= VLIB_BUFFER_TOTAL_LENGTH_VALID;
346       
347       tp = vlib_buffer_get_current (b0);
348       ip = (ip4_header_t *) &tp->ip4;
349       udp = (udp_header_t *) (ip+1);
350       
351       sum0 = ip->checksum;
352       old_l0 = ip->length;
353       new_l0 = clib_host_to_net_u16 ((u16)next_offset);
354       
355       sum0 = ip_csum_update (sum0, old_l0, new_l0, ip4_header_t,
356                              length /* changed member */);
357       
358       ip->checksum = ip_csum_fold (sum0);
359       ip->length = new_l0;
360       udp->length = clib_host_to_net_u16 (b0->current_length - sizeof (*ip));
361
362       if (frm->udp_checksum)
363         {
364           /* RFC 7011 section 10.3.2. */
365           udp->checksum = ip4_tcp_udp_compute_checksum (vm, b0, ip);
366           if (udp->checksum == 0)
367           udp->checksum = 0xffff;
368         }
369
370       ASSERT (ip->checksum == ip4_header_checksum (ip));
371       
372       to_next[0] = bi0;
373       f->n_vectors++;
374       
375       b0 = 0;
376       bi0 = ~0;
377     }
378   
379   *(t->writer_lock) = 0;
380   return f;
381 }
382
383 static clib_error_t *
384 ipfix_classify_table_add_del_command_fn (vlib_main_t * vm,
385                                          unformat_input_t * input,
386                                          vlib_cli_command_t * cmd)
387 {
388   flow_report_classify_main_t *fcm = &flow_report_classify_main;
389   flow_report_main_t *frm = &flow_report_main;
390   vnet_flow_report_add_del_args_t args;
391   ipfix_classify_table_t * table;
392   int rv;
393   int is_add = -1;
394   u32 classify_table_index = ~0;
395   u8 ip_version = 0;
396   u8 transport_protocol = 255;
397   clib_error_t * error = 0;
398
399   if (fcm->src_port == 0)
400     clib_error_return (0, "call 'set ipfix classify stream' first");
401
402   memset (&args, 0, sizeof (args));
403
404   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) {
405     if (unformat (input, "add"))
406       is_add = 1;
407     else if (unformat (input, "del"))
408       is_add = 0;
409     else if (unformat (input, "%d", &classify_table_index))
410       ;
411     else if (unformat (input, "ip4"))
412       ip_version = 4;
413     else if (unformat (input, "ip6"))
414       ip_version = 6;
415     else if (unformat (input, "tcp"))
416       transport_protocol = 6;
417     else if (unformat (input, "udp"))
418       transport_protocol = 17;
419     else
420       return clib_error_return (0, "unknown input `%U'",
421                                 format_unformat_error, input);
422   }
423
424   if (is_add == -1)
425     return clib_error_return (0, "expecting: add|del");
426   if (classify_table_index == ~0)
427     return clib_error_return (0, "classifier table not specified");
428   if (ip_version == 0)
429     return clib_error_return (0, "IP version not specified");
430
431   table = 0;
432   int i;
433   for (i = 0; i < vec_len(fcm->tables); i++)
434     if (ipfix_classify_table_index_valid(i))
435       if (fcm->tables[i].classify_table_index == classify_table_index) {
436         table = &fcm->tables[i];
437         break;
438       }
439
440   if (is_add) {
441     if (table)
442       return clib_error_return (0, "Specified classifier table already used");
443     table = ipfix_classify_add_table();
444     table->classify_table_index = classify_table_index;
445   } else {
446     if (!table)
447       return clib_error_return (0, "Specified classifier table not registered");
448   }
449
450   table->ip_version = ip_version;
451   table->transport_protocol = transport_protocol;
452
453   args.opaque.as_uword = table - fcm->tables;
454   args.rewrite_callback = ipfix_classify_template_rewrite;
455   args.flow_data_callback = ipfix_classify_send_flows;
456   args.is_add = is_add;
457   args.domain_id = fcm->domain_id;
458   args.src_port = fcm->src_port;
459
460   rv = vnet_flow_report_add_del (frm, &args);
461
462   error = flow_report_add_del_error_to_clib_error(rv);
463
464   /* If deleting, or add failed */
465   if (is_add == 0 || (rv && is_add))
466     ipfix_classify_delete_table (table - fcm->tables);
467
468   return error;
469 }
470
471 VLIB_CLI_COMMAND (ipfix_classify_table_add_del_command, static) = {
472   .path = "ipfix classify table",
473   .short_help = "ipfix classify table add|del <table-index>",
474   .function = ipfix_classify_table_add_del_command_fn,
475 };
476
477 static clib_error_t *
478 set_ipfix_classify_stream_command_fn (vlib_main_t * vm,
479                                       unformat_input_t * input,
480                                       vlib_cli_command_t * cmd)
481 {
482   flow_report_classify_main_t *fcm = &flow_report_classify_main;
483   flow_report_main_t *frm = &flow_report_main;
484   u32 domain_id = 1;
485   u32 src_port = UDP_DST_PORT_ipfix;
486
487   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) {
488     if (unformat (input, "domain %d", &domain_id))
489       ;
490     else if (unformat (input, "src-port %d", &src_port))
491       ;
492     else
493       return clib_error_return (0, "unknown input `%U'",
494                                 format_unformat_error, input);
495   }
496
497   if (fcm->src_port != 0 &&
498       (fcm->domain_id != domain_id ||
499        fcm->src_port != (u16)src_port)) {
500     int rv = vnet_stream_change (frm, fcm->domain_id, fcm->src_port,
501                                  domain_id, (u16)src_port);
502     ASSERT (rv == 0);
503   }
504
505   fcm->domain_id = domain_id;
506   fcm->src_port = (u16)src_port;
507
508   return 0;
509 }
510
511 VLIB_CLI_COMMAND (set_ipfix_classify_stream_command, static) = {
512   .path = "set ipfix classify stream",
513   .short_help = "set ipfix classify stream"
514                 "[domain <domain-id>] [src-port <src-port>]",
515   .function = set_ipfix_classify_stream_command_fn,
516 };
517
518 static clib_error_t *
519 flow_report_classify_init (vlib_main_t *vm)
520 {
521   clib_error_t * error;
522
523   if ((error = vlib_call_init_function (vm, flow_report_init)))
524     return error;
525
526   return 0;
527 }
528
529 VLIB_INIT_FUNCTION (flow_report_classify_init);