buffer chain linearization
[vpp.git] / src / vnet / ip / ip4_reassembly.c
1 /*
2  * Copyright (c) 2017 Cisco and/or its affiliates.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at:
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15
16 /**
17  * @file
18  * @brief IPv4 Reassembly.
19  *
20  * This file contains the source code for IPv4 reassembly.
21  */
22
23 #include <vppinfra/vec.h>
24 #include <vnet/vnet.h>
25 #include <vnet/ip/ip.h>
26 #include <vppinfra/bihash_16_8.h>
27 #include <vnet/ip/ip4_reassembly.h>
28
29 #define MSEC_PER_SEC 1000
30 #define IP4_REASS_TIMEOUT_DEFAULT_MS 100
31 #define IP4_REASS_EXPIRE_WALK_INTERVAL_DEFAULT_MS 10000 // 10 seconds default
32 #define IP4_REASS_MAX_REASSEMBLIES_DEFAULT 1024
33 #define IP4_REASS_HT_LOAD_FACTOR (0.75)
34
35 #define IP4_REASS_DEBUG_BUFFERS 0
36 #if IP4_REASS_DEBUG_BUFFERS
37 #define IP4_REASS_DEBUG_BUFFER(bi, what)             \
38   do                                                 \
39     {                                                \
40       u32 _bi = bi;                                  \
41       printf (#what "buffer %u", _bi);               \
42       vlib_buffer_t *_b = vlib_get_buffer (vm, _bi); \
43       while (_b->flags & VLIB_BUFFER_NEXT_PRESENT)   \
44         {                                            \
45           _bi = _b->next_buffer;                     \
46           printf ("[%u]", _bi);                      \
47           _b = vlib_get_buffer (vm, _bi);            \
48         }                                            \
49       printf ("\n");                                 \
50       fflush (stdout);                               \
51     }                                                \
52   while (0)
53 #else
54 #define IP4_REASS_DEBUG_BUFFER(...)
55 #endif
56
57 typedef enum
58 {
59   IP4_REASS_RC_OK,
60   IP4_REASS_RC_INTERNAL_ERROR,
61   IP4_REASS_RC_NO_BUF,
62 } ip4_reass_rc_t;
63
64 typedef struct
65 {
66   union
67   {
68     struct
69     {
70       u32 xx_id;
71       ip4_address_t src;
72       ip4_address_t dst;
73       u16 frag_id;
74       u8 proto;
75       u8 unused;
76     };
77     u64 as_u64[2];
78   };
79 } ip4_reass_key_t;
80
81 always_inline u32
82 ip4_reass_buffer_get_data_offset (vlib_buffer_t * b)
83 {
84   vnet_buffer_opaque_t *vnb = vnet_buffer (b);
85   return vnb->ip.reass.range_first - vnb->ip.reass.fragment_first;
86 }
87
88 always_inline u16
89 ip4_reass_buffer_get_data_len (vlib_buffer_t * b)
90 {
91   vnet_buffer_opaque_t *vnb = vnet_buffer (b);
92   return clib_min (vnb->ip.reass.range_last, vnb->ip.reass.fragment_last) -
93     (vnb->ip.reass.fragment_first + ip4_reass_buffer_get_data_offset (b)) + 1;
94 }
95
96 typedef struct
97 {
98   // hash table key
99   ip4_reass_key_t key;
100   // time when last packet was received
101   f64 last_heard;
102   // internal id of this reassembly
103   u64 id;
104   // buffer index of first buffer in this reassembly context
105   u32 first_bi;
106   // last octet of packet, ~0 until fragment without more_fragments arrives
107   u32 last_packet_octet;
108   // length of data collected so far
109   u32 data_len;
110   // trace operation counter
111   u32 trace_op_counter;
112   // next index - used by non-feature node
113   u8 next_index;
114   // minimum fragment length for this reassembly - used to estimate MTU
115   u16 min_fragment_length;
116 } ip4_reass_t;
117
118 typedef struct
119 {
120   ip4_reass_t *pool;
121   u32 reass_n;
122   u32 id_counter;
123   clib_spinlock_t lock;
124 } ip4_reass_per_thread_t;
125
126 typedef struct
127 {
128   // IPv4 config
129   u32 timeout_ms;
130   f64 timeout;
131   u32 expire_walk_interval_ms;
132   u32 max_reass_n;
133
134   // IPv4 runtime
135   clib_bihash_16_8_t hash;
136   // per-thread data
137   ip4_reass_per_thread_t *per_thread_data;
138
139   // convenience
140   vlib_main_t *vlib_main;
141   vnet_main_t *vnet_main;
142
143   // node index of ip4-drop node
144   u32 ip4_drop_idx;
145   u32 ip4_reass_expire_node_idx;
146 } ip4_reass_main_t;
147
148 ip4_reass_main_t ip4_reass_main;
149
150 typedef enum
151 {
152   IP4_REASSEMBLY_NEXT_INPUT,
153   IP4_REASSEMBLY_NEXT_DROP,
154   IP4_REASSEMBLY_N_NEXT,
155 } ip4_reass_next_t;
156
157 typedef enum
158 {
159   RANGE_NEW,
160   RANGE_SHRINK,
161   RANGE_DISCARD,
162   RANGE_OVERLAP,
163   FINALIZE,
164 } ip4_reass_trace_operation_e;
165
166 typedef struct
167 {
168   u16 range_first;
169   u16 range_last;
170   u32 range_bi;
171   i32 data_offset;
172   u32 data_len;
173   u32 first_bi;
174 } ip4_reass_range_trace_t;
175
176 typedef struct
177 {
178   ip4_reass_trace_operation_e action;
179   u32 reass_id;
180   ip4_reass_range_trace_t trace_range;
181   u32 size_diff;
182   u32 op_id;
183   u32 fragment_first;
184   u32 fragment_last;
185   u32 total_data_len;
186 } ip4_reass_trace_t;
187
188 static void
189 ip4_reass_trace_details (vlib_main_t * vm, u32 bi,
190                          ip4_reass_range_trace_t * trace)
191 {
192   vlib_buffer_t *b = vlib_get_buffer (vm, bi);
193   vnet_buffer_opaque_t *vnb = vnet_buffer (b);
194   trace->range_first = vnb->ip.reass.range_first;
195   trace->range_last = vnb->ip.reass.range_last;
196   trace->data_offset = ip4_reass_buffer_get_data_offset (b);
197   trace->data_len = ip4_reass_buffer_get_data_len (b);
198   trace->range_bi = bi;
199 }
200
201 static u8 *
202 format_ip4_reass_range_trace (u8 * s, va_list * args)
203 {
204   ip4_reass_range_trace_t *trace = va_arg (*args, ip4_reass_range_trace_t *);
205   s = format (s, "range: [%u, %u], off %d, len %u, bi %u", trace->range_first,
206               trace->range_last, trace->data_offset, trace->data_len,
207               trace->range_bi);
208   return s;
209 }
210
211 u8 *
212 format_ip4_reass_trace (u8 * s, va_list * args)
213 {
214   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
215   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
216   ip4_reass_trace_t *t = va_arg (*args, ip4_reass_trace_t *);
217   s = format (s, "reass id: %u, op id: %u ", t->reass_id, t->op_id);
218   u32 indent = format_get_indent (s);
219   s = format (s, "first bi: %u, data len: %u, ip/fragment[%u, %u]",
220               t->trace_range.first_bi, t->total_data_len, t->fragment_first,
221               t->fragment_last);
222   switch (t->action)
223     {
224     case RANGE_SHRINK:
225       s = format (s, "\n%Ushrink %U by %u", format_white_space, indent,
226                   format_ip4_reass_range_trace, &t->trace_range,
227                   t->size_diff);
228       break;
229     case RANGE_DISCARD:
230       s = format (s, "\n%Udiscard %U", format_white_space, indent,
231                   format_ip4_reass_range_trace, &t->trace_range);
232       break;
233     case RANGE_NEW:
234       s = format (s, "\n%Unew %U", format_white_space, indent,
235                   format_ip4_reass_range_trace, &t->trace_range);
236       break;
237     case RANGE_OVERLAP:
238       s = format (s, "\n%Uoverlapping/ignored %U", format_white_space, indent,
239                   format_ip4_reass_range_trace, &t->trace_range);
240       break;
241     case FINALIZE:
242       s = format (s, "\n%Ufinalize reassembly", format_white_space, indent);
243       break;
244     }
245   return s;
246 }
247
248 static void
249 ip4_reass_add_trace (vlib_main_t * vm, vlib_node_runtime_t * node,
250                      ip4_reass_main_t * rm, ip4_reass_t * reass, u32 bi,
251                      ip4_reass_trace_operation_e action, u32 size_diff)
252 {
253   vlib_buffer_t *b = vlib_get_buffer (vm, bi);
254   vnet_buffer_opaque_t *vnb = vnet_buffer (b);
255   if (pool_is_free_index (vm->trace_main.trace_buffer_pool, b->trace_index))
256     {
257       // this buffer's trace is gone
258       b->flags &= ~VLIB_BUFFER_IS_TRACED;
259       return;
260     }
261   ip4_reass_trace_t *t = vlib_add_trace (vm, node, b, sizeof (t[0]));
262   t->reass_id = reass->id;
263   t->action = action;
264   ip4_reass_trace_details (vm, bi, &t->trace_range);
265   t->size_diff = size_diff;
266   t->op_id = reass->trace_op_counter;
267   ++reass->trace_op_counter;
268   t->fragment_first = vnb->ip.reass.fragment_first;
269   t->fragment_last = vnb->ip.reass.fragment_last;
270   t->trace_range.first_bi = reass->first_bi;
271   t->total_data_len = reass->data_len;
272 #if 0
273   static u8 *s = NULL;
274   s = format (s, "%U", format_ip4_reass_trace, NULL, NULL, t);
275   printf ("%.*s\n", vec_len (s), s);
276   fflush (stdout);
277   vec_reset_length (s);
278 #endif
279 }
280
281 always_inline void
282 ip4_reass_free (ip4_reass_main_t * rm, ip4_reass_per_thread_t * rt,
283                 ip4_reass_t * reass)
284 {
285   clib_bihash_kv_16_8_t kv;
286   kv.key[0] = reass->key.as_u64[0];
287   kv.key[1] = reass->key.as_u64[1];
288   clib_bihash_add_del_16_8 (&rm->hash, &kv, 0);
289   pool_put (rt->pool, reass);
290   --rt->reass_n;
291 }
292
293 always_inline void
294 ip4_reass_on_timeout (vlib_main_t * vm, ip4_reass_main_t * rm,
295                       ip4_reass_t * reass)
296 {
297   u32 range_bi = reass->first_bi;
298   vlib_buffer_t *range_b;
299   vnet_buffer_opaque_t *range_vnb;
300   u32 *to_free = NULL;
301   while (~0 != range_bi)
302     {
303       range_b = vlib_get_buffer (vm, range_bi);
304       range_vnb = vnet_buffer (range_b);
305       u32 bi = range_bi;
306       while (~0 != bi)
307         {
308           vec_add1 (to_free, bi);
309           vlib_buffer_t *b = vlib_get_buffer (vm, bi);
310           if (b->flags & VLIB_BUFFER_NEXT_PRESENT)
311             {
312               bi = b->next_buffer;
313               b->flags &= ~VLIB_BUFFER_NEXT_PRESENT;
314             }
315           else
316             {
317               bi = ~0;
318             }
319         }
320       range_bi = range_vnb->ip.reass.next_range_bi;
321     }
322   vlib_buffer_free (vm, to_free, vec_len (to_free));
323   vec_free (to_free);
324 }
325
326 ip4_reass_t *
327 ip4_reass_find_or_create (vlib_main_t * vm, ip4_reass_main_t * rm,
328                           ip4_reass_per_thread_t * rt, ip4_reass_key_t * k)
329 {
330   ip4_reass_t *reass = NULL;
331   f64 now = vlib_time_now (rm->vlib_main);
332   clib_bihash_kv_16_8_t kv, value;
333   kv.key[0] = k->as_u64[0];
334   kv.key[1] = k->as_u64[1];
335
336   if (!clib_bihash_search_16_8 (&rm->hash, &kv, &value))
337     {
338       reass = pool_elt_at_index (rt->pool, value.value);
339       if (now > reass->last_heard + rm->timeout)
340         {
341           ip4_reass_on_timeout (vm, rm, reass);
342           ip4_reass_free (rm, rt, reass);
343           reass = NULL;
344         }
345     }
346
347   if (reass)
348     {
349       reass->last_heard = now;
350       return reass;
351     }
352
353   if (rt->reass_n >= rm->max_reass_n)
354     {
355       reass = NULL;
356       return reass;
357     }
358   else
359     {
360       pool_get (rt->pool, reass);
361       clib_memset (reass, 0, sizeof (*reass));
362       reass->id =
363         ((u64) os_get_thread_index () * 1000000000) + rt->id_counter;
364       ++rt->id_counter;
365       reass->first_bi = ~0;
366       reass->last_packet_octet = ~0;
367       reass->data_len = 0;
368       ++rt->reass_n;
369     }
370
371   reass->key.as_u64[0] = kv.key[0] = k->as_u64[0];
372   reass->key.as_u64[1] = kv.key[1] = k->as_u64[1];
373   kv.value = reass - rt->pool;
374   reass->last_heard = now;
375
376   if (clib_bihash_add_del_16_8 (&rm->hash, &kv, 1))
377     {
378       ip4_reass_free (rm, rt, reass);
379       reass = NULL;
380     }
381
382   return reass;
383 }
384
385 always_inline ip4_reass_rc_t
386 ip4_reass_finalize (vlib_main_t * vm, vlib_node_runtime_t * node,
387                     ip4_reass_main_t * rm, ip4_reass_per_thread_t * rt,
388                     ip4_reass_t * reass, u32 * bi0, u32 * next0, u32 * error0,
389                     bool is_feature)
390 {
391   vlib_buffer_t *first_b = vlib_get_buffer (vm, reass->first_bi);
392   vlib_buffer_t *last_b = NULL;
393   u32 sub_chain_bi = reass->first_bi;
394   u32 total_length = 0;
395   u32 buf_cnt = 0;
396   do
397     {
398       u32 tmp_bi = sub_chain_bi;
399       vlib_buffer_t *tmp = vlib_get_buffer (vm, tmp_bi);
400       ip4_header_t *ip = vlib_buffer_get_current (tmp);
401       vnet_buffer_opaque_t *vnb = vnet_buffer (tmp);
402       if (!(vnb->ip.reass.range_first >= vnb->ip.reass.fragment_first) &&
403           !(vnb->ip.reass.range_last > vnb->ip.reass.fragment_first))
404         {
405           return IP4_REASS_RC_INTERNAL_ERROR;
406         }
407
408       u32 data_len = ip4_reass_buffer_get_data_len (tmp);
409       u32 trim_front =
410         ip4_header_bytes (ip) + ip4_reass_buffer_get_data_offset (tmp);
411       u32 trim_end =
412         vlib_buffer_length_in_chain (vm, tmp) - trim_front - data_len;
413       if (tmp_bi == reass->first_bi)
414         {
415           /* first buffer - keep ip4 header */
416           if (0 != ip4_reass_buffer_get_data_offset (tmp))
417             {
418               return IP4_REASS_RC_INTERNAL_ERROR;
419             }
420           trim_front = 0;
421           trim_end = vlib_buffer_length_in_chain (vm, tmp) - data_len -
422             ip4_header_bytes (ip);
423           if (!(vlib_buffer_length_in_chain (vm, tmp) - trim_end > 0))
424             {
425               return IP4_REASS_RC_INTERNAL_ERROR;
426             }
427         }
428       u32 keep_data =
429         vlib_buffer_length_in_chain (vm, tmp) - trim_front - trim_end;
430       while (1)
431         {
432           ++buf_cnt;
433           if (trim_front)
434             {
435               if (trim_front > tmp->current_length)
436                 {
437                   /* drop whole buffer */
438                   vlib_buffer_free_one (vm, tmp_bi);
439                   trim_front -= tmp->current_length;
440                   if (!(tmp->flags & VLIB_BUFFER_NEXT_PRESENT))
441                     {
442                       return IP4_REASS_RC_INTERNAL_ERROR;
443                     }
444                   tmp->flags &= ~VLIB_BUFFER_NEXT_PRESENT;
445                   tmp_bi = tmp->next_buffer;
446                   tmp = vlib_get_buffer (vm, tmp_bi);
447                   continue;
448                 }
449               else
450                 {
451                   vlib_buffer_advance (tmp, trim_front);
452                   trim_front = 0;
453                 }
454             }
455           if (keep_data)
456             {
457               if (last_b)
458                 {
459                   last_b->flags |= VLIB_BUFFER_NEXT_PRESENT;
460                   last_b->next_buffer = tmp_bi;
461                 }
462               last_b = tmp;
463               if (keep_data <= tmp->current_length)
464                 {
465                   tmp->current_length = keep_data;
466                   keep_data = 0;
467                 }
468               else
469                 {
470                   keep_data -= tmp->current_length;
471                   if (!(tmp->flags & VLIB_BUFFER_NEXT_PRESENT))
472                     {
473                       return IP4_REASS_RC_INTERNAL_ERROR;
474                     }
475                 }
476               total_length += tmp->current_length;
477             }
478           else
479             {
480               vlib_buffer_free_one (vm, tmp_bi);
481               if (reass->first_bi == tmp_bi)
482                 {
483                   return IP4_REASS_RC_INTERNAL_ERROR;
484                 }
485             }
486           if (tmp->flags & VLIB_BUFFER_NEXT_PRESENT)
487             {
488               tmp_bi = tmp->next_buffer;
489               tmp = vlib_get_buffer (vm, tmp->next_buffer);
490             }
491           else
492             {
493               break;
494             }
495         }
496       sub_chain_bi =
497         vnet_buffer (vlib_get_buffer (vm, sub_chain_bi))->ip.
498         reass.next_range_bi;
499     }
500   while (~0 != sub_chain_bi);
501
502   if (!last_b)
503     {
504       return IP4_REASS_RC_INTERNAL_ERROR;
505     }
506   last_b->flags &= ~VLIB_BUFFER_NEXT_PRESENT;
507   if (total_length < first_b->current_length)
508     {
509       return IP4_REASS_RC_INTERNAL_ERROR;
510     }
511   total_length -= first_b->current_length;
512   first_b->flags |= VLIB_BUFFER_TOTAL_LENGTH_VALID;
513   first_b->total_length_not_including_first_buffer = total_length;
514   ip4_header_t *ip = vlib_buffer_get_current (first_b);
515   ip->flags_and_fragment_offset = 0;
516   ip->length = clib_host_to_net_u16 (first_b->current_length + total_length);
517   ip->checksum = ip4_header_checksum (ip);
518   if (!vlib_buffer_chain_linearize (vm, first_b))
519     {
520       return IP4_REASS_RC_NO_BUF;
521     }
522
523   if (PREDICT_FALSE (first_b->flags & VLIB_BUFFER_IS_TRACED))
524     {
525       ip4_reass_add_trace (vm, node, rm, reass, reass->first_bi, FINALIZE, 0);
526 #if 0
527       // following code does a hexdump of packet fragments to stdout ...
528       do
529         {
530           u32 bi = reass->first_bi;
531           u8 *s = NULL;
532           while (~0 != bi)
533             {
534               vlib_buffer_t *b = vlib_get_buffer (vm, bi);
535               s = format (s, "%u: %U\n", bi, format_hexdump,
536                           vlib_buffer_get_current (b), b->current_length);
537               if (b->flags & VLIB_BUFFER_NEXT_PRESENT)
538                 {
539                   bi = b->next_buffer;
540                 }
541               else
542                 {
543                   break;
544                 }
545             }
546           printf ("%.*s\n", vec_len (s), s);
547           fflush (stdout);
548           vec_free (s);
549         }
550       while (0);
551 #endif
552     }
553   *bi0 = reass->first_bi;
554   if (is_feature)
555     {
556       *next0 = IP4_REASSEMBLY_NEXT_INPUT;
557     }
558   else
559     {
560       *next0 = reass->next_index;
561     }
562   vnet_buffer (first_b)->ip.reass.estimated_mtu = reass->min_fragment_length;
563   *error0 = IP4_ERROR_NONE;
564   ip4_reass_free (rm, rt, reass);
565   reass = NULL;
566   return IP4_REASS_RC_OK;
567 }
568
569 always_inline ip4_reass_rc_t
570 ip4_reass_insert_range_in_chain (vlib_main_t * vm,
571                                  ip4_reass_main_t * rm,
572                                  ip4_reass_per_thread_t * rt,
573                                  ip4_reass_t * reass,
574                                  u32 prev_range_bi, u32 new_next_bi)
575 {
576   vlib_buffer_t *new_next_b = vlib_get_buffer (vm, new_next_bi);
577   vnet_buffer_opaque_t *new_next_vnb = vnet_buffer (new_next_b);
578   if (~0 != prev_range_bi)
579     {
580       vlib_buffer_t *prev_b = vlib_get_buffer (vm, prev_range_bi);
581       vnet_buffer_opaque_t *prev_vnb = vnet_buffer (prev_b);
582       new_next_vnb->ip.reass.next_range_bi = prev_vnb->ip.reass.next_range_bi;
583       prev_vnb->ip.reass.next_range_bi = new_next_bi;
584     }
585   else
586     {
587       if (~0 != reass->first_bi)
588         {
589           new_next_vnb->ip.reass.next_range_bi = reass->first_bi;
590         }
591       reass->first_bi = new_next_bi;
592     }
593   vnet_buffer_opaque_t *vnb = vnet_buffer (new_next_b);
594   if (!(vnb->ip.reass.range_first >= vnb->ip.reass.fragment_first) &&
595       !(vnb->ip.reass.range_last > vnb->ip.reass.fragment_first))
596     {
597       return IP4_REASS_RC_INTERNAL_ERROR;
598     }
599   reass->data_len += ip4_reass_buffer_get_data_len (new_next_b);
600   return IP4_REASS_RC_OK;
601 }
602
603 always_inline ip4_reass_rc_t
604 ip4_reass_remove_range_from_chain (vlib_main_t * vm,
605                                    vlib_node_runtime_t * node,
606                                    ip4_reass_main_t * rm,
607                                    ip4_reass_t * reass, u32 prev_range_bi,
608                                    u32 discard_bi)
609 {
610   vlib_buffer_t *discard_b = vlib_get_buffer (vm, discard_bi);
611   vnet_buffer_opaque_t *discard_vnb = vnet_buffer (discard_b);
612   if (~0 != prev_range_bi)
613     {
614       vlib_buffer_t *prev_b = vlib_get_buffer (vm, prev_range_bi);
615       vnet_buffer_opaque_t *prev_vnb = vnet_buffer (prev_b);
616       if (!(prev_vnb->ip.reass.next_range_bi == discard_bi))
617         {
618           return IP4_REASS_RC_INTERNAL_ERROR;
619         }
620       prev_vnb->ip.reass.next_range_bi = discard_vnb->ip.reass.next_range_bi;
621     }
622   else
623     {
624       reass->first_bi = discard_vnb->ip.reass.next_range_bi;
625     }
626   vnet_buffer_opaque_t *vnb = vnet_buffer (discard_b);
627   if (!(vnb->ip.reass.range_first >= vnb->ip.reass.fragment_first) &&
628       !(vnb->ip.reass.range_last > vnb->ip.reass.fragment_first))
629     {
630       return IP4_REASS_RC_INTERNAL_ERROR;
631     }
632   reass->data_len -= ip4_reass_buffer_get_data_len (discard_b);
633   while (1)
634     {
635       vlib_buffer_free_one (vm, discard_bi);
636       if (PREDICT_FALSE (discard_b->flags & VLIB_BUFFER_IS_TRACED))
637         {
638           ip4_reass_add_trace (vm, node, rm, reass, discard_bi, RANGE_DISCARD,
639                                0);
640         }
641       if (discard_b->flags & VLIB_BUFFER_NEXT_PRESENT)
642         {
643           discard_b->flags &= ~VLIB_BUFFER_NEXT_PRESENT;
644           discard_bi = discard_b->next_buffer;
645           discard_b = vlib_get_buffer (vm, discard_bi);
646         }
647       else
648         {
649           break;
650         }
651     }
652   return IP4_REASS_RC_OK;
653 }
654
655 always_inline ip4_reass_rc_t
656 ip4_reass_update (vlib_main_t * vm, vlib_node_runtime_t * node,
657                   ip4_reass_main_t * rm, ip4_reass_per_thread_t * rt,
658                   ip4_reass_t * reass, u32 * bi0, u32 * next0, u32 * error0,
659                   bool is_feature)
660 {
661   ip4_reass_rc_t rc = IP4_REASS_RC_OK;
662   int consumed = 0;
663   vlib_buffer_t *fb = vlib_get_buffer (vm, *bi0);
664   ip4_header_t *fip = vlib_buffer_get_current (fb);
665   vnet_buffer_opaque_t *fvnb = vnet_buffer (fb);
666   reass->next_index = fvnb->ip.reass.next_index;        // store next_index before it's overwritten
667   const u32 fragment_first = ip4_get_fragment_offset_bytes (fip);
668   const u32 fragment_length =
669     clib_net_to_host_u16 (fip->length) - ip4_header_bytes (fip);
670   const u32 fragment_last = fragment_first + fragment_length - 1;
671   fvnb->ip.reass.fragment_first = fragment_first;
672   fvnb->ip.reass.fragment_last = fragment_last;
673   int more_fragments = ip4_get_fragment_more (fip);
674   u32 candidate_range_bi = reass->first_bi;
675   u32 prev_range_bi = ~0;
676   fvnb->ip.reass.range_first = fragment_first;
677   fvnb->ip.reass.range_last = fragment_last;
678   fvnb->ip.reass.next_range_bi = ~0;
679   if (!more_fragments)
680     {
681       reass->last_packet_octet = fragment_last;
682     }
683   if (~0 == reass->first_bi)
684     {
685       // starting a new reassembly
686       rc =
687         ip4_reass_insert_range_in_chain (vm, rm, rt, reass, prev_range_bi,
688                                          *bi0);
689       if (IP4_REASS_RC_OK != rc)
690         {
691           return rc;
692         }
693       if (PREDICT_FALSE (fb->flags & VLIB_BUFFER_IS_TRACED))
694         {
695           ip4_reass_add_trace (vm, node, rm, reass, *bi0, RANGE_NEW, 0);
696         }
697       *bi0 = ~0;
698       reass->min_fragment_length = clib_net_to_host_u16 (fip->length);
699       return IP4_REASS_RC_OK;
700     }
701   reass->min_fragment_length = clib_min (clib_net_to_host_u16 (fip->length),
702                                          fvnb->ip.reass.estimated_mtu);
703   while (~0 != candidate_range_bi)
704     {
705       vlib_buffer_t *candidate_b = vlib_get_buffer (vm, candidate_range_bi);
706       vnet_buffer_opaque_t *candidate_vnb = vnet_buffer (candidate_b);
707       if (fragment_first > candidate_vnb->ip.reass.range_last)
708         {
709           // this fragments starts after candidate range
710           prev_range_bi = candidate_range_bi;
711           candidate_range_bi = candidate_vnb->ip.reass.next_range_bi;
712           if (candidate_vnb->ip.reass.range_last < fragment_last &&
713               ~0 == candidate_range_bi)
714             {
715               // special case - this fragment falls beyond all known ranges
716               rc =
717                 ip4_reass_insert_range_in_chain (vm, rm, rt, reass,
718                                                  prev_range_bi, *bi0);
719               if (IP4_REASS_RC_OK != rc)
720                 {
721                   return rc;
722                 }
723               consumed = 1;
724               break;
725             }
726           continue;
727         }
728       if (fragment_last < candidate_vnb->ip.reass.range_first)
729         {
730           // this fragment ends before candidate range without any overlap
731           rc =
732             ip4_reass_insert_range_in_chain (vm, rm, rt, reass, prev_range_bi,
733                                              *bi0);
734           if (IP4_REASS_RC_OK != rc)
735             {
736               return rc;
737             }
738           consumed = 1;
739         }
740       else
741         {
742           if (fragment_first >= candidate_vnb->ip.reass.range_first &&
743               fragment_last <= candidate_vnb->ip.reass.range_last)
744             {
745               // this fragment is a (sub)part of existing range, ignore it
746               if (PREDICT_FALSE (fb->flags & VLIB_BUFFER_IS_TRACED))
747                 {
748                   ip4_reass_add_trace (vm, node, rm, reass, *bi0,
749                                        RANGE_OVERLAP, 0);
750                 }
751               break;
752             }
753           int discard_candidate = 0;
754           if (fragment_first < candidate_vnb->ip.reass.range_first)
755             {
756               u32 overlap =
757                 fragment_last - candidate_vnb->ip.reass.range_first + 1;
758               if (overlap < ip4_reass_buffer_get_data_len (candidate_b))
759                 {
760                   candidate_vnb->ip.reass.range_first += overlap;
761                   if (reass->data_len < overlap)
762                     {
763                       return IP4_REASS_RC_INTERNAL_ERROR;
764                     }
765                   reass->data_len -= overlap;
766                   if (PREDICT_FALSE (fb->flags & VLIB_BUFFER_IS_TRACED))
767                     {
768                       ip4_reass_add_trace (vm, node, rm, reass,
769                                            candidate_range_bi, RANGE_SHRINK,
770                                            overlap);
771                     }
772                   rc =
773                     ip4_reass_insert_range_in_chain (vm, rm, rt, reass,
774                                                      prev_range_bi, *bi0);
775                   if (IP4_REASS_RC_OK != rc)
776                     {
777                       return rc;
778                     }
779                   consumed = 1;
780                 }
781               else
782                 {
783                   discard_candidate = 1;
784                 }
785             }
786           else if (fragment_last > candidate_vnb->ip.reass.range_last)
787             {
788               u32 overlap =
789                 candidate_vnb->ip.reass.range_last - fragment_first + 1;
790               if (overlap < ip4_reass_buffer_get_data_len (candidate_b))
791                 {
792                   fvnb->ip.reass.range_first += overlap;
793                   if (~0 != candidate_vnb->ip.reass.next_range_bi)
794                     {
795                       prev_range_bi = candidate_range_bi;
796                       candidate_range_bi =
797                         candidate_vnb->ip.reass.next_range_bi;
798                       continue;
799                     }
800                   else
801                     {
802                       // special case - last range discarded
803                       rc =
804                         ip4_reass_insert_range_in_chain (vm, rm, rt, reass,
805                                                          candidate_range_bi,
806                                                          *bi0);
807                       if (IP4_REASS_RC_OK != rc)
808                         {
809                           return rc;
810                         }
811                       consumed = 1;
812                     }
813                 }
814               else
815                 {
816                   discard_candidate = 1;
817                 }
818             }
819           else
820             {
821               discard_candidate = 1;
822             }
823           if (discard_candidate)
824             {
825               u32 next_range_bi = candidate_vnb->ip.reass.next_range_bi;
826               // discard candidate range, probe next range
827               rc =
828                 ip4_reass_remove_range_from_chain (vm, node, rm, reass,
829                                                    prev_range_bi,
830                                                    candidate_range_bi);
831               if (IP4_REASS_RC_OK != rc)
832                 {
833                   return rc;
834                 }
835               if (~0 != next_range_bi)
836                 {
837                   candidate_range_bi = next_range_bi;
838                   continue;
839                 }
840               else
841                 {
842                   // special case - last range discarded
843                   rc =
844                     ip4_reass_insert_range_in_chain (vm, rm, rt, reass,
845                                                      prev_range_bi, *bi0);
846                   if (IP4_REASS_RC_OK != rc)
847                     {
848                       return rc;
849                     }
850                   consumed = 1;
851                 }
852             }
853         }
854       break;
855     }
856   if (consumed)
857     {
858       if (PREDICT_FALSE (fb->flags & VLIB_BUFFER_IS_TRACED))
859         {
860           ip4_reass_add_trace (vm, node, rm, reass, *bi0, RANGE_NEW, 0);
861         }
862     }
863   if (~0 != reass->last_packet_octet &&
864       reass->data_len == reass->last_packet_octet + 1)
865     {
866       return ip4_reass_finalize (vm, node, rm, rt, reass, bi0, next0, error0,
867                                  is_feature);
868     }
869   else
870     {
871       if (consumed)
872         {
873           *bi0 = ~0;
874         }
875       else
876         {
877           *next0 = IP4_REASSEMBLY_NEXT_DROP;
878           *error0 = IP4_ERROR_REASS_DUPLICATE_FRAGMENT;
879         }
880     }
881   return rc;
882 }
883
884 always_inline uword
885 ip4_reassembly_inline (vlib_main_t * vm,
886                        vlib_node_runtime_t * node,
887                        vlib_frame_t * frame, bool is_feature)
888 {
889   u32 *from = vlib_frame_vector_args (frame);
890   u32 n_left_from, n_left_to_next, *to_next, next_index;
891   ip4_reass_main_t *rm = &ip4_reass_main;
892   ip4_reass_per_thread_t *rt = &rm->per_thread_data[os_get_thread_index ()];
893   clib_spinlock_lock (&rt->lock);
894
895   n_left_from = frame->n_vectors;
896   next_index = node->cached_next_index;
897   while (n_left_from > 0)
898     {
899       vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
900
901       while (n_left_from > 0 && n_left_to_next > 0)
902         {
903           u32 bi0;
904           vlib_buffer_t *b0;
905           u32 next0;
906           u32 error0 = IP4_ERROR_NONE;
907
908           bi0 = from[0];
909           b0 = vlib_get_buffer (vm, bi0);
910
911           ip4_header_t *ip0 = vlib_buffer_get_current (b0);
912           if (!ip4_get_fragment_more (ip0) && !ip4_get_fragment_offset (ip0))
913             {
914               // this is a whole packet - no fragmentation
915               if (is_feature)
916                 {
917                   next0 = IP4_REASSEMBLY_NEXT_INPUT;
918                 }
919               else
920                 {
921                   next0 = vnet_buffer (b0)->ip.reass.next_index;
922                 }
923             }
924           else
925             {
926               const u32 fragment_first = ip4_get_fragment_offset_bytes (ip0);
927               const u32 fragment_length =
928                 clib_net_to_host_u16 (ip0->length) - ip4_header_bytes (ip0);
929               const u32 fragment_last = fragment_first + fragment_length - 1;
930               if (fragment_first > fragment_last || fragment_first + fragment_length > UINT16_MAX - 20 || (fragment_length < 8 && ip4_get_fragment_more (ip0))) // 8 is minimum frag length per RFC 791
931                 {
932                   next0 = IP4_REASSEMBLY_NEXT_DROP;
933                   error0 = IP4_ERROR_REASS_MALFORMED_PACKET;
934                 }
935               else
936                 {
937                   ip4_reass_key_t k;
938                   k.as_u64[0] =
939                     (u64) vnet_buffer (b0)->sw_if_index[VLIB_RX] |
940                     (u64) ip0->src_address.as_u32 << 32;
941                   k.as_u64[1] =
942                     (u64) ip0->dst_address.as_u32 |
943                     (u64) ip0->fragment_id << 32 | (u64) ip0->protocol << 48;
944
945                   ip4_reass_t *reass =
946                     ip4_reass_find_or_create (vm, rm, rt, &k);
947
948                   if (reass)
949                     {
950                       switch (ip4_reass_update
951                               (vm, node, rm, rt, reass, &bi0, &next0, &error0,
952                                is_feature))
953                         {
954                         case IP4_REASS_RC_OK:
955                           /* nothing to do here */
956                           break;
957                         case IP4_REASS_RC_NO_BUF:
958                           /* fallthrough */
959                         case IP4_REASS_RC_INTERNAL_ERROR:
960                           /* drop everything and start with a clean slate */
961                           ip4_reass_on_timeout (vm, rm, reass);
962                           ip4_reass_free (rm, rt, reass);
963                           goto next_packet;
964                           break;
965                         }
966                     }
967                   else
968                     {
969                       next0 = IP4_REASSEMBLY_NEXT_DROP;
970                       error0 = IP4_ERROR_REASS_LIMIT_REACHED;
971                     }
972                 }
973
974               b0->error = node->errors[error0];
975             }
976
977           if (bi0 != ~0)
978             {
979               to_next[0] = bi0;
980               to_next += 1;
981               n_left_to_next -= 1;
982               if (is_feature && IP4_ERROR_NONE == error0)
983                 {
984                   b0 = vlib_get_buffer (vm, bi0);
985                   vnet_feature_next (&next0, b0);
986                 }
987               vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
988                                                n_left_to_next, bi0, next0);
989               IP4_REASS_DEBUG_BUFFER (bi0, enqueue_next);
990             }
991
992         next_packet:
993           from += 1;
994           n_left_from -= 1;
995         }
996
997       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
998     }
999
1000   clib_spinlock_unlock (&rt->lock);
1001   return frame->n_vectors;
1002 }
1003
1004 static char *ip4_reassembly_error_strings[] = {
1005 #define _(sym, string) string,
1006   foreach_ip4_error
1007 #undef _
1008 };
1009
1010 static uword
1011 ip4_reassembly (vlib_main_t * vm, vlib_node_runtime_t * node,
1012                 vlib_frame_t * frame)
1013 {
1014   return ip4_reassembly_inline (vm, node, frame, false /* is_feature */ );
1015 }
1016
1017 /* *INDENT-OFF* */
1018 VLIB_REGISTER_NODE (ip4_reass_node, static) = {
1019     .function = ip4_reassembly,
1020     .name = "ip4-reassembly",
1021     .vector_size = sizeof (u32),
1022     .format_trace = format_ip4_reass_trace,
1023     .n_errors = ARRAY_LEN (ip4_reassembly_error_strings),
1024     .error_strings = ip4_reassembly_error_strings,
1025     .n_next_nodes = IP4_REASSEMBLY_N_NEXT,
1026     .next_nodes =
1027         {
1028                 [IP4_REASSEMBLY_NEXT_INPUT] = "ip4-input",
1029                 [IP4_REASSEMBLY_NEXT_DROP] = "ip4-drop",
1030         },
1031 };
1032 /* *INDENT-ON* */
1033
1034 VLIB_NODE_FUNCTION_MULTIARCH (ip4_reass_node, ip4_reassembly);
1035
1036 static uword
1037 ip4_reassembly_feature (vlib_main_t * vm,
1038                         vlib_node_runtime_t * node, vlib_frame_t * frame)
1039 {
1040   return ip4_reassembly_inline (vm, node, frame, true /* is_feature */ );
1041 }
1042
1043 /* *INDENT-OFF* */
1044 VLIB_REGISTER_NODE (ip4_reass_node_feature, static) = {
1045     .function = ip4_reassembly_feature,
1046     .name = "ip4-reassembly-feature",
1047     .vector_size = sizeof (u32),
1048     .format_trace = format_ip4_reass_trace,
1049     .n_errors = ARRAY_LEN (ip4_reassembly_error_strings),
1050     .error_strings = ip4_reassembly_error_strings,
1051     .n_next_nodes = IP4_REASSEMBLY_N_NEXT,
1052     .next_nodes =
1053         {
1054                 [IP4_REASSEMBLY_NEXT_INPUT] = "ip4-input",
1055                 [IP4_REASSEMBLY_NEXT_DROP] = "ip4-drop",
1056         },
1057 };
1058 /* *INDENT-ON* */
1059
1060 VLIB_NODE_FUNCTION_MULTIARCH (ip4_reass_node_feature, ip4_reassembly_feature);
1061
1062 /* *INDENT-OFF* */
1063 VNET_FEATURE_INIT (ip4_reassembly_feature, static) = {
1064     .arc_name = "ip4-unicast",
1065     .node_name = "ip4-reassembly-feature",
1066     .runs_before = VNET_FEATURES ("ip4-lookup"),
1067     .runs_after = 0,
1068 };
1069 /* *INDENT-ON* */
1070
1071 always_inline u32
1072 ip4_reass_get_nbuckets ()
1073 {
1074   ip4_reass_main_t *rm = &ip4_reass_main;
1075   u32 nbuckets;
1076   u8 i;
1077
1078   nbuckets = (u32) (rm->max_reass_n / IP4_REASS_HT_LOAD_FACTOR);
1079
1080   for (i = 0; i < 31; i++)
1081     if ((1 << i) >= nbuckets)
1082       break;
1083   nbuckets = 1 << i;
1084
1085   return nbuckets;
1086 }
1087
1088 typedef enum
1089 {
1090   IP4_EVENT_CONFIG_CHANGED = 1,
1091 } ip4_reass_event_t;
1092
1093 typedef struct
1094 {
1095   int failure;
1096   clib_bihash_16_8_t *new_hash;
1097 } ip4_rehash_cb_ctx;
1098
1099 static void
1100 ip4_rehash_cb (clib_bihash_kv_16_8_t * kv, void *_ctx)
1101 {
1102   ip4_rehash_cb_ctx *ctx = _ctx;
1103   if (clib_bihash_add_del_16_8 (ctx->new_hash, kv, 1))
1104     {
1105       ctx->failure = 1;
1106     }
1107 }
1108
1109 static void
1110 ip4_reass_set_params (u32 timeout_ms, u32 max_reassemblies,
1111                       u32 expire_walk_interval_ms)
1112 {
1113   ip4_reass_main.timeout_ms = timeout_ms;
1114   ip4_reass_main.timeout = (f64) timeout_ms / (f64) MSEC_PER_SEC;
1115   ip4_reass_main.max_reass_n = max_reassemblies;
1116   ip4_reass_main.expire_walk_interval_ms = expire_walk_interval_ms;
1117 }
1118
1119 vnet_api_error_t
1120 ip4_reass_set (u32 timeout_ms, u32 max_reassemblies,
1121                u32 expire_walk_interval_ms)
1122 {
1123   u32 old_nbuckets = ip4_reass_get_nbuckets ();
1124   ip4_reass_set_params (timeout_ms, max_reassemblies,
1125                         expire_walk_interval_ms);
1126   vlib_process_signal_event (ip4_reass_main.vlib_main,
1127                              ip4_reass_main.ip4_reass_expire_node_idx,
1128                              IP4_EVENT_CONFIG_CHANGED, 0);
1129   u32 new_nbuckets = ip4_reass_get_nbuckets ();
1130   if (ip4_reass_main.max_reass_n > 0 && new_nbuckets > old_nbuckets)
1131     {
1132       clib_bihash_16_8_t new_hash;
1133       clib_memset (&new_hash, 0, sizeof (new_hash));
1134       ip4_rehash_cb_ctx ctx;
1135       ctx.failure = 0;
1136       ctx.new_hash = &new_hash;
1137       clib_bihash_init_16_8 (&new_hash, "ip4-reass", new_nbuckets,
1138                              new_nbuckets * 1024);
1139       clib_bihash_foreach_key_value_pair_16_8 (&ip4_reass_main.hash,
1140                                                ip4_rehash_cb, &ctx);
1141       if (ctx.failure)
1142         {
1143           clib_bihash_free_16_8 (&new_hash);
1144           return -1;
1145         }
1146       else
1147         {
1148           clib_bihash_free_16_8 (&ip4_reass_main.hash);
1149           clib_memcpy_fast (&ip4_reass_main.hash, &new_hash,
1150                             sizeof (ip4_reass_main.hash));
1151         }
1152     }
1153   return 0;
1154 }
1155
1156 vnet_api_error_t
1157 ip4_reass_get (u32 * timeout_ms, u32 * max_reassemblies,
1158                u32 * expire_walk_interval_ms)
1159 {
1160   *timeout_ms = ip4_reass_main.timeout_ms;
1161   *max_reassemblies = ip4_reass_main.max_reass_n;
1162   *expire_walk_interval_ms = ip4_reass_main.expire_walk_interval_ms;
1163   return 0;
1164 }
1165
1166 static clib_error_t *
1167 ip4_reass_init_function (vlib_main_t * vm)
1168 {
1169   ip4_reass_main_t *rm = &ip4_reass_main;
1170   clib_error_t *error = 0;
1171   u32 nbuckets;
1172   vlib_node_t *node;
1173
1174   rm->vlib_main = vm;
1175   rm->vnet_main = vnet_get_main ();
1176
1177   vec_validate (rm->per_thread_data, vlib_num_workers ());
1178   ip4_reass_per_thread_t *rt;
1179   vec_foreach (rt, rm->per_thread_data)
1180   {
1181     clib_spinlock_init (&rt->lock);
1182     pool_alloc (rt->pool, rm->max_reass_n);
1183   }
1184
1185   node = vlib_get_node_by_name (vm, (u8 *) "ip4-reassembly-expire-walk");
1186   ASSERT (node);
1187   rm->ip4_reass_expire_node_idx = node->index;
1188
1189   ip4_reass_set_params (IP4_REASS_TIMEOUT_DEFAULT_MS,
1190                         IP4_REASS_MAX_REASSEMBLIES_DEFAULT,
1191                         IP4_REASS_EXPIRE_WALK_INTERVAL_DEFAULT_MS);
1192
1193   nbuckets = ip4_reass_get_nbuckets ();
1194   clib_bihash_init_16_8 (&rm->hash, "ip4-reass", nbuckets, nbuckets * 1024);
1195
1196   node = vlib_get_node_by_name (vm, (u8 *) "ip4-drop");
1197   ASSERT (node);
1198   rm->ip4_drop_idx = node->index;
1199
1200   return error;
1201 }
1202
1203 VLIB_INIT_FUNCTION (ip4_reass_init_function);
1204
1205 static uword
1206 ip4_reass_walk_expired (vlib_main_t * vm,
1207                         vlib_node_runtime_t * node, vlib_frame_t * f)
1208 {
1209   ip4_reass_main_t *rm = &ip4_reass_main;
1210   uword event_type, *event_data = 0;
1211
1212   while (true)
1213     {
1214       vlib_process_wait_for_event_or_clock (vm,
1215                                             (f64) rm->expire_walk_interval_ms
1216                                             / (f64) MSEC_PER_SEC);
1217       event_type = vlib_process_get_events (vm, &event_data);
1218
1219       switch (event_type)
1220         {
1221         case ~0:                /* no events => timeout */
1222           /* nothing to do here */
1223           break;
1224         case IP4_EVENT_CONFIG_CHANGED:
1225           break;
1226         default:
1227           clib_warning ("BUG: event type 0x%wx", event_type);
1228           break;
1229         }
1230       f64 now = vlib_time_now (vm);
1231
1232       ip4_reass_t *reass;
1233       int *pool_indexes_to_free = NULL;
1234
1235       uword thread_index = 0;
1236       int index;
1237       const uword nthreads = vlib_num_workers () + 1;
1238       for (thread_index = 0; thread_index < nthreads; ++thread_index)
1239         {
1240           ip4_reass_per_thread_t *rt = &rm->per_thread_data[thread_index];
1241           clib_spinlock_lock (&rt->lock);
1242
1243           vec_reset_length (pool_indexes_to_free);
1244           /* *INDENT-OFF* */
1245           pool_foreach_index (index, rt->pool, ({
1246                                 reass = pool_elt_at_index (rt->pool, index);
1247                                 if (now > reass->last_heard + rm->timeout)
1248                                   {
1249                                     vec_add1 (pool_indexes_to_free, index);
1250                                   }
1251                               }));
1252           /* *INDENT-ON* */
1253           int *i;
1254           /* *INDENT-OFF* */
1255           vec_foreach (i, pool_indexes_to_free)
1256           {
1257             ip4_reass_t *reass = pool_elt_at_index (rt->pool, i[0]);
1258             ip4_reass_on_timeout (vm, rm, reass);
1259             ip4_reass_free (rm, rt, reass);
1260           }
1261           /* *INDENT-ON* */
1262
1263           clib_spinlock_unlock (&rt->lock);
1264         }
1265
1266       vec_free (pool_indexes_to_free);
1267       if (event_data)
1268         {
1269           _vec_len (event_data) = 0;
1270         }
1271     }
1272
1273   return 0;
1274 }
1275
1276 /* *INDENT-OFF* */
1277 VLIB_REGISTER_NODE (ip4_reass_expire_node, static) = {
1278     .function = ip4_reass_walk_expired,
1279     .type = VLIB_NODE_TYPE_PROCESS,
1280     .name = "ip4-reassembly-expire-walk",
1281     .format_trace = format_ip4_reass_trace,
1282     .n_errors = ARRAY_LEN (ip4_reassembly_error_strings),
1283     .error_strings = ip4_reassembly_error_strings,
1284
1285 };
1286 /* *INDENT-ON* */
1287
1288 static u8 *
1289 format_ip4_reass_key (u8 * s, va_list * args)
1290 {
1291   ip4_reass_key_t *key = va_arg (*args, ip4_reass_key_t *);
1292   s = format (s, "xx_id: %u, src: %U, dst: %U, frag_id: %u, proto: %u",
1293               key->xx_id, format_ip4_address, &key->src, format_ip4_address,
1294               &key->dst, clib_net_to_host_u16 (key->frag_id), key->proto);
1295   return s;
1296 }
1297
1298 static u8 *
1299 format_ip4_reass (u8 * s, va_list * args)
1300 {
1301   vlib_main_t *vm = va_arg (*args, vlib_main_t *);
1302   ip4_reass_t *reass = va_arg (*args, ip4_reass_t *);
1303
1304   s = format (s, "ID: %lu, key: %U\n  first_bi: %u, data_len: %u, "
1305               "last_packet_octet: %u, trace_op_counter: %u\n",
1306               reass->id, format_ip4_reass_key, &reass->key, reass->first_bi,
1307               reass->data_len, reass->last_packet_octet,
1308               reass->trace_op_counter);
1309   u32 bi = reass->first_bi;
1310   u32 counter = 0;
1311   while (~0 != bi)
1312     {
1313       vlib_buffer_t *b = vlib_get_buffer (vm, bi);
1314       vnet_buffer_opaque_t *vnb = vnet_buffer (b);
1315       s = format (s, "  #%03u: range: [%u, %u], bi: %u, off: %d, len: %u, "
1316                   "fragment[%u, %u]\n",
1317                   counter, vnb->ip.reass.range_first,
1318                   vnb->ip.reass.range_last, bi,
1319                   ip4_reass_buffer_get_data_offset (b),
1320                   ip4_reass_buffer_get_data_len (b),
1321                   vnb->ip.reass.fragment_first, vnb->ip.reass.fragment_last);
1322       if (b->flags & VLIB_BUFFER_NEXT_PRESENT)
1323         {
1324           bi = b->next_buffer;
1325         }
1326       else
1327         {
1328           bi = ~0;
1329         }
1330     }
1331   return s;
1332 }
1333
1334 static clib_error_t *
1335 show_ip4_reass (vlib_main_t * vm, unformat_input_t * input,
1336                 CLIB_UNUSED (vlib_cli_command_t * lmd))
1337 {
1338   ip4_reass_main_t *rm = &ip4_reass_main;
1339
1340   vlib_cli_output (vm, "---------------------");
1341   vlib_cli_output (vm, "IP4 reassembly status");
1342   vlib_cli_output (vm, "---------------------");
1343   bool details = false;
1344   if (unformat (input, "details"))
1345     {
1346       details = true;
1347     }
1348
1349   u32 sum_reass_n = 0;
1350   ip4_reass_t *reass;
1351   uword thread_index;
1352   const uword nthreads = vlib_num_workers () + 1;
1353   for (thread_index = 0; thread_index < nthreads; ++thread_index)
1354     {
1355       ip4_reass_per_thread_t *rt = &rm->per_thread_data[thread_index];
1356       clib_spinlock_lock (&rt->lock);
1357       if (details)
1358         {
1359           /* *INDENT-OFF* */
1360           pool_foreach (reass, rt->pool, {
1361             vlib_cli_output (vm, "%U", format_ip4_reass, vm, reass);
1362           });
1363           /* *INDENT-ON* */
1364         }
1365       sum_reass_n += rt->reass_n;
1366       clib_spinlock_unlock (&rt->lock);
1367     }
1368   vlib_cli_output (vm, "---------------------");
1369   vlib_cli_output (vm, "Current IP4 reassemblies count: %lu\n",
1370                    (long unsigned) sum_reass_n);
1371   vlib_cli_output (vm,
1372                    "Maximum configured concurrent IP4 reassemblies per worker-thread: %lu\n",
1373                    (long unsigned) rm->max_reass_n);
1374   return 0;
1375 }
1376
1377 /* *INDENT-OFF* */
1378 VLIB_CLI_COMMAND (show_ip4_reassembly_cmd, static) = {
1379     .path = "show ip4-reassembly",
1380     .short_help = "show ip4-reassembly [details]",
1381     .function = show_ip4_reass,
1382 };
1383 /* *INDENT-ON* */
1384
1385 vnet_api_error_t
1386 ip4_reass_enable_disable (u32 sw_if_index, u8 enable_disable)
1387 {
1388   return vnet_feature_enable_disable ("ip4-unicast", "ip4-reassembly-feature",
1389                                       sw_if_index, enable_disable, 0, 0);
1390 }
1391
1392 /*
1393  * fd.io coding-style-patch-verification: ON
1394  *
1395  * Local Variables:
1396  * eval: (c-set-style "gnu")
1397  * End:
1398  */