ip: reassembly - adding custom reassembly node
[vpp.git] / src / vnet / ip / reass / ip6_full_reass.c
1 /*
2  * Copyright (c) 2017 Cisco and/or its affiliates.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at:
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15
16 /**
17  * @file
18  * @brief IPv6 Full Reassembly.
19  *
20  * This file contains the source code for IPv6 full reassembly.
21  */
22
23 #include <vppinfra/vec.h>
24 #include <vnet/vnet.h>
25 #include <vnet/ip/ip.h>
26 #include <vppinfra/bihash_48_8.h>
27 #include <vnet/ip/reass/ip6_full_reass.h>
28 #include <vnet/ip/ip6_inlines.h>
29
30 #define MSEC_PER_SEC 1000
31 #define IP6_FULL_REASS_TIMEOUT_DEFAULT_MS 100
32 #define IP6_FULL_REASS_EXPIRE_WALK_INTERVAL_DEFAULT_MS 10000    // 10 seconds default
33 #define IP6_FULL_REASS_MAX_REASSEMBLIES_DEFAULT 1024
34 #define IP6_FULL_REASS_MAX_REASSEMBLY_LENGTH_DEFAULT 3
35 #define IP6_FULL_REASS_HT_LOAD_FACTOR (0.75)
36
37 typedef enum
38 {
39   IP6_FULL_REASS_RC_OK,
40   IP6_FULL_REASS_RC_INTERNAL_ERROR,
41   IP6_FULL_REASS_RC_TOO_MANY_FRAGMENTS,
42   IP6_FULL_REASS_RC_NO_BUF,
43   IP6_FULL_REASS_RC_HANDOFF,
44   IP6_FULL_REASS_RC_INVALID_FRAG_LEN,
45   IP6_FULL_REASS_RC_OVERLAP,
46 } ip6_full_reass_rc_t;
47
48 typedef struct
49 {
50   union
51   {
52     struct
53     {
54       ip6_address_t src;
55       ip6_address_t dst;
56       u32 xx_id;
57       u32 frag_id;
58       u8 unused[7];
59       u8 proto;
60     };
61     u64 as_u64[6];
62   };
63 } ip6_full_reass_key_t;
64
65 typedef union
66 {
67   struct
68   {
69     u32 reass_index;
70     u32 memory_owner_thread_index;
71   };
72   u64 as_u64;
73 } ip6_full_reass_val_t;
74
75 typedef union
76 {
77   struct
78   {
79     ip6_full_reass_key_t k;
80     ip6_full_reass_val_t v;
81   };
82   clib_bihash_kv_48_8_t kv;
83 } ip6_full_reass_kv_t;
84
85
86 always_inline u32
87 ip6_full_reass_buffer_get_data_offset (vlib_buffer_t * b)
88 {
89   vnet_buffer_opaque_t *vnb = vnet_buffer (b);
90   return vnb->ip.reass.range_first - vnb->ip.reass.fragment_first;
91 }
92
93 always_inline u16
94 ip6_full_reass_buffer_get_data_len (vlib_buffer_t * b)
95 {
96   vnet_buffer_opaque_t *vnb = vnet_buffer (b);
97   return clib_min (vnb->ip.reass.range_last, vnb->ip.reass.fragment_last) -
98     (vnb->ip.reass.fragment_first +
99      ip6_full_reass_buffer_get_data_offset (b)) + 1;
100 }
101
102 typedef struct
103 {
104   // hash table key
105   ip6_full_reass_key_t key;
106   // time when last packet was received
107   f64 last_heard;
108   // internal id of this reassembly
109   u64 id;
110   // buffer index of first buffer in this reassembly context
111   u32 first_bi;
112   // last octet of packet, ~0 until fragment without more_fragments arrives
113   u32 last_packet_octet;
114   // length of data collected so far
115   u32 data_len;
116   // trace operation counter
117   u32 trace_op_counter;
118   // next index - used by custom apps (~0 if not set)
119   u32 next_index;
120   // error next index - used by custom apps (~0 if not set)
121   u32 error_next_index;
122   // minimum fragment length for this reassembly - used to estimate MTU
123   u16 min_fragment_length;
124   // number of fragments for this reassembly
125   u32 fragments_n;
126   // thread owning memory for this context (whose pool contains this ctx)
127   u32 memory_owner_thread_index;
128   // thread which received fragment with offset 0 and which sends out the
129   // completed reassembly
130   u32 sendout_thread_index;
131 } ip6_full_reass_t;
132
133 typedef struct
134 {
135   ip6_full_reass_t *pool;
136   u32 reass_n;
137   u32 id_counter;
138   clib_spinlock_t lock;
139 } ip6_full_reass_per_thread_t;
140
141 typedef struct
142 {
143   // IPv6 config
144   u32 timeout_ms;
145   f64 timeout;
146   u32 expire_walk_interval_ms;
147   // maximum number of fragments in one reassembly
148   u32 max_reass_len;
149   // maximum number of reassemblies
150   u32 max_reass_n;
151
152   // IPv6 runtime
153   clib_bihash_48_8_t hash;
154
155   // per-thread data
156   ip6_full_reass_per_thread_t *per_thread_data;
157
158   // convenience
159   vlib_main_t *vlib_main;
160
161   u32 ip6_icmp_error_idx;
162   u32 ip6_full_reass_expire_node_idx;
163
164   /** Worker handoff */
165   u32 fq_index;
166   u32 fq_local_index;
167   u32 fq_feature_index;
168   u32 fq_custom_index;
169
170   // reference count for enabling/disabling feature - per interface
171   u32 *feature_use_refcount_per_intf;
172
173   // whether local fragmented packets are reassembled or not
174   int is_local_reass_enabled;
175 } ip6_full_reass_main_t;
176
177 extern ip6_full_reass_main_t ip6_full_reass_main;
178
179 #ifndef CLIB_MARCH_VARIANT
180 ip6_full_reass_main_t ip6_full_reass_main;
181 #endif /* CLIB_MARCH_VARIANT */
182
183 typedef enum
184 {
185   IP6_FULL_REASSEMBLY_NEXT_INPUT,
186   IP6_FULL_REASSEMBLY_NEXT_DROP,
187   IP6_FULL_REASSEMBLY_NEXT_ICMP_ERROR,
188   IP6_FULL_REASSEMBLY_NEXT_HANDOFF,
189   IP6_FULL_REASSEMBLY_N_NEXT,
190 } ip6_full_reass_next_t;
191
192 typedef enum
193 {
194   NORMAL,
195   FEATURE,
196   CUSTOM
197 } ip6_full_reass_node_type_t;
198
199 typedef enum
200 {
201   RANGE_NEW,
202   RANGE_OVERLAP,
203   ICMP_ERROR_RT_EXCEEDED,
204   ICMP_ERROR_FL_TOO_BIG,
205   ICMP_ERROR_FL_NOT_MULT_8,
206   FINALIZE,
207   HANDOFF,
208   PASSTHROUGH,
209 } ip6_full_reass_trace_operation_e;
210
211 typedef struct
212 {
213   u16 range_first;
214   u16 range_last;
215   u32 range_bi;
216   i32 data_offset;
217   u32 data_len;
218   u32 first_bi;
219 } ip6_full_reass_range_trace_t;
220
221 typedef struct
222 {
223   ip6_full_reass_trace_operation_e action;
224   u32 reass_id;
225   ip6_full_reass_range_trace_t trace_range;
226   u32 op_id;
227   u32 fragment_first;
228   u32 fragment_last;
229   u32 total_data_len;
230   u32 thread_id;
231   u32 thread_id_to;
232   bool is_after_handoff;
233   ip6_header_t ip6_header;
234   ip6_frag_hdr_t ip6_frag_header;
235 } ip6_full_reass_trace_t;
236
237 static void
238 ip6_full_reass_trace_details (vlib_main_t * vm, u32 bi,
239                               ip6_full_reass_range_trace_t * trace)
240 {
241   vlib_buffer_t *b = vlib_get_buffer (vm, bi);
242   vnet_buffer_opaque_t *vnb = vnet_buffer (b);
243   trace->range_first = vnb->ip.reass.range_first;
244   trace->range_last = vnb->ip.reass.range_last;
245   trace->data_offset = ip6_full_reass_buffer_get_data_offset (b);
246   trace->data_len = ip6_full_reass_buffer_get_data_len (b);
247   trace->range_bi = bi;
248 }
249
250 static u8 *
251 format_ip6_full_reass_range_trace (u8 * s, va_list * args)
252 {
253   ip6_full_reass_range_trace_t *trace =
254     va_arg (*args, ip6_full_reass_range_trace_t *);
255   s =
256     format (s, "range: [%u, %u], off %d, len %u, bi %u", trace->range_first,
257             trace->range_last, trace->data_offset, trace->data_len,
258             trace->range_bi);
259   return s;
260 }
261
262 static u8 *
263 format_ip6_full_reass_trace (u8 * s, va_list * args)
264 {
265   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
266   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
267   ip6_full_reass_trace_t *t = va_arg (*args, ip6_full_reass_trace_t *);
268   u32 indent = 0;
269   if (~0 != t->reass_id)
270     {
271       if (t->is_after_handoff)
272         {
273           s =
274             format (s, "%U\n", format_ip6_header, &t->ip6_header,
275                     sizeof (t->ip6_header));
276           s =
277             format (s, "  %U\n", format_ip6_frag_hdr, &t->ip6_frag_header,
278                     sizeof (t->ip6_frag_header));
279           indent = 2;
280         }
281       s =
282         format (s, "%Ureass id: %u, op id: %u, ", format_white_space, indent,
283                 t->reass_id, t->op_id);
284       indent = format_get_indent (s);
285       s = format (s, "first bi: %u, data len: %u, ip/fragment[%u, %u]",
286                   t->trace_range.first_bi, t->total_data_len,
287                   t->fragment_first, t->fragment_last);
288     }
289   switch (t->action)
290     {
291     case RANGE_NEW:
292       s = format (s, "\n%Unew %U", format_white_space, indent,
293                   format_ip6_full_reass_range_trace, &t->trace_range);
294       break;
295     case RANGE_OVERLAP:
296       s = format (s, "\n%Uoverlap %U", format_white_space, indent,
297                   format_ip6_full_reass_range_trace, &t->trace_range);
298       break;
299     case ICMP_ERROR_FL_TOO_BIG:
300       s = format (s, "\n%Uicmp-error - frag_len > 65535 %U",
301                   format_white_space, indent,
302                   format_ip6_full_reass_range_trace, &t->trace_range);
303       break;
304     case ICMP_ERROR_FL_NOT_MULT_8:
305       s = format (s, "\n%Uicmp-error - frag_len mod 8 != 0 %U",
306                   format_white_space, indent,
307                   format_ip6_full_reass_range_trace, &t->trace_range);
308       break;
309     case ICMP_ERROR_RT_EXCEEDED:
310       s = format (s, "\n%Uicmp-error - reassembly time exceeded",
311                   format_white_space, indent);
312       break;
313     case FINALIZE:
314       s = format (s, "\n%Ufinalize reassembly", format_white_space, indent);
315       break;
316     case HANDOFF:
317       s =
318         format (s, "handoff from thread #%u to thread #%u", t->thread_id,
319                 t->thread_id_to);
320       break;
321     case PASSTHROUGH:
322       s = format (s, "passthrough - not a fragment");
323       break;
324     }
325   return s;
326 }
327
328 static void
329 ip6_full_reass_add_trace (vlib_main_t * vm, vlib_node_runtime_t * node,
330                           ip6_full_reass_t * reass, u32 bi,
331                           ip6_frag_hdr_t * ip6_frag_header,
332                           ip6_full_reass_trace_operation_e action,
333                           u32 thread_id_to)
334 {
335   vlib_buffer_t *b = vlib_get_buffer (vm, bi);
336   vnet_buffer_opaque_t *vnb = vnet_buffer (b);
337   bool is_after_handoff = false;
338   if (pool_is_free_index
339       (vm->trace_main.trace_buffer_pool, vlib_buffer_get_trace_index (b)))
340     {
341       // this buffer's trace is gone
342       b->flags &= ~VLIB_BUFFER_IS_TRACED;
343       return;
344     }
345   if (vlib_buffer_get_trace_thread (b) != vm->thread_index)
346     {
347       is_after_handoff = true;
348     }
349   ip6_full_reass_trace_t *t = vlib_add_trace (vm, node, b, sizeof (t[0]));
350   t->is_after_handoff = is_after_handoff;
351   if (t->is_after_handoff)
352     {
353       clib_memcpy (&t->ip6_header, vlib_buffer_get_current (b),
354                    clib_min (sizeof (t->ip6_header), b->current_length));
355       if (ip6_frag_header)
356         {
357           clib_memcpy (&t->ip6_frag_header, ip6_frag_header,
358                        sizeof (t->ip6_frag_header));
359         }
360       else
361         {
362           clib_memset (&t->ip6_frag_header, 0, sizeof (t->ip6_frag_header));
363         }
364     }
365   if (reass)
366     {
367       t->reass_id = reass->id;
368       t->op_id = reass->trace_op_counter;
369       t->trace_range.first_bi = reass->first_bi;
370       t->total_data_len = reass->data_len;
371       ++reass->trace_op_counter;
372     }
373   else
374     {
375       t->reass_id = ~0;
376     }
377   t->action = action;
378   t->thread_id = vm->thread_index;
379   t->thread_id_to = thread_id_to;
380   ip6_full_reass_trace_details (vm, bi, &t->trace_range);
381   t->fragment_first = vnb->ip.reass.fragment_first;
382   t->fragment_last = vnb->ip.reass.fragment_last;
383 #if 0
384   static u8 *s = NULL;
385   s = format (s, "%U", format_ip6_full_reass_trace, NULL, NULL, t);
386   printf ("%.*s\n", vec_len (s), s);
387   fflush (stdout);
388   vec_reset_length (s);
389 #endif
390 }
391
392 always_inline void
393 ip6_full_reass_free_ctx (ip6_full_reass_per_thread_t * rt,
394                          ip6_full_reass_t * reass)
395 {
396   pool_put (rt->pool, reass);
397   --rt->reass_n;
398 }
399
400 always_inline void
401 ip6_full_reass_free (ip6_full_reass_main_t * rm,
402                      ip6_full_reass_per_thread_t * rt,
403                      ip6_full_reass_t * reass)
404 {
405   clib_bihash_kv_48_8_t kv;
406   kv.key[0] = reass->key.as_u64[0];
407   kv.key[1] = reass->key.as_u64[1];
408   kv.key[2] = reass->key.as_u64[2];
409   kv.key[3] = reass->key.as_u64[3];
410   kv.key[4] = reass->key.as_u64[4];
411   kv.key[5] = reass->key.as_u64[5];
412   clib_bihash_add_del_48_8 (&rm->hash, &kv, 0);
413   ip6_full_reass_free_ctx (rt, reass);
414 }
415
416 always_inline void
417 ip6_full_reass_drop_all (vlib_main_t *vm, vlib_node_runtime_t *node,
418                          ip6_full_reass_t *reass, u32 offending_bi)
419 {
420   u32 range_bi = reass->first_bi;
421   vlib_buffer_t *range_b;
422   vnet_buffer_opaque_t *range_vnb;
423   u32 *to_free = NULL;
424   while (~0 != range_bi)
425     {
426       range_b = vlib_get_buffer (vm, range_bi);
427       range_vnb = vnet_buffer (range_b);
428       u32 bi = range_bi;
429       while (~0 != bi)
430         {
431           vec_add1 (to_free, bi);
432           if (bi == offending_bi)
433             {
434               offending_bi = ~0;
435             }
436           vlib_buffer_t *b = vlib_get_buffer (vm, bi);
437           if (b->flags & VLIB_BUFFER_NEXT_PRESENT)
438             {
439               bi = b->next_buffer;
440               b->flags &= ~VLIB_BUFFER_NEXT_PRESENT;
441             }
442           else
443             {
444               bi = ~0;
445             }
446         }
447       range_bi = range_vnb->ip.reass.next_range_bi;
448     }
449   if (~0 != offending_bi)
450     {
451       vec_add1 (to_free, offending_bi);
452     }
453   /* send to next_error_index */
454   if (~0 != reass->error_next_index)
455     {
456       u32 n_left_to_next, *to_next, next_index;
457
458       next_index = reass->error_next_index;
459       u32 bi = ~0;
460
461       while (vec_len (to_free) > 0)
462         {
463           vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
464
465           while (vec_len (to_free) > 0 && n_left_to_next > 0)
466             {
467               bi = vec_pop (to_free);
468
469               if (~0 != bi)
470                 {
471                   to_next[0] = bi;
472                   to_next += 1;
473                   n_left_to_next -= 1;
474                 }
475             }
476           vlib_put_next_frame (vm, node, next_index, n_left_to_next);
477         }
478     }
479   else
480     {
481       vlib_buffer_free (vm, to_free, vec_len (to_free));
482     }
483   vec_free (to_free);
484 }
485
486 always_inline void
487 ip6_full_reass_on_timeout (vlib_main_t * vm, vlib_node_runtime_t * node,
488                            ip6_full_reass_t * reass, u32 * icmp_bi)
489 {
490   if (~0 == reass->first_bi)
491     {
492       return;
493     }
494   if (~0 == reass->next_index)  // custom apps don't want icmp
495     {
496       vlib_buffer_t *b = vlib_get_buffer (vm, reass->first_bi);
497       if (0 == vnet_buffer (b)->ip.reass.fragment_first)
498         {
499           *icmp_bi = reass->first_bi;
500           if (PREDICT_FALSE (b->flags & VLIB_BUFFER_IS_TRACED))
501             {
502               ip6_full_reass_add_trace (vm, node, reass, reass->first_bi, NULL,
503                                         ICMP_ERROR_RT_EXCEEDED, ~0);
504             }
505           // fragment with offset zero received - send icmp message back
506           if (b->flags & VLIB_BUFFER_NEXT_PRESENT)
507             {
508               // separate first buffer from chain and steer it towards icmp node
509               b->flags &= ~VLIB_BUFFER_NEXT_PRESENT;
510               reass->first_bi = b->next_buffer;
511             }
512           else
513             {
514               reass->first_bi = vnet_buffer (b)->ip.reass.next_range_bi;
515             }
516           icmp6_error_set_vnet_buffer (b, ICMP6_time_exceeded,
517                                        ICMP6_time_exceeded_fragment_reassembly_time_exceeded,
518                                        0);
519         }
520     }
521   ip6_full_reass_drop_all (vm, node, reass, ~0);
522 }
523
524 always_inline ip6_full_reass_t *
525 ip6_full_reass_find_or_create (vlib_main_t *vm, vlib_node_runtime_t *node,
526                                ip6_full_reass_main_t *rm,
527                                ip6_full_reass_per_thread_t *rt,
528                                ip6_full_reass_kv_t *kv, u32 *icmp_bi,
529                                u8 *do_handoff, int skip_bihash)
530 {
531   ip6_full_reass_t *reass;
532   f64 now;
533
534 again:
535
536   reass = NULL;
537   now = vlib_time_now (vm);
538
539   if (!skip_bihash && !clib_bihash_search_48_8 (&rm->hash, &kv->kv, &kv->kv))
540     {
541       if (vm->thread_index != kv->v.memory_owner_thread_index)
542         {
543           *do_handoff = 1;
544           return NULL;
545         }
546
547       reass =
548         pool_elt_at_index (rm->per_thread_data
549                            [kv->v.memory_owner_thread_index].pool,
550                            kv->v.reass_index);
551
552       if (now > reass->last_heard + rm->timeout)
553         {
554           ip6_full_reass_on_timeout (vm, node, reass, icmp_bi);
555           ip6_full_reass_free (rm, rt, reass);
556           reass = NULL;
557         }
558     }
559
560   if (reass)
561     {
562       reass->last_heard = now;
563       return reass;
564     }
565
566   if (rt->reass_n >= rm->max_reass_n)
567     {
568       reass = NULL;
569       return reass;
570     }
571   else
572     {
573       pool_get (rt->pool, reass);
574       clib_memset (reass, 0, sizeof (*reass));
575       reass->id = ((u64) vm->thread_index * 1000000000) + rt->id_counter;
576       ++rt->id_counter;
577       reass->first_bi = ~0;
578       reass->last_packet_octet = ~0;
579       reass->data_len = 0;
580       reass->next_index = ~0;
581       reass->error_next_index = ~0;
582       reass->memory_owner_thread_index = vm->thread_index;
583       ++rt->reass_n;
584     }
585
586   kv->v.reass_index = (reass - rt->pool);
587   kv->v.memory_owner_thread_index = vm->thread_index;
588   reass->last_heard = now;
589
590   if (!skip_bihash)
591     {
592       reass->key.as_u64[0] = kv->kv.key[0];
593       reass->key.as_u64[1] = kv->kv.key[1];
594       reass->key.as_u64[2] = kv->kv.key[2];
595       reass->key.as_u64[3] = kv->kv.key[3];
596       reass->key.as_u64[4] = kv->kv.key[4];
597       reass->key.as_u64[5] = kv->kv.key[5];
598
599       int rv = clib_bihash_add_del_48_8 (&rm->hash, &kv->kv, 2);
600       if (rv)
601         {
602           ip6_full_reass_free (rm, rt, reass);
603           reass = NULL;
604           // if other worker created a context already work with the other copy
605           if (-2 == rv)
606             goto again;
607         }
608     }
609   else
610     {
611       reass->key.as_u64[0] = ~0;
612       reass->key.as_u64[1] = ~0;
613       reass->key.as_u64[2] = ~0;
614       reass->key.as_u64[3] = ~0;
615       reass->key.as_u64[4] = ~0;
616       reass->key.as_u64[5] = ~0;
617     }
618
619   return reass;
620 }
621
622 always_inline ip6_full_reass_rc_t
623 ip6_full_reass_finalize (vlib_main_t * vm, vlib_node_runtime_t * node,
624                          ip6_full_reass_main_t * rm,
625                          ip6_full_reass_per_thread_t * rt,
626                          ip6_full_reass_t * reass, u32 * bi0, u32 * next0,
627                          u32 * error0, bool is_custom_app)
628 {
629   *bi0 = reass->first_bi;
630   *error0 = IP6_ERROR_NONE;
631   ip6_frag_hdr_t *frag_hdr;
632   vlib_buffer_t *last_b = NULL;
633   u32 sub_chain_bi = reass->first_bi;
634   u32 total_length = 0;
635   u32 buf_cnt = 0;
636   u32 dropped_cnt = 0;
637   u32 *vec_drop_compress = NULL;
638   ip6_full_reass_rc_t rv = IP6_FULL_REASS_RC_OK;
639   do
640     {
641       u32 tmp_bi = sub_chain_bi;
642       vlib_buffer_t *tmp = vlib_get_buffer (vm, tmp_bi);
643       vnet_buffer_opaque_t *vnb = vnet_buffer (tmp);
644       if (!(vnb->ip.reass.range_first >= vnb->ip.reass.fragment_first) &&
645           !(vnb->ip.reass.range_last > vnb->ip.reass.fragment_first))
646         {
647           rv = IP6_FULL_REASS_RC_INTERNAL_ERROR;
648           goto free_buffers_and_return;
649         }
650
651       u32 data_len = ip6_full_reass_buffer_get_data_len (tmp);
652       u32 trim_front = vnet_buffer (tmp)->ip.reass.ip6_frag_hdr_offset +
653         sizeof (*frag_hdr) + ip6_full_reass_buffer_get_data_offset (tmp);
654       u32 trim_end =
655         vlib_buffer_length_in_chain (vm, tmp) - trim_front - data_len;
656       if (tmp_bi == reass->first_bi)
657         {
658           /* first buffer - keep ip6 header */
659           if (0 != ip6_full_reass_buffer_get_data_offset (tmp))
660             {
661               rv = IP6_FULL_REASS_RC_INTERNAL_ERROR;
662               goto free_buffers_and_return;
663             }
664           trim_front = 0;
665           trim_end = vlib_buffer_length_in_chain (vm, tmp) - data_len -
666             (vnet_buffer (tmp)->ip.reass.ip6_frag_hdr_offset +
667              sizeof (*frag_hdr));
668           if (!(vlib_buffer_length_in_chain (vm, tmp) - trim_end > 0))
669             {
670               rv = IP6_FULL_REASS_RC_INTERNAL_ERROR;
671               goto free_buffers_and_return;
672             }
673         }
674       u32 keep_data =
675         vlib_buffer_length_in_chain (vm, tmp) - trim_front - trim_end;
676       while (1)
677         {
678           ++buf_cnt;
679           if (trim_front)
680             {
681               if (trim_front > tmp->current_length)
682                 {
683                   /* drop whole buffer */
684                   if (!(tmp->flags & VLIB_BUFFER_NEXT_PRESENT))
685                     {
686                       rv = IP6_FULL_REASS_RC_INTERNAL_ERROR;
687                       goto free_buffers_and_return;
688                     }
689                   trim_front -= tmp->current_length;
690                   vec_add1 (vec_drop_compress, tmp_bi);
691                   tmp->flags &= ~VLIB_BUFFER_NEXT_PRESENT;
692                   tmp_bi = tmp->next_buffer;
693                   tmp = vlib_get_buffer (vm, tmp_bi);
694                   continue;
695                 }
696               else
697                 {
698                   vlib_buffer_advance (tmp, trim_front);
699                   trim_front = 0;
700                 }
701             }
702           if (keep_data)
703             {
704               if (last_b)
705                 {
706                   last_b->flags |= VLIB_BUFFER_NEXT_PRESENT;
707                   last_b->next_buffer = tmp_bi;
708                 }
709               last_b = tmp;
710               if (keep_data <= tmp->current_length)
711                 {
712                   tmp->current_length = keep_data;
713                   keep_data = 0;
714                 }
715               else
716                 {
717                   keep_data -= tmp->current_length;
718                   if (!(tmp->flags & VLIB_BUFFER_NEXT_PRESENT))
719                     {
720                       rv = IP6_FULL_REASS_RC_INTERNAL_ERROR;
721                       goto free_buffers_and_return;
722                     }
723                 }
724               total_length += tmp->current_length;
725             }
726           else
727             {
728               if (reass->first_bi == tmp_bi)
729                 {
730                   rv = IP6_FULL_REASS_RC_INTERNAL_ERROR;
731                   goto free_buffers_and_return;
732                 }
733               vec_add1 (vec_drop_compress, tmp_bi);
734               ++dropped_cnt;
735             }
736           if (tmp->flags & VLIB_BUFFER_NEXT_PRESENT)
737             {
738               tmp_bi = tmp->next_buffer;
739               tmp = vlib_get_buffer (vm, tmp->next_buffer);
740             }
741           else
742             {
743               break;
744             }
745         }
746       sub_chain_bi =
747         vnet_buffer (vlib_get_buffer (vm, sub_chain_bi))->ip.
748         reass.next_range_bi;
749     }
750   while (~0 != sub_chain_bi);
751
752   if (!last_b)
753     {
754       rv = IP6_FULL_REASS_RC_INTERNAL_ERROR;
755       goto free_buffers_and_return;
756     }
757   last_b->flags &= ~VLIB_BUFFER_NEXT_PRESENT;
758   vlib_buffer_t *first_b = vlib_get_buffer (vm, reass->first_bi);
759   if (total_length < first_b->current_length)
760     {
761       rv = IP6_FULL_REASS_RC_INTERNAL_ERROR;
762       goto free_buffers_and_return;
763     }
764   total_length -= first_b->current_length;
765   first_b->flags |= VLIB_BUFFER_TOTAL_LENGTH_VALID;
766   first_b->total_length_not_including_first_buffer = total_length;
767   // drop fragment header
768   vnet_buffer_opaque_t *first_b_vnb = vnet_buffer (first_b);
769   ip6_header_t *ip = vlib_buffer_get_current (first_b);
770   u16 ip6_frag_hdr_offset = first_b_vnb->ip.reass.ip6_frag_hdr_offset;
771   ip6_ext_hdr_chain_t hdr_chain;
772   ip6_ext_header_t *prev_hdr = 0;
773   int res = ip6_ext_header_walk (first_b, ip, IP_PROTOCOL_IPV6_FRAGMENTATION,
774                                  &hdr_chain);
775   if (res < 0 ||
776       (hdr_chain.eh[res].protocol != IP_PROTOCOL_IPV6_FRAGMENTATION))
777     {
778       rv = IP6_FULL_REASS_RC_INTERNAL_ERROR;
779       goto free_buffers_and_return;
780     }
781   frag_hdr = ip6_ext_next_header_offset (ip, hdr_chain.eh[res].offset);
782   if (res > 0)
783     {
784       prev_hdr = ip6_ext_next_header_offset (ip, hdr_chain.eh[res - 1].offset);
785       prev_hdr->next_hdr = frag_hdr->next_hdr;
786     }
787   else
788     {
789       ip->protocol = frag_hdr->next_hdr;
790     }
791   if (hdr_chain.eh[res].offset != ip6_frag_hdr_offset)
792     {
793       rv = IP6_FULL_REASS_RC_INTERNAL_ERROR;
794       goto free_buffers_and_return;
795     }
796   memmove (frag_hdr, (u8 *) frag_hdr + sizeof (*frag_hdr),
797            first_b->current_length - ip6_frag_hdr_offset -
798            sizeof (ip6_frag_hdr_t));
799   first_b->current_length -= sizeof (*frag_hdr);
800   ip->payload_length =
801     clib_host_to_net_u16 (total_length + first_b->current_length -
802                           sizeof (*ip));
803   if (!vlib_buffer_chain_linearize (vm, first_b))
804     {
805       rv = IP6_FULL_REASS_RC_NO_BUF;
806       goto free_buffers_and_return;
807     }
808   first_b->flags &= ~VLIB_BUFFER_EXT_HDR_VALID;
809   if (PREDICT_FALSE (first_b->flags & VLIB_BUFFER_IS_TRACED))
810     {
811       ip6_full_reass_add_trace (vm, node, reass, reass->first_bi, NULL,
812                                 FINALIZE, ~0);
813 #if 0
814       // following code does a hexdump of packet fragments to stdout ...
815       do
816         {
817           u32 bi = reass->first_bi;
818           u8 *s = NULL;
819           while (~0 != bi)
820             {
821               vlib_buffer_t *b = vlib_get_buffer (vm, bi);
822               s = format (s, "%u: %U\n", bi, format_hexdump,
823                           vlib_buffer_get_current (b), b->current_length);
824               if (b->flags & VLIB_BUFFER_NEXT_PRESENT)
825                 {
826                   bi = b->next_buffer;
827                 }
828               else
829                 {
830                   break;
831                 }
832             }
833           printf ("%.*s\n", vec_len (s), s);
834           fflush (stdout);
835           vec_free (s);
836         }
837       while (0);
838 #endif
839     }
840   if (!is_custom_app)
841     {
842       *next0 = IP6_FULL_REASSEMBLY_NEXT_INPUT;
843     }
844   else
845     {
846       *next0 = reass->next_index;
847     }
848   vnet_buffer (first_b)->ip.reass.estimated_mtu = reass->min_fragment_length;
849   ip6_full_reass_free (rm, rt, reass);
850   reass = NULL;
851 free_buffers_and_return:
852   vlib_buffer_free (vm, vec_drop_compress, vec_len (vec_drop_compress));
853   vec_free (vec_drop_compress);
854   return rv;
855 }
856
857 always_inline void
858 ip6_full_reass_insert_range_in_chain (vlib_main_t * vm,
859                                       ip6_full_reass_t * reass,
860                                       u32 prev_range_bi, u32 new_next_bi)
861 {
862
863   vlib_buffer_t *new_next_b = vlib_get_buffer (vm, new_next_bi);
864   vnet_buffer_opaque_t *new_next_vnb = vnet_buffer (new_next_b);
865   if (~0 != prev_range_bi)
866     {
867       vlib_buffer_t *prev_b = vlib_get_buffer (vm, prev_range_bi);
868       vnet_buffer_opaque_t *prev_vnb = vnet_buffer (prev_b);
869       new_next_vnb->ip.reass.next_range_bi = prev_vnb->ip.reass.next_range_bi;
870       prev_vnb->ip.reass.next_range_bi = new_next_bi;
871     }
872   else
873     {
874       if (~0 != reass->first_bi)
875         {
876           new_next_vnb->ip.reass.next_range_bi = reass->first_bi;
877         }
878       reass->first_bi = new_next_bi;
879     }
880   reass->data_len += ip6_full_reass_buffer_get_data_len (new_next_b);
881 }
882
883 always_inline ip6_full_reass_rc_t
884 ip6_full_reass_update (vlib_main_t *vm, vlib_node_runtime_t *node,
885                        ip6_full_reass_main_t *rm,
886                        ip6_full_reass_per_thread_t *rt,
887                        ip6_full_reass_t *reass, u32 *bi0, u32 *next0,
888                        u32 *error0, ip6_frag_hdr_t *frag_hdr,
889                        bool is_custom_app, u32 *handoff_thread_idx,
890                        int skip_bihash)
891 {
892   int consumed = 0;
893   vlib_buffer_t *fb = vlib_get_buffer (vm, *bi0);
894   vnet_buffer_opaque_t *fvnb = vnet_buffer (fb);
895   if (is_custom_app)
896     {
897       reass->next_index = fvnb->ip.reass.next_index;    // store next_index before it's overwritten
898       reass->error_next_index = fvnb->ip.reass.error_next_index;        // store error_next_index before it is overwritten
899     }
900
901   fvnb->ip.reass.ip6_frag_hdr_offset =
902     (u8 *) frag_hdr - (u8 *) vlib_buffer_get_current (fb);
903   ip6_header_t *fip = vlib_buffer_get_current (fb);
904   if (fb->current_length < sizeof (*fip) ||
905       fvnb->ip.reass.ip6_frag_hdr_offset == 0 ||
906       fvnb->ip.reass.ip6_frag_hdr_offset >= fb->current_length)
907     {
908       return IP6_FULL_REASS_RC_INTERNAL_ERROR;
909     }
910
911   u32 fragment_first = fvnb->ip.reass.fragment_first =
912     ip6_frag_hdr_offset_bytes (frag_hdr);
913   u32 fragment_length =
914     vlib_buffer_length_in_chain (vm, fb) -
915     (fvnb->ip.reass.ip6_frag_hdr_offset + sizeof (*frag_hdr));
916   if (0 == fragment_length)
917     {
918       return IP6_FULL_REASS_RC_INVALID_FRAG_LEN;
919     }
920   u32 fragment_last = fvnb->ip.reass.fragment_last =
921     fragment_first + fragment_length - 1;
922   int more_fragments = ip6_frag_hdr_more (frag_hdr);
923   u32 candidate_range_bi = reass->first_bi;
924   u32 prev_range_bi = ~0;
925   fvnb->ip.reass.range_first = fragment_first;
926   fvnb->ip.reass.range_last = fragment_last;
927   fvnb->ip.reass.next_range_bi = ~0;
928   if (!more_fragments)
929     {
930       reass->last_packet_octet = fragment_last;
931     }
932   if (~0 == reass->first_bi)
933     {
934       // starting a new reassembly
935       ip6_full_reass_insert_range_in_chain (vm, reass, prev_range_bi, *bi0);
936       reass->min_fragment_length = clib_net_to_host_u16 (fip->payload_length);
937       consumed = 1;
938       reass->fragments_n = 1;
939       goto check_if_done_maybe;
940     }
941   reass->min_fragment_length =
942     clib_min (clib_net_to_host_u16 (fip->payload_length),
943               fvnb->ip.reass.estimated_mtu);
944   while (~0 != candidate_range_bi)
945     {
946       vlib_buffer_t *candidate_b = vlib_get_buffer (vm, candidate_range_bi);
947       vnet_buffer_opaque_t *candidate_vnb = vnet_buffer (candidate_b);
948       if (fragment_first > candidate_vnb->ip.reass.range_last)
949         {
950           // this fragments starts after candidate range
951           prev_range_bi = candidate_range_bi;
952           candidate_range_bi = candidate_vnb->ip.reass.next_range_bi;
953           if (candidate_vnb->ip.reass.range_last < fragment_last &&
954               ~0 == candidate_range_bi)
955             {
956               // special case - this fragment falls beyond all known ranges
957               ip6_full_reass_insert_range_in_chain (vm, reass, prev_range_bi,
958                                                     *bi0);
959               consumed = 1;
960               break;
961             }
962           continue;
963         }
964       if (fragment_last < candidate_vnb->ip.reass.range_first)
965         {
966           // this fragment ends before candidate range without any overlap
967           ip6_full_reass_insert_range_in_chain (vm, reass, prev_range_bi,
968                                                 *bi0);
969           consumed = 1;
970         }
971       else if (fragment_first == candidate_vnb->ip.reass.range_first &&
972                fragment_last == candidate_vnb->ip.reass.range_last)
973         {
974           // duplicate fragment - ignore
975         }
976       else
977         {
978           // overlapping fragment - not allowed by RFC 8200
979           if (PREDICT_FALSE (fb->flags & VLIB_BUFFER_IS_TRACED))
980             {
981               ip6_full_reass_add_trace (vm, node, reass, *bi0, frag_hdr,
982                                         RANGE_OVERLAP, ~0);
983             }
984           return IP6_FULL_REASS_RC_OVERLAP;
985         }
986       break;
987     }
988   ++reass->fragments_n;
989 check_if_done_maybe:
990   if (consumed)
991     {
992       if (PREDICT_FALSE (fb->flags & VLIB_BUFFER_IS_TRACED))
993         {
994           ip6_full_reass_add_trace (vm, node, reass, *bi0, frag_hdr, RANGE_NEW,
995                                     ~0);
996         }
997     }
998   else if (skip_bihash)
999     {
1000       // if this reassembly is not in bihash, then the packet must have been
1001       // consumed
1002       return IP6_FULL_REASS_RC_INTERNAL_ERROR;
1003     }
1004   if (~0 != reass->last_packet_octet &&
1005       reass->data_len == reass->last_packet_octet + 1)
1006     {
1007       *handoff_thread_idx = reass->sendout_thread_index;
1008       int handoff =
1009         reass->memory_owner_thread_index != reass->sendout_thread_index;
1010       ip6_full_reass_rc_t rc =
1011         ip6_full_reass_finalize (vm, node, rm, rt, reass, bi0, next0, error0,
1012                                  is_custom_app);
1013       if (IP6_FULL_REASS_RC_OK == rc && handoff)
1014         {
1015           return IP6_FULL_REASS_RC_HANDOFF;
1016         }
1017       return rc;
1018     }
1019   else
1020     {
1021       if (skip_bihash)
1022         {
1023           // if this reassembly is not in bihash, it should've been an atomic
1024           // fragment and thus finalized
1025           return IP6_FULL_REASS_RC_INTERNAL_ERROR;
1026         }
1027       if (consumed)
1028         {
1029           *bi0 = ~0;
1030           if (reass->fragments_n > rm->max_reass_len)
1031             {
1032               return IP6_FULL_REASS_RC_TOO_MANY_FRAGMENTS;
1033             }
1034         }
1035       else
1036         {
1037           *next0 = IP6_FULL_REASSEMBLY_NEXT_DROP;
1038           *error0 = IP6_ERROR_REASS_DUPLICATE_FRAGMENT;
1039         }
1040     }
1041   return IP6_FULL_REASS_RC_OK;
1042 }
1043
1044 always_inline bool
1045 ip6_full_reass_verify_upper_layer_present (vlib_node_runtime_t *node,
1046                                            vlib_buffer_t *b,
1047                                            ip6_ext_hdr_chain_t *hc)
1048 {
1049   int nh = hc->eh[hc->length - 1].protocol;
1050   /* Checking to see if it's a terminating header */
1051   if (ip6_ext_hdr (nh))
1052     {
1053       icmp6_error_set_vnet_buffer (
1054         b, ICMP6_parameter_problem,
1055         ICMP6_parameter_problem_first_fragment_has_incomplete_header_chain, 0);
1056       b->error = node->errors[IP6_ERROR_REASS_MISSING_UPPER];
1057       return false;
1058     }
1059   return true;
1060 }
1061
1062 always_inline bool
1063 ip6_full_reass_verify_fragment_multiple_8 (vlib_main_t * vm,
1064                                            vlib_buffer_t * b,
1065                                            ip6_frag_hdr_t * frag_hdr)
1066 {
1067   vnet_buffer_opaque_t *vnb = vnet_buffer (b);
1068   ip6_header_t *ip = vlib_buffer_get_current (b);
1069   int more_fragments = ip6_frag_hdr_more (frag_hdr);
1070   u32 fragment_length =
1071     vlib_buffer_length_in_chain (vm, b) -
1072     (vnb->ip.reass.ip6_frag_hdr_offset + sizeof (*frag_hdr));
1073   if (more_fragments && 0 != fragment_length % 8)
1074     {
1075       icmp6_error_set_vnet_buffer (b, ICMP6_parameter_problem,
1076                                    ICMP6_parameter_problem_erroneous_header_field,
1077                                    (u8 *) & ip->payload_length - (u8 *) ip);
1078       return false;
1079     }
1080   return true;
1081 }
1082
1083 always_inline bool
1084 ip6_full_reass_verify_packet_size_lt_64k (vlib_main_t * vm,
1085                                           vlib_buffer_t * b,
1086                                           ip6_frag_hdr_t * frag_hdr)
1087 {
1088   vnet_buffer_opaque_t *vnb = vnet_buffer (b);
1089   u32 fragment_first = ip6_frag_hdr_offset_bytes (frag_hdr);
1090   u32 fragment_length =
1091     vlib_buffer_length_in_chain (vm, b) -
1092     (vnb->ip.reass.ip6_frag_hdr_offset + sizeof (*frag_hdr));
1093   if (fragment_first + fragment_length > 65535)
1094     {
1095       ip6_header_t *ip0 = vlib_buffer_get_current (b);
1096       icmp6_error_set_vnet_buffer (b, ICMP6_parameter_problem,
1097                                    ICMP6_parameter_problem_erroneous_header_field,
1098                                    (u8 *) & frag_hdr->fragment_offset_and_more
1099                                    - (u8 *) ip0);
1100       return false;
1101     }
1102   return true;
1103 }
1104
1105 always_inline uword
1106 ip6_full_reassembly_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
1107                             vlib_frame_t *frame, bool is_feature,
1108                             bool is_custom_app, bool is_local)
1109 {
1110   u32 *from = vlib_frame_vector_args (frame);
1111   u32 n_left_from, n_left_to_next, *to_next, next_index;
1112   ip6_full_reass_main_t *rm = &ip6_full_reass_main;
1113   ip6_full_reass_per_thread_t *rt = &rm->per_thread_data[vm->thread_index];
1114   clib_spinlock_lock (&rt->lock);
1115
1116   n_left_from = frame->n_vectors;
1117   next_index = node->cached_next_index;
1118   while (n_left_from > 0)
1119     {
1120       vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
1121
1122       while (n_left_from > 0 && n_left_to_next > 0)
1123         {
1124           u32 bi0;
1125           vlib_buffer_t *b0;
1126           u32 next0 = IP6_FULL_REASSEMBLY_NEXT_DROP;
1127           u32 error0 = IP6_ERROR_NONE;
1128           u32 icmp_bi = ~0;
1129
1130           bi0 = from[0];
1131           b0 = vlib_get_buffer (vm, bi0);
1132
1133           ip6_header_t *ip0 = vlib_buffer_get_current (b0);
1134           ip6_frag_hdr_t *frag_hdr;
1135           ip6_ext_hdr_chain_t hdr_chain;
1136           int res = ip6_ext_header_walk (
1137             b0, ip0, IP_PROTOCOL_IPV6_FRAGMENTATION, &hdr_chain);
1138           if (res < 0 ||
1139               hdr_chain.eh[res].protocol != IP_PROTOCOL_IPV6_FRAGMENTATION)
1140             {
1141               // this is a mangled packet - no fragmentation
1142               next0 = IP6_FULL_REASSEMBLY_NEXT_DROP;
1143               ip6_full_reass_add_trace (vm, node, NULL, bi0, NULL, PASSTHROUGH,
1144                                         ~0);
1145               goto skip_reass;
1146             }
1147           if (is_local && !rm->is_local_reass_enabled)
1148             {
1149               next0 = IP6_FULL_REASSEMBLY_NEXT_DROP;
1150               goto skip_reass;
1151             }
1152           frag_hdr =
1153             ip6_ext_next_header_offset (ip0, hdr_chain.eh[res].offset);
1154           vnet_buffer (b0)->ip.reass.ip6_frag_hdr_offset =
1155             hdr_chain.eh[res].offset;
1156
1157           if (0 == ip6_frag_hdr_offset (frag_hdr))
1158             {
1159               // first fragment - verify upper-layer is present
1160               if (!ip6_full_reass_verify_upper_layer_present (node, b0,
1161                                                               &hdr_chain))
1162                 {
1163                   next0 = IP6_FULL_REASSEMBLY_NEXT_ICMP_ERROR;
1164                   goto skip_reass;
1165                 }
1166             }
1167           if (!ip6_full_reass_verify_fragment_multiple_8 (vm, b0, frag_hdr) ||
1168               !ip6_full_reass_verify_packet_size_lt_64k (vm, b0, frag_hdr))
1169             {
1170               next0 = IP6_FULL_REASSEMBLY_NEXT_ICMP_ERROR;
1171               goto skip_reass;
1172             }
1173
1174           int skip_bihash = 0;
1175           ip6_full_reass_kv_t kv;
1176           u8 do_handoff = 0;
1177
1178           if (0 == ip6_frag_hdr_offset (frag_hdr) &&
1179               !ip6_frag_hdr_more (frag_hdr))
1180             {
1181               // this is atomic fragment and needs to be processed separately
1182               skip_bihash = 1;
1183             }
1184           else
1185             {
1186               kv.k.as_u64[0] = ip0->src_address.as_u64[0];
1187               kv.k.as_u64[1] = ip0->src_address.as_u64[1];
1188               kv.k.as_u64[2] = ip0->dst_address.as_u64[0];
1189               kv.k.as_u64[3] = ip0->dst_address.as_u64[1];
1190               kv.k.as_u64[4] =
1191                 ((u64) vec_elt (ip6_main.fib_index_by_sw_if_index,
1192                                 vnet_buffer (b0)->sw_if_index[VLIB_RX]))
1193                   << 32 |
1194                 (u64) frag_hdr->identification;
1195               kv.k.as_u64[5] = ip0->protocol;
1196             }
1197
1198           ip6_full_reass_t *reass = ip6_full_reass_find_or_create (
1199             vm, node, rm, rt, &kv, &icmp_bi, &do_handoff, skip_bihash);
1200
1201           if (reass)
1202             {
1203               const u32 fragment_first = ip6_frag_hdr_offset (frag_hdr);
1204               if (0 == fragment_first)
1205                 {
1206                   reass->sendout_thread_index = vm->thread_index;
1207                 }
1208             }
1209           if (PREDICT_FALSE (do_handoff))
1210             {
1211               next0 = IP6_FULL_REASSEMBLY_NEXT_HANDOFF;
1212               vnet_buffer (b0)->ip.reass.owner_thread_index =
1213                 kv.v.memory_owner_thread_index;
1214             }
1215           else if (reass)
1216             {
1217               u32 handoff_thread_idx;
1218               u32 counter = ~0;
1219               switch (ip6_full_reass_update (
1220                 vm, node, rm, rt, reass, &bi0, &next0, &error0, frag_hdr,
1221                 is_custom_app, &handoff_thread_idx, skip_bihash))
1222                 {
1223                 case IP6_FULL_REASS_RC_OK:
1224                   /* nothing to do here */
1225                   break;
1226                 case IP6_FULL_REASS_RC_HANDOFF:
1227                   next0 = IP6_FULL_REASSEMBLY_NEXT_HANDOFF;
1228                   b0 = vlib_get_buffer (vm, bi0);
1229                   vnet_buffer (b0)->ip.reass.owner_thread_index =
1230                     handoff_thread_idx;
1231                   break;
1232                 case IP6_FULL_REASS_RC_TOO_MANY_FRAGMENTS:
1233                   counter = IP6_ERROR_REASS_FRAGMENT_CHAIN_TOO_LONG;
1234                   break;
1235                 case IP6_FULL_REASS_RC_NO_BUF:
1236                   counter = IP6_ERROR_REASS_NO_BUF;
1237                   break;
1238                 case IP6_FULL_REASS_RC_INTERNAL_ERROR:
1239                   counter = IP6_ERROR_REASS_INTERNAL_ERROR;
1240                   break;
1241                 case IP6_FULL_REASS_RC_INVALID_FRAG_LEN:
1242                   counter = IP6_ERROR_REASS_INVALID_FRAG_LEN;
1243                   break;
1244                 case IP6_FULL_REASS_RC_OVERLAP:
1245                   counter = IP6_ERROR_REASS_OVERLAPPING_FRAGMENT;
1246                   break;
1247                 }
1248               if (~0 != counter)
1249                 {
1250                   vlib_node_increment_counter (vm, node->node_index, counter,
1251                                                1);
1252                   ip6_full_reass_drop_all (vm, node, reass, bi0);
1253                   ip6_full_reass_free (rm, rt, reass);
1254                   goto next_packet;
1255                   break;
1256                 }
1257             }
1258           else
1259             {
1260               if (is_feature)
1261                 {
1262                   next0 = IP6_FULL_REASSEMBLY_NEXT_DROP;
1263                 }
1264               else
1265                 {
1266                   vnet_buffer_opaque_t *fvnb = vnet_buffer (b0);
1267                   next0 = fvnb->ip.reass.error_next_index;
1268                 }
1269               error0 = IP6_ERROR_REASS_LIMIT_REACHED;
1270             }
1271
1272           if (~0 != bi0)
1273             {
1274             skip_reass:
1275               to_next[0] = bi0;
1276               to_next += 1;
1277               n_left_to_next -= 1;
1278
1279               /* bi0 might have been updated by reass_finalize, reload */
1280               b0 = vlib_get_buffer (vm, bi0);
1281               if (IP6_ERROR_NONE != error0)
1282                 {
1283                   b0->error = node->errors[error0];
1284                 }
1285
1286               if (next0 == IP6_FULL_REASSEMBLY_NEXT_HANDOFF)
1287                 {
1288                   if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
1289                     {
1290                       ip6_full_reass_add_trace (
1291                         vm, node, NULL, bi0, frag_hdr, HANDOFF,
1292                         vnet_buffer (b0)->ip.reass.owner_thread_index);
1293                     }
1294                 }
1295               else if (is_feature && IP6_ERROR_NONE == error0)
1296                 {
1297                   vnet_feature_next (&next0, b0);
1298                 }
1299               vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
1300                                                n_left_to_next, bi0, next0);
1301             }
1302
1303           if (~0 != icmp_bi)
1304             {
1305               next0 = IP6_FULL_REASSEMBLY_NEXT_ICMP_ERROR;
1306               to_next[0] = icmp_bi;
1307               to_next += 1;
1308               n_left_to_next -= 1;
1309               vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
1310                                                n_left_to_next, icmp_bi,
1311                                                next0);
1312             }
1313         next_packet:
1314           from += 1;
1315           n_left_from -= 1;
1316         }
1317
1318       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
1319     }
1320
1321   clib_spinlock_unlock (&rt->lock);
1322   return frame->n_vectors;
1323 }
1324
1325 static char *ip6_full_reassembly_error_strings[] = {
1326 #define _(sym, string) string,
1327   foreach_ip6_error
1328 #undef _
1329 };
1330
1331 VLIB_NODE_FN (ip6_full_reass_node) (vlib_main_t * vm,
1332                                     vlib_node_runtime_t * node,
1333                                     vlib_frame_t * frame)
1334 {
1335   return ip6_full_reassembly_inline (vm, node, frame, false /* is_feature */,
1336                                      false /* is_custom_app */,
1337                                      false /* is_local */);
1338 }
1339
1340 VLIB_REGISTER_NODE (ip6_full_reass_node) = {
1341     .name = "ip6-full-reassembly",
1342     .vector_size = sizeof (u32),
1343     .format_trace = format_ip6_full_reass_trace,
1344     .n_errors = ARRAY_LEN (ip6_full_reassembly_error_strings),
1345     .error_strings = ip6_full_reassembly_error_strings,
1346     .n_next_nodes = IP6_FULL_REASSEMBLY_N_NEXT,
1347     .next_nodes =
1348         {
1349                 [IP6_FULL_REASSEMBLY_NEXT_INPUT] = "ip6-input",
1350                 [IP6_FULL_REASSEMBLY_NEXT_DROP] = "ip6-drop",
1351                 [IP6_FULL_REASSEMBLY_NEXT_ICMP_ERROR] = "ip6-icmp-error",
1352                 [IP6_FULL_REASSEMBLY_NEXT_HANDOFF] = "ip6-full-reassembly-handoff",
1353         },
1354 };
1355
1356 VLIB_NODE_FN (ip6_local_full_reass_node)
1357 (vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame)
1358 {
1359   return ip6_full_reassembly_inline (vm, node, frame, false /* is_feature */,
1360                                      false /* is_custom_app */,
1361                                      true /* is_local */);
1362 }
1363
1364 VLIB_REGISTER_NODE (ip6_local_full_reass_node) = {
1365     .name = "ip6-local-full-reassembly",
1366     .vector_size = sizeof (u32),
1367     .format_trace = format_ip6_full_reass_trace,
1368     .n_errors = ARRAY_LEN (ip6_full_reassembly_error_strings),
1369     .error_strings = ip6_full_reassembly_error_strings,
1370     .n_next_nodes = IP6_FULL_REASSEMBLY_N_NEXT,
1371     .next_nodes =
1372         {
1373                 [IP6_FULL_REASSEMBLY_NEXT_INPUT] = "ip6-input",
1374                 [IP6_FULL_REASSEMBLY_NEXT_DROP] = "ip6-drop",
1375                 [IP6_FULL_REASSEMBLY_NEXT_ICMP_ERROR] = "ip6-icmp-error",
1376                 [IP6_FULL_REASSEMBLY_NEXT_HANDOFF] = "ip6-local-full-reassembly-handoff",
1377         },
1378 };
1379
1380 VLIB_NODE_FN (ip6_full_reass_node_feature) (vlib_main_t * vm,
1381                                             vlib_node_runtime_t * node,
1382                                             vlib_frame_t * frame)
1383 {
1384   return ip6_full_reassembly_inline (vm, node, frame, true /* is_feature */,
1385                                      false /* is_custom_app */,
1386                                      false /* is_local */);
1387 }
1388
1389 VLIB_REGISTER_NODE (ip6_full_reass_node_feature) = {
1390     .name = "ip6-full-reassembly-feature",
1391     .vector_size = sizeof (u32),
1392     .format_trace = format_ip6_full_reass_trace,
1393     .n_errors = ARRAY_LEN (ip6_full_reassembly_error_strings),
1394     .error_strings = ip6_full_reassembly_error_strings,
1395     .n_next_nodes = IP6_FULL_REASSEMBLY_N_NEXT,
1396     .next_nodes =
1397         {
1398                 [IP6_FULL_REASSEMBLY_NEXT_INPUT] = "ip6-input",
1399                 [IP6_FULL_REASSEMBLY_NEXT_DROP] = "ip6-drop",
1400                 [IP6_FULL_REASSEMBLY_NEXT_ICMP_ERROR] = "ip6-icmp-error",
1401                 [IP6_FULL_REASSEMBLY_NEXT_HANDOFF] = "ip6-full-reass-feature-hoff",
1402         },
1403 };
1404
1405 VNET_FEATURE_INIT (ip6_full_reassembly_feature, static) = {
1406     .arc_name = "ip6-unicast",
1407     .node_name = "ip6-full-reassembly-feature",
1408     .runs_before = VNET_FEATURES ("ip6-lookup",
1409                                   "ipsec6-input-feature"),
1410     .runs_after = 0,
1411 };
1412
1413 VLIB_NODE_FN (ip6_full_reass_node_custom)
1414 (vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame)
1415 {
1416   return ip6_full_reassembly_inline (vm, node, frame, false /* is_feature */,
1417                                      true /* is_custom_app */,
1418                                      false /* is_local */);
1419 }
1420
1421 VLIB_REGISTER_NODE (ip6_full_reass_node_custom) = {
1422     .name = "ip6-full-reassembly-custom",
1423     .vector_size = sizeof (u32),
1424     .format_trace = format_ip6_full_reass_trace,
1425     .n_errors = ARRAY_LEN (ip6_full_reassembly_error_strings),
1426     .error_strings = ip6_full_reassembly_error_strings,
1427     .n_next_nodes = IP6_FULL_REASSEMBLY_N_NEXT,
1428     .next_nodes =
1429         {
1430                 [IP6_FULL_REASSEMBLY_NEXT_INPUT] = "ip6-input",
1431                 [IP6_FULL_REASSEMBLY_NEXT_DROP] = "ip6-drop",
1432                 [IP6_FULL_REASSEMBLY_NEXT_ICMP_ERROR] = "ip6-icmp-error",
1433                 [IP6_FULL_REASSEMBLY_NEXT_HANDOFF] = "ip6-full-reass-custom-hoff",
1434         },
1435 };
1436
1437 #ifndef CLIB_MARCH_VARIANT
1438 static u32
1439 ip6_full_reass_get_nbuckets ()
1440 {
1441   ip6_full_reass_main_t *rm = &ip6_full_reass_main;
1442   u32 nbuckets;
1443   u8 i;
1444
1445   /* need more mem with more workers */
1446   nbuckets = (u32) (rm->max_reass_n * (vlib_num_workers () + 1) /
1447                     IP6_FULL_REASS_HT_LOAD_FACTOR);
1448
1449   for (i = 0; i < 31; i++)
1450     if ((1 << i) >= nbuckets)
1451       break;
1452   nbuckets = 1 << i;
1453
1454   return nbuckets;
1455 }
1456 #endif /* CLIB_MARCH_VARIANT */
1457
1458 typedef enum
1459 {
1460   IP6_EVENT_CONFIG_CHANGED = 1,
1461 } ip6_full_reass_event_t;
1462
1463 #ifndef CLIB_MARCH_VARIANT
1464 typedef struct
1465 {
1466   int failure;
1467   clib_bihash_48_8_t *new_hash;
1468 } ip6_rehash_cb_ctx;
1469
1470 static int
1471 ip6_rehash_cb (clib_bihash_kv_48_8_t * kv, void *_ctx)
1472 {
1473   ip6_rehash_cb_ctx *ctx = _ctx;
1474   if (clib_bihash_add_del_48_8 (ctx->new_hash, kv, 1))
1475     {
1476       ctx->failure = 1;
1477     }
1478   return (BIHASH_WALK_CONTINUE);
1479 }
1480
1481 static void
1482 ip6_full_reass_set_params (u32 timeout_ms, u32 max_reassemblies,
1483                            u32 max_reassembly_length,
1484                            u32 expire_walk_interval_ms)
1485 {
1486   ip6_full_reass_main.timeout_ms = timeout_ms;
1487   ip6_full_reass_main.timeout = (f64) timeout_ms / (f64) MSEC_PER_SEC;
1488   ip6_full_reass_main.max_reass_n = max_reassemblies;
1489   ip6_full_reass_main.max_reass_len = max_reassembly_length;
1490   ip6_full_reass_main.expire_walk_interval_ms = expire_walk_interval_ms;
1491 }
1492
1493 vnet_api_error_t
1494 ip6_full_reass_set (u32 timeout_ms, u32 max_reassemblies,
1495                     u32 max_reassembly_length, u32 expire_walk_interval_ms)
1496 {
1497   u32 old_nbuckets = ip6_full_reass_get_nbuckets ();
1498   ip6_full_reass_set_params (timeout_ms, max_reassemblies,
1499                              max_reassembly_length, expire_walk_interval_ms);
1500   vlib_process_signal_event (ip6_full_reass_main.vlib_main,
1501                              ip6_full_reass_main.ip6_full_reass_expire_node_idx,
1502                              IP6_EVENT_CONFIG_CHANGED, 0);
1503   u32 new_nbuckets = ip6_full_reass_get_nbuckets ();
1504   if (ip6_full_reass_main.max_reass_n > 0 && new_nbuckets > old_nbuckets)
1505     {
1506       clib_bihash_48_8_t new_hash;
1507       clib_memset (&new_hash, 0, sizeof (new_hash));
1508       ip6_rehash_cb_ctx ctx;
1509       ctx.failure = 0;
1510       ctx.new_hash = &new_hash;
1511       clib_bihash_init_48_8 (&new_hash, "ip6-full-reass", new_nbuckets,
1512                              new_nbuckets * 1024);
1513       clib_bihash_foreach_key_value_pair_48_8 (&ip6_full_reass_main.hash,
1514                                                ip6_rehash_cb, &ctx);
1515       if (ctx.failure)
1516         {
1517           clib_bihash_free_48_8 (&new_hash);
1518           return -1;
1519         }
1520       else
1521         {
1522           clib_bihash_free_48_8 (&ip6_full_reass_main.hash);
1523           clib_memcpy_fast (&ip6_full_reass_main.hash, &new_hash,
1524                             sizeof (ip6_full_reass_main.hash));
1525           clib_bihash_copied (&ip6_full_reass_main.hash, &new_hash);
1526         }
1527     }
1528   return 0;
1529 }
1530
1531 vnet_api_error_t
1532 ip6_full_reass_get (u32 * timeout_ms, u32 * max_reassemblies,
1533                     u32 * max_reassembly_length,
1534                     u32 * expire_walk_interval_ms)
1535 {
1536   *timeout_ms = ip6_full_reass_main.timeout_ms;
1537   *max_reassemblies = ip6_full_reass_main.max_reass_n;
1538   *max_reassembly_length = ip6_full_reass_main.max_reass_len;
1539   *expire_walk_interval_ms = ip6_full_reass_main.expire_walk_interval_ms;
1540   return 0;
1541 }
1542
1543 static clib_error_t *
1544 ip6_full_reass_init_function (vlib_main_t * vm)
1545 {
1546   ip6_full_reass_main_t *rm = &ip6_full_reass_main;
1547   clib_error_t *error = 0;
1548   u32 nbuckets;
1549   vlib_node_t *node;
1550
1551   rm->vlib_main = vm;
1552
1553   vec_validate (rm->per_thread_data, vlib_num_workers ());
1554   ip6_full_reass_per_thread_t *rt;
1555   vec_foreach (rt, rm->per_thread_data)
1556   {
1557     clib_spinlock_init (&rt->lock);
1558     pool_alloc (rt->pool, rm->max_reass_n);
1559   }
1560
1561   node = vlib_get_node_by_name (vm, (u8 *) "ip6-full-reassembly-expire-walk");
1562   ASSERT (node);
1563   rm->ip6_full_reass_expire_node_idx = node->index;
1564
1565   ip6_full_reass_set_params (IP6_FULL_REASS_TIMEOUT_DEFAULT_MS,
1566                              IP6_FULL_REASS_MAX_REASSEMBLIES_DEFAULT,
1567                              IP6_FULL_REASS_MAX_REASSEMBLY_LENGTH_DEFAULT,
1568                              IP6_FULL_REASS_EXPIRE_WALK_INTERVAL_DEFAULT_MS);
1569
1570   nbuckets = ip6_full_reass_get_nbuckets ();
1571   clib_bihash_init_48_8 (&rm->hash, "ip6-full-reass", nbuckets,
1572                          nbuckets * 1024);
1573
1574   node = vlib_get_node_by_name (vm, (u8 *) "ip6-icmp-error");
1575   ASSERT (node);
1576   rm->ip6_icmp_error_idx = node->index;
1577
1578   if ((error = vlib_call_init_function (vm, ip_main_init)))
1579     return error;
1580   ip6_register_protocol (IP_PROTOCOL_IPV6_FRAGMENTATION,
1581                          ip6_local_full_reass_node.index);
1582   rm->is_local_reass_enabled = 1;
1583
1584   rm->fq_index = vlib_frame_queue_main_init (ip6_full_reass_node.index, 0);
1585   rm->fq_local_index =
1586     vlib_frame_queue_main_init (ip6_local_full_reass_node.index, 0);
1587   rm->fq_feature_index =
1588     vlib_frame_queue_main_init (ip6_full_reass_node_feature.index, 0);
1589   rm->fq_custom_index =
1590     vlib_frame_queue_main_init (ip6_full_reass_node_custom.index, 0);
1591
1592   rm->feature_use_refcount_per_intf = NULL;
1593   return error;
1594 }
1595
1596 VLIB_INIT_FUNCTION (ip6_full_reass_init_function);
1597 #endif /* CLIB_MARCH_VARIANT */
1598
1599 static uword
1600 ip6_full_reass_walk_expired (vlib_main_t *vm, vlib_node_runtime_t *node,
1601                              CLIB_UNUSED (vlib_frame_t *f))
1602 {
1603   ip6_full_reass_main_t *rm = &ip6_full_reass_main;
1604   uword event_type, *event_data = 0;
1605
1606   while (true)
1607     {
1608       vlib_process_wait_for_event_or_clock (vm,
1609                                             (f64) rm->expire_walk_interval_ms
1610                                             / (f64) MSEC_PER_SEC);
1611       event_type = vlib_process_get_events (vm, &event_data);
1612
1613       switch (event_type)
1614         {
1615         case ~0:
1616           /* no events => timeout */
1617           /* fallthrough */
1618         case IP6_EVENT_CONFIG_CHANGED:
1619           /* nothing to do here */
1620           break;
1621         default:
1622           clib_warning ("BUG: event type 0x%wx", event_type);
1623           break;
1624         }
1625       f64 now = vlib_time_now (vm);
1626
1627       ip6_full_reass_t *reass;
1628       int *pool_indexes_to_free = NULL;
1629
1630       uword thread_index = 0;
1631       int index;
1632       const uword nthreads = vlib_num_workers () + 1;
1633       u32 *vec_icmp_bi = NULL;
1634       for (thread_index = 0; thread_index < nthreads; ++thread_index)
1635         {
1636           ip6_full_reass_per_thread_t *rt =
1637             &rm->per_thread_data[thread_index];
1638           clib_spinlock_lock (&rt->lock);
1639
1640           vec_reset_length (pool_indexes_to_free);
1641           pool_foreach_index (index, rt->pool)  {
1642                                 reass = pool_elt_at_index (rt->pool, index);
1643                                 if (now > reass->last_heard + rm->timeout)
1644                                   {
1645                                     vec_add1 (pool_indexes_to_free, index);
1646                                   }
1647                               }
1648           int *i;
1649           vec_foreach (i, pool_indexes_to_free)
1650           {
1651             ip6_full_reass_t *reass = pool_elt_at_index (rt->pool, i[0]);
1652             u32 icmp_bi = ~0;
1653             ip6_full_reass_on_timeout (vm, node, reass, &icmp_bi);
1654             if (~0 != icmp_bi)
1655               vec_add1 (vec_icmp_bi, icmp_bi);
1656
1657             ip6_full_reass_free (rm, rt, reass);
1658           }
1659
1660           clib_spinlock_unlock (&rt->lock);
1661         }
1662
1663       while (vec_len (vec_icmp_bi) > 0)
1664         {
1665           vlib_frame_t *f =
1666             vlib_get_frame_to_node (vm, rm->ip6_icmp_error_idx);
1667           u32 *to_next = vlib_frame_vector_args (f);
1668           u32 n_left_to_next = VLIB_FRAME_SIZE - f->n_vectors;
1669           int trace_frame = 0;
1670           while (vec_len (vec_icmp_bi) > 0 && n_left_to_next > 0)
1671             {
1672               u32 bi = vec_pop (vec_icmp_bi);
1673               vlib_buffer_t *b = vlib_get_buffer (vm, bi);
1674               if (PREDICT_FALSE (b->flags & VLIB_BUFFER_IS_TRACED))
1675                 trace_frame = 1;
1676               b->error = node->errors[IP6_ERROR_REASS_TIMEOUT];
1677               to_next[0] = bi;
1678               ++f->n_vectors;
1679               to_next += 1;
1680               n_left_to_next -= 1;
1681             }
1682           f->frame_flags |= (trace_frame * VLIB_FRAME_TRACE);
1683           vlib_put_frame_to_node (vm, rm->ip6_icmp_error_idx, f);
1684         }
1685
1686       vec_free (pool_indexes_to_free);
1687       vec_free (vec_icmp_bi);
1688       if (event_data)
1689         {
1690           vec_set_len (event_data, 0);
1691         }
1692     }
1693
1694   return 0;
1695 }
1696
1697 VLIB_REGISTER_NODE (ip6_full_reass_expire_node) = {
1698     .function = ip6_full_reass_walk_expired,
1699     .format_trace = format_ip6_full_reass_trace,
1700     .type = VLIB_NODE_TYPE_PROCESS,
1701     .name = "ip6-full-reassembly-expire-walk",
1702
1703     .n_errors = ARRAY_LEN (ip6_full_reassembly_error_strings),
1704     .error_strings = ip6_full_reassembly_error_strings,
1705
1706 };
1707
1708 static u8 *
1709 format_ip6_full_reass_key (u8 * s, va_list * args)
1710 {
1711   ip6_full_reass_key_t *key = va_arg (*args, ip6_full_reass_key_t *);
1712   s = format (s, "xx_id: %u, src: %U, dst: %U, frag_id: %u, proto: %u",
1713               key->xx_id, format_ip6_address, &key->src, format_ip6_address,
1714               &key->dst, clib_net_to_host_u16 (key->frag_id), key->proto);
1715   return s;
1716 }
1717
1718 static u8 *
1719 format_ip6_full_reass (u8 * s, va_list * args)
1720 {
1721   vlib_main_t *vm = va_arg (*args, vlib_main_t *);
1722   ip6_full_reass_t *reass = va_arg (*args, ip6_full_reass_t *);
1723
1724   s = format (s, "ID: %lu, key: %U\n  first_bi: %u, data_len: %u, "
1725               "last_packet_octet: %u, trace_op_counter: %u\n",
1726               reass->id, format_ip6_full_reass_key, &reass->key,
1727               reass->first_bi, reass->data_len, reass->last_packet_octet,
1728               reass->trace_op_counter);
1729   u32 bi = reass->first_bi;
1730   u32 counter = 0;
1731   while (~0 != bi)
1732     {
1733       vlib_buffer_t *b = vlib_get_buffer (vm, bi);
1734       vnet_buffer_opaque_t *vnb = vnet_buffer (b);
1735       s = format (s, "  #%03u: range: [%u, %u], bi: %u, off: %d, len: %u, "
1736                   "fragment[%u, %u]\n",
1737                   counter, vnb->ip.reass.range_first,
1738                   vnb->ip.reass.range_last, bi,
1739                   ip6_full_reass_buffer_get_data_offset (b),
1740                   ip6_full_reass_buffer_get_data_len (b),
1741                   vnb->ip.reass.fragment_first, vnb->ip.reass.fragment_last);
1742       if (b->flags & VLIB_BUFFER_NEXT_PRESENT)
1743         {
1744           bi = b->next_buffer;
1745         }
1746       else
1747         {
1748           bi = ~0;
1749         }
1750     }
1751   return s;
1752 }
1753
1754 static clib_error_t *
1755 show_ip6_full_reass (vlib_main_t * vm, unformat_input_t * input,
1756                      CLIB_UNUSED (vlib_cli_command_t * lmd))
1757 {
1758   ip6_full_reass_main_t *rm = &ip6_full_reass_main;
1759
1760   vlib_cli_output (vm, "---------------------");
1761   vlib_cli_output (vm, "IP6 reassembly status");
1762   vlib_cli_output (vm, "---------------------");
1763   bool details = false;
1764   if (unformat (input, "details"))
1765     {
1766       details = true;
1767     }
1768
1769   u32 sum_reass_n = 0;
1770   u64 sum_buffers_n = 0;
1771   ip6_full_reass_t *reass;
1772   uword thread_index;
1773   const uword nthreads = vlib_num_workers () + 1;
1774   for (thread_index = 0; thread_index < nthreads; ++thread_index)
1775     {
1776       ip6_full_reass_per_thread_t *rt = &rm->per_thread_data[thread_index];
1777       clib_spinlock_lock (&rt->lock);
1778       if (details)
1779         {
1780           pool_foreach (reass, rt->pool) {
1781             vlib_cli_output (vm, "%U", format_ip6_full_reass, vm, reass);
1782           }
1783         }
1784       sum_reass_n += rt->reass_n;
1785       clib_spinlock_unlock (&rt->lock);
1786     }
1787   vlib_cli_output (vm, "---------------------");
1788   vlib_cli_output (vm, "Current IP6 reassemblies count: %lu\n",
1789                    (long unsigned) sum_reass_n);
1790   vlib_cli_output (vm,
1791                    "Maximum configured concurrent full IP6 reassemblies per worker-thread: %lu\n",
1792                    (long unsigned) rm->max_reass_n);
1793   vlib_cli_output (vm,
1794                    "Maximum configured amount of fragments "
1795                    "per full IP6 reassembly: %lu\n",
1796                    (long unsigned) rm->max_reass_len);
1797   vlib_cli_output (vm,
1798                    "Maximum configured full IP6 reassembly timeout: %lums\n",
1799                    (long unsigned) rm->timeout_ms);
1800   vlib_cli_output (vm,
1801                    "Maximum configured full IP6 reassembly expire walk interval: %lums\n",
1802                    (long unsigned) rm->expire_walk_interval_ms);
1803   vlib_cli_output (vm, "Buffers in use: %lu\n",
1804                    (long unsigned) sum_buffers_n);
1805   return 0;
1806 }
1807
1808 VLIB_CLI_COMMAND (show_ip6_full_reassembly_cmd, static) = {
1809     .path = "show ip6-full-reassembly",
1810     .short_help = "show ip6-full-reassembly [details]",
1811     .function = show_ip6_full_reass,
1812 };
1813
1814 #ifndef CLIB_MARCH_VARIANT
1815 vnet_api_error_t
1816 ip6_full_reass_enable_disable (u32 sw_if_index, u8 enable_disable)
1817 {
1818   return vnet_feature_enable_disable ("ip6-unicast",
1819                                       "ip6-full-reassembly-feature",
1820                                       sw_if_index, enable_disable, 0, 0);
1821 }
1822 #endif /* CLIB_MARCH_VARIANT */
1823
1824 #define foreach_ip6_full_reassembly_handoff_error                       \
1825 _(CONGESTION_DROP, "congestion drop")
1826
1827
1828 typedef enum
1829 {
1830 #define _(sym,str) IP6_FULL_REASSEMBLY_HANDOFF_ERROR_##sym,
1831   foreach_ip6_full_reassembly_handoff_error
1832 #undef _
1833     IP6_FULL_REASSEMBLY_HANDOFF_N_ERROR,
1834 } ip6_full_reassembly_handoff_error_t;
1835
1836 static char *ip6_full_reassembly_handoff_error_strings[] = {
1837 #define _(sym,string) string,
1838   foreach_ip6_full_reassembly_handoff_error
1839 #undef _
1840 };
1841
1842 typedef struct
1843 {
1844   u32 next_worker_index;
1845 } ip6_full_reassembly_handoff_trace_t;
1846
1847 static u8 *
1848 format_ip6_full_reassembly_handoff_trace (u8 * s, va_list * args)
1849 {
1850   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1851   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1852   ip6_full_reassembly_handoff_trace_t *t =
1853     va_arg (*args, ip6_full_reassembly_handoff_trace_t *);
1854
1855   s =
1856     format (s, "ip6-full-reassembly-handoff: next-worker %d",
1857             t->next_worker_index);
1858
1859   return s;
1860 }
1861
1862 always_inline uword
1863 ip6_full_reassembly_handoff_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
1864                                     vlib_frame_t *frame,
1865                                     ip6_full_reass_node_type_t type,
1866                                     bool is_local)
1867 {
1868   ip6_full_reass_main_t *rm = &ip6_full_reass_main;
1869
1870   vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b;
1871   u32 n_enq, n_left_from, *from;
1872   u16 thread_indices[VLIB_FRAME_SIZE], *ti;
1873   u32 fq_index;
1874
1875   from = vlib_frame_vector_args (frame);
1876   n_left_from = frame->n_vectors;
1877   vlib_get_buffers (vm, from, bufs, n_left_from);
1878
1879   b = bufs;
1880   ti = thread_indices;
1881
1882   switch (type)
1883     {
1884     case NORMAL:
1885       if (is_local)
1886         {
1887           fq_index = rm->fq_local_index;
1888         }
1889       else
1890         {
1891           fq_index = rm->fq_index;
1892         }
1893       break;
1894     case FEATURE:
1895       fq_index = rm->fq_feature_index;
1896       break;
1897     case CUSTOM:
1898       fq_index = rm->fq_custom_index;
1899       break;
1900     default:
1901       clib_warning ("Unexpected `type' (%d)!", type);
1902       ASSERT (0);
1903     }
1904   while (n_left_from > 0)
1905     {
1906       ti[0] = vnet_buffer (b[0])->ip.reass.owner_thread_index;
1907
1908       if (PREDICT_FALSE
1909           ((node->flags & VLIB_NODE_FLAG_TRACE)
1910            && (b[0]->flags & VLIB_BUFFER_IS_TRACED)))
1911         {
1912           ip6_full_reassembly_handoff_trace_t *t =
1913             vlib_add_trace (vm, node, b[0], sizeof (*t));
1914           t->next_worker_index = ti[0];
1915         }
1916
1917       n_left_from -= 1;
1918       ti += 1;
1919       b += 1;
1920     }
1921   n_enq = vlib_buffer_enqueue_to_thread (vm, node, fq_index, from,
1922                                          thread_indices, frame->n_vectors, 1);
1923
1924   if (n_enq < frame->n_vectors)
1925     vlib_node_increment_counter (vm, node->node_index,
1926                                  IP6_FULL_REASSEMBLY_HANDOFF_ERROR_CONGESTION_DROP,
1927                                  frame->n_vectors - n_enq);
1928   return frame->n_vectors;
1929 }
1930
1931 VLIB_NODE_FN (ip6_full_reassembly_handoff_node) (vlib_main_t * vm,
1932                                                  vlib_node_runtime_t * node,
1933                                                  vlib_frame_t * frame)
1934 {
1935   return ip6_full_reassembly_handoff_inline (vm, node, frame, NORMAL,
1936                                              false /* is_local */);
1937 }
1938
1939 VLIB_REGISTER_NODE (ip6_full_reassembly_handoff_node) = {
1940   .name = "ip6-full-reassembly-handoff",
1941   .vector_size = sizeof (u32),
1942   .n_errors = ARRAY_LEN(ip6_full_reassembly_handoff_error_strings),
1943   .error_strings = ip6_full_reassembly_handoff_error_strings,
1944   .format_trace = format_ip6_full_reassembly_handoff_trace,
1945
1946   .n_next_nodes = 1,
1947
1948   .next_nodes = {
1949     [0] = "error-drop",
1950   },
1951 };
1952
1953 VLIB_NODE_FN (ip6_local_full_reassembly_handoff_node)
1954 (vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame)
1955 {
1956   return ip6_full_reassembly_handoff_inline (vm, node, frame, NORMAL,
1957                                              true /* is_feature */);
1958 }
1959
1960 VLIB_REGISTER_NODE (ip6_local_full_reassembly_handoff_node) = {
1961   .name = "ip6-local-full-reassembly-handoff",
1962   .vector_size = sizeof (u32),
1963   .n_errors = ARRAY_LEN(ip6_full_reassembly_handoff_error_strings),
1964   .error_strings = ip6_full_reassembly_handoff_error_strings,
1965   .format_trace = format_ip6_full_reassembly_handoff_trace,
1966
1967   .n_next_nodes = 1,
1968
1969   .next_nodes = {
1970     [0] = "error-drop",
1971   },
1972 };
1973
1974 VLIB_NODE_FN (ip6_full_reassembly_feature_handoff_node) (vlib_main_t * vm,
1975                                vlib_node_runtime_t * node, vlib_frame_t * frame)
1976 {
1977   return ip6_full_reassembly_handoff_inline (vm, node, frame, FEATURE,
1978                                              false /* is_local */);
1979 }
1980
1981 VLIB_REGISTER_NODE (ip6_full_reassembly_feature_handoff_node) = {
1982   .name = "ip6-full-reass-feature-hoff",
1983   .vector_size = sizeof (u32),
1984   .n_errors = ARRAY_LEN(ip6_full_reassembly_handoff_error_strings),
1985   .error_strings = ip6_full_reassembly_handoff_error_strings,
1986   .format_trace = format_ip6_full_reassembly_handoff_trace,
1987
1988   .n_next_nodes = 1,
1989
1990   .next_nodes = {
1991     [0] = "error-drop",
1992   },
1993 };
1994
1995 VLIB_NODE_FN (ip6_full_reassembly_custom_handoff_node)
1996 (vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame)
1997 {
1998   return ip6_full_reassembly_handoff_inline (vm, node, frame, CUSTOM,
1999                                              false /* is_local */);
2000 }
2001
2002 VLIB_REGISTER_NODE (ip6_full_reassembly_custom_handoff_node) = {
2003   .name = "ip6-full-reass-custom-hoff",
2004   .vector_size = sizeof (u32),
2005   .n_errors = ARRAY_LEN(ip6_full_reassembly_handoff_error_strings),
2006   .error_strings = ip6_full_reassembly_handoff_error_strings,
2007   .format_trace = format_ip6_full_reassembly_handoff_trace,
2008
2009   .n_next_nodes = 1,
2010
2011   .next_nodes = {
2012     [0] = "error-drop",
2013   },
2014 };
2015
2016 #ifndef CLIB_MARCH_VARIANT
2017 int
2018 ip6_full_reass_enable_disable_with_refcnt (u32 sw_if_index, int is_enable)
2019 {
2020   ip6_full_reass_main_t *rm = &ip6_full_reass_main;
2021   vec_validate (rm->feature_use_refcount_per_intf, sw_if_index);
2022   if (is_enable)
2023     {
2024       if (!rm->feature_use_refcount_per_intf[sw_if_index])
2025         {
2026           ++rm->feature_use_refcount_per_intf[sw_if_index];
2027           return vnet_feature_enable_disable ("ip6-unicast",
2028                                               "ip6-full-reassembly-feature",
2029                                               sw_if_index, 1, 0, 0);
2030         }
2031       ++rm->feature_use_refcount_per_intf[sw_if_index];
2032     }
2033   else
2034     {
2035       --rm->feature_use_refcount_per_intf[sw_if_index];
2036       if (!rm->feature_use_refcount_per_intf[sw_if_index])
2037         return vnet_feature_enable_disable ("ip6-unicast",
2038                                             "ip6-full-reassembly-feature",
2039                                             sw_if_index, 0, 0, 0);
2040     }
2041   return -1;
2042 }
2043
2044 void
2045 ip6_local_full_reass_enable_disable (int enable)
2046 {
2047   if (enable)
2048     {
2049       if (!ip6_full_reass_main.is_local_reass_enabled)
2050         {
2051           ip6_full_reass_main.is_local_reass_enabled = 1;
2052           ip6_register_protocol (IP_PROTOCOL_IPV6_FRAGMENTATION,
2053                                  ip6_local_full_reass_node.index);
2054         }
2055     }
2056   else
2057     {
2058       if (ip6_full_reass_main.is_local_reass_enabled)
2059         {
2060           ip6_full_reass_main.is_local_reass_enabled = 0;
2061           ip6_unregister_protocol (IP_PROTOCOL_IPV6_FRAGMENTATION);
2062         }
2063     }
2064 }
2065
2066 int
2067 ip6_local_full_reass_enabled ()
2068 {
2069   return ip6_full_reass_main.is_local_reass_enabled;
2070 }
2071
2072 #endif
2073
2074 /*
2075  * fd.io coding-style-patch-verification: ON
2076  *
2077  * Local Variables:
2078  * eval: (c-set-style "gnu")
2079  * End:
2080  */