ip: reassembly - add a way to disable for forus
[vpp.git] / src / vnet / ip / reass / ip6_full_reass.c
1 /*
2  * Copyright (c) 2017 Cisco and/or its affiliates.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at:
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15
16 /**
17  * @file
18  * @brief IPv6 Full Reassembly.
19  *
20  * This file contains the source code for IPv6 full reassembly.
21  */
22
23 #include <vppinfra/vec.h>
24 #include <vnet/vnet.h>
25 #include <vnet/ip/ip.h>
26 #include <vppinfra/bihash_48_8.h>
27 #include <vnet/ip/reass/ip6_full_reass.h>
28 #include <vnet/ip/ip6_inlines.h>
29
30 #define MSEC_PER_SEC 1000
31 #define IP6_FULL_REASS_TIMEOUT_DEFAULT_MS 100
32 #define IP6_FULL_REASS_EXPIRE_WALK_INTERVAL_DEFAULT_MS 10000    // 10 seconds default
33 #define IP6_FULL_REASS_MAX_REASSEMBLIES_DEFAULT 1024
34 #define IP6_FULL_REASS_MAX_REASSEMBLY_LENGTH_DEFAULT 3
35 #define IP6_FULL_REASS_HT_LOAD_FACTOR (0.75)
36
37 typedef enum
38 {
39   IP6_FULL_REASS_RC_OK,
40   IP6_FULL_REASS_RC_INTERNAL_ERROR,
41   IP6_FULL_REASS_RC_TOO_MANY_FRAGMENTS,
42   IP6_FULL_REASS_RC_NO_BUF,
43   IP6_FULL_REASS_RC_HANDOFF,
44   IP6_FULL_REASS_RC_INVALID_FRAG_LEN,
45 } ip6_full_reass_rc_t;
46
47 typedef struct
48 {
49   union
50   {
51     struct
52     {
53       ip6_address_t src;
54       ip6_address_t dst;
55       u32 xx_id;
56       u32 frag_id;
57       u8 unused[7];
58       u8 proto;
59     };
60     u64 as_u64[6];
61   };
62 } ip6_full_reass_key_t;
63
64 typedef union
65 {
66   struct
67   {
68     u32 reass_index;
69     u32 memory_owner_thread_index;
70   };
71   u64 as_u64;
72 } ip6_full_reass_val_t;
73
74 typedef union
75 {
76   struct
77   {
78     ip6_full_reass_key_t k;
79     ip6_full_reass_val_t v;
80   };
81   clib_bihash_kv_48_8_t kv;
82 } ip6_full_reass_kv_t;
83
84
85 always_inline u32
86 ip6_full_reass_buffer_get_data_offset (vlib_buffer_t * b)
87 {
88   vnet_buffer_opaque_t *vnb = vnet_buffer (b);
89   return vnb->ip.reass.range_first - vnb->ip.reass.fragment_first;
90 }
91
92 always_inline u16
93 ip6_full_reass_buffer_get_data_len (vlib_buffer_t * b)
94 {
95   vnet_buffer_opaque_t *vnb = vnet_buffer (b);
96   return clib_min (vnb->ip.reass.range_last, vnb->ip.reass.fragment_last) -
97     (vnb->ip.reass.fragment_first +
98      ip6_full_reass_buffer_get_data_offset (b)) + 1;
99 }
100
101 typedef struct
102 {
103   // hash table key
104   ip6_full_reass_key_t key;
105   // time when last packet was received
106   f64 last_heard;
107   // internal id of this reassembly
108   u64 id;
109   // buffer index of first buffer in this reassembly context
110   u32 first_bi;
111   // last octet of packet, ~0 until fragment without more_fragments arrives
112   u32 last_packet_octet;
113   // length of data collected so far
114   u32 data_len;
115   // trace operation counter
116   u32 trace_op_counter;
117   // next index - used by custom apps (~0 if not set)
118   u32 next_index;
119   // error next index - used by custom apps (~0 if not set)
120   u32 error_next_index;
121   // minimum fragment length for this reassembly - used to estimate MTU
122   u16 min_fragment_length;
123   // number of fragments for this reassembly
124   u32 fragments_n;
125   // thread owning memory for this context (whose pool contains this ctx)
126   u32 memory_owner_thread_index;
127   // thread which received fragment with offset 0 and which sends out the
128   // completed reassembly
129   u32 sendout_thread_index;
130 } ip6_full_reass_t;
131
132 typedef struct
133 {
134   ip6_full_reass_t *pool;
135   u32 reass_n;
136   u32 id_counter;
137   clib_spinlock_t lock;
138 } ip6_full_reass_per_thread_t;
139
140 typedef struct
141 {
142   // IPv6 config
143   u32 timeout_ms;
144   f64 timeout;
145   u32 expire_walk_interval_ms;
146   // maximum number of fragments in one reassembly
147   u32 max_reass_len;
148   // maximum number of reassemblies
149   u32 max_reass_n;
150
151   // IPv6 runtime
152   clib_bihash_48_8_t hash;
153
154   // per-thread data
155   ip6_full_reass_per_thread_t *per_thread_data;
156
157   // convenience
158   vlib_main_t *vlib_main;
159
160   // node index of ip6-drop node
161   u32 ip6_drop_idx;
162   u32 ip6_icmp_error_idx;
163   u32 ip6_full_reass_expire_node_idx;
164
165   /** Worker handoff */
166   u32 fq_index;
167   u32 fq_local_index;
168   u32 fq_feature_index;
169
170   // reference count for enabling/disabling feature - per interface
171   u32 *feature_use_refcount_per_intf;
172
173   // whether local fragmented packets are reassembled or not
174   int is_local_reass_enabled;
175 } ip6_full_reass_main_t;
176
177 extern ip6_full_reass_main_t ip6_full_reass_main;
178
179 #ifndef CLIB_MARCH_VARIANT
180 ip6_full_reass_main_t ip6_full_reass_main;
181 #endif /* CLIB_MARCH_VARIANT */
182
183 typedef enum
184 {
185   IP6_FULL_REASSEMBLY_NEXT_INPUT,
186   IP6_FULL_REASSEMBLY_NEXT_DROP,
187   IP6_FULL_REASSEMBLY_NEXT_ICMP_ERROR,
188   IP6_FULL_REASSEMBLY_NEXT_HANDOFF,
189   IP6_FULL_REASSEMBLY_N_NEXT,
190 } ip6_full_reass_next_t;
191
192 typedef enum
193 {
194   RANGE_NEW,
195   RANGE_OVERLAP,
196   ICMP_ERROR_RT_EXCEEDED,
197   ICMP_ERROR_FL_TOO_BIG,
198   ICMP_ERROR_FL_NOT_MULT_8,
199   FINALIZE,
200   HANDOFF,
201   PASSTHROUGH,
202 } ip6_full_reass_trace_operation_e;
203
204 typedef struct
205 {
206   u16 range_first;
207   u16 range_last;
208   u32 range_bi;
209   i32 data_offset;
210   u32 data_len;
211   u32 first_bi;
212 } ip6_full_reass_range_trace_t;
213
214 typedef struct
215 {
216   ip6_full_reass_trace_operation_e action;
217   u32 reass_id;
218   ip6_full_reass_range_trace_t trace_range;
219   u32 op_id;
220   u32 fragment_first;
221   u32 fragment_last;
222   u32 total_data_len;
223   u32 thread_id;
224   u32 thread_id_to;
225   bool is_after_handoff;
226   ip6_header_t ip6_header;
227   ip6_frag_hdr_t ip6_frag_header;
228 } ip6_full_reass_trace_t;
229
230 static void
231 ip6_full_reass_trace_details (vlib_main_t * vm, u32 bi,
232                               ip6_full_reass_range_trace_t * trace)
233 {
234   vlib_buffer_t *b = vlib_get_buffer (vm, bi);
235   vnet_buffer_opaque_t *vnb = vnet_buffer (b);
236   trace->range_first = vnb->ip.reass.range_first;
237   trace->range_last = vnb->ip.reass.range_last;
238   trace->data_offset = ip6_full_reass_buffer_get_data_offset (b);
239   trace->data_len = ip6_full_reass_buffer_get_data_len (b);
240   trace->range_bi = bi;
241 }
242
243 static u8 *
244 format_ip6_full_reass_range_trace (u8 * s, va_list * args)
245 {
246   ip6_full_reass_range_trace_t *trace =
247     va_arg (*args, ip6_full_reass_range_trace_t *);
248   s =
249     format (s, "range: [%u, %u], off %d, len %u, bi %u", trace->range_first,
250             trace->range_last, trace->data_offset, trace->data_len,
251             trace->range_bi);
252   return s;
253 }
254
255 static u8 *
256 format_ip6_full_reass_trace (u8 * s, va_list * args)
257 {
258   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
259   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
260   ip6_full_reass_trace_t *t = va_arg (*args, ip6_full_reass_trace_t *);
261   u32 indent = 0;
262   if (~0 != t->reass_id)
263     {
264       if (t->is_after_handoff)
265         {
266           s =
267             format (s, "%U\n", format_ip6_header, &t->ip6_header,
268                     sizeof (t->ip6_header));
269           s =
270             format (s, "  %U\n", format_ip6_frag_hdr, &t->ip6_frag_header,
271                     sizeof (t->ip6_frag_header));
272           indent = 2;
273         }
274       s =
275         format (s, "%Ureass id: %u, op id: %u, ", format_white_space, indent,
276                 t->reass_id, t->op_id);
277       indent = format_get_indent (s);
278       s = format (s, "first bi: %u, data len: %u, ip/fragment[%u, %u]",
279                   t->trace_range.first_bi, t->total_data_len,
280                   t->fragment_first, t->fragment_last);
281     }
282   switch (t->action)
283     {
284     case RANGE_NEW:
285       s = format (s, "\n%Unew %U", format_white_space, indent,
286                   format_ip6_full_reass_range_trace, &t->trace_range);
287       break;
288     case RANGE_OVERLAP:
289       s = format (s, "\n%Uoverlap %U", format_white_space, indent,
290                   format_ip6_full_reass_range_trace, &t->trace_range);
291       break;
292     case ICMP_ERROR_FL_TOO_BIG:
293       s = format (s, "\n%Uicmp-error - frag_len > 65535 %U",
294                   format_white_space, indent,
295                   format_ip6_full_reass_range_trace, &t->trace_range);
296       break;
297     case ICMP_ERROR_FL_NOT_MULT_8:
298       s = format (s, "\n%Uicmp-error - frag_len mod 8 != 0 %U",
299                   format_white_space, indent,
300                   format_ip6_full_reass_range_trace, &t->trace_range);
301       break;
302     case ICMP_ERROR_RT_EXCEEDED:
303       s = format (s, "\n%Uicmp-error - reassembly time exceeded",
304                   format_white_space, indent);
305       break;
306     case FINALIZE:
307       s = format (s, "\n%Ufinalize reassembly", format_white_space, indent);
308       break;
309     case HANDOFF:
310       s =
311         format (s, "handoff from thread #%u to thread #%u", t->thread_id,
312                 t->thread_id_to);
313       break;
314     case PASSTHROUGH:
315       s = format (s, "passthrough - not a fragment");
316       break;
317     }
318   return s;
319 }
320
321 static void
322 ip6_full_reass_add_trace (vlib_main_t * vm, vlib_node_runtime_t * node,
323                           ip6_full_reass_t * reass, u32 bi,
324                           ip6_frag_hdr_t * ip6_frag_header,
325                           ip6_full_reass_trace_operation_e action,
326                           u32 thread_id_to)
327 {
328   vlib_buffer_t *b = vlib_get_buffer (vm, bi);
329   vnet_buffer_opaque_t *vnb = vnet_buffer (b);
330   bool is_after_handoff = false;
331   if (pool_is_free_index
332       (vm->trace_main.trace_buffer_pool, vlib_buffer_get_trace_index (b)))
333     {
334       // this buffer's trace is gone
335       b->flags &= ~VLIB_BUFFER_IS_TRACED;
336       return;
337     }
338   if (vlib_buffer_get_trace_thread (b) != vm->thread_index)
339     {
340       is_after_handoff = true;
341     }
342   ip6_full_reass_trace_t *t = vlib_add_trace (vm, node, b, sizeof (t[0]));
343   t->is_after_handoff = is_after_handoff;
344   if (t->is_after_handoff)
345     {
346       clib_memcpy (&t->ip6_header, vlib_buffer_get_current (b),
347                    clib_min (sizeof (t->ip6_header), b->current_length));
348       if (ip6_frag_header)
349         {
350           clib_memcpy (&t->ip6_frag_header, ip6_frag_header,
351                        sizeof (t->ip6_frag_header));
352         }
353       else
354         {
355           clib_memset (&t->ip6_frag_header, 0, sizeof (t->ip6_frag_header));
356         }
357     }
358   if (reass)
359     {
360       t->reass_id = reass->id;
361       t->op_id = reass->trace_op_counter;
362       t->trace_range.first_bi = reass->first_bi;
363       t->total_data_len = reass->data_len;
364       ++reass->trace_op_counter;
365     }
366   else
367     {
368       t->reass_id = ~0;
369     }
370   t->action = action;
371   t->thread_id = vm->thread_index;
372   t->thread_id_to = thread_id_to;
373   ip6_full_reass_trace_details (vm, bi, &t->trace_range);
374   t->fragment_first = vnb->ip.reass.fragment_first;
375   t->fragment_last = vnb->ip.reass.fragment_last;
376 #if 0
377   static u8 *s = NULL;
378   s = format (s, "%U", format_ip6_full_reass_trace, NULL, NULL, t);
379   printf ("%.*s\n", vec_len (s), s);
380   fflush (stdout);
381   vec_reset_length (s);
382 #endif
383 }
384
385 always_inline void
386 ip6_full_reass_free_ctx (ip6_full_reass_per_thread_t * rt,
387                          ip6_full_reass_t * reass)
388 {
389   pool_put (rt->pool, reass);
390   --rt->reass_n;
391 }
392
393 always_inline void
394 ip6_full_reass_free (ip6_full_reass_main_t * rm,
395                      ip6_full_reass_per_thread_t * rt,
396                      ip6_full_reass_t * reass)
397 {
398   clib_bihash_kv_48_8_t kv;
399   kv.key[0] = reass->key.as_u64[0];
400   kv.key[1] = reass->key.as_u64[1];
401   kv.key[2] = reass->key.as_u64[2];
402   kv.key[3] = reass->key.as_u64[3];
403   kv.key[4] = reass->key.as_u64[4];
404   kv.key[5] = reass->key.as_u64[5];
405   clib_bihash_add_del_48_8 (&rm->hash, &kv, 0);
406   ip6_full_reass_free_ctx (rt, reass);
407 }
408
409 always_inline void
410 ip6_full_reass_drop_all (vlib_main_t *vm, vlib_node_runtime_t *node,
411                          ip6_full_reass_t *reass)
412 {
413   u32 range_bi = reass->first_bi;
414   vlib_buffer_t *range_b;
415   vnet_buffer_opaque_t *range_vnb;
416   u32 *to_free = NULL;
417   while (~0 != range_bi)
418     {
419       range_b = vlib_get_buffer (vm, range_bi);
420       range_vnb = vnet_buffer (range_b);
421       u32 bi = range_bi;
422       while (~0 != bi)
423         {
424           vec_add1 (to_free, bi);
425           vlib_buffer_t *b = vlib_get_buffer (vm, bi);
426           if (b->flags & VLIB_BUFFER_NEXT_PRESENT)
427             {
428               bi = b->next_buffer;
429               b->flags &= ~VLIB_BUFFER_NEXT_PRESENT;
430             }
431           else
432             {
433               bi = ~0;
434             }
435         }
436       range_bi = range_vnb->ip.reass.next_range_bi;
437     }
438   /* send to next_error_index */
439   if (~0 != reass->error_next_index)
440     {
441       u32 n_left_to_next, *to_next, next_index;
442
443       next_index = reass->error_next_index;
444       u32 bi = ~0;
445
446       while (vec_len (to_free) > 0)
447         {
448           vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
449
450           while (vec_len (to_free) > 0 && n_left_to_next > 0)
451             {
452               bi = vec_pop (to_free);
453
454               if (~0 != bi)
455                 {
456                   to_next[0] = bi;
457                   to_next += 1;
458                   n_left_to_next -= 1;
459                 }
460             }
461           vlib_put_next_frame (vm, node, next_index, n_left_to_next);
462         }
463     }
464   else
465     {
466       vlib_buffer_free (vm, to_free, vec_len (to_free));
467     }
468   vec_free (to_free);
469 }
470
471 always_inline void
472 ip6_full_reass_on_timeout (vlib_main_t * vm, vlib_node_runtime_t * node,
473                            ip6_full_reass_t * reass, u32 * icmp_bi)
474 {
475   if (~0 == reass->first_bi)
476     {
477       return;
478     }
479   if (~0 == reass->next_index)  // custom apps don't want icmp
480     {
481       vlib_buffer_t *b = vlib_get_buffer (vm, reass->first_bi);
482       if (0 == vnet_buffer (b)->ip.reass.fragment_first)
483         {
484           *icmp_bi = reass->first_bi;
485           if (PREDICT_FALSE (b->flags & VLIB_BUFFER_IS_TRACED))
486             {
487               ip6_full_reass_add_trace (vm, node, reass, reass->first_bi, NULL,
488                                         ICMP_ERROR_RT_EXCEEDED, ~0);
489             }
490           // fragment with offset zero received - send icmp message back
491           if (b->flags & VLIB_BUFFER_NEXT_PRESENT)
492             {
493               // separate first buffer from chain and steer it towards icmp node
494               b->flags &= ~VLIB_BUFFER_NEXT_PRESENT;
495               reass->first_bi = b->next_buffer;
496             }
497           else
498             {
499               reass->first_bi = vnet_buffer (b)->ip.reass.next_range_bi;
500             }
501           icmp6_error_set_vnet_buffer (b, ICMP6_time_exceeded,
502                                        ICMP6_time_exceeded_fragment_reassembly_time_exceeded,
503                                        0);
504         }
505     }
506   ip6_full_reass_drop_all (vm, node, reass);
507 }
508
509 always_inline ip6_full_reass_t *
510 ip6_full_reass_find_or_create (vlib_main_t *vm, vlib_node_runtime_t *node,
511                                ip6_full_reass_main_t *rm,
512                                ip6_full_reass_per_thread_t *rt,
513                                ip6_full_reass_kv_t *kv, u32 *icmp_bi,
514                                u8 *do_handoff, int skip_bihash)
515 {
516   ip6_full_reass_t *reass;
517   f64 now;
518
519 again:
520
521   reass = NULL;
522   now = vlib_time_now (vm);
523
524   if (!skip_bihash && !clib_bihash_search_48_8 (&rm->hash, &kv->kv, &kv->kv))
525     {
526       if (vm->thread_index != kv->v.memory_owner_thread_index)
527         {
528           *do_handoff = 1;
529           return NULL;
530         }
531
532       reass =
533         pool_elt_at_index (rm->per_thread_data
534                            [kv->v.memory_owner_thread_index].pool,
535                            kv->v.reass_index);
536
537       if (now > reass->last_heard + rm->timeout)
538         {
539           ip6_full_reass_on_timeout (vm, node, reass, icmp_bi);
540           ip6_full_reass_free (rm, rt, reass);
541           reass = NULL;
542         }
543     }
544
545   if (reass)
546     {
547       reass->last_heard = now;
548       return reass;
549     }
550
551   if (rt->reass_n >= rm->max_reass_n)
552     {
553       reass = NULL;
554       return reass;
555     }
556   else
557     {
558       pool_get (rt->pool, reass);
559       clib_memset (reass, 0, sizeof (*reass));
560       reass->id = ((u64) vm->thread_index * 1000000000) + rt->id_counter;
561       ++rt->id_counter;
562       reass->first_bi = ~0;
563       reass->last_packet_octet = ~0;
564       reass->data_len = 0;
565       reass->next_index = ~0;
566       reass->error_next_index = ~0;
567       reass->memory_owner_thread_index = vm->thread_index;
568       ++rt->reass_n;
569     }
570
571   kv->v.reass_index = (reass - rt->pool);
572   kv->v.memory_owner_thread_index = vm->thread_index;
573   reass->last_heard = now;
574
575   if (!skip_bihash)
576     {
577       reass->key.as_u64[0] = kv->kv.key[0];
578       reass->key.as_u64[1] = kv->kv.key[1];
579       reass->key.as_u64[2] = kv->kv.key[2];
580       reass->key.as_u64[3] = kv->kv.key[3];
581       reass->key.as_u64[4] = kv->kv.key[4];
582       reass->key.as_u64[5] = kv->kv.key[5];
583
584       int rv = clib_bihash_add_del_48_8 (&rm->hash, &kv->kv, 2);
585       if (rv)
586         {
587           ip6_full_reass_free (rm, rt, reass);
588           reass = NULL;
589           // if other worker created a context already work with the other copy
590           if (-2 == rv)
591             goto again;
592         }
593     }
594   else
595     {
596       reass->key.as_u64[0] = ~0;
597       reass->key.as_u64[1] = ~0;
598       reass->key.as_u64[2] = ~0;
599       reass->key.as_u64[3] = ~0;
600       reass->key.as_u64[4] = ~0;
601       reass->key.as_u64[5] = ~0;
602     }
603
604   return reass;
605 }
606
607 always_inline ip6_full_reass_rc_t
608 ip6_full_reass_finalize (vlib_main_t * vm, vlib_node_runtime_t * node,
609                          ip6_full_reass_main_t * rm,
610                          ip6_full_reass_per_thread_t * rt,
611                          ip6_full_reass_t * reass, u32 * bi0, u32 * next0,
612                          u32 * error0, bool is_custom_app)
613 {
614   *bi0 = reass->first_bi;
615   *error0 = IP6_ERROR_NONE;
616   ip6_frag_hdr_t *frag_hdr;
617   vlib_buffer_t *last_b = NULL;
618   u32 sub_chain_bi = reass->first_bi;
619   u32 total_length = 0;
620   u32 buf_cnt = 0;
621   u32 dropped_cnt = 0;
622   u32 *vec_drop_compress = NULL;
623   ip6_full_reass_rc_t rv = IP6_FULL_REASS_RC_OK;
624   do
625     {
626       u32 tmp_bi = sub_chain_bi;
627       vlib_buffer_t *tmp = vlib_get_buffer (vm, tmp_bi);
628       vnet_buffer_opaque_t *vnb = vnet_buffer (tmp);
629       if (!(vnb->ip.reass.range_first >= vnb->ip.reass.fragment_first) &&
630           !(vnb->ip.reass.range_last > vnb->ip.reass.fragment_first))
631         {
632           rv = IP6_FULL_REASS_RC_INTERNAL_ERROR;
633           goto free_buffers_and_return;
634         }
635
636       u32 data_len = ip6_full_reass_buffer_get_data_len (tmp);
637       u32 trim_front = vnet_buffer (tmp)->ip.reass.ip6_frag_hdr_offset +
638         sizeof (*frag_hdr) + ip6_full_reass_buffer_get_data_offset (tmp);
639       u32 trim_end =
640         vlib_buffer_length_in_chain (vm, tmp) - trim_front - data_len;
641       if (tmp_bi == reass->first_bi)
642         {
643           /* first buffer - keep ip6 header */
644           if (0 != ip6_full_reass_buffer_get_data_offset (tmp))
645             {
646               rv = IP6_FULL_REASS_RC_INTERNAL_ERROR;
647               goto free_buffers_and_return;
648             }
649           trim_front = 0;
650           trim_end = vlib_buffer_length_in_chain (vm, tmp) - data_len -
651             (vnet_buffer (tmp)->ip.reass.ip6_frag_hdr_offset +
652              sizeof (*frag_hdr));
653           if (!(vlib_buffer_length_in_chain (vm, tmp) - trim_end > 0))
654             {
655               rv = IP6_FULL_REASS_RC_INTERNAL_ERROR;
656               goto free_buffers_and_return;
657             }
658         }
659       u32 keep_data =
660         vlib_buffer_length_in_chain (vm, tmp) - trim_front - trim_end;
661       while (1)
662         {
663           ++buf_cnt;
664           if (trim_front)
665             {
666               if (trim_front > tmp->current_length)
667                 {
668                   /* drop whole buffer */
669                   vec_add1 (vec_drop_compress, tmp_bi);
670                   trim_front -= tmp->current_length;
671                   if (!(tmp->flags & VLIB_BUFFER_NEXT_PRESENT))
672                     {
673                       rv = IP6_FULL_REASS_RC_INTERNAL_ERROR;
674                       goto free_buffers_and_return;
675                     }
676                   tmp->flags &= ~VLIB_BUFFER_NEXT_PRESENT;
677                   tmp_bi = tmp->next_buffer;
678                   tmp = vlib_get_buffer (vm, tmp_bi);
679                   continue;
680                 }
681               else
682                 {
683                   vlib_buffer_advance (tmp, trim_front);
684                   trim_front = 0;
685                 }
686             }
687           if (keep_data)
688             {
689               if (last_b)
690                 {
691                   last_b->flags |= VLIB_BUFFER_NEXT_PRESENT;
692                   last_b->next_buffer = tmp_bi;
693                 }
694               last_b = tmp;
695               if (keep_data <= tmp->current_length)
696                 {
697                   tmp->current_length = keep_data;
698                   keep_data = 0;
699                 }
700               else
701                 {
702                   keep_data -= tmp->current_length;
703                   if (!(tmp->flags & VLIB_BUFFER_NEXT_PRESENT))
704                     {
705                       rv = IP6_FULL_REASS_RC_INTERNAL_ERROR;
706                       goto free_buffers_and_return;
707                     }
708                 }
709               total_length += tmp->current_length;
710             }
711           else
712             {
713               vec_add1 (vec_drop_compress, tmp_bi);
714               if (reass->first_bi == tmp_bi)
715                 {
716                   rv = IP6_FULL_REASS_RC_INTERNAL_ERROR;
717                   goto free_buffers_and_return;
718                 }
719               ++dropped_cnt;
720             }
721           if (tmp->flags & VLIB_BUFFER_NEXT_PRESENT)
722             {
723               tmp_bi = tmp->next_buffer;
724               tmp = vlib_get_buffer (vm, tmp->next_buffer);
725             }
726           else
727             {
728               break;
729             }
730         }
731       sub_chain_bi =
732         vnet_buffer (vlib_get_buffer (vm, sub_chain_bi))->ip.
733         reass.next_range_bi;
734     }
735   while (~0 != sub_chain_bi);
736
737   if (!last_b)
738     {
739       rv = IP6_FULL_REASS_RC_INTERNAL_ERROR;
740       goto free_buffers_and_return;
741     }
742   last_b->flags &= ~VLIB_BUFFER_NEXT_PRESENT;
743   vlib_buffer_t *first_b = vlib_get_buffer (vm, reass->first_bi);
744   if (total_length < first_b->current_length)
745     {
746       rv = IP6_FULL_REASS_RC_INTERNAL_ERROR;
747       goto free_buffers_and_return;
748     }
749   total_length -= first_b->current_length;
750   first_b->flags |= VLIB_BUFFER_TOTAL_LENGTH_VALID;
751   first_b->total_length_not_including_first_buffer = total_length;
752   // drop fragment header
753   vnet_buffer_opaque_t *first_b_vnb = vnet_buffer (first_b);
754   ip6_header_t *ip = vlib_buffer_get_current (first_b);
755   u16 ip6_frag_hdr_offset = first_b_vnb->ip.reass.ip6_frag_hdr_offset;
756   ip6_ext_hdr_chain_t hdr_chain;
757   ip6_ext_header_t *prev_hdr = 0;
758   int res = ip6_ext_header_walk (first_b, ip, IP_PROTOCOL_IPV6_FRAGMENTATION,
759                                  &hdr_chain);
760   if (res < 0 ||
761       (hdr_chain.eh[res].protocol != IP_PROTOCOL_IPV6_FRAGMENTATION))
762     {
763       rv = IP6_FULL_REASS_RC_INTERNAL_ERROR;
764       goto free_buffers_and_return;
765     }
766   frag_hdr = ip6_ext_next_header_offset (ip, hdr_chain.eh[res].offset);
767   if (res > 0)
768     {
769       prev_hdr = ip6_ext_next_header_offset (ip, hdr_chain.eh[res - 1].offset);
770       prev_hdr->next_hdr = frag_hdr->next_hdr;
771     }
772   else
773     {
774       ip->protocol = frag_hdr->next_hdr;
775     }
776   if (hdr_chain.eh[res].offset != ip6_frag_hdr_offset)
777     {
778       rv = IP6_FULL_REASS_RC_INTERNAL_ERROR;
779       goto free_buffers_and_return;
780     }
781   memmove (frag_hdr, (u8 *) frag_hdr + sizeof (*frag_hdr),
782            first_b->current_length - ip6_frag_hdr_offset -
783            sizeof (ip6_frag_hdr_t));
784   first_b->current_length -= sizeof (*frag_hdr);
785   ip->payload_length =
786     clib_host_to_net_u16 (total_length + first_b->current_length -
787                           sizeof (*ip));
788   if (!vlib_buffer_chain_linearize (vm, first_b))
789     {
790       rv = IP6_FULL_REASS_RC_NO_BUF;
791       goto free_buffers_and_return;
792     }
793   first_b->flags &= ~VLIB_BUFFER_EXT_HDR_VALID;
794   if (PREDICT_FALSE (first_b->flags & VLIB_BUFFER_IS_TRACED))
795     {
796       ip6_full_reass_add_trace (vm, node, reass, reass->first_bi, NULL,
797                                 FINALIZE, ~0);
798 #if 0
799       // following code does a hexdump of packet fragments to stdout ...
800       do
801         {
802           u32 bi = reass->first_bi;
803           u8 *s = NULL;
804           while (~0 != bi)
805             {
806               vlib_buffer_t *b = vlib_get_buffer (vm, bi);
807               s = format (s, "%u: %U\n", bi, format_hexdump,
808                           vlib_buffer_get_current (b), b->current_length);
809               if (b->flags & VLIB_BUFFER_NEXT_PRESENT)
810                 {
811                   bi = b->next_buffer;
812                 }
813               else
814                 {
815                   break;
816                 }
817             }
818           printf ("%.*s\n", vec_len (s), s);
819           fflush (stdout);
820           vec_free (s);
821         }
822       while (0);
823 #endif
824     }
825   if (!is_custom_app)
826     {
827       *next0 = IP6_FULL_REASSEMBLY_NEXT_INPUT;
828     }
829   else
830     {
831       *next0 = reass->next_index;
832     }
833   vnet_buffer (first_b)->ip.reass.estimated_mtu = reass->min_fragment_length;
834   ip6_full_reass_free (rm, rt, reass);
835   reass = NULL;
836 free_buffers_and_return:
837   vlib_buffer_free (vm, vec_drop_compress, vec_len (vec_drop_compress));
838   vec_free (vec_drop_compress);
839   return rv;
840 }
841
842 always_inline void
843 ip6_full_reass_insert_range_in_chain (vlib_main_t * vm,
844                                       ip6_full_reass_t * reass,
845                                       u32 prev_range_bi, u32 new_next_bi)
846 {
847
848   vlib_buffer_t *new_next_b = vlib_get_buffer (vm, new_next_bi);
849   vnet_buffer_opaque_t *new_next_vnb = vnet_buffer (new_next_b);
850   if (~0 != prev_range_bi)
851     {
852       vlib_buffer_t *prev_b = vlib_get_buffer (vm, prev_range_bi);
853       vnet_buffer_opaque_t *prev_vnb = vnet_buffer (prev_b);
854       new_next_vnb->ip.reass.next_range_bi = prev_vnb->ip.reass.next_range_bi;
855       prev_vnb->ip.reass.next_range_bi = new_next_bi;
856     }
857   else
858     {
859       if (~0 != reass->first_bi)
860         {
861           new_next_vnb->ip.reass.next_range_bi = reass->first_bi;
862         }
863       reass->first_bi = new_next_bi;
864     }
865   reass->data_len += ip6_full_reass_buffer_get_data_len (new_next_b);
866 }
867
868 always_inline ip6_full_reass_rc_t
869 ip6_full_reass_update (vlib_main_t *vm, vlib_node_runtime_t *node,
870                        ip6_full_reass_main_t *rm,
871                        ip6_full_reass_per_thread_t *rt,
872                        ip6_full_reass_t *reass, u32 *bi0, u32 *next0,
873                        u32 *error0, ip6_frag_hdr_t *frag_hdr,
874                        bool is_custom_app, u32 *handoff_thread_idx,
875                        int skip_bihash)
876 {
877   int consumed = 0;
878   vlib_buffer_t *fb = vlib_get_buffer (vm, *bi0);
879   vnet_buffer_opaque_t *fvnb = vnet_buffer (fb);
880   if (is_custom_app)
881     {
882       reass->next_index = fvnb->ip.reass.next_index;    // store next_index before it's overwritten
883       reass->error_next_index = fvnb->ip.reass.error_next_index;        // store error_next_index before it is overwritten
884     }
885
886   fvnb->ip.reass.ip6_frag_hdr_offset =
887     (u8 *) frag_hdr - (u8 *) vlib_buffer_get_current (fb);
888   ip6_header_t *fip = vlib_buffer_get_current (fb);
889   if (fb->current_length < sizeof (*fip) ||
890       fvnb->ip.reass.ip6_frag_hdr_offset == 0 ||
891       fvnb->ip.reass.ip6_frag_hdr_offset >= fb->current_length)
892     {
893       return IP6_FULL_REASS_RC_INTERNAL_ERROR;
894     }
895
896   u32 fragment_first = fvnb->ip.reass.fragment_first =
897     ip6_frag_hdr_offset_bytes (frag_hdr);
898   u32 fragment_length =
899     vlib_buffer_length_in_chain (vm, fb) -
900     (fvnb->ip.reass.ip6_frag_hdr_offset + sizeof (*frag_hdr));
901   if (0 == fragment_length)
902     {
903       return IP6_FULL_REASS_RC_INVALID_FRAG_LEN;
904     }
905   u32 fragment_last = fvnb->ip.reass.fragment_last =
906     fragment_first + fragment_length - 1;
907   int more_fragments = ip6_frag_hdr_more (frag_hdr);
908   u32 candidate_range_bi = reass->first_bi;
909   u32 prev_range_bi = ~0;
910   fvnb->ip.reass.range_first = fragment_first;
911   fvnb->ip.reass.range_last = fragment_last;
912   fvnb->ip.reass.next_range_bi = ~0;
913   if (!more_fragments)
914     {
915       reass->last_packet_octet = fragment_last;
916     }
917   if (~0 == reass->first_bi)
918     {
919       // starting a new reassembly
920       ip6_full_reass_insert_range_in_chain (vm, reass, prev_range_bi, *bi0);
921       reass->min_fragment_length = clib_net_to_host_u16 (fip->payload_length);
922       consumed = 1;
923       reass->fragments_n = 1;
924       goto check_if_done_maybe;
925     }
926   reass->min_fragment_length =
927     clib_min (clib_net_to_host_u16 (fip->payload_length),
928               fvnb->ip.reass.estimated_mtu);
929   while (~0 != candidate_range_bi)
930     {
931       vlib_buffer_t *candidate_b = vlib_get_buffer (vm, candidate_range_bi);
932       vnet_buffer_opaque_t *candidate_vnb = vnet_buffer (candidate_b);
933       if (fragment_first > candidate_vnb->ip.reass.range_last)
934         {
935           // this fragments starts after candidate range
936           prev_range_bi = candidate_range_bi;
937           candidate_range_bi = candidate_vnb->ip.reass.next_range_bi;
938           if (candidate_vnb->ip.reass.range_last < fragment_last &&
939               ~0 == candidate_range_bi)
940             {
941               // special case - this fragment falls beyond all known ranges
942               ip6_full_reass_insert_range_in_chain (vm, reass, prev_range_bi,
943                                                     *bi0);
944               consumed = 1;
945               break;
946             }
947           continue;
948         }
949       if (fragment_last < candidate_vnb->ip.reass.range_first)
950         {
951           // this fragment ends before candidate range without any overlap
952           ip6_full_reass_insert_range_in_chain (vm, reass, prev_range_bi,
953                                                 *bi0);
954           consumed = 1;
955         }
956       else if (fragment_first == candidate_vnb->ip.reass.range_first &&
957                fragment_last == candidate_vnb->ip.reass.range_last)
958         {
959           // duplicate fragment - ignore
960         }
961       else
962         {
963           // overlapping fragment - not allowed by RFC 8200
964           if (PREDICT_FALSE (fb->flags & VLIB_BUFFER_IS_TRACED))
965             {
966               ip6_full_reass_add_trace (vm, node, reass, *bi0, frag_hdr,
967                                         RANGE_OVERLAP, ~0);
968             }
969           ip6_full_reass_drop_all (vm, node, reass);
970           ip6_full_reass_free (rm, rt, reass);
971           *next0 = IP6_FULL_REASSEMBLY_NEXT_DROP;
972           *error0 = IP6_ERROR_REASS_OVERLAPPING_FRAGMENT;
973           return IP6_FULL_REASS_RC_OK;
974         }
975       break;
976     }
977   ++reass->fragments_n;
978 check_if_done_maybe:
979   if (consumed)
980     {
981       if (PREDICT_FALSE (fb->flags & VLIB_BUFFER_IS_TRACED))
982         {
983           ip6_full_reass_add_trace (vm, node, reass, *bi0, frag_hdr, RANGE_NEW,
984                                     ~0);
985         }
986     }
987   else if (skip_bihash)
988     {
989       // if this reassembly is not in bihash, then the packet must have been
990       // consumed
991       return IP6_FULL_REASS_RC_INTERNAL_ERROR;
992     }
993   if (~0 != reass->last_packet_octet &&
994       reass->data_len == reass->last_packet_octet + 1)
995     {
996       *handoff_thread_idx = reass->sendout_thread_index;
997       int handoff =
998         reass->memory_owner_thread_index != reass->sendout_thread_index;
999       ip6_full_reass_rc_t rc =
1000         ip6_full_reass_finalize (vm, node, rm, rt, reass, bi0, next0, error0,
1001                                  is_custom_app);
1002       if (IP6_FULL_REASS_RC_OK == rc && handoff)
1003         {
1004           return IP6_FULL_REASS_RC_HANDOFF;
1005         }
1006       return rc;
1007     }
1008   else
1009     {
1010       if (skip_bihash)
1011         {
1012           // if this reassembly is not in bihash, it should've been an atomic
1013           // fragment and thus finalized
1014           return IP6_FULL_REASS_RC_INTERNAL_ERROR;
1015         }
1016       if (consumed)
1017         {
1018           *bi0 = ~0;
1019           if (reass->fragments_n > rm->max_reass_len)
1020             {
1021               return IP6_FULL_REASS_RC_TOO_MANY_FRAGMENTS;
1022             }
1023         }
1024       else
1025         {
1026           *next0 = IP6_FULL_REASSEMBLY_NEXT_DROP;
1027           *error0 = IP6_ERROR_REASS_DUPLICATE_FRAGMENT;
1028         }
1029     }
1030   return IP6_FULL_REASS_RC_OK;
1031 }
1032
1033 always_inline bool
1034 ip6_full_reass_verify_upper_layer_present (vlib_node_runtime_t *node,
1035                                            vlib_buffer_t *b,
1036                                            ip6_ext_hdr_chain_t *hc)
1037 {
1038   int nh = hc->eh[hc->length - 1].protocol;
1039   /* Checking to see if it's a terminating header */
1040   if (ip6_ext_hdr (nh))
1041     {
1042       icmp6_error_set_vnet_buffer (
1043         b, ICMP6_parameter_problem,
1044         ICMP6_parameter_problem_first_fragment_has_incomplete_header_chain, 0);
1045       b->error = node->errors[IP6_ERROR_REASS_MISSING_UPPER];
1046       return false;
1047     }
1048   return true;
1049 }
1050
1051 always_inline bool
1052 ip6_full_reass_verify_fragment_multiple_8 (vlib_main_t * vm,
1053                                            vlib_buffer_t * b,
1054                                            ip6_frag_hdr_t * frag_hdr)
1055 {
1056   vnet_buffer_opaque_t *vnb = vnet_buffer (b);
1057   ip6_header_t *ip = vlib_buffer_get_current (b);
1058   int more_fragments = ip6_frag_hdr_more (frag_hdr);
1059   u32 fragment_length =
1060     vlib_buffer_length_in_chain (vm, b) -
1061     (vnb->ip.reass.ip6_frag_hdr_offset + sizeof (*frag_hdr));
1062   if (more_fragments && 0 != fragment_length % 8)
1063     {
1064       icmp6_error_set_vnet_buffer (b, ICMP6_parameter_problem,
1065                                    ICMP6_parameter_problem_erroneous_header_field,
1066                                    (u8 *) & ip->payload_length - (u8 *) ip);
1067       return false;
1068     }
1069   return true;
1070 }
1071
1072 always_inline bool
1073 ip6_full_reass_verify_packet_size_lt_64k (vlib_main_t * vm,
1074                                           vlib_buffer_t * b,
1075                                           ip6_frag_hdr_t * frag_hdr)
1076 {
1077   vnet_buffer_opaque_t *vnb = vnet_buffer (b);
1078   u32 fragment_first = ip6_frag_hdr_offset_bytes (frag_hdr);
1079   u32 fragment_length =
1080     vlib_buffer_length_in_chain (vm, b) -
1081     (vnb->ip.reass.ip6_frag_hdr_offset + sizeof (*frag_hdr));
1082   if (fragment_first + fragment_length > 65535)
1083     {
1084       ip6_header_t *ip0 = vlib_buffer_get_current (b);
1085       icmp6_error_set_vnet_buffer (b, ICMP6_parameter_problem,
1086                                    ICMP6_parameter_problem_erroneous_header_field,
1087                                    (u8 *) & frag_hdr->fragment_offset_and_more
1088                                    - (u8 *) ip0);
1089       return false;
1090     }
1091   return true;
1092 }
1093
1094 always_inline uword
1095 ip6_full_reassembly_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
1096                             vlib_frame_t *frame, bool is_feature,
1097                             bool is_custom_app, bool is_local)
1098 {
1099   u32 *from = vlib_frame_vector_args (frame);
1100   u32 n_left_from, n_left_to_next, *to_next, next_index;
1101   ip6_full_reass_main_t *rm = &ip6_full_reass_main;
1102   ip6_full_reass_per_thread_t *rt = &rm->per_thread_data[vm->thread_index];
1103   clib_spinlock_lock (&rt->lock);
1104
1105   n_left_from = frame->n_vectors;
1106   next_index = node->cached_next_index;
1107   while (n_left_from > 0)
1108     {
1109       vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
1110
1111       while (n_left_from > 0 && n_left_to_next > 0)
1112         {
1113           u32 bi0;
1114           vlib_buffer_t *b0;
1115           u32 next0 = IP6_FULL_REASSEMBLY_NEXT_DROP;
1116           u32 error0 = IP6_ERROR_NONE;
1117           u32 icmp_bi = ~0;
1118
1119           bi0 = from[0];
1120           b0 = vlib_get_buffer (vm, bi0);
1121
1122           ip6_header_t *ip0 = vlib_buffer_get_current (b0);
1123           ip6_frag_hdr_t *frag_hdr;
1124           ip6_ext_hdr_chain_t hdr_chain;
1125           int res = ip6_ext_header_walk (
1126             b0, ip0, IP_PROTOCOL_IPV6_FRAGMENTATION, &hdr_chain);
1127           if (res < 0 ||
1128               hdr_chain.eh[res].protocol != IP_PROTOCOL_IPV6_FRAGMENTATION)
1129             {
1130               // this is a mangled packet - no fragmentation
1131               next0 = IP6_FULL_REASSEMBLY_NEXT_DROP;
1132               ip6_full_reass_add_trace (vm, node, NULL, bi0, NULL, PASSTHROUGH,
1133                                         ~0);
1134               goto skip_reass;
1135             }
1136           if (is_local && !rm->is_local_reass_enabled)
1137             {
1138               next0 = IP6_FULL_REASSEMBLY_NEXT_DROP;
1139               goto skip_reass;
1140             }
1141           frag_hdr =
1142             ip6_ext_next_header_offset (ip0, hdr_chain.eh[res].offset);
1143           vnet_buffer (b0)->ip.reass.ip6_frag_hdr_offset =
1144             hdr_chain.eh[res].offset;
1145
1146           if (0 == ip6_frag_hdr_offset (frag_hdr))
1147             {
1148               // first fragment - verify upper-layer is present
1149               if (!ip6_full_reass_verify_upper_layer_present (node, b0,
1150                                                               &hdr_chain))
1151                 {
1152                   next0 = IP6_FULL_REASSEMBLY_NEXT_ICMP_ERROR;
1153                   goto skip_reass;
1154                 }
1155             }
1156           if (!ip6_full_reass_verify_fragment_multiple_8 (vm, b0, frag_hdr) ||
1157               !ip6_full_reass_verify_packet_size_lt_64k (vm, b0, frag_hdr))
1158             {
1159               next0 = IP6_FULL_REASSEMBLY_NEXT_ICMP_ERROR;
1160               goto skip_reass;
1161             }
1162
1163           int skip_bihash = 0;
1164           ip6_full_reass_kv_t kv;
1165           u8 do_handoff = 0;
1166
1167           if (0 == ip6_frag_hdr_offset (frag_hdr) &&
1168               !ip6_frag_hdr_more (frag_hdr))
1169             {
1170               // this is atomic fragment and needs to be processed separately
1171               skip_bihash = 1;
1172             }
1173           else
1174             {
1175               kv.k.as_u64[0] = ip0->src_address.as_u64[0];
1176               kv.k.as_u64[1] = ip0->src_address.as_u64[1];
1177               kv.k.as_u64[2] = ip0->dst_address.as_u64[0];
1178               kv.k.as_u64[3] = ip0->dst_address.as_u64[1];
1179               kv.k.as_u64[4] =
1180                 ((u64) vec_elt (ip6_main.fib_index_by_sw_if_index,
1181                                 vnet_buffer (b0)->sw_if_index[VLIB_RX]))
1182                   << 32 |
1183                 (u64) frag_hdr->identification;
1184               kv.k.as_u64[5] = ip0->protocol;
1185             }
1186
1187           ip6_full_reass_t *reass = ip6_full_reass_find_or_create (
1188             vm, node, rm, rt, &kv, &icmp_bi, &do_handoff, skip_bihash);
1189
1190           if (reass)
1191             {
1192               const u32 fragment_first = ip6_frag_hdr_offset (frag_hdr);
1193               if (0 == fragment_first)
1194                 {
1195                   reass->sendout_thread_index = vm->thread_index;
1196                 }
1197             }
1198           if (PREDICT_FALSE (do_handoff))
1199             {
1200               next0 = IP6_FULL_REASSEMBLY_NEXT_HANDOFF;
1201               vnet_buffer (b0)->ip.reass.owner_thread_index =
1202                 kv.v.memory_owner_thread_index;
1203             }
1204           else if (reass)
1205             {
1206               u32 handoff_thread_idx;
1207               u32 counter = ~0;
1208               switch (ip6_full_reass_update (
1209                 vm, node, rm, rt, reass, &bi0, &next0, &error0, frag_hdr,
1210                 is_custom_app, &handoff_thread_idx, skip_bihash))
1211                 {
1212                 case IP6_FULL_REASS_RC_OK:
1213                   /* nothing to do here */
1214                   break;
1215                 case IP6_FULL_REASS_RC_HANDOFF:
1216                   next0 = IP6_FULL_REASSEMBLY_NEXT_HANDOFF;
1217                   b0 = vlib_get_buffer (vm, bi0);
1218                   vnet_buffer (b0)->ip.reass.owner_thread_index =
1219                     handoff_thread_idx;
1220                   break;
1221                 case IP6_FULL_REASS_RC_TOO_MANY_FRAGMENTS:
1222                   counter = IP6_ERROR_REASS_FRAGMENT_CHAIN_TOO_LONG;
1223                   break;
1224                 case IP6_FULL_REASS_RC_NO_BUF:
1225                   counter = IP6_ERROR_REASS_NO_BUF;
1226                   break;
1227                 case IP6_FULL_REASS_RC_INTERNAL_ERROR:
1228                   counter = IP6_ERROR_REASS_INTERNAL_ERROR;
1229                   break;
1230                 case IP6_FULL_REASS_RC_INVALID_FRAG_LEN:
1231                   counter = IP6_ERROR_REASS_INVALID_FRAG_LEN;
1232                   break;
1233                 }
1234               if (~0 != counter)
1235                 {
1236                   vlib_node_increment_counter (vm, node->node_index, counter,
1237                                                1);
1238                   ip6_full_reass_drop_all (vm, node, reass);
1239                   ip6_full_reass_free (rm, rt, reass);
1240                   goto next_packet;
1241                 }
1242             }
1243           else
1244             {
1245               if (is_feature)
1246                 {
1247                   next0 = IP6_FULL_REASSEMBLY_NEXT_DROP;
1248                 }
1249               else
1250                 {
1251                   vnet_buffer_opaque_t *fvnb = vnet_buffer (b0);
1252                   next0 = fvnb->ip.reass.error_next_index;
1253                 }
1254               error0 = IP6_ERROR_REASS_LIMIT_REACHED;
1255             }
1256
1257           if (~0 != bi0)
1258             {
1259             skip_reass:
1260               to_next[0] = bi0;
1261               to_next += 1;
1262               n_left_to_next -= 1;
1263
1264               /* bi0 might have been updated by reass_finalize, reload */
1265               b0 = vlib_get_buffer (vm, bi0);
1266               if (IP6_ERROR_NONE != error0)
1267                 {
1268                   b0->error = node->errors[error0];
1269                 }
1270
1271               if (next0 == IP6_FULL_REASSEMBLY_NEXT_HANDOFF)
1272                 {
1273                   if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
1274                     {
1275                       ip6_full_reass_add_trace (
1276                         vm, node, NULL, bi0, frag_hdr, HANDOFF,
1277                         vnet_buffer (b0)->ip.reass.owner_thread_index);
1278                     }
1279                 }
1280               else if (is_feature && IP6_ERROR_NONE == error0)
1281                 {
1282                   vnet_feature_next (&next0, b0);
1283                 }
1284               vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
1285                                                n_left_to_next, bi0, next0);
1286             }
1287
1288           if (~0 != icmp_bi)
1289             {
1290               next0 = IP6_FULL_REASSEMBLY_NEXT_ICMP_ERROR;
1291               to_next[0] = icmp_bi;
1292               to_next += 1;
1293               n_left_to_next -= 1;
1294               vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
1295                                                n_left_to_next, icmp_bi,
1296                                                next0);
1297             }
1298         next_packet:
1299           from += 1;
1300           n_left_from -= 1;
1301         }
1302
1303       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
1304     }
1305
1306   clib_spinlock_unlock (&rt->lock);
1307   return frame->n_vectors;
1308 }
1309
1310 static char *ip6_full_reassembly_error_strings[] = {
1311 #define _(sym, string) string,
1312   foreach_ip6_error
1313 #undef _
1314 };
1315
1316 VLIB_NODE_FN (ip6_full_reass_node) (vlib_main_t * vm,
1317                                     vlib_node_runtime_t * node,
1318                                     vlib_frame_t * frame)
1319 {
1320   return ip6_full_reassembly_inline (vm, node, frame, false /* is_feature */,
1321                                      false /* is_custom_app */,
1322                                      false /* is_local */);
1323 }
1324
1325 VLIB_REGISTER_NODE (ip6_full_reass_node) = {
1326     .name = "ip6-full-reassembly",
1327     .vector_size = sizeof (u32),
1328     .format_trace = format_ip6_full_reass_trace,
1329     .n_errors = ARRAY_LEN (ip6_full_reassembly_error_strings),
1330     .error_strings = ip6_full_reassembly_error_strings,
1331     .n_next_nodes = IP6_FULL_REASSEMBLY_N_NEXT,
1332     .next_nodes =
1333         {
1334                 [IP6_FULL_REASSEMBLY_NEXT_INPUT] = "ip6-input",
1335                 [IP6_FULL_REASSEMBLY_NEXT_DROP] = "ip6-drop",
1336                 [IP6_FULL_REASSEMBLY_NEXT_ICMP_ERROR] = "ip6-icmp-error",
1337                 [IP6_FULL_REASSEMBLY_NEXT_HANDOFF] = "ip6-full-reassembly-handoff",
1338         },
1339 };
1340
1341 VLIB_NODE_FN (ip6_local_full_reass_node)
1342 (vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame)
1343 {
1344   return ip6_full_reassembly_inline (vm, node, frame, false /* is_feature */,
1345                                      false /* is_custom_app */,
1346                                      true /* is_local */);
1347 }
1348
1349 VLIB_REGISTER_NODE (ip6_local_full_reass_node) = {
1350     .name = "ip6-local-full-reassembly",
1351     .vector_size = sizeof (u32),
1352     .format_trace = format_ip6_full_reass_trace,
1353     .n_errors = ARRAY_LEN (ip6_full_reassembly_error_strings),
1354     .error_strings = ip6_full_reassembly_error_strings,
1355     .n_next_nodes = IP6_FULL_REASSEMBLY_N_NEXT,
1356     .next_nodes =
1357         {
1358                 [IP6_FULL_REASSEMBLY_NEXT_INPUT] = "ip6-input",
1359                 [IP6_FULL_REASSEMBLY_NEXT_DROP] = "ip6-drop",
1360                 [IP6_FULL_REASSEMBLY_NEXT_ICMP_ERROR] = "ip6-icmp-error",
1361                 [IP6_FULL_REASSEMBLY_NEXT_HANDOFF] = "ip6-local-full-reassembly-handoff",
1362         },
1363 };
1364
1365 VLIB_NODE_FN (ip6_full_reass_node_feature) (vlib_main_t * vm,
1366                                             vlib_node_runtime_t * node,
1367                                             vlib_frame_t * frame)
1368 {
1369   return ip6_full_reassembly_inline (vm, node, frame, true /* is_feature */,
1370                                      false /* is_custom_app */,
1371                                      false /* is_local */);
1372 }
1373
1374 VLIB_REGISTER_NODE (ip6_full_reass_node_feature) = {
1375     .name = "ip6-full-reassembly-feature",
1376     .vector_size = sizeof (u32),
1377     .format_trace = format_ip6_full_reass_trace,
1378     .n_errors = ARRAY_LEN (ip6_full_reassembly_error_strings),
1379     .error_strings = ip6_full_reassembly_error_strings,
1380     .n_next_nodes = IP6_FULL_REASSEMBLY_N_NEXT,
1381     .next_nodes =
1382         {
1383                 [IP6_FULL_REASSEMBLY_NEXT_INPUT] = "ip6-input",
1384                 [IP6_FULL_REASSEMBLY_NEXT_DROP] = "ip6-drop",
1385                 [IP6_FULL_REASSEMBLY_NEXT_ICMP_ERROR] = "ip6-icmp-error",
1386                 [IP6_FULL_REASSEMBLY_NEXT_HANDOFF] = "ip6-full-reass-feature-hoff",
1387         },
1388 };
1389
1390 VNET_FEATURE_INIT (ip6_full_reassembly_feature, static) = {
1391     .arc_name = "ip6-unicast",
1392     .node_name = "ip6-full-reassembly-feature",
1393     .runs_before = VNET_FEATURES ("ip6-lookup",
1394                                   "ipsec6-input-feature"),
1395     .runs_after = 0,
1396 };
1397
1398 #ifndef CLIB_MARCH_VARIANT
1399 static u32
1400 ip6_full_reass_get_nbuckets ()
1401 {
1402   ip6_full_reass_main_t *rm = &ip6_full_reass_main;
1403   u32 nbuckets;
1404   u8 i;
1405
1406   nbuckets = (u32) (rm->max_reass_n / IP6_FULL_REASS_HT_LOAD_FACTOR);
1407
1408   for (i = 0; i < 31; i++)
1409     if ((1 << i) >= nbuckets)
1410       break;
1411   nbuckets = 1 << i;
1412
1413   return nbuckets;
1414 }
1415 #endif /* CLIB_MARCH_VARIANT */
1416
1417 typedef enum
1418 {
1419   IP6_EVENT_CONFIG_CHANGED = 1,
1420 } ip6_full_reass_event_t;
1421
1422 #ifndef CLIB_MARCH_VARIANT
1423 typedef struct
1424 {
1425   int failure;
1426   clib_bihash_48_8_t *new_hash;
1427 } ip6_rehash_cb_ctx;
1428
1429 static int
1430 ip6_rehash_cb (clib_bihash_kv_48_8_t * kv, void *_ctx)
1431 {
1432   ip6_rehash_cb_ctx *ctx = _ctx;
1433   if (clib_bihash_add_del_48_8 (ctx->new_hash, kv, 1))
1434     {
1435       ctx->failure = 1;
1436     }
1437   return (BIHASH_WALK_CONTINUE);
1438 }
1439
1440 static void
1441 ip6_full_reass_set_params (u32 timeout_ms, u32 max_reassemblies,
1442                            u32 max_reassembly_length,
1443                            u32 expire_walk_interval_ms)
1444 {
1445   ip6_full_reass_main.timeout_ms = timeout_ms;
1446   ip6_full_reass_main.timeout = (f64) timeout_ms / (f64) MSEC_PER_SEC;
1447   ip6_full_reass_main.max_reass_n = max_reassemblies;
1448   ip6_full_reass_main.max_reass_len = max_reassembly_length;
1449   ip6_full_reass_main.expire_walk_interval_ms = expire_walk_interval_ms;
1450 }
1451
1452 vnet_api_error_t
1453 ip6_full_reass_set (u32 timeout_ms, u32 max_reassemblies,
1454                     u32 max_reassembly_length, u32 expire_walk_interval_ms)
1455 {
1456   u32 old_nbuckets = ip6_full_reass_get_nbuckets ();
1457   ip6_full_reass_set_params (timeout_ms, max_reassemblies,
1458                              max_reassembly_length, expire_walk_interval_ms);
1459   vlib_process_signal_event (ip6_full_reass_main.vlib_main,
1460                              ip6_full_reass_main.ip6_full_reass_expire_node_idx,
1461                              IP6_EVENT_CONFIG_CHANGED, 0);
1462   u32 new_nbuckets = ip6_full_reass_get_nbuckets ();
1463   if (ip6_full_reass_main.max_reass_n > 0 && new_nbuckets > old_nbuckets)
1464     {
1465       clib_bihash_48_8_t new_hash;
1466       clib_memset (&new_hash, 0, sizeof (new_hash));
1467       ip6_rehash_cb_ctx ctx;
1468       ctx.failure = 0;
1469       ctx.new_hash = &new_hash;
1470       clib_bihash_init_48_8 (&new_hash, "ip6-full-reass", new_nbuckets,
1471                              new_nbuckets * 1024);
1472       clib_bihash_foreach_key_value_pair_48_8 (&ip6_full_reass_main.hash,
1473                                                ip6_rehash_cb, &ctx);
1474       if (ctx.failure)
1475         {
1476           clib_bihash_free_48_8 (&new_hash);
1477           return -1;
1478         }
1479       else
1480         {
1481           clib_bihash_free_48_8 (&ip6_full_reass_main.hash);
1482           clib_memcpy_fast (&ip6_full_reass_main.hash, &new_hash,
1483                             sizeof (ip6_full_reass_main.hash));
1484           clib_bihash_copied (&ip6_full_reass_main.hash, &new_hash);
1485         }
1486     }
1487   return 0;
1488 }
1489
1490 vnet_api_error_t
1491 ip6_full_reass_get (u32 * timeout_ms, u32 * max_reassemblies,
1492                     u32 * max_reassembly_length,
1493                     u32 * expire_walk_interval_ms)
1494 {
1495   *timeout_ms = ip6_full_reass_main.timeout_ms;
1496   *max_reassemblies = ip6_full_reass_main.max_reass_n;
1497   *max_reassembly_length = ip6_full_reass_main.max_reass_len;
1498   *expire_walk_interval_ms = ip6_full_reass_main.expire_walk_interval_ms;
1499   return 0;
1500 }
1501
1502 static clib_error_t *
1503 ip6_full_reass_init_function (vlib_main_t * vm)
1504 {
1505   ip6_full_reass_main_t *rm = &ip6_full_reass_main;
1506   clib_error_t *error = 0;
1507   u32 nbuckets;
1508   vlib_node_t *node;
1509
1510   rm->vlib_main = vm;
1511
1512   vec_validate (rm->per_thread_data, vlib_num_workers ());
1513   ip6_full_reass_per_thread_t *rt;
1514   vec_foreach (rt, rm->per_thread_data)
1515   {
1516     clib_spinlock_init (&rt->lock);
1517     pool_alloc (rt->pool, rm->max_reass_n);
1518   }
1519
1520   node = vlib_get_node_by_name (vm, (u8 *) "ip6-full-reassembly-expire-walk");
1521   ASSERT (node);
1522   rm->ip6_full_reass_expire_node_idx = node->index;
1523
1524   ip6_full_reass_set_params (IP6_FULL_REASS_TIMEOUT_DEFAULT_MS,
1525                              IP6_FULL_REASS_MAX_REASSEMBLIES_DEFAULT,
1526                              IP6_FULL_REASS_MAX_REASSEMBLY_LENGTH_DEFAULT,
1527                              IP6_FULL_REASS_EXPIRE_WALK_INTERVAL_DEFAULT_MS);
1528
1529   nbuckets = ip6_full_reass_get_nbuckets ();
1530   clib_bihash_init_48_8 (&rm->hash, "ip6-full-reass", nbuckets,
1531                          nbuckets * 1024);
1532
1533   node = vlib_get_node_by_name (vm, (u8 *) "ip6-drop");
1534   ASSERT (node);
1535   rm->ip6_drop_idx = node->index;
1536   node = vlib_get_node_by_name (vm, (u8 *) "ip6-icmp-error");
1537   ASSERT (node);
1538   rm->ip6_icmp_error_idx = node->index;
1539
1540   if ((error = vlib_call_init_function (vm, ip_main_init)))
1541     return error;
1542   ip6_register_protocol (IP_PROTOCOL_IPV6_FRAGMENTATION,
1543                          ip6_local_full_reass_node.index);
1544   rm->is_local_reass_enabled = 1;
1545
1546   rm->fq_index = vlib_frame_queue_main_init (ip6_full_reass_node.index, 0);
1547   rm->fq_local_index =
1548     vlib_frame_queue_main_init (ip6_local_full_reass_node.index, 0);
1549   rm->fq_feature_index =
1550     vlib_frame_queue_main_init (ip6_full_reass_node_feature.index, 0);
1551
1552   rm->feature_use_refcount_per_intf = NULL;
1553   return error;
1554 }
1555
1556 VLIB_INIT_FUNCTION (ip6_full_reass_init_function);
1557 #endif /* CLIB_MARCH_VARIANT */
1558
1559 static uword
1560 ip6_full_reass_walk_expired (vlib_main_t *vm, vlib_node_runtime_t *node,
1561                              CLIB_UNUSED (vlib_frame_t *f))
1562 {
1563   ip6_full_reass_main_t *rm = &ip6_full_reass_main;
1564   uword event_type, *event_data = 0;
1565
1566   while (true)
1567     {
1568       vlib_process_wait_for_event_or_clock (vm,
1569                                             (f64) rm->expire_walk_interval_ms
1570                                             / (f64) MSEC_PER_SEC);
1571       event_type = vlib_process_get_events (vm, &event_data);
1572
1573       switch (event_type)
1574         {
1575         case ~0:
1576           /* no events => timeout */
1577           /* fallthrough */
1578         case IP6_EVENT_CONFIG_CHANGED:
1579           /* nothing to do here */
1580           break;
1581         default:
1582           clib_warning ("BUG: event type 0x%wx", event_type);
1583           break;
1584         }
1585       f64 now = vlib_time_now (vm);
1586
1587       ip6_full_reass_t *reass;
1588       int *pool_indexes_to_free = NULL;
1589
1590       uword thread_index = 0;
1591       int index;
1592       const uword nthreads = vlib_num_workers () + 1;
1593       u32 *vec_icmp_bi = NULL;
1594       for (thread_index = 0; thread_index < nthreads; ++thread_index)
1595         {
1596           ip6_full_reass_per_thread_t *rt =
1597             &rm->per_thread_data[thread_index];
1598           clib_spinlock_lock (&rt->lock);
1599
1600           vec_reset_length (pool_indexes_to_free);
1601           pool_foreach_index (index, rt->pool)  {
1602                                 reass = pool_elt_at_index (rt->pool, index);
1603                                 if (now > reass->last_heard + rm->timeout)
1604                                   {
1605                                     vec_add1 (pool_indexes_to_free, index);
1606                                   }
1607                               }
1608           int *i;
1609           vec_foreach (i, pool_indexes_to_free)
1610           {
1611             ip6_full_reass_t *reass = pool_elt_at_index (rt->pool, i[0]);
1612             u32 icmp_bi = ~0;
1613             ip6_full_reass_on_timeout (vm, node, reass, &icmp_bi);
1614             if (~0 != icmp_bi)
1615               vec_add1 (vec_icmp_bi, icmp_bi);
1616
1617             ip6_full_reass_free (rm, rt, reass);
1618           }
1619
1620           clib_spinlock_unlock (&rt->lock);
1621         }
1622
1623       while (vec_len (vec_icmp_bi) > 0)
1624         {
1625           vlib_frame_t *f =
1626             vlib_get_frame_to_node (vm, rm->ip6_icmp_error_idx);
1627           u32 *to_next = vlib_frame_vector_args (f);
1628           u32 n_left_to_next = VLIB_FRAME_SIZE - f->n_vectors;
1629           int trace_frame = 0;
1630           while (vec_len (vec_icmp_bi) > 0 && n_left_to_next > 0)
1631             {
1632               u32 bi = vec_pop (vec_icmp_bi);
1633               vlib_buffer_t *b = vlib_get_buffer (vm, bi);
1634               if (PREDICT_FALSE (b->flags & VLIB_BUFFER_IS_TRACED))
1635                 trace_frame = 1;
1636               b->error = node->errors[IP6_ERROR_REASS_TIMEOUT];
1637               to_next[0] = bi;
1638               ++f->n_vectors;
1639               to_next += 1;
1640               n_left_to_next -= 1;
1641             }
1642           f->frame_flags |= (trace_frame * VLIB_FRAME_TRACE);
1643           vlib_put_frame_to_node (vm, rm->ip6_icmp_error_idx, f);
1644         }
1645
1646       vec_free (pool_indexes_to_free);
1647       vec_free (vec_icmp_bi);
1648       if (event_data)
1649         {
1650           _vec_len (event_data) = 0;
1651         }
1652     }
1653
1654   return 0;
1655 }
1656
1657 VLIB_REGISTER_NODE (ip6_full_reass_expire_node) = {
1658     .function = ip6_full_reass_walk_expired,
1659     .format_trace = format_ip6_full_reass_trace,
1660     .type = VLIB_NODE_TYPE_PROCESS,
1661     .name = "ip6-full-reassembly-expire-walk",
1662
1663     .n_errors = ARRAY_LEN (ip6_full_reassembly_error_strings),
1664     .error_strings = ip6_full_reassembly_error_strings,
1665
1666 };
1667
1668 static u8 *
1669 format_ip6_full_reass_key (u8 * s, va_list * args)
1670 {
1671   ip6_full_reass_key_t *key = va_arg (*args, ip6_full_reass_key_t *);
1672   s = format (s, "xx_id: %u, src: %U, dst: %U, frag_id: %u, proto: %u",
1673               key->xx_id, format_ip6_address, &key->src, format_ip6_address,
1674               &key->dst, clib_net_to_host_u16 (key->frag_id), key->proto);
1675   return s;
1676 }
1677
1678 static u8 *
1679 format_ip6_full_reass (u8 * s, va_list * args)
1680 {
1681   vlib_main_t *vm = va_arg (*args, vlib_main_t *);
1682   ip6_full_reass_t *reass = va_arg (*args, ip6_full_reass_t *);
1683
1684   s = format (s, "ID: %lu, key: %U\n  first_bi: %u, data_len: %u, "
1685               "last_packet_octet: %u, trace_op_counter: %u\n",
1686               reass->id, format_ip6_full_reass_key, &reass->key,
1687               reass->first_bi, reass->data_len, reass->last_packet_octet,
1688               reass->trace_op_counter);
1689   u32 bi = reass->first_bi;
1690   u32 counter = 0;
1691   while (~0 != bi)
1692     {
1693       vlib_buffer_t *b = vlib_get_buffer (vm, bi);
1694       vnet_buffer_opaque_t *vnb = vnet_buffer (b);
1695       s = format (s, "  #%03u: range: [%u, %u], bi: %u, off: %d, len: %u, "
1696                   "fragment[%u, %u]\n",
1697                   counter, vnb->ip.reass.range_first,
1698                   vnb->ip.reass.range_last, bi,
1699                   ip6_full_reass_buffer_get_data_offset (b),
1700                   ip6_full_reass_buffer_get_data_len (b),
1701                   vnb->ip.reass.fragment_first, vnb->ip.reass.fragment_last);
1702       if (b->flags & VLIB_BUFFER_NEXT_PRESENT)
1703         {
1704           bi = b->next_buffer;
1705         }
1706       else
1707         {
1708           bi = ~0;
1709         }
1710     }
1711   return s;
1712 }
1713
1714 static clib_error_t *
1715 show_ip6_full_reass (vlib_main_t * vm, unformat_input_t * input,
1716                      CLIB_UNUSED (vlib_cli_command_t * lmd))
1717 {
1718   ip6_full_reass_main_t *rm = &ip6_full_reass_main;
1719
1720   vlib_cli_output (vm, "---------------------");
1721   vlib_cli_output (vm, "IP6 reassembly status");
1722   vlib_cli_output (vm, "---------------------");
1723   bool details = false;
1724   if (unformat (input, "details"))
1725     {
1726       details = true;
1727     }
1728
1729   u32 sum_reass_n = 0;
1730   u64 sum_buffers_n = 0;
1731   ip6_full_reass_t *reass;
1732   uword thread_index;
1733   const uword nthreads = vlib_num_workers () + 1;
1734   for (thread_index = 0; thread_index < nthreads; ++thread_index)
1735     {
1736       ip6_full_reass_per_thread_t *rt = &rm->per_thread_data[thread_index];
1737       clib_spinlock_lock (&rt->lock);
1738       if (details)
1739         {
1740           pool_foreach (reass, rt->pool) {
1741             vlib_cli_output (vm, "%U", format_ip6_full_reass, vm, reass);
1742           }
1743         }
1744       sum_reass_n += rt->reass_n;
1745       clib_spinlock_unlock (&rt->lock);
1746     }
1747   vlib_cli_output (vm, "---------------------");
1748   vlib_cli_output (vm, "Current IP6 reassemblies count: %lu\n",
1749                    (long unsigned) sum_reass_n);
1750   vlib_cli_output (vm,
1751                    "Maximum configured concurrent full IP6 reassemblies per worker-thread: %lu\n",
1752                    (long unsigned) rm->max_reass_n);
1753   vlib_cli_output (vm,
1754                    "Maximum configured amount of fragments "
1755                    "per full IP6 reassembly: %lu\n",
1756                    (long unsigned) rm->max_reass_len);
1757   vlib_cli_output (vm,
1758                    "Maximum configured full IP6 reassembly timeout: %lums\n",
1759                    (long unsigned) rm->timeout_ms);
1760   vlib_cli_output (vm,
1761                    "Maximum configured full IP6 reassembly expire walk interval: %lums\n",
1762                    (long unsigned) rm->expire_walk_interval_ms);
1763   vlib_cli_output (vm, "Buffers in use: %lu\n",
1764                    (long unsigned) sum_buffers_n);
1765   return 0;
1766 }
1767
1768 VLIB_CLI_COMMAND (show_ip6_full_reassembly_cmd, static) = {
1769     .path = "show ip6-full-reassembly",
1770     .short_help = "show ip6-full-reassembly [details]",
1771     .function = show_ip6_full_reass,
1772 };
1773
1774 #ifndef CLIB_MARCH_VARIANT
1775 vnet_api_error_t
1776 ip6_full_reass_enable_disable (u32 sw_if_index, u8 enable_disable)
1777 {
1778   return vnet_feature_enable_disable ("ip6-unicast",
1779                                       "ip6-full-reassembly-feature",
1780                                       sw_if_index, enable_disable, 0, 0);
1781 }
1782 #endif /* CLIB_MARCH_VARIANT */
1783
1784 #define foreach_ip6_full_reassembly_handoff_error                       \
1785 _(CONGESTION_DROP, "congestion drop")
1786
1787
1788 typedef enum
1789 {
1790 #define _(sym,str) IP6_FULL_REASSEMBLY_HANDOFF_ERROR_##sym,
1791   foreach_ip6_full_reassembly_handoff_error
1792 #undef _
1793     IP6_FULL_REASSEMBLY_HANDOFF_N_ERROR,
1794 } ip6_full_reassembly_handoff_error_t;
1795
1796 static char *ip6_full_reassembly_handoff_error_strings[] = {
1797 #define _(sym,string) string,
1798   foreach_ip6_full_reassembly_handoff_error
1799 #undef _
1800 };
1801
1802 typedef struct
1803 {
1804   u32 next_worker_index;
1805 } ip6_full_reassembly_handoff_trace_t;
1806
1807 static u8 *
1808 format_ip6_full_reassembly_handoff_trace (u8 * s, va_list * args)
1809 {
1810   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1811   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1812   ip6_full_reassembly_handoff_trace_t *t =
1813     va_arg (*args, ip6_full_reassembly_handoff_trace_t *);
1814
1815   s =
1816     format (s, "ip6-full-reassembly-handoff: next-worker %d",
1817             t->next_worker_index);
1818
1819   return s;
1820 }
1821
1822 always_inline uword
1823 ip6_full_reassembly_handoff_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
1824                                     vlib_frame_t *frame, bool is_feature,
1825                                     bool is_local)
1826 {
1827   ip6_full_reass_main_t *rm = &ip6_full_reass_main;
1828
1829   vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b;
1830   u32 n_enq, n_left_from, *from;
1831   u16 thread_indices[VLIB_FRAME_SIZE], *ti;
1832   u32 fq_index;
1833
1834   from = vlib_frame_vector_args (frame);
1835   n_left_from = frame->n_vectors;
1836   vlib_get_buffers (vm, from, bufs, n_left_from);
1837
1838   b = bufs;
1839   ti = thread_indices;
1840
1841   if (is_feature)
1842     {
1843       fq_index = rm->fq_feature_index;
1844     }
1845   else
1846     {
1847       if (is_local)
1848         {
1849           fq_index = rm->fq_local_index;
1850         }
1851       else
1852         {
1853           fq_index = rm->fq_index;
1854         }
1855     }
1856
1857   while (n_left_from > 0)
1858     {
1859       ti[0] = vnet_buffer (b[0])->ip.reass.owner_thread_index;
1860
1861       if (PREDICT_FALSE
1862           ((node->flags & VLIB_NODE_FLAG_TRACE)
1863            && (b[0]->flags & VLIB_BUFFER_IS_TRACED)))
1864         {
1865           ip6_full_reassembly_handoff_trace_t *t =
1866             vlib_add_trace (vm, node, b[0], sizeof (*t));
1867           t->next_worker_index = ti[0];
1868         }
1869
1870       n_left_from -= 1;
1871       ti += 1;
1872       b += 1;
1873     }
1874   n_enq = vlib_buffer_enqueue_to_thread (vm, node, fq_index, from,
1875                                          thread_indices, frame->n_vectors, 1);
1876
1877   if (n_enq < frame->n_vectors)
1878     vlib_node_increment_counter (vm, node->node_index,
1879                                  IP6_FULL_REASSEMBLY_HANDOFF_ERROR_CONGESTION_DROP,
1880                                  frame->n_vectors - n_enq);
1881   return frame->n_vectors;
1882 }
1883
1884 VLIB_NODE_FN (ip6_full_reassembly_handoff_node) (vlib_main_t * vm,
1885                                                  vlib_node_runtime_t * node,
1886                                                  vlib_frame_t * frame)
1887 {
1888   return ip6_full_reassembly_handoff_inline (
1889     vm, node, frame, false /* is_feature */, false /* is_local */);
1890 }
1891
1892 VLIB_REGISTER_NODE (ip6_full_reassembly_handoff_node) = {
1893   .name = "ip6-full-reassembly-handoff",
1894   .vector_size = sizeof (u32),
1895   .n_errors = ARRAY_LEN(ip6_full_reassembly_handoff_error_strings),
1896   .error_strings = ip6_full_reassembly_handoff_error_strings,
1897   .format_trace = format_ip6_full_reassembly_handoff_trace,
1898
1899   .n_next_nodes = 1,
1900
1901   .next_nodes = {
1902     [0] = "error-drop",
1903   },
1904 };
1905
1906 VLIB_NODE_FN (ip6_local_full_reassembly_handoff_node)
1907 (vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame)
1908 {
1909   return ip6_full_reassembly_handoff_inline (
1910     vm, node, frame, false /* is_feature */, true /* is_feature */);
1911 }
1912
1913 VLIB_REGISTER_NODE (ip6_local_full_reassembly_handoff_node) = {
1914   .name = "ip6-local-full-reassembly-handoff",
1915   .vector_size = sizeof (u32),
1916   .n_errors = ARRAY_LEN(ip6_full_reassembly_handoff_error_strings),
1917   .error_strings = ip6_full_reassembly_handoff_error_strings,
1918   .format_trace = format_ip6_full_reassembly_handoff_trace,
1919
1920   .n_next_nodes = 1,
1921
1922   .next_nodes = {
1923     [0] = "error-drop",
1924   },
1925 };
1926
1927 VLIB_NODE_FN (ip6_full_reassembly_feature_handoff_node) (vlib_main_t * vm,
1928                                vlib_node_runtime_t * node, vlib_frame_t * frame)
1929 {
1930   return ip6_full_reassembly_handoff_inline (
1931     vm, node, frame, true /* is_feature */, false /* is_local */);
1932 }
1933
1934
1935 VLIB_REGISTER_NODE (ip6_full_reassembly_feature_handoff_node) = {
1936   .name = "ip6-full-reass-feature-hoff",
1937   .vector_size = sizeof (u32),
1938   .n_errors = ARRAY_LEN(ip6_full_reassembly_handoff_error_strings),
1939   .error_strings = ip6_full_reassembly_handoff_error_strings,
1940   .format_trace = format_ip6_full_reassembly_handoff_trace,
1941
1942   .n_next_nodes = 1,
1943
1944   .next_nodes = {
1945     [0] = "error-drop",
1946   },
1947 };
1948
1949 #ifndef CLIB_MARCH_VARIANT
1950 int
1951 ip6_full_reass_enable_disable_with_refcnt (u32 sw_if_index, int is_enable)
1952 {
1953   ip6_full_reass_main_t *rm = &ip6_full_reass_main;
1954   vec_validate (rm->feature_use_refcount_per_intf, sw_if_index);
1955   if (is_enable)
1956     {
1957       if (!rm->feature_use_refcount_per_intf[sw_if_index])
1958         {
1959           ++rm->feature_use_refcount_per_intf[sw_if_index];
1960           return vnet_feature_enable_disable ("ip6-unicast",
1961                                               "ip6-full-reassembly-feature",
1962                                               sw_if_index, 1, 0, 0);
1963         }
1964       ++rm->feature_use_refcount_per_intf[sw_if_index];
1965     }
1966   else
1967     {
1968       --rm->feature_use_refcount_per_intf[sw_if_index];
1969       if (!rm->feature_use_refcount_per_intf[sw_if_index])
1970         return vnet_feature_enable_disable ("ip6-unicast",
1971                                             "ip6-full-reassembly-feature",
1972                                             sw_if_index, 0, 0, 0);
1973     }
1974   return -1;
1975 }
1976
1977 void
1978 ip6_local_full_reass_enable_disable (int enable)
1979 {
1980   if (enable)
1981     {
1982       if (!ip6_full_reass_main.is_local_reass_enabled)
1983         {
1984           ip6_full_reass_main.is_local_reass_enabled = 1;
1985           ip6_register_protocol (IP_PROTOCOL_IPV6_FRAGMENTATION,
1986                                  ip6_local_full_reass_node.index);
1987         }
1988     }
1989   else
1990     {
1991       if (ip6_full_reass_main.is_local_reass_enabled)
1992         {
1993           ip6_full_reass_main.is_local_reass_enabled = 0;
1994           ip6_unregister_protocol (IP_PROTOCOL_IPV6_FRAGMENTATION);
1995         }
1996     }
1997 }
1998
1999 int
2000 ip6_local_full_reass_enabled ()
2001 {
2002   return ip6_full_reass_main.is_local_reass_enabled;
2003 }
2004
2005 #endif
2006
2007 /*
2008  * fd.io coding-style-patch-verification: ON
2009  *
2010  * Local Variables:
2011  * eval: (c-set-style "gnu")
2012  * End:
2013  */