3656c5a8f610cb52c5ca8afa279db4c158d34db1
[vpp.git] / src / vnet / ip / reass / ip6_sv_reass.c
1 /*
2  * Copyright (c) 2017 Cisco and/or its affiliates.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at:
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15
16 /**
17  * @file
18  * @brief IPv6 Shallow Virtual Reassembly.
19  *
20  * This file contains the source code for IPv6 Shallow Virtual reassembly.
21  */
22
23 #include <vppinfra/vec.h>
24 #include <vnet/vnet.h>
25 #include <vnet/ip/ip.h>
26 #include <vnet/ip/ip6_to_ip4.h>
27 #include <vppinfra/bihash_48_8.h>
28 #include <vnet/ip/reass/ip6_sv_reass.h>
29 #include <vnet/ip/ip6_inlines.h>
30
31 #define MSEC_PER_SEC 1000
32 #define IP6_SV_REASS_TIMEOUT_DEFAULT_MS 100
33 #define IP6_SV_REASS_EXPIRE_WALK_INTERVAL_DEFAULT_MS 10000      // 10 seconds default
34 #define IP6_SV_REASS_MAX_REASSEMBLIES_DEFAULT 1024
35 #define IP6_SV_REASS_MAX_REASSEMBLY_LENGTH_DEFAULT 3
36 #define IP6_SV_REASS_HT_LOAD_FACTOR (0.75)
37
38 typedef enum
39 {
40   IP6_SV_REASS_RC_OK,
41   IP6_SV_REASS_RC_TOO_MANY_FRAGMENTS,
42   IP6_SV_REASS_RC_INTERNAL_ERROR,
43   IP6_SV_REASS_RC_UNSUPP_IP_PROTO,
44 } ip6_sv_reass_rc_t;
45
46 typedef struct
47 {
48   union
49   {
50     struct
51     {
52       ip6_address_t src;
53       ip6_address_t dst;
54       u32 fib_index;
55       u32 frag_id;
56       u8 unused[7];
57       u8 proto;
58     };
59     u64 as_u64[6];
60   };
61 } ip6_sv_reass_key_t;
62
63 typedef union
64 {
65   struct
66   {
67     u32 reass_index;
68     u32 thread_index;
69   };
70   u64 as_u64;
71 } ip6_sv_reass_val_t;
72
73 typedef union
74 {
75   struct
76   {
77     ip6_sv_reass_key_t k;
78     ip6_sv_reass_val_t v;
79   };
80   clib_bihash_kv_48_8_t kv;
81 } ip6_sv_reass_kv_t;
82
83 typedef struct
84 {
85   // hash table key
86   ip6_sv_reass_key_t key;
87   // time when last packet was received
88   f64 last_heard;
89   // internal id of this reassembly
90   u64 id;
91   // trace operation counter
92   u32 trace_op_counter;
93   // buffer indexes of buffers in this reassembly in chronological order -
94   // including overlaps and duplicate fragments
95   u32 *cached_buffers;
96   // set to true when this reassembly is completed
97   bool is_complete;
98   // ip protocol
99   u8 ip_proto;
100   u8 icmp_type_or_tcp_flags;
101   u32 tcp_ack_number;
102   u32 tcp_seq_number;
103   // l4 src port
104   u16 l4_src_port;
105   // l4 dst port
106   u16 l4_dst_port;
107   // lru indexes
108   u32 lru_prev;
109   u32 lru_next;
110 } ip6_sv_reass_t;
111
112 typedef struct
113 {
114   ip6_sv_reass_t *pool;
115   u32 reass_n;
116   u32 id_counter;
117   clib_spinlock_t lock;
118   // lru indexes
119   u32 lru_first;
120   u32 lru_last;
121 } ip6_sv_reass_per_thread_t;
122
123 typedef struct
124 {
125   // IPv6 config
126   u32 timeout_ms;
127   f64 timeout;
128   u32 expire_walk_interval_ms;
129   // maximum number of fragments in one reassembly
130   u32 max_reass_len;
131   // maximum number of reassemblies
132   u32 max_reass_n;
133
134   // IPv6 runtime
135   clib_bihash_48_8_t hash;
136
137   // per-thread data
138   ip6_sv_reass_per_thread_t *per_thread_data;
139
140   // convenience
141   vlib_main_t *vlib_main;
142   vnet_main_t *vnet_main;
143
144   // node index of ip6-drop node
145   u32 ip6_drop_idx;
146   u32 ip6_icmp_error_idx;
147   u32 ip6_sv_reass_expire_node_idx;
148
149   /** Worker handoff */
150   u32 fq_index;
151   u32 fq_feature_index;
152
153   // reference count for enabling/disabling feature - per interface
154   u32 *feature_use_refcount_per_intf;
155 } ip6_sv_reass_main_t;
156
157 extern ip6_sv_reass_main_t ip6_sv_reass_main;
158
159 #ifndef CLIB_MARCH_VARIANT
160 ip6_sv_reass_main_t ip6_sv_reass_main;
161 #endif /* CLIB_MARCH_VARIANT */
162
163 typedef enum
164 {
165   IP6_SV_REASSEMBLY_NEXT_INPUT,
166   IP6_SV_REASSEMBLY_NEXT_DROP,
167   IP6_SV_REASSEMBLY_NEXT_ICMP_ERROR,
168   IP6_SV_REASSEMBLY_NEXT_HANDOFF,
169   IP6_SV_REASSEMBLY_N_NEXT,
170 } ip6_sv_reass_next_t;
171
172 typedef enum
173 {
174   REASS_FRAGMENT_CACHE,
175   REASS_FINISH,
176   REASS_FRAGMENT_FORWARD,
177   REASS_PASSTHROUGH,
178 } ip6_sv_reass_trace_operation_e;
179
180 typedef struct
181 {
182   ip6_sv_reass_trace_operation_e action;
183   u32 reass_id;
184   u32 op_id;
185   u8 ip_proto;
186   u16 l4_src_port;
187   u16 l4_dst_port;
188 } ip6_sv_reass_trace_t;
189
190 static u8 *
191 format_ip6_sv_reass_trace (u8 * s, va_list * args)
192 {
193   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
194   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
195   ip6_sv_reass_trace_t *t = va_arg (*args, ip6_sv_reass_trace_t *);
196   if (REASS_PASSTHROUGH != t->action)
197     {
198       s = format (s, "reass id: %u, op id: %u ", t->reass_id, t->op_id);
199     }
200   switch (t->action)
201     {
202     case REASS_FRAGMENT_CACHE:
203       s = format (s, "[cached]");
204       break;
205     case REASS_FINISH:
206       s =
207         format (s, "[finish, ip proto=%u, src_port=%u, dst_port=%u]",
208                 t->ip_proto, clib_net_to_host_u16 (t->l4_src_port),
209                 clib_net_to_host_u16 (t->l4_dst_port));
210       break;
211     case REASS_FRAGMENT_FORWARD:
212       s =
213         format (s, "[forward, ip proto=%u, src_port=%u, dst_port=%u]",
214                 t->ip_proto, clib_net_to_host_u16 (t->l4_src_port),
215                 clib_net_to_host_u16 (t->l4_dst_port));
216       break;
217     case REASS_PASSTHROUGH:
218       s = format (s, "[not fragmented or atomic fragment]");
219       break;
220     }
221   return s;
222 }
223
224 static void
225 ip6_sv_reass_add_trace (vlib_main_t * vm, vlib_node_runtime_t * node,
226                         ip6_sv_reass_t * reass, u32 bi,
227                         ip6_sv_reass_trace_operation_e action,
228                         u32 ip_proto, u16 l4_src_port, u16 l4_dst_port)
229 {
230   vlib_buffer_t *b = vlib_get_buffer (vm, bi);
231   if (pool_is_free_index
232       (vm->trace_main.trace_buffer_pool, vlib_buffer_get_trace_index (b)))
233     {
234       // this buffer's trace is gone
235       b->flags &= ~VLIB_BUFFER_IS_TRACED;
236       return;
237     }
238   ip6_sv_reass_trace_t *t = vlib_add_trace (vm, node, b, sizeof (t[0]));
239   if (reass)
240     {
241       t->reass_id = reass->id;
242       t->op_id = reass->trace_op_counter;
243       ++reass->trace_op_counter;
244     }
245   t->action = action;
246   t->ip_proto = ip_proto;
247   t->l4_src_port = l4_src_port;
248   t->l4_dst_port = l4_dst_port;
249 #if 0
250   static u8 *s = NULL;
251   s = format (s, "%U", format_ip6_sv_reass_trace, NULL, NULL, t);
252   printf ("%.*s\n", vec_len (s), s);
253   fflush (stdout);
254   vec_reset_length (s);
255 #endif
256 }
257
258 always_inline void
259 ip6_sv_reass_free (vlib_main_t * vm, ip6_sv_reass_main_t * rm,
260                    ip6_sv_reass_per_thread_t * rt, ip6_sv_reass_t * reass)
261 {
262   clib_bihash_kv_48_8_t kv;
263   kv.key[0] = reass->key.as_u64[0];
264   kv.key[1] = reass->key.as_u64[1];
265   kv.key[2] = reass->key.as_u64[2];
266   kv.key[3] = reass->key.as_u64[3];
267   kv.key[4] = reass->key.as_u64[4];
268   kv.key[5] = reass->key.as_u64[5];
269   clib_bihash_add_del_48_8 (&rm->hash, &kv, 0);
270   vlib_buffer_free (vm, reass->cached_buffers,
271                     vec_len (reass->cached_buffers));
272   vec_free (reass->cached_buffers);
273   reass->cached_buffers = NULL;
274   if (~0 != reass->lru_prev)
275     {
276       ip6_sv_reass_t *lru_prev =
277         pool_elt_at_index (rt->pool, reass->lru_prev);
278       lru_prev->lru_next = reass->lru_next;
279     }
280   if (~0 != reass->lru_next)
281     {
282       ip6_sv_reass_t *lru_next =
283         pool_elt_at_index (rt->pool, reass->lru_next);
284       lru_next->lru_prev = reass->lru_prev;
285     }
286   if (rt->lru_first == reass - rt->pool)
287     {
288       rt->lru_first = reass->lru_next;
289     }
290   if (rt->lru_last == reass - rt->pool)
291     {
292       rt->lru_last = reass->lru_prev;
293     }
294   pool_put (rt->pool, reass);
295   --rt->reass_n;
296 }
297
298 always_inline void
299 ip6_sv_reass_init (ip6_sv_reass_t * reass)
300 {
301   reass->cached_buffers = NULL;
302   reass->is_complete = false;
303 }
304
305 always_inline ip6_sv_reass_t *
306 ip6_sv_reass_find_or_create (vlib_main_t *vm, ip6_sv_reass_main_t *rm,
307                              ip6_sv_reass_per_thread_t *rt,
308                              ip6_sv_reass_kv_t *kv, u8 *do_handoff)
309 {
310   ip6_sv_reass_t *reass = NULL;
311   f64 now = vlib_time_now (vm);
312
313   if (!clib_bihash_search_48_8 (&rm->hash, &kv->kv, &kv->kv))
314     {
315       if (vm->thread_index != kv->v.thread_index)
316         {
317           *do_handoff = 1;
318           return NULL;
319         }
320       reass = pool_elt_at_index (rt->pool, kv->v.reass_index);
321
322       if (now > reass->last_heard + rm->timeout)
323         {
324           ip6_sv_reass_free (vm, rm, rt, reass);
325           reass = NULL;
326         }
327     }
328
329   if (reass)
330     {
331       reass->last_heard = now;
332       return reass;
333     }
334
335   if (rt->reass_n >= rm->max_reass_n)
336     {
337       reass = pool_elt_at_index (rt->pool, rt->lru_first);
338       ip6_sv_reass_free (vm, rm, rt, reass);
339     }
340
341   pool_get (rt->pool, reass);
342   clib_memset (reass, 0, sizeof (*reass));
343   reass->id = ((u64) vm->thread_index * 1000000000) + rt->id_counter;
344   ++rt->id_counter;
345   ip6_sv_reass_init (reass);
346   ++rt->reass_n;
347
348   reass->lru_prev = reass->lru_next = ~0;
349
350   if (~0 != rt->lru_last)
351     {
352       ip6_sv_reass_t *lru_last = pool_elt_at_index (rt->pool, rt->lru_last);
353       reass->lru_prev = rt->lru_last;
354       lru_last->lru_next = rt->lru_last = reass - rt->pool;
355     }
356
357   if (~0 == rt->lru_first)
358     {
359       rt->lru_first = rt->lru_last = reass - rt->pool;
360     }
361
362   reass->key.as_u64[0] = kv->kv.key[0];
363   reass->key.as_u64[1] = kv->kv.key[1];
364   reass->key.as_u64[2] = kv->kv.key[2];
365   reass->key.as_u64[3] = kv->kv.key[3];
366   reass->key.as_u64[4] = kv->kv.key[4];
367   reass->key.as_u64[5] = kv->kv.key[5];
368   kv->v.reass_index = (reass - rt->pool);
369   kv->v.thread_index = vm->thread_index;
370   reass->last_heard = now;
371
372   if (clib_bihash_add_del_48_8 (&rm->hash, &kv->kv, 1))
373     {
374       ip6_sv_reass_free (vm, rm, rt, reass);
375       reass = NULL;
376     }
377
378   return reass;
379 }
380
381 always_inline ip6_sv_reass_rc_t
382 ip6_sv_reass_update (vlib_main_t *vm, vlib_node_runtime_t *node,
383                      ip6_sv_reass_main_t *rm, ip6_sv_reass_t *reass, u32 bi0,
384                      ip6_frag_hdr_t *frag_hdr)
385 {
386   vlib_buffer_t *fb = vlib_get_buffer (vm, bi0);
387   vnet_buffer_opaque_t *fvnb = vnet_buffer (fb);
388   fvnb->ip.reass.ip6_frag_hdr_offset =
389     (u8 *) frag_hdr - (u8 *) vlib_buffer_get_current (fb);
390   ip6_header_t *fip = vlib_buffer_get_current (fb);
391   if (fb->current_length < sizeof (*fip) ||
392       fvnb->ip.reass.ip6_frag_hdr_offset == 0 ||
393       fvnb->ip.reass.ip6_frag_hdr_offset >= fb->current_length)
394     {
395       return IP6_SV_REASS_RC_INTERNAL_ERROR;
396     }
397
398   u32 fragment_first = fvnb->ip.reass.fragment_first =
399     ip6_frag_hdr_offset_bytes (frag_hdr);
400   u32 fragment_length =
401     vlib_buffer_length_in_chain (vm, fb) -
402     (fvnb->ip.reass.ip6_frag_hdr_offset + sizeof (*frag_hdr));
403   u32 fragment_last = fvnb->ip.reass.fragment_last =
404     fragment_first + fragment_length - 1;
405   fvnb->ip.reass.range_first = fragment_first;
406   fvnb->ip.reass.range_last = fragment_last;
407   fvnb->ip.reass.next_range_bi = ~0;
408   if (0 == fragment_first)
409     {
410       if (!ip6_get_port
411           (vm, fb, fip, fb->current_length, &reass->ip_proto,
412            &reass->l4_src_port, &reass->l4_dst_port,
413            &reass->icmp_type_or_tcp_flags, &reass->tcp_ack_number,
414            &reass->tcp_seq_number))
415         return IP6_SV_REASS_RC_UNSUPP_IP_PROTO;
416
417       reass->is_complete = true;
418       vlib_buffer_t *b0 = vlib_get_buffer (vm, bi0);
419       if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
420         {
421           ip6_sv_reass_add_trace (vm, node, reass, bi0, REASS_FINISH,
422                                   reass->ip_proto, reass->l4_src_port,
423                                   reass->l4_dst_port);
424         }
425     }
426   vec_add1 (reass->cached_buffers, bi0);
427   if (!reass->is_complete)
428     {
429       if (PREDICT_FALSE (fb->flags & VLIB_BUFFER_IS_TRACED))
430         {
431           ip6_sv_reass_add_trace (vm, node, reass, bi0, REASS_FRAGMENT_CACHE,
432                                   reass->ip_proto, reass->l4_src_port,
433                                   reass->l4_dst_port);
434         }
435       if (vec_len (reass->cached_buffers) > rm->max_reass_len)
436         {
437           return IP6_SV_REASS_RC_TOO_MANY_FRAGMENTS;
438         }
439     }
440   return IP6_SV_REASS_RC_OK;
441 }
442
443 always_inline bool
444 ip6_sv_reass_verify_upper_layer_present (vlib_node_runtime_t *node,
445                                          vlib_buffer_t *b,
446                                          ip6_ext_hdr_chain_t *hc)
447 {
448   int nh = hc->eh[hc->length - 1].protocol;
449   /* Checking to see if it's a terminating header */
450   if (ip6_ext_hdr (nh))
451     {
452       icmp6_error_set_vnet_buffer (
453         b, ICMP6_parameter_problem,
454         ICMP6_parameter_problem_first_fragment_has_incomplete_header_chain, 0);
455       b->error = node->errors[IP6_ERROR_REASS_MISSING_UPPER];
456       return false;
457     }
458   return true;
459 }
460
461 always_inline bool
462 ip6_sv_reass_verify_fragment_multiple_8 (vlib_main_t * vm,
463                                          vlib_buffer_t * b,
464                                          ip6_frag_hdr_t * frag_hdr)
465 {
466   vnet_buffer_opaque_t *vnb = vnet_buffer (b);
467   ip6_header_t *ip = vlib_buffer_get_current (b);
468   int more_fragments = ip6_frag_hdr_more (frag_hdr);
469   u32 fragment_length =
470     vlib_buffer_length_in_chain (vm, b) -
471     (vnb->ip.reass.ip6_frag_hdr_offset + sizeof (*frag_hdr));
472   if (more_fragments && 0 != fragment_length % 8)
473     {
474       icmp6_error_set_vnet_buffer (b, ICMP6_parameter_problem,
475                                    ICMP6_parameter_problem_erroneous_header_field,
476                                    (u8 *) & ip->payload_length - (u8 *) ip);
477       return false;
478     }
479   return true;
480 }
481
482 always_inline bool
483 ip6_sv_reass_verify_packet_size_lt_64k (vlib_main_t * vm,
484                                         vlib_buffer_t * b,
485                                         ip6_frag_hdr_t * frag_hdr)
486 {
487   vnet_buffer_opaque_t *vnb = vnet_buffer (b);
488   u32 fragment_first = ip6_frag_hdr_offset_bytes (frag_hdr);
489   u32 fragment_length =
490     vlib_buffer_length_in_chain (vm, b) -
491     (vnb->ip.reass.ip6_frag_hdr_offset + sizeof (*frag_hdr));
492   if (fragment_first + fragment_length > 65535)
493     {
494       ip6_header_t *ip0 = vlib_buffer_get_current (b);
495       icmp6_error_set_vnet_buffer (b, ICMP6_parameter_problem,
496                                    ICMP6_parameter_problem_erroneous_header_field,
497                                    (u8 *) & frag_hdr->fragment_offset_and_more
498                                    - (u8 *) ip0);
499       return false;
500     }
501   return true;
502 }
503
504 always_inline uword
505 ip6_sv_reassembly_inline (vlib_main_t * vm,
506                           vlib_node_runtime_t * node,
507                           vlib_frame_t * frame, bool is_feature)
508 {
509   u32 *from = vlib_frame_vector_args (frame);
510   u32 n_left_from, n_left_to_next, *to_next, next_index;
511   ip6_sv_reass_main_t *rm = &ip6_sv_reass_main;
512   ip6_sv_reass_per_thread_t *rt = &rm->per_thread_data[vm->thread_index];
513   clib_spinlock_lock (&rt->lock);
514
515   n_left_from = frame->n_vectors;
516   next_index = node->cached_next_index;
517
518   while (n_left_from > 0)
519     {
520       vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
521
522       while (n_left_from > 0 && n_left_to_next > 0)
523         {
524           u32 bi0;
525           vlib_buffer_t *b0;
526           u32 next0 = IP6_SV_REASSEMBLY_NEXT_DROP;
527           u32 error0 = IP6_ERROR_NONE;
528
529           bi0 = from[0];
530           b0 = vlib_get_buffer (vm, bi0);
531
532           ip6_header_t *ip0 = vlib_buffer_get_current (b0);
533           ip6_frag_hdr_t *frag_hdr;
534           ip6_ext_hdr_chain_t hdr_chain;
535           bool is_atomic_fragment = false;
536
537           int res = ip6_ext_header_walk (
538             b0, ip0, IP_PROTOCOL_IPV6_FRAGMENTATION, &hdr_chain);
539           if (res >= 0 &&
540               hdr_chain.eh[res].protocol == IP_PROTOCOL_IPV6_FRAGMENTATION)
541             {
542               frag_hdr =
543                 ip6_ext_next_header_offset (ip0, hdr_chain.eh[res].offset);
544               is_atomic_fragment = (0 == ip6_frag_hdr_offset (frag_hdr) &&
545                                     !ip6_frag_hdr_more (frag_hdr));
546             }
547
548           if (res < 0 ||
549               hdr_chain.eh[res].protocol != IP_PROTOCOL_IPV6_FRAGMENTATION ||
550               is_atomic_fragment)
551             {
552               // this is a regular unfragmented packet or an atomic fragment
553               if (!ip6_get_port
554                   (vm, b0, ip0, b0->current_length,
555                    &(vnet_buffer (b0)->ip.reass.ip_proto),
556                    &(vnet_buffer (b0)->ip.reass.l4_src_port),
557                    &(vnet_buffer (b0)->ip.reass.l4_dst_port),
558                    &(vnet_buffer (b0)->ip.reass.icmp_type_or_tcp_flags),
559                    &(vnet_buffer (b0)->ip.reass.tcp_ack_number),
560                    &(vnet_buffer (b0)->ip.reass.tcp_seq_number)))
561                 {
562                   error0 = IP6_ERROR_REASS_UNSUPP_IP_PROTO;
563                   b0->error = node->errors[error0];
564                   next0 = IP6_SV_REASSEMBLY_NEXT_DROP;
565                   goto packet_enqueue;
566                 }
567               vnet_buffer (b0)->ip.reass.is_non_first_fragment = 0;
568               next0 = IP6_SV_REASSEMBLY_NEXT_INPUT;
569               if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
570                 {
571                   ip6_sv_reass_add_trace (
572                     vm, node, NULL, bi0, REASS_PASSTHROUGH,
573                     vnet_buffer (b0)->ip.reass.ip_proto,
574                     vnet_buffer (b0)->ip.reass.l4_src_port,
575                     vnet_buffer (b0)->ip.reass.l4_dst_port);
576                 }
577               goto packet_enqueue;
578             }
579
580           vnet_buffer (b0)->ip.reass.ip6_frag_hdr_offset =
581             hdr_chain.eh[res].offset;
582
583           if (0 == ip6_frag_hdr_offset (frag_hdr))
584             {
585               // first fragment - verify upper-layer is present
586               if (!ip6_sv_reass_verify_upper_layer_present (node, b0,
587                                                             &hdr_chain))
588                 {
589                   next0 = IP6_SV_REASSEMBLY_NEXT_ICMP_ERROR;
590                   goto packet_enqueue;
591                 }
592             }
593           if (!ip6_sv_reass_verify_fragment_multiple_8 (vm, b0, frag_hdr) ||
594               !ip6_sv_reass_verify_packet_size_lt_64k (vm, b0, frag_hdr))
595             {
596               next0 = IP6_SV_REASSEMBLY_NEXT_ICMP_ERROR;
597               goto packet_enqueue;
598             }
599
600           ip6_sv_reass_kv_t kv;
601           u8 do_handoff = 0;
602
603           kv.k.as_u64[0] = ip0->src_address.as_u64[0];
604           kv.k.as_u64[1] = ip0->src_address.as_u64[1];
605           kv.k.as_u64[2] = ip0->dst_address.as_u64[0];
606           kv.k.as_u64[3] = ip0->dst_address.as_u64[1];
607           kv.k.as_u64[4] =
608             ((u64) vec_elt (ip6_main.fib_index_by_sw_if_index,
609                             vnet_buffer (b0)->sw_if_index[VLIB_RX])) << 32 |
610             (u64) frag_hdr->identification;
611           kv.k.as_u64[5] = ip0->protocol;
612
613           ip6_sv_reass_t *reass =
614             ip6_sv_reass_find_or_create (vm, rm, rt, &kv, &do_handoff);
615
616           if (PREDICT_FALSE (do_handoff))
617             {
618               next0 = IP6_SV_REASSEMBLY_NEXT_HANDOFF;
619               vnet_buffer (b0)->ip.reass.owner_thread_index =
620                 kv.v.thread_index;
621               goto packet_enqueue;
622             }
623
624           if (!reass)
625             {
626               next0 = IP6_SV_REASSEMBLY_NEXT_DROP;
627               error0 = IP6_ERROR_REASS_LIMIT_REACHED;
628               b0->error = node->errors[error0];
629               goto packet_enqueue;
630             }
631
632           if (reass->is_complete)
633             {
634               vnet_buffer (b0)->ip.reass.is_non_first_fragment =
635                 ! !ip6_frag_hdr_offset (frag_hdr);
636               vnet_buffer (b0)->ip.reass.ip_proto = reass->ip_proto;
637               vnet_buffer (b0)->ip.reass.icmp_type_or_tcp_flags =
638                 reass->icmp_type_or_tcp_flags;
639               vnet_buffer (b0)->ip.reass.tcp_ack_number =
640                 reass->tcp_ack_number;
641               vnet_buffer (b0)->ip.reass.tcp_seq_number =
642                 reass->tcp_seq_number;
643               vnet_buffer (b0)->ip.reass.l4_src_port = reass->l4_src_port;
644               vnet_buffer (b0)->ip.reass.l4_dst_port = reass->l4_dst_port;
645               next0 = IP6_SV_REASSEMBLY_NEXT_INPUT;
646               if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
647                 {
648                   ip6_sv_reass_add_trace (
649                     vm, node, reass, bi0, REASS_FRAGMENT_FORWARD,
650                     reass->ip_proto, reass->l4_src_port, reass->l4_dst_port);
651                 }
652               goto packet_enqueue;
653             }
654
655           u32 counter = ~0;
656           switch (ip6_sv_reass_update (vm, node, rm, reass, bi0, frag_hdr))
657             {
658             case IP6_SV_REASS_RC_OK:
659               /* nothing to do here */
660               break;
661             case IP6_SV_REASS_RC_TOO_MANY_FRAGMENTS:
662               counter = IP6_ERROR_REASS_FRAGMENT_CHAIN_TOO_LONG;
663               break;
664             case IP6_SV_REASS_RC_UNSUPP_IP_PROTO:
665               counter = IP6_ERROR_REASS_UNSUPP_IP_PROTO;
666               break;
667             case IP6_SV_REASS_RC_INTERNAL_ERROR:
668               counter = IP6_ERROR_REASS_INTERNAL_ERROR;
669               break;
670             }
671           if (~0 != counter)
672             {
673               vlib_node_increment_counter (vm, node->node_index, counter, 1);
674               ip6_sv_reass_free (vm, rm, rt, reass);
675               goto next_packet;
676             }
677
678           if (reass->is_complete)
679             {
680               u32 idx;
681               vec_foreach_index (idx, reass->cached_buffers)
682               {
683                 u32 bi0 = vec_elt (reass->cached_buffers, idx);
684                 if (0 == n_left_to_next)
685                   {
686                     vlib_put_next_frame (vm, node, next_index,
687                                          n_left_to_next);
688                     vlib_get_next_frame (vm, node, next_index, to_next,
689                                          n_left_to_next);
690                   }
691                 to_next[0] = bi0;
692                 to_next += 1;
693                 n_left_to_next -= 1;
694                 b0 = vlib_get_buffer (vm, bi0);
695                 if (is_feature)
696                   {
697                     vnet_feature_next (&next0, b0);
698                   }
699                 frag_hdr =
700                   vlib_buffer_get_current (b0) +
701                   vnet_buffer (b0)->ip.reass.ip6_frag_hdr_offset;
702                 vnet_buffer (b0)->ip.reass.is_non_first_fragment =
703                   ! !ip6_frag_hdr_offset (frag_hdr);
704                 vnet_buffer (b0)->ip.reass.ip_proto = reass->ip_proto;
705                 vnet_buffer (b0)->ip.reass.icmp_type_or_tcp_flags =
706                   reass->icmp_type_or_tcp_flags;
707                 vnet_buffer (b0)->ip.reass.tcp_ack_number =
708                   reass->tcp_ack_number;
709                 vnet_buffer (b0)->ip.reass.tcp_seq_number =
710                   reass->tcp_seq_number;
711                 vnet_buffer (b0)->ip.reass.l4_src_port = reass->l4_src_port;
712                 vnet_buffer (b0)->ip.reass.l4_dst_port = reass->l4_dst_port;
713                 if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
714                   {
715                     ip6_sv_reass_add_trace (
716                       vm, node, reass, bi0, REASS_FRAGMENT_FORWARD,
717                       reass->ip_proto, reass->l4_src_port, reass->l4_dst_port);
718                   }
719                 vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
720                                                  to_next, n_left_to_next, bi0,
721                                                  next0);
722               }
723               _vec_len (reass->cached_buffers) = 0;     // buffers are owned by frame now
724             }
725           goto next_packet;
726
727         packet_enqueue:
728           to_next[0] = bi0;
729           to_next += 1;
730           n_left_to_next -= 1;
731           if (is_feature && IP6_ERROR_NONE == error0)
732             {
733               b0 = vlib_get_buffer (vm, bi0);
734               vnet_feature_next (&next0, b0);
735             }
736           vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
737                                            n_left_to_next, bi0, next0);
738
739         next_packet:
740           from += 1;
741           n_left_from -= 1;
742         }
743
744       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
745     }
746
747   clib_spinlock_unlock (&rt->lock);
748   return frame->n_vectors;
749 }
750
751 static char *ip6_sv_reassembly_error_strings[] = {
752 #define _(sym, string) string,
753   foreach_ip6_error
754 #undef _
755 };
756
757 VLIB_NODE_FN (ip6_sv_reass_node) (vlib_main_t * vm,
758                                   vlib_node_runtime_t * node,
759                                   vlib_frame_t * frame)
760 {
761   return ip6_sv_reassembly_inline (vm, node, frame, false /* is_feature */ );
762 }
763
764 /* *INDENT-OFF* */
765 VLIB_REGISTER_NODE (ip6_sv_reass_node) = {
766     .name = "ip6-sv-reassembly",
767     .vector_size = sizeof (u32),
768     .format_trace = format_ip6_sv_reass_trace,
769     .n_errors = ARRAY_LEN (ip6_sv_reassembly_error_strings),
770     .error_strings = ip6_sv_reassembly_error_strings,
771     .n_next_nodes = IP6_SV_REASSEMBLY_N_NEXT,
772     .next_nodes =
773         {
774                 [IP6_SV_REASSEMBLY_NEXT_INPUT] = "ip6-input",
775                 [IP6_SV_REASSEMBLY_NEXT_DROP] = "ip6-drop",
776                 [IP6_SV_REASSEMBLY_NEXT_ICMP_ERROR] = "ip6-icmp-error",
777                 [IP6_SV_REASSEMBLY_NEXT_HANDOFF] = "ip6-sv-reassembly-handoff",
778         },
779 };
780 /* *INDENT-ON* */
781
782 VLIB_NODE_FN (ip6_sv_reass_node_feature) (vlib_main_t * vm,
783                                           vlib_node_runtime_t * node,
784                                           vlib_frame_t * frame)
785 {
786   return ip6_sv_reassembly_inline (vm, node, frame, true /* is_feature */ );
787 }
788
789 /* *INDENT-OFF* */
790 VLIB_REGISTER_NODE (ip6_sv_reass_node_feature) = {
791     .name = "ip6-sv-reassembly-feature",
792     .vector_size = sizeof (u32),
793     .format_trace = format_ip6_sv_reass_trace,
794     .n_errors = ARRAY_LEN (ip6_sv_reassembly_error_strings),
795     .error_strings = ip6_sv_reassembly_error_strings,
796     .n_next_nodes = IP6_SV_REASSEMBLY_N_NEXT,
797     .next_nodes =
798         {
799                 [IP6_SV_REASSEMBLY_NEXT_INPUT] = "ip6-input",
800                 [IP6_SV_REASSEMBLY_NEXT_DROP] = "ip6-drop",
801                 [IP6_SV_REASSEMBLY_NEXT_ICMP_ERROR] = "ip6-icmp-error",
802                 [IP6_SV_REASSEMBLY_NEXT_HANDOFF] = "ip6-sv-reass-feature-hoff",
803         },
804 };
805 /* *INDENT-ON* */
806
807 /* *INDENT-OFF* */
808 VNET_FEATURE_INIT (ip6_sv_reassembly_feature) = {
809     .arc_name = "ip6-unicast",
810     .node_name = "ip6-sv-reassembly-feature",
811     .runs_before = VNET_FEATURES ("ip6-lookup"),
812     .runs_after = 0,
813 };
814 /* *INDENT-ON* */
815
816 #ifndef CLIB_MARCH_VARIANT
817 static u32
818 ip6_sv_reass_get_nbuckets ()
819 {
820   ip6_sv_reass_main_t *rm = &ip6_sv_reass_main;
821   u32 nbuckets;
822   u8 i;
823
824   nbuckets = (u32) (rm->max_reass_n / IP6_SV_REASS_HT_LOAD_FACTOR);
825
826   for (i = 0; i < 31; i++)
827     if ((1 << i) >= nbuckets)
828       break;
829   nbuckets = 1 << i;
830
831   return nbuckets;
832 }
833 #endif /* CLIB_MARCH_VARIANT */
834
835 typedef enum
836 {
837   IP6_EVENT_CONFIG_CHANGED = 1,
838 } ip6_sv_reass_event_t;
839
840 #ifndef CLIB_MARCH_VARIANT
841 typedef struct
842 {
843   int failure;
844   clib_bihash_48_8_t *new_hash;
845 } ip6_rehash_cb_ctx;
846
847 static int
848 ip6_rehash_cb (clib_bihash_kv_48_8_t * kv, void *_ctx)
849 {
850   ip6_rehash_cb_ctx *ctx = _ctx;
851   if (clib_bihash_add_del_48_8 (ctx->new_hash, kv, 1))
852     {
853       ctx->failure = 1;
854     }
855   return (BIHASH_WALK_CONTINUE);
856 }
857
858 static void
859 ip6_sv_reass_set_params (u32 timeout_ms, u32 max_reassemblies,
860                          u32 max_reassembly_length,
861                          u32 expire_walk_interval_ms)
862 {
863   ip6_sv_reass_main.timeout_ms = timeout_ms;
864   ip6_sv_reass_main.timeout = (f64) timeout_ms / (f64) MSEC_PER_SEC;
865   ip6_sv_reass_main.max_reass_n = max_reassemblies;
866   ip6_sv_reass_main.max_reass_len = max_reassembly_length;
867   ip6_sv_reass_main.expire_walk_interval_ms = expire_walk_interval_ms;
868 }
869
870 vnet_api_error_t
871 ip6_sv_reass_set (u32 timeout_ms, u32 max_reassemblies,
872                   u32 max_reassembly_length, u32 expire_walk_interval_ms)
873 {
874   u32 old_nbuckets = ip6_sv_reass_get_nbuckets ();
875   ip6_sv_reass_set_params (timeout_ms, max_reassemblies,
876                            max_reassembly_length, expire_walk_interval_ms);
877   vlib_process_signal_event (ip6_sv_reass_main.vlib_main,
878                              ip6_sv_reass_main.ip6_sv_reass_expire_node_idx,
879                              IP6_EVENT_CONFIG_CHANGED, 0);
880   u32 new_nbuckets = ip6_sv_reass_get_nbuckets ();
881   if (ip6_sv_reass_main.max_reass_n > 0 && new_nbuckets > old_nbuckets)
882     {
883       clib_bihash_48_8_t new_hash;
884       clib_memset (&new_hash, 0, sizeof (new_hash));
885       ip6_rehash_cb_ctx ctx;
886       ctx.failure = 0;
887       ctx.new_hash = &new_hash;
888       clib_bihash_init_48_8 (&new_hash, "ip6-sv-reass", new_nbuckets,
889                              new_nbuckets * 1024);
890       clib_bihash_foreach_key_value_pair_48_8 (&ip6_sv_reass_main.hash,
891                                                ip6_rehash_cb, &ctx);
892       if (ctx.failure)
893         {
894           clib_bihash_free_48_8 (&new_hash);
895           return -1;
896         }
897       else
898         {
899           clib_bihash_free_48_8 (&ip6_sv_reass_main.hash);
900           clib_memcpy_fast (&ip6_sv_reass_main.hash, &new_hash,
901                             sizeof (ip6_sv_reass_main.hash));
902           clib_bihash_copied (&ip6_sv_reass_main.hash, &new_hash);
903         }
904     }
905   return 0;
906 }
907
908 vnet_api_error_t
909 ip6_sv_reass_get (u32 * timeout_ms, u32 * max_reassemblies,
910                   u32 * max_reassembly_length, u32 * expire_walk_interval_ms)
911 {
912   *timeout_ms = ip6_sv_reass_main.timeout_ms;
913   *max_reassemblies = ip6_sv_reass_main.max_reass_n;
914   *max_reassembly_length = ip6_sv_reass_main.max_reass_len;
915   *expire_walk_interval_ms = ip6_sv_reass_main.expire_walk_interval_ms;
916   return 0;
917 }
918
919 static clib_error_t *
920 ip6_sv_reass_init_function (vlib_main_t * vm)
921 {
922   ip6_sv_reass_main_t *rm = &ip6_sv_reass_main;
923   clib_error_t *error = 0;
924   u32 nbuckets;
925   vlib_node_t *node;
926
927   rm->vlib_main = vm;
928   rm->vnet_main = vnet_get_main ();
929
930   vec_validate (rm->per_thread_data, vlib_num_workers ());
931   ip6_sv_reass_per_thread_t *rt;
932   vec_foreach (rt, rm->per_thread_data)
933   {
934     clib_spinlock_init (&rt->lock);
935     pool_alloc (rt->pool, rm->max_reass_n);
936     rt->lru_first = rt->lru_last = ~0;
937   }
938
939   node = vlib_get_node_by_name (vm, (u8 *) "ip6-sv-reassembly-expire-walk");
940   ASSERT (node);
941   rm->ip6_sv_reass_expire_node_idx = node->index;
942
943   ip6_sv_reass_set_params (IP6_SV_REASS_TIMEOUT_DEFAULT_MS,
944                            IP6_SV_REASS_MAX_REASSEMBLIES_DEFAULT,
945                            IP6_SV_REASS_MAX_REASSEMBLY_LENGTH_DEFAULT,
946                            IP6_SV_REASS_EXPIRE_WALK_INTERVAL_DEFAULT_MS);
947
948   nbuckets = ip6_sv_reass_get_nbuckets ();
949   clib_bihash_init_48_8 (&rm->hash, "ip6-sv-reass", nbuckets,
950                          nbuckets * 1024);
951
952   node = vlib_get_node_by_name (vm, (u8 *) "ip6-drop");
953   ASSERT (node);
954   rm->ip6_drop_idx = node->index;
955   node = vlib_get_node_by_name (vm, (u8 *) "ip6-icmp-error");
956   ASSERT (node);
957   rm->ip6_icmp_error_idx = node->index;
958
959   if ((error = vlib_call_init_function (vm, ip_main_init)))
960     return error;
961
962   rm->fq_index = vlib_frame_queue_main_init (ip6_sv_reass_node.index, 0);
963   rm->fq_feature_index =
964     vlib_frame_queue_main_init (ip6_sv_reass_node_feature.index, 0);
965
966   rm->feature_use_refcount_per_intf = NULL;
967
968   return error;
969 }
970
971 VLIB_INIT_FUNCTION (ip6_sv_reass_init_function);
972 #endif /* CLIB_MARCH_VARIANT */
973
974 static uword
975 ip6_sv_reass_walk_expired (vlib_main_t *vm,
976                            CLIB_UNUSED (vlib_node_runtime_t *node),
977                            CLIB_UNUSED (vlib_frame_t *f))
978 {
979   ip6_sv_reass_main_t *rm = &ip6_sv_reass_main;
980   uword event_type, *event_data = 0;
981
982   while (true)
983     {
984       vlib_process_wait_for_event_or_clock (vm,
985                                             (f64) rm->expire_walk_interval_ms
986                                             / (f64) MSEC_PER_SEC);
987       event_type = vlib_process_get_events (vm, &event_data);
988
989       switch (event_type)
990         {
991         case ~0:
992           /* no events => timeout */
993           /* fallthrough */
994         case IP6_EVENT_CONFIG_CHANGED:
995           /* nothing to do here */
996           break;
997         default:
998           clib_warning ("BUG: event type 0x%wx", event_type);
999           break;
1000         }
1001       f64 now = vlib_time_now (vm);
1002
1003       ip6_sv_reass_t *reass;
1004       int *pool_indexes_to_free = NULL;
1005
1006       uword thread_index = 0;
1007       int index;
1008       const uword nthreads = vlib_num_workers () + 1;
1009       for (thread_index = 0; thread_index < nthreads; ++thread_index)
1010         {
1011           ip6_sv_reass_per_thread_t *rt = &rm->per_thread_data[thread_index];
1012           clib_spinlock_lock (&rt->lock);
1013
1014           vec_reset_length (pool_indexes_to_free);
1015           /* *INDENT-OFF* */
1016           pool_foreach_index (index, rt->pool)  {
1017                                 reass = pool_elt_at_index (rt->pool, index);
1018                                 if (now > reass->last_heard + rm->timeout)
1019                                   {
1020                                     vec_add1 (pool_indexes_to_free, index);
1021                                   }
1022                               }
1023           /* *INDENT-ON* */
1024           int *i;
1025           /* *INDENT-OFF* */
1026           vec_foreach (i, pool_indexes_to_free)
1027           {
1028             ip6_sv_reass_t *reass = pool_elt_at_index (rt->pool, i[0]);
1029             ip6_sv_reass_free (vm, rm, rt, reass);
1030           }
1031           /* *INDENT-ON* */
1032
1033           clib_spinlock_unlock (&rt->lock);
1034         }
1035
1036       vec_free (pool_indexes_to_free);
1037       if (event_data)
1038         {
1039           _vec_len (event_data) = 0;
1040         }
1041     }
1042
1043   return 0;
1044 }
1045
1046 /* *INDENT-OFF* */
1047 VLIB_REGISTER_NODE (ip6_sv_reass_expire_node) = {
1048     .function = ip6_sv_reass_walk_expired,
1049     .format_trace = format_ip6_sv_reass_trace,
1050     .type = VLIB_NODE_TYPE_PROCESS,
1051     .name = "ip6-sv-reassembly-expire-walk",
1052
1053     .n_errors = ARRAY_LEN (ip6_sv_reassembly_error_strings),
1054     .error_strings = ip6_sv_reassembly_error_strings,
1055
1056 };
1057 /* *INDENT-ON* */
1058
1059 static u8 *
1060 format_ip6_sv_reass_key (u8 * s, va_list * args)
1061 {
1062   ip6_sv_reass_key_t *key = va_arg (*args, ip6_sv_reass_key_t *);
1063   s =
1064     format (s, "fib_index: %u, src: %U, dst: %U, frag_id: %u, proto: %u",
1065             key->fib_index, format_ip6_address, &key->src, format_ip6_address,
1066             &key->dst, clib_net_to_host_u16 (key->frag_id), key->proto);
1067   return s;
1068 }
1069
1070 static u8 *
1071 format_ip6_sv_reass (u8 * s, va_list * args)
1072 {
1073   vlib_main_t *vm = va_arg (*args, vlib_main_t *);
1074   ip6_sv_reass_t *reass = va_arg (*args, ip6_sv_reass_t *);
1075
1076   s = format (s, "ID: %lu, key: %U, trace_op_counter: %u\n",
1077               reass->id, format_ip6_sv_reass_key, &reass->key,
1078               reass->trace_op_counter);
1079   vlib_buffer_t *b;
1080   u32 *bip;
1081   u32 counter = 0;
1082   vec_foreach (bip, reass->cached_buffers)
1083   {
1084     u32 bi = *bip;
1085     do
1086       {
1087         b = vlib_get_buffer (vm, bi);
1088         s = format (s, "  #%03u: bi: %u\n", counter, bi);
1089         ++counter;
1090         bi = b->next_buffer;
1091       }
1092     while (b->flags & VLIB_BUFFER_NEXT_PRESENT);
1093   }
1094   return s;
1095 }
1096
1097 static clib_error_t *
1098 show_ip6_sv_reass (vlib_main_t * vm, unformat_input_t * input,
1099                    CLIB_UNUSED (vlib_cli_command_t * lmd))
1100 {
1101   ip6_sv_reass_main_t *rm = &ip6_sv_reass_main;
1102
1103   vlib_cli_output (vm, "---------------------");
1104   vlib_cli_output (vm, "IP6 reassembly status");
1105   vlib_cli_output (vm, "---------------------");
1106   bool details = false;
1107   if (unformat (input, "details"))
1108     {
1109       details = true;
1110     }
1111
1112   u32 sum_reass_n = 0;
1113   u64 sum_buffers_n = 0;
1114   ip6_sv_reass_t *reass;
1115   uword thread_index;
1116   const uword nthreads = vlib_num_workers () + 1;
1117   for (thread_index = 0; thread_index < nthreads; ++thread_index)
1118     {
1119       ip6_sv_reass_per_thread_t *rt = &rm->per_thread_data[thread_index];
1120       clib_spinlock_lock (&rt->lock);
1121       if (details)
1122         {
1123           /* *INDENT-OFF* */
1124           pool_foreach (reass, rt->pool) {
1125             vlib_cli_output (vm, "%U", format_ip6_sv_reass, vm, reass);
1126           }
1127           /* *INDENT-ON* */
1128         }
1129       sum_reass_n += rt->reass_n;
1130       clib_spinlock_unlock (&rt->lock);
1131     }
1132   vlib_cli_output (vm, "---------------------");
1133   vlib_cli_output (vm, "Current IP6 reassemblies count: %lu\n",
1134                    (long unsigned) sum_reass_n);
1135   vlib_cli_output (vm,
1136                    "Maximum configured concurrent shallow virtual IP6 reassemblies per worker-thread: %lu\n",
1137                    (long unsigned) rm->max_reass_n);
1138   vlib_cli_output (vm,
1139                    "Maximum configured amount of fragments per shallow "
1140                    "virtual IP6 reassembly: %lu\n",
1141                    (long unsigned) rm->max_reass_len);
1142   vlib_cli_output (vm,
1143                    "Maximum configured shallow virtual IP6 reassembly timeout: %lums\n",
1144                    (long unsigned) rm->timeout_ms);
1145   vlib_cli_output (vm,
1146                    "Maximum configured shallow virtual IP6 reassembly expire walk interval: %lums\n",
1147                    (long unsigned) rm->expire_walk_interval_ms);
1148   vlib_cli_output (vm, "Buffers in use: %lu\n",
1149                    (long unsigned) sum_buffers_n);
1150   return 0;
1151 }
1152
1153 /* *INDENT-OFF* */
1154 VLIB_CLI_COMMAND (show_ip6_sv_reassembly_cmd, static) = {
1155     .path = "show ip6-sv-reassembly",
1156     .short_help = "show ip6-sv-reassembly [details]",
1157     .function = show_ip6_sv_reass,
1158 };
1159 /* *INDENT-ON* */
1160
1161 #ifndef CLIB_MARCH_VARIANT
1162 vnet_api_error_t
1163 ip6_sv_reass_enable_disable (u32 sw_if_index, u8 enable_disable)
1164 {
1165   return ip6_sv_reass_enable_disable_with_refcnt (sw_if_index,
1166                                                   enable_disable);
1167 }
1168 #endif /* CLIB_MARCH_VARIANT */
1169
1170 #define foreach_ip6_sv_reassembly_handoff_error                       \
1171 _(CONGESTION_DROP, "congestion drop")
1172
1173
1174 typedef enum
1175 {
1176 #define _(sym,str) IP6_SV_REASSEMBLY_HANDOFF_ERROR_##sym,
1177   foreach_ip6_sv_reassembly_handoff_error
1178 #undef _
1179     IP6_SV_REASSEMBLY_HANDOFF_N_ERROR,
1180 } ip6_sv_reassembly_handoff_error_t;
1181
1182 static char *ip6_sv_reassembly_handoff_error_strings[] = {
1183 #define _(sym,string) string,
1184   foreach_ip6_sv_reassembly_handoff_error
1185 #undef _
1186 };
1187
1188 typedef struct
1189 {
1190   u32 next_worker_index;
1191 } ip6_sv_reassembly_handoff_trace_t;
1192
1193 static u8 *
1194 format_ip6_sv_reassembly_handoff_trace (u8 * s, va_list * args)
1195 {
1196   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1197   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1198   ip6_sv_reassembly_handoff_trace_t *t =
1199     va_arg (*args, ip6_sv_reassembly_handoff_trace_t *);
1200
1201   s =
1202     format (s, "ip6-sv-reassembly-handoff: next-worker %d",
1203             t->next_worker_index);
1204
1205   return s;
1206 }
1207
1208 always_inline uword
1209 ip6_sv_reassembly_handoff_inline (vlib_main_t * vm,
1210                                   vlib_node_runtime_t * node,
1211                                   vlib_frame_t * frame, bool is_feature)
1212 {
1213   ip6_sv_reass_main_t *rm = &ip6_sv_reass_main;
1214
1215   vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b;
1216   u32 n_enq, n_left_from, *from;
1217   u16 thread_indices[VLIB_FRAME_SIZE], *ti;
1218   u32 fq_index;
1219
1220   from = vlib_frame_vector_args (frame);
1221   n_left_from = frame->n_vectors;
1222   vlib_get_buffers (vm, from, bufs, n_left_from);
1223
1224   b = bufs;
1225   ti = thread_indices;
1226
1227   fq_index = (is_feature) ? rm->fq_feature_index : rm->fq_index;
1228
1229   while (n_left_from > 0)
1230     {
1231       ti[0] = vnet_buffer (b[0])->ip.reass.owner_thread_index;
1232
1233       if (PREDICT_FALSE
1234           ((node->flags & VLIB_NODE_FLAG_TRACE)
1235            && (b[0]->flags & VLIB_BUFFER_IS_TRACED)))
1236         {
1237           ip6_sv_reassembly_handoff_trace_t *t =
1238             vlib_add_trace (vm, node, b[0], sizeof (*t));
1239           t->next_worker_index = ti[0];
1240         }
1241
1242       n_left_from -= 1;
1243       ti += 1;
1244       b += 1;
1245     }
1246   n_enq = vlib_buffer_enqueue_to_thread (vm, node, fq_index, from,
1247                                          thread_indices, frame->n_vectors, 1);
1248
1249   if (n_enq < frame->n_vectors)
1250     vlib_node_increment_counter (vm, node->node_index,
1251                                  IP6_SV_REASSEMBLY_HANDOFF_ERROR_CONGESTION_DROP,
1252                                  frame->n_vectors - n_enq);
1253   return frame->n_vectors;
1254 }
1255
1256 VLIB_NODE_FN (ip6_sv_reassembly_handoff_node) (vlib_main_t * vm,
1257                                                vlib_node_runtime_t * node,
1258                                                vlib_frame_t * frame)
1259 {
1260   return ip6_sv_reassembly_handoff_inline (vm, node, frame,
1261                                            false /* is_feature */ );
1262 }
1263
1264 /* *INDENT-OFF* */
1265 VLIB_REGISTER_NODE (ip6_sv_reassembly_handoff_node) = {
1266   .name = "ip6-sv-reassembly-handoff",
1267   .vector_size = sizeof (u32),
1268   .n_errors = ARRAY_LEN(ip6_sv_reassembly_handoff_error_strings),
1269   .error_strings = ip6_sv_reassembly_handoff_error_strings,
1270   .format_trace = format_ip6_sv_reassembly_handoff_trace,
1271
1272   .n_next_nodes = 1,
1273
1274   .next_nodes = {
1275     [0] = "error-drop",
1276   },
1277 };
1278
1279
1280 VLIB_NODE_FN (ip6_sv_reassembly_feature_handoff_node) (vlib_main_t * vm,
1281                                vlib_node_runtime_t * node, vlib_frame_t * frame)
1282 {
1283   return ip6_sv_reassembly_handoff_inline (vm, node, frame, true /* is_feature */ );
1284 }
1285
1286
1287 /* *INDENT-OFF* */
1288 VLIB_REGISTER_NODE (ip6_sv_reassembly_feature_handoff_node) = {
1289   .name = "ip6-sv-reass-feature-hoff",
1290   .vector_size = sizeof (u32),
1291   .n_errors = ARRAY_LEN(ip6_sv_reassembly_handoff_error_strings),
1292   .error_strings = ip6_sv_reassembly_handoff_error_strings,
1293   .format_trace = format_ip6_sv_reassembly_handoff_trace,
1294
1295   .n_next_nodes = 1,
1296
1297   .next_nodes = {
1298     [0] = "error-drop",
1299   },
1300 };
1301 /* *INDENT-ON* */
1302
1303 #ifndef CLIB_MARCH_VARIANT
1304 int
1305 ip6_sv_reass_enable_disable_with_refcnt (u32 sw_if_index, int is_enable)
1306 {
1307   ip6_sv_reass_main_t *rm = &ip6_sv_reass_main;
1308   vec_validate (rm->feature_use_refcount_per_intf, sw_if_index);
1309   if (is_enable)
1310     {
1311       if (!rm->feature_use_refcount_per_intf[sw_if_index])
1312         {
1313           ++rm->feature_use_refcount_per_intf[sw_if_index];
1314           return vnet_feature_enable_disable ("ip6-unicast",
1315                                               "ip6-sv-reassembly-feature",
1316                                               sw_if_index, 1, 0, 0);
1317         }
1318       ++rm->feature_use_refcount_per_intf[sw_if_index];
1319     }
1320   else
1321     {
1322       --rm->feature_use_refcount_per_intf[sw_if_index];
1323       if (!rm->feature_use_refcount_per_intf[sw_if_index])
1324         return vnet_feature_enable_disable ("ip6-unicast",
1325                                             "ip6-sv-reassembly-feature",
1326                                             sw_if_index, 0, 0, 0);
1327     }
1328   return 0;
1329 }
1330 #endif
1331
1332 /*
1333  * fd.io coding-style-patch-verification: ON
1334  *
1335  * Local Variables:
1336  * eval: (c-set-style "gnu")
1337  * End:
1338  */