Add extern to *_main global variable declarations in header files.
[vpp.git] / src / vnet / srp / node.c
1 /*
2  * Copyright (c) 2015 Cisco and/or its affiliates.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at:
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 /*
16  * node.c: srp packet processing
17  *
18  * Copyright (c) 2011 Eliot Dresselhaus
19  *
20  * Permission is hereby granted, free of charge, to any person obtaining
21  * a copy of this software and associated documentation files (the
22  * "Software"), to deal in the Software without restriction, including
23  * without limitation the rights to use, copy, modify, merge, publish,
24  * distribute, sublicense, and/or sell copies of the Software, and to
25  * permit persons to whom the Software is furnished to do so, subject to
26  * the following conditions:
27  *
28  * The above copyright notice and this permission notice shall be
29  * included in all copies or substantial portions of the Software.
30  *
31  *  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
32  *  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
33  *  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
34  *  NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
35  *  LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
36  *  OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
37  *  WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
38  */
39
40 #include <vlib/vlib.h>
41 #include <vnet/ip/ip_packet.h>  /* for ip_csum_fold */
42 #include <vnet/srp/srp.h>
43
44 srp_main_t srp_main;
45
46 typedef struct {
47   u8 packet_data[32];
48 } srp_input_trace_t;
49
50 static u8 * format_srp_input_trace (u8 * s, va_list * va)
51 {
52   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*va, vlib_main_t *);
53   CLIB_UNUSED (vlib_node_t * node) = va_arg (*va, vlib_node_t *);
54   srp_input_trace_t * t = va_arg (*va, srp_input_trace_t *);
55
56   s = format (s, "%U", format_srp_header, t->packet_data);
57
58   return s;
59 }
60
61 typedef enum {
62   SRP_INPUT_NEXT_ERROR,
63   SRP_INPUT_NEXT_ETHERNET_INPUT,
64   SRP_INPUT_NEXT_CONTROL,
65   SRP_INPUT_N_NEXT,
66 } srp_input_next_t;
67
68 typedef struct {
69   u8 next_index;
70   u8 buffer_advance;
71   u16 error;
72 } srp_input_disposition_t;
73
74 static srp_input_disposition_t srp_input_disposition_by_mode[8] = {
75   [SRP_MODE_reserved0] = {
76     .next_index = SRP_INPUT_NEXT_ERROR,
77     .error = SRP_ERROR_UNKNOWN_MODE,
78   },
79   [SRP_MODE_reserved1] = {
80     .next_index = SRP_INPUT_NEXT_ERROR,
81     .error = SRP_ERROR_UNKNOWN_MODE,
82   },
83   [SRP_MODE_reserved2] = {
84     .next_index = SRP_INPUT_NEXT_ERROR,
85     .error = SRP_ERROR_UNKNOWN_MODE,
86   },
87   [SRP_MODE_reserved3] = {
88     .next_index = SRP_INPUT_NEXT_ERROR,
89     .error = SRP_ERROR_UNKNOWN_MODE,
90   },
91   [SRP_MODE_keep_alive] = {
92     .next_index = SRP_INPUT_NEXT_ERROR,
93     .error = SRP_ERROR_KEEP_ALIVE_DROPPED,
94   },
95   [SRP_MODE_data] = {
96     .next_index = SRP_INPUT_NEXT_ETHERNET_INPUT,
97     .buffer_advance = sizeof (srp_header_t),
98   },
99   [SRP_MODE_control_pass_to_host] = {
100     .next_index = SRP_INPUT_NEXT_CONTROL,
101   },
102   [SRP_MODE_control_locally_buffered_for_host] = {
103     .next_index = SRP_INPUT_NEXT_CONTROL,
104   },
105 };
106
107 static uword
108 srp_input (vlib_main_t * vm,
109            vlib_node_runtime_t * node,
110            vlib_frame_t * from_frame)
111 {
112   vnet_main_t * vnm = vnet_get_main();
113   srp_main_t * sm = &srp_main;
114   u32 n_left_from, next_index, * from, * to_next;
115
116   from = vlib_frame_vector_args (from_frame);
117   n_left_from = from_frame->n_vectors;
118
119   if (node->flags & VLIB_NODE_FLAG_TRACE)
120     vlib_trace_frame_buffers_only (vm, node,
121                                    from,
122                                    n_left_from,
123                                    sizeof (from[0]),
124                                    sizeof (srp_input_trace_t));
125
126   next_index = node->cached_next_index;
127
128   while (n_left_from > 0)
129     {
130       u32 n_left_to_next;
131
132       vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
133
134       while (n_left_from >= 4 && n_left_to_next >= 2)
135         {
136           u32 bi0, bi1, sw_if_index0, sw_if_index1;
137           vlib_buffer_t * b0, * b1;
138           u8 next0, next1, error0, error1;
139           srp_header_t * s0, * s1;
140           srp_input_disposition_t * d0, * d1;
141           vnet_hw_interface_t * hi0, * hi1;
142           srp_interface_t * si0, * si1;
143
144           /* Prefetch next iteration. */
145           {
146             vlib_buffer_t * b2, * b3;
147
148             b2 = vlib_get_buffer (vm, from[2]);
149             b3 = vlib_get_buffer (vm, from[3]);
150
151             vlib_prefetch_buffer_header (b2, LOAD);
152             vlib_prefetch_buffer_header (b3, LOAD);
153
154             CLIB_PREFETCH (b2->data, sizeof (srp_header_t), LOAD);
155             CLIB_PREFETCH (b3->data, sizeof (srp_header_t), LOAD);
156           }
157
158           bi0 = from[0];
159           bi1 = from[1];
160           to_next[0] = bi0;
161           to_next[1] = bi1;
162           from += 2;
163           to_next += 2;
164           n_left_to_next -= 2;
165           n_left_from -= 2;
166
167           b0 = vlib_get_buffer (vm, bi0);
168           b1 = vlib_get_buffer (vm, bi1);
169
170           s0 = (void *) (b0->data + b0->current_data);
171           s1 = (void *) (b1->data + b1->current_data);
172
173           /* Data packets are always assigned to side A (outer ring) interface. */
174           sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
175           sw_if_index1 = vnet_buffer (b1)->sw_if_index[VLIB_RX];
176
177           hi0 = vnet_get_sup_hw_interface (vnm, sw_if_index0);
178           hi1 = vnet_get_sup_hw_interface (vnm, sw_if_index1);
179
180           si0 = pool_elt_at_index (sm->interface_pool, hi0->hw_instance);
181           si1 = pool_elt_at_index (sm->interface_pool, hi1->hw_instance);
182
183           sw_if_index0 = (s0->mode == SRP_MODE_data
184                           ? si0->rings[SRP_RING_OUTER].sw_if_index
185                           : sw_if_index0);
186           sw_if_index1 = (s1->mode == SRP_MODE_data
187                           ? si1->rings[SRP_RING_OUTER].sw_if_index
188                           : sw_if_index1);
189             
190           vnet_buffer (b0)->sw_if_index[VLIB_RX] = sw_if_index0;
191           vnet_buffer (b1)->sw_if_index[VLIB_RX] = sw_if_index1;
192
193           d0 = srp_input_disposition_by_mode + s0->mode;
194           d1 = srp_input_disposition_by_mode + s1->mode;
195
196           next0 = d0->next_index;
197           next1 = d1->next_index;
198
199           error0 = d0->error;
200           error1 = d1->error;
201
202           vlib_buffer_advance (b0, d0->buffer_advance);
203           vlib_buffer_advance (b1, d1->buffer_advance);
204
205           b0->error = node->errors[error0];
206           b1->error = node->errors[error1];
207
208           vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
209                                            to_next, n_left_to_next,
210                                            bi0, bi1, next0, next1);
211         }
212     
213       while (n_left_from > 0 && n_left_to_next > 0)
214         {
215           u32 bi0, sw_if_index0;
216           vlib_buffer_t * b0;
217           u8 next0, error0;
218           srp_header_t * s0;
219           srp_input_disposition_t * d0;
220           srp_interface_t * si0;
221           vnet_hw_interface_t * hi0;
222
223           bi0 = from[0];
224           to_next[0] = bi0;
225           from += 1;
226           to_next += 1;
227           n_left_to_next -= 1;
228           n_left_from -= 1;
229
230           b0 = vlib_get_buffer (vm, bi0);
231
232           s0 = (void *) (b0->data + b0->current_data);
233
234           /* Data packets are always assigned to side A (outer ring) interface. */
235           sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
236
237           hi0 = vnet_get_sup_hw_interface (vnm, sw_if_index0);
238
239           si0 = pool_elt_at_index (sm->interface_pool, hi0->hw_instance);
240
241           sw_if_index0 = (s0->mode == SRP_MODE_data
242                           ? si0->rings[SRP_RING_OUTER].sw_if_index
243                           : sw_if_index0);
244             
245           vnet_buffer (b0)->sw_if_index[VLIB_RX] = sw_if_index0;
246
247           d0 = srp_input_disposition_by_mode + s0->mode;
248
249           next0 = d0->next_index;
250
251           error0 = d0->error;
252
253           vlib_buffer_advance (b0, d0->buffer_advance);
254
255           b0->error = node->errors[error0];
256
257           vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
258                                            to_next, n_left_to_next,
259                                            bi0, next0);
260         }
261
262       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
263     }
264
265   return from_frame->n_vectors;
266 }
267
268 static char * srp_error_strings[] = {
269 #define _(f,s) s,
270   foreach_srp_error
271 #undef _
272 };
273
274 static vlib_node_registration_t srp_input_node = {
275   .function = srp_input,
276   .name = "srp-input",
277   /* Takes a vector of packets. */
278   .vector_size = sizeof (u32),
279
280   .n_errors = SRP_N_ERROR,
281   .error_strings = srp_error_strings,
282
283   .n_next_nodes = SRP_INPUT_N_NEXT,
284   .next_nodes = {
285     [SRP_INPUT_NEXT_ERROR] = "error-drop",
286     [SRP_INPUT_NEXT_ETHERNET_INPUT] = "ethernet-input",
287     [SRP_INPUT_NEXT_CONTROL] = "srp-control",
288   },
289
290   .format_buffer = format_srp_header_with_length,
291   .format_trace = format_srp_input_trace,
292   .unformat_buffer = unformat_srp_header,
293 };
294
295 static uword
296 srp_topology_packet (vlib_main_t * vm, u32 sw_if_index, u8 ** contents)
297 {
298   vnet_main_t * vnm = vnet_get_main();
299   vnet_hw_interface_t * hi = vnet_get_sup_hw_interface (vnm, sw_if_index);
300   srp_topology_header_t * t;
301   srp_topology_mac_binding_t * mb;
302   u32 nb, nmb;
303
304   t = (void *) *contents;
305
306   nb = clib_net_to_host_u16 (t->n_bytes_of_data_that_follows);
307   nmb = (nb - sizeof (t->originator_address)) / sizeof (mb[0]);
308   if (vec_len (*contents) < sizeof (t[0]) + nmb * sizeof (mb[0]))
309     return SRP_ERROR_TOPOLOGY_BAD_LENGTH;
310
311   /* Fill in our source MAC address. */
312   clib_memcpy (t->ethernet.src_address, hi->hw_address, vec_len (hi->hw_address));
313
314   /* Make space for our MAC binding. */
315   vec_resize (*contents, sizeof (srp_topology_mac_binding_t));
316   t = (void *) *contents;
317   t->n_bytes_of_data_that_follows = clib_host_to_net_u16 (nb + sizeof (mb[0]));
318
319   mb = t->bindings + nmb;
320
321   mb->flags =
322     ((t->srp.is_inner_ring ? SRP_TOPOLOGY_MAC_BINDING_FLAG_IS_INNER_RING : 0)
323      | (/* is wrapped FIXME */ 0));
324   clib_memcpy (mb->address, hi->hw_address, vec_len (hi->hw_address));
325
326   t->control.checksum
327     = ~ip_csum_fold (ip_incremental_checksum (0, &t->control,
328                                               vec_len (*contents) - STRUCT_OFFSET_OF (srp_generic_control_header_t, control)));
329
330   {
331     vlib_frame_t * f = vlib_get_frame_to_node (vm, hi->output_node_index);
332     vlib_buffer_t * b;
333     u32 * to_next = vlib_frame_vector_args (f);
334     u32 bi;
335
336     bi = vlib_buffer_add_data (vm, VLIB_BUFFER_DEFAULT_FREE_LIST_INDEX,
337                                /* buffer to append to */ 0,
338                                *contents, vec_len (*contents));
339     b = vlib_get_buffer (vm, bi);
340     vnet_buffer (b)->sw_if_index[VLIB_RX] = vnet_buffer (b)->sw_if_index[VLIB_TX] = sw_if_index;
341     to_next[0] = bi;
342     f->n_vectors = 1;
343     vlib_put_frame_to_node (vm, hi->output_node_index, f);
344   }
345
346   return SRP_ERROR_CONTROL_PACKETS_PROCESSED;
347 }
348
349 typedef uword (srp_control_handler_function_t) (vlib_main_t * vm,
350                                                 u32 sw_if_index,
351                                                 u8 ** contents);
352
353 static uword
354 srp_control_input (vlib_main_t * vm,
355                    vlib_node_runtime_t * node,
356                    vlib_frame_t * from_frame)
357 {
358   u32 n_left_from, next_index, * from, * to_next;
359   vlib_node_runtime_t * error_node;
360   static u8 * contents;
361
362   error_node = vlib_node_get_runtime (vm, srp_input_node.index);
363
364   from = vlib_frame_vector_args (from_frame);
365   n_left_from = from_frame->n_vectors;
366
367   if (node->flags & VLIB_NODE_FLAG_TRACE)
368     vlib_trace_frame_buffers_only (vm, node,
369                                    from,
370                                    n_left_from,
371                                    sizeof (from[0]),
372                                    sizeof (srp_input_trace_t));
373
374   next_index = node->cached_next_index;
375
376   while (n_left_from > 0)
377     {
378       u32 n_left_to_next;
379
380       vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
381
382       while (n_left_from > 0 && n_left_to_next > 0)
383         {
384           u32 bi0, l2_len0, l3_len0;
385           vlib_buffer_t * b0;
386           u8 next0, error0;
387           srp_generic_control_header_t * s0;
388
389           bi0 = from[0];
390           to_next[0] = bi0;
391           from += 1;
392           to_next += 1;
393           n_left_to_next -= 1;
394           n_left_from -= 1;
395
396           b0 = vlib_get_buffer (vm, bi0);
397
398           s0 = (void *) (b0->data + b0->current_data);
399           l2_len0 = vlib_buffer_length_in_chain (vm, b0);
400           l3_len0 = l2_len0 - STRUCT_OFFSET_OF (srp_generic_control_header_t, control);
401
402           error0 = SRP_ERROR_CONTROL_PACKETS_PROCESSED;
403
404           error0 = s0->control.version != 0 ? SRP_ERROR_CONTROL_VERSION_NON_ZERO : error0;
405
406           {
407             u16 save0 = s0->control.checksum;
408             u16 computed0;
409             s0->control.checksum = 0;
410             computed0 = ~ip_csum_fold (ip_incremental_checksum (0, &s0->control, l3_len0));
411             error0 = save0 != computed0 ? SRP_ERROR_CONTROL_BAD_CHECKSUM : error0;
412           }
413
414           if (error0 == SRP_ERROR_CONTROL_PACKETS_PROCESSED)
415             {
416               static srp_control_handler_function_t * t[SRP_N_CONTROL_PACKET_TYPE] = {
417                 [SRP_CONTROL_PACKET_TYPE_topology] = srp_topology_packet,
418               };
419               srp_control_handler_function_t * f;
420
421               f = 0;
422               if (s0->control.type < ARRAY_LEN (t))
423                 f = t[s0->control.type];
424
425               if (f)
426                 {
427                   vec_validate (contents, l2_len0 - 1);
428                   vlib_buffer_contents (vm, bi0, contents);
429                   error0 = f (vm, vnet_buffer (b0)->sw_if_index[VLIB_RX], &contents);
430                 }
431               else
432                 error0 = SRP_ERROR_UNKNOWN_CONTROL;
433             }
434
435           b0->error = error_node->errors[error0];
436           next0 = 0;
437
438           vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
439                                            to_next, n_left_to_next,
440                                            bi0, next0);
441         }
442
443       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
444     }
445
446   return from_frame->n_vectors;
447 }
448
449 static vlib_node_registration_t srp_control_input_node = {
450   .function = srp_control_input,
451   .name = "srp-control",
452   /* Takes a vector of packets. */
453   .vector_size = sizeof (u32),
454
455   .n_next_nodes = 1,
456   .next_nodes = {
457     [0] = "error-drop",
458   },
459
460   .format_buffer = format_srp_header_with_length,
461   .format_trace = format_srp_input_trace,
462   .unformat_buffer = unformat_srp_header,
463 };
464
465 static u8 * format_srp_ips_request_type (u8 * s, va_list * args)
466 {
467   u32 x = va_arg (*args, u32);
468   char * t = 0;
469   switch (x)
470     {
471 #define _(f,n) case SRP_IPS_REQUEST_##f: t = #f; break;
472       foreach_srp_ips_request_type
473 #undef _
474     default:
475       return format (s, "unknown 0x%x", x);
476     }
477   return format (s, "%U", format_c_identifier, t);
478 }
479
480 static u8 * format_srp_ips_status (u8 * s, va_list * args)
481 {
482   u32 x = va_arg (*args, u32);
483   char * t = 0;
484   switch (x)
485     {
486 #define _(f,n) case SRP_IPS_STATUS_##f: t = #f; break;
487       foreach_srp_ips_status
488 #undef _
489     default:
490       return format (s, "unknown 0x%x", x);
491     }
492   return format (s, "%U", format_c_identifier, t);
493 }
494
495 static u8 * format_srp_ips_state (u8 * s, va_list * args)
496 {
497   u32 x = va_arg (*args, u32);
498   char * t = 0;
499   switch (x)
500     {
501 #define _(f) case SRP_IPS_STATE_##f: t = #f; break;
502       foreach_srp_ips_state
503 #undef _
504     default:
505       return format (s, "unknown 0x%x", x);
506     }
507   return format (s, "%U", format_c_identifier, t);
508 }
509
510 static u8 * format_srp_ring (u8 * s, va_list * args)
511 {
512   u32 ring = va_arg (*args, u32);
513   return format (s, "%s", ring == SRP_RING_INNER ? "inner" : "outer");
514 }
515
516 static u8 * format_srp_ips_header (u8 * s, va_list * args)
517 {
518   srp_ips_header_t * h = va_arg (*args, srp_ips_header_t *);
519
520   s = format (s, "%U, %U, %U, %s-path",
521               format_srp_ips_request_type, h->request_type,
522               format_ethernet_address, h->originator_address,
523               format_srp_ips_status, h->status,
524               h->is_long_path ? "long" : "short");
525
526   return s;
527 }
528
529 static u8 * format_srp_interface (u8 * s, va_list * args)
530 {
531   srp_interface_t * si = va_arg (*args, srp_interface_t *);
532   srp_interface_ring_t * ir;
533
534   s = format (s, "address %U, IPS state %U",
535               format_ethernet_address, si->my_address,
536               format_srp_ips_state, si->current_ips_state);
537   for (ir = si->rings; ir < si->rings + SRP_N_RING; ir++)
538     if (ir->rx_neighbor_address_valid)
539       s = format (s, ", %U neighbor %U",
540                   format_srp_ring, ir->ring,
541                   format_ethernet_address, ir->rx_neighbor_address);
542
543   return s;
544 }
545
546 u8 * format_srp_device (u8 * s, va_list * args)
547 {
548   u32 hw_if_index = va_arg (*args, u32);
549   CLIB_UNUSED (int verbose) = va_arg (*args, int);
550   vnet_main_t * vnm = vnet_get_main();
551   srp_main_t * sm = &srp_main;
552   vnet_hw_interface_t * hi = vnet_get_hw_interface (vnm, hw_if_index);
553   srp_interface_t * si = pool_elt_at_index (sm->interface_pool, hi->hw_instance);
554   return format (s, "%U", format_srp_interface, si);
555 }
556
557 always_inline srp_interface_t *
558 srp_get_interface (u32 sw_if_index, srp_ring_type_t * ring)
559 {
560   vnet_main_t * vnm = vnet_get_main();
561   srp_main_t * sm = &srp_main;
562   vnet_hw_interface_t * hi = vnet_get_sup_hw_interface (vnm, sw_if_index);
563   srp_interface_t * si;
564
565   ASSERT (hi->hw_class_index == srp_hw_interface_class.index);
566   si = pool_elt_at_index (sm->interface_pool, hi->hw_instance);
567
568   ASSERT (si->rings[SRP_RING_INNER].hw_if_index == hi->hw_if_index
569           || si->rings[SRP_RING_OUTER].hw_if_index == hi->hw_if_index);
570   if (ring)
571     *ring =
572       (hi->hw_if_index == si->rings[SRP_RING_INNER].hw_if_index
573        ? SRP_RING_INNER
574        : SRP_RING_OUTER);
575
576   return si;
577 }
578
579 static void init_ips_packet (srp_interface_t * si,
580                              srp_ring_type_t tx_ring,
581                              srp_ips_header_t * i)
582 {
583   memset (i, 0, sizeof (i[0]));
584
585   i->srp.ttl = 1;
586   i->srp.is_inner_ring = tx_ring;
587   i->srp.priority = 7;
588   i->srp.mode = SRP_MODE_control_locally_buffered_for_host;
589   srp_header_compute_parity (&i->srp);
590
591   clib_memcpy (&i->ethernet.src_address, &si->my_address, sizeof (si->my_address));
592   i->ethernet.type = clib_host_to_net_u16 (ETHERNET_TYPE_SRP_CONTROL);
593
594   /* Checksum will be filled in later. */
595   i->control.version = 0;
596   i->control.type = SRP_CONTROL_PACKET_TYPE_ips;
597   i->control.ttl = 255;
598
599   clib_memcpy (&i->originator_address, &si->my_address, sizeof (si->my_address));
600 }
601
602 static void tx_ips_packet (srp_interface_t * si,
603                            srp_ring_type_t tx_ring,
604                            srp_ips_header_t * i)
605 {
606   srp_main_t * sm = &srp_main;
607   vnet_main_t * vnm = vnet_get_main();
608   vlib_main_t * vm = sm->vlib_main;
609   vnet_hw_interface_t * hi = vnet_get_hw_interface (vnm, si->rings[tx_ring].hw_if_index);
610   vlib_frame_t * f;
611   vlib_buffer_t * b;
612   u32 * to_next, bi;
613
614   if (! vnet_sw_interface_is_admin_up (vnm, hi->sw_if_index))
615     return;
616   if (hi->hw_class_index != srp_hw_interface_class.index)
617     return;
618
619   i->control.checksum
620     = ~ip_csum_fold (ip_incremental_checksum (0, &i->control,
621                                               sizeof (i[0]) - STRUCT_OFFSET_OF (srp_ips_header_t, control)));
622
623   bi = vlib_buffer_add_data (vm, VLIB_BUFFER_DEFAULT_FREE_LIST_INDEX,
624                              /* buffer to append to */ 0,
625                              i, sizeof (i[0]));
626
627   /* FIXME trace. */
628   if (0)
629     clib_warning ("%U %U",
630                   format_vnet_sw_if_index_name, vnm, hi->sw_if_index,
631                   format_srp_ips_header, i);
632
633   b = vlib_get_buffer (vm, bi);
634   vnet_buffer (b)->sw_if_index[VLIB_RX] = vnet_buffer (b)->sw_if_index[VLIB_TX] = hi->sw_if_index;
635
636   f = vlib_get_frame_to_node (vm, hi->output_node_index);
637   to_next = vlib_frame_vector_args (f);
638   to_next[0] = bi;
639   f->n_vectors = 1;
640   vlib_put_frame_to_node (vm, hi->output_node_index, f);
641 }
642
643 static void serialize_srp_interface_state_msg (serialize_main_t * m, va_list * va)
644 {
645   srp_interface_t * si = va_arg (*va, srp_interface_t *);
646   srp_main_t * sm = &srp_main;
647   int r;
648
649   ASSERT (! pool_is_free (sm->interface_pool, si));
650   serialize_integer (m, si - sm->interface_pool, sizeof (u32));
651   serialize_likely_small_unsigned_integer (m, si->current_ips_state);
652   for (r = 0; r < SRP_N_RING; r++)
653     {
654       srp_interface_ring_t * ir = &si->rings[r];
655       void * p;
656       serialize_likely_small_unsigned_integer (m, ir->rx_neighbor_address_valid);
657       if (ir->rx_neighbor_address_valid)
658         {
659           p = serialize_get (m, sizeof (ir->rx_neighbor_address));
660           clib_memcpy (p, ir->rx_neighbor_address, sizeof (ir->rx_neighbor_address));
661         }
662       serialize_likely_small_unsigned_integer (m, ir->waiting_to_restore);
663       if (ir->waiting_to_restore)
664         serialize (m, serialize_f64, ir->wait_to_restore_start_time);
665     }
666 }
667
668 static void unserialize_srp_interface_state_msg (serialize_main_t * m, va_list * va)
669 {
670   CLIB_UNUSED (mc_main_t * mcm) = va_arg (*va, mc_main_t *);
671   srp_main_t * sm = &srp_main;
672   srp_interface_t * si;
673   u32 si_index, r;
674
675   unserialize_integer (m, &si_index, sizeof (u32));
676   si = pool_elt_at_index (sm->interface_pool, si_index);
677   si->current_ips_state = unserialize_likely_small_unsigned_integer (m);
678   for (r = 0; r < SRP_N_RING; r++)
679     {
680       srp_interface_ring_t * ir = &si->rings[r];
681       void * p;
682       ir->rx_neighbor_address_valid = unserialize_likely_small_unsigned_integer (m);
683       if (ir->rx_neighbor_address_valid)
684         {
685           p = unserialize_get (m, sizeof (ir->rx_neighbor_address));
686           clib_memcpy (ir->rx_neighbor_address, p, sizeof (ir->rx_neighbor_address));
687         }
688       ir->waiting_to_restore = unserialize_likely_small_unsigned_integer (m);
689       if (ir->waiting_to_restore)
690         unserialize (m, unserialize_f64, &ir->wait_to_restore_start_time);
691     }
692 }
693
694 MC_SERIALIZE_MSG (srp_interface_state_msg, static) = {
695   .name = "vnet_srp_interface_state",
696   .serialize = serialize_srp_interface_state_msg,
697   .unserialize = unserialize_srp_interface_state_msg,
698 };
699
700 static int requests_switch (srp_ips_request_type_t r)
701 {
702   static u8 t[16] = {
703     [SRP_IPS_REQUEST_forced_switch] = 1,
704     [SRP_IPS_REQUEST_manual_switch] = 1,
705     [SRP_IPS_REQUEST_signal_fail] = 1,
706     [SRP_IPS_REQUEST_signal_degrade] = 1,
707   };
708   return (int) r < ARRAY_LEN (t) ? t[r] : 0;
709 }
710
711 /* Called when an IPS control packet is received on given interface. */
712 void srp_ips_rx_packet (u32 sw_if_index, srp_ips_header_t * h)
713 {
714   vnet_main_t * vnm = vnet_get_main();
715   vlib_main_t * vm = srp_main.vlib_main;
716   srp_ring_type_t rx_ring;
717   srp_interface_t * si = srp_get_interface (sw_if_index, &rx_ring);
718   srp_interface_ring_t * ir = &si->rings[rx_ring];
719   int si_needs_broadcast = 0;
720
721   /* FIXME trace. */
722   if (0)
723     clib_warning ("%U %U %U",
724                   format_time_interval, "h:m:s:u", vlib_time_now (vm),
725                   format_vnet_sw_if_index_name, vnm, sw_if_index,
726                   format_srp_ips_header, h);
727
728   /* Ignore self-generated IPS packets. */
729   if (! memcmp (h->originator_address, si->my_address, sizeof (h->originator_address)))
730     goto done;
731
732   /* Learn neighbor address from short path messages. */
733   if (! h->is_long_path)
734     {
735       if (ir->rx_neighbor_address_valid
736           && memcmp (ir->rx_neighbor_address, h->originator_address, sizeof (ir->rx_neighbor_address)))
737         {
738           ASSERT (0);
739         }
740       ir->rx_neighbor_address_valid = 1;
741       clib_memcpy (ir->rx_neighbor_address, h->originator_address, sizeof (ir->rx_neighbor_address));
742     }
743
744   switch (si->current_ips_state)
745     {
746     case SRP_IPS_STATE_idle:
747       /* Received {REQ,NEIGHBOR,W,S} in idle state: wrap. */
748       if (requests_switch (h->request_type)
749           && ! h->is_long_path
750           && h->status == SRP_IPS_STATUS_wrapped)
751         {
752           srp_ips_header_t to_tx[2];
753
754           si_needs_broadcast = 1;
755           si->current_ips_state = SRP_IPS_STATE_wrapped;
756           si->hw_wrap_function (si->rings[SRP_SIDE_A].hw_if_index, /* enable_wrap */ 1);
757           si->hw_wrap_function (si->rings[SRP_SIDE_B].hw_if_index, /* enable_wrap */ 1);
758
759           init_ips_packet (si, rx_ring ^ 0, &to_tx[0]);
760           to_tx[0].request_type = SRP_IPS_REQUEST_idle;
761           to_tx[0].status = SRP_IPS_STATUS_wrapped;
762           to_tx[0].is_long_path = 0;
763           tx_ips_packet (si, rx_ring ^ 0, &to_tx[0]);
764
765           init_ips_packet (si, rx_ring ^ 1, &to_tx[1]);
766           to_tx[1].request_type = h->request_type;
767           to_tx[1].status = SRP_IPS_STATUS_wrapped;
768           to_tx[1].is_long_path = 1;
769           tx_ips_packet (si, rx_ring ^ 1, &to_tx[1]);
770         }
771       break;
772
773     case SRP_IPS_STATE_wrapped:
774       if (! h->is_long_path
775           && h->request_type == SRP_IPS_REQUEST_idle
776           && h->status == SRP_IPS_STATUS_idle)
777         {
778           si_needs_broadcast = 1;
779           si->current_ips_state = SRP_IPS_STATE_idle;
780           si->hw_wrap_function (si->rings[SRP_SIDE_A].hw_if_index, /* enable_wrap */ 0);
781           si->hw_wrap_function (si->rings[SRP_SIDE_B].hw_if_index, /* enable_wrap */ 0);
782         }
783       break;
784
785     case SRP_IPS_STATE_pass_thru:
786       /* FIXME */
787       break;
788
789     default:
790       abort ();
791       break;
792     }
793
794  done:
795   if (vm->mc_main && si_needs_broadcast)
796     mc_serialize (vm->mc_main, &srp_interface_state_msg, si);
797 }
798
799 /* Preform local IPS request on given interface. */
800 void srp_ips_local_request (u32 sw_if_index, srp_ips_request_type_t request)
801 {
802   vnet_main_t * vnm = vnet_get_main();
803   srp_main_t * sm = &srp_main;
804   vlib_main_t * vm = sm->vlib_main;
805   srp_ring_type_t rx_ring;
806   srp_interface_t * si = srp_get_interface (sw_if_index, &rx_ring);
807   srp_interface_ring_t * ir = &si->rings[rx_ring];
808   int si_needs_broadcast = 0;
809
810   if (request == SRP_IPS_REQUEST_wait_to_restore)
811     {
812       if (si->current_ips_state != SRP_IPS_STATE_wrapped)
813         return;
814       if (! ir->waiting_to_restore)
815         {
816           ir->wait_to_restore_start_time = vlib_time_now (sm->vlib_main);
817           ir->waiting_to_restore = 1;
818           si_needs_broadcast = 1;
819         }
820     }
821   else
822     {
823       /* FIXME handle local signal fail. */
824       si_needs_broadcast = ir->waiting_to_restore;
825       ir->wait_to_restore_start_time = 0;
826       ir->waiting_to_restore = 0;
827     }
828
829   /* FIXME trace. */
830   if (0)
831     clib_warning ("%U %U",
832                   format_vnet_sw_if_index_name, vnm, sw_if_index,
833                   format_srp_ips_request_type, request);
834
835   if (vm->mc_main && si_needs_broadcast)
836     mc_serialize (vm->mc_main, &srp_interface_state_msg, si);
837 }
838
839 static void maybe_send_ips_message (srp_interface_t * si)
840 {
841   srp_main_t * sm = &srp_main;
842   srp_ips_header_t to_tx[2];
843   srp_ring_type_t rx_ring = SRP_RING_OUTER;
844   srp_interface_ring_t * r0 = &si->rings[rx_ring ^ 0];
845   srp_interface_ring_t * r1 = &si->rings[rx_ring ^ 1];
846   f64 now = vlib_time_now (sm->vlib_main);
847
848   if (! si->ips_process_enable)
849     return;
850
851   if (si->current_ips_state == SRP_IPS_STATE_wrapped
852       && r0->waiting_to_restore
853       && r1->waiting_to_restore
854       && now >= r0->wait_to_restore_start_time + si->config.wait_to_restore_idle_delay
855       && now >= r1->wait_to_restore_start_time + si->config.wait_to_restore_idle_delay)
856     {
857       si->current_ips_state = SRP_IPS_STATE_idle;
858       r0->waiting_to_restore = r1->waiting_to_restore = 0;
859       r0->wait_to_restore_start_time = r1->wait_to_restore_start_time = 0;
860     }
861
862   if (si->current_ips_state != SRP_IPS_STATE_idle)
863     return;
864
865   init_ips_packet (si, rx_ring ^ 0, &to_tx[0]);
866   init_ips_packet (si, rx_ring ^ 1, &to_tx[1]);
867
868   if (si->current_ips_state == SRP_IPS_STATE_idle)
869     {
870       to_tx[0].request_type = to_tx[1].request_type = SRP_IPS_REQUEST_idle;
871       to_tx[0].status = to_tx[1].status = SRP_IPS_STATUS_idle;
872       to_tx[0].is_long_path = to_tx[1].is_long_path = 0;
873     }
874
875   else if (si->current_ips_state == SRP_IPS_STATE_wrapped)
876     {
877       to_tx[0].request_type =
878         (si->rings[rx_ring ^ 0].waiting_to_restore
879          ? SRP_IPS_REQUEST_wait_to_restore
880          : SRP_IPS_REQUEST_signal_fail);
881       to_tx[1].request_type =
882         (si->rings[rx_ring ^ 1].waiting_to_restore
883          ? SRP_IPS_REQUEST_wait_to_restore
884          : SRP_IPS_REQUEST_signal_fail);
885       to_tx[0].status = to_tx[1].status = SRP_IPS_STATUS_wrapped;
886       to_tx[0].is_long_path = 0;
887       to_tx[1].is_long_path = 1;
888     }
889
890   tx_ips_packet (si, rx_ring ^ 0, &to_tx[0]);
891   tx_ips_packet (si, rx_ring ^ 1, &to_tx[1]);
892 }
893
894 static uword
895 srp_ips_process (vlib_main_t * vm,
896                  vlib_node_runtime_t * rt,
897                  vlib_frame_t * f)
898 {
899   srp_main_t * sm = &srp_main;
900   srp_interface_t * si;
901
902   while (1)
903     {
904       pool_foreach (si, sm->interface_pool, ({
905         maybe_send_ips_message (si);
906       }));
907       vlib_process_suspend (vm, 1.0);
908     }
909
910   return 0;
911 }
912
913 vlib_node_registration_t srp_ips_process_node = {
914     .function = srp_ips_process,
915     .type = VLIB_NODE_TYPE_PROCESS,
916     .name = "srp-ips-process",
917     .state = VLIB_NODE_STATE_DISABLED,
918 };
919
920 static clib_error_t * srp_init (vlib_main_t * vm)
921 {
922   srp_main_t * sm = &srp_main;
923
924   sm->default_data_ttl = 255;
925   sm->vlib_main = vm;
926   vlib_register_node (vm, &srp_ips_process_node);
927   vlib_register_node (vm, &srp_input_node);
928   vlib_register_node (vm, &srp_control_input_node);
929   srp_setup_node (vm, srp_input_node.index);
930
931   return 0;
932 }
933
934 VLIB_INIT_FUNCTION (srp_init);