897be2546241089a5a94f027ed6d965b303d84a2
[vpp.git] / vnet / vnet / srp / node.c
1 /*
2  * Copyright (c) 2015 Cisco and/or its affiliates.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at:
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 /*
16  * node.c: srp packet processing
17  *
18  * Copyright (c) 2011 Eliot Dresselhaus
19  *
20  * Permission is hereby granted, free of charge, to any person obtaining
21  * a copy of this software and associated documentation files (the
22  * "Software"), to deal in the Software without restriction, including
23  * without limitation the rights to use, copy, modify, merge, publish,
24  * distribute, sublicense, and/or sell copies of the Software, and to
25  * permit persons to whom the Software is furnished to do so, subject to
26  * the following conditions:
27  *
28  * The above copyright notice and this permission notice shall be
29  * included in all copies or substantial portions of the Software.
30  *
31  *  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
32  *  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
33  *  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
34  *  NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
35  *  LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
36  *  OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
37  *  WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
38  */
39
40 #include <vlib/vlib.h>
41 #include <vnet/ip/ip_packet.h>  /* for ip_csum_fold */
42 #include <vnet/srp/srp.h>
43
44 typedef struct {
45   u8 packet_data[32];
46 } srp_input_trace_t;
47
48 static u8 * format_srp_input_trace (u8 * s, va_list * va)
49 {
50   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*va, vlib_main_t *);
51   CLIB_UNUSED (vlib_node_t * node) = va_arg (*va, vlib_node_t *);
52   srp_input_trace_t * t = va_arg (*va, srp_input_trace_t *);
53
54   s = format (s, "%U", format_srp_header, t->packet_data);
55
56   return s;
57 }
58
59 typedef enum {
60   SRP_INPUT_NEXT_ERROR,
61   SRP_INPUT_NEXT_ETHERNET_INPUT,
62   SRP_INPUT_NEXT_CONTROL,
63   SRP_INPUT_N_NEXT,
64 } srp_input_next_t;
65
66 typedef struct {
67   u8 next_index;
68   u8 buffer_advance;
69   u16 error;
70 } srp_input_disposition_t;
71
72 static srp_input_disposition_t srp_input_disposition_by_mode[8] = {
73   [SRP_MODE_reserved0] = {
74     .next_index = SRP_INPUT_NEXT_ERROR,
75     .error = SRP_ERROR_UNKNOWN_MODE,
76   },
77   [SRP_MODE_reserved1] = {
78     .next_index = SRP_INPUT_NEXT_ERROR,
79     .error = SRP_ERROR_UNKNOWN_MODE,
80   },
81   [SRP_MODE_reserved2] = {
82     .next_index = SRP_INPUT_NEXT_ERROR,
83     .error = SRP_ERROR_UNKNOWN_MODE,
84   },
85   [SRP_MODE_reserved3] = {
86     .next_index = SRP_INPUT_NEXT_ERROR,
87     .error = SRP_ERROR_UNKNOWN_MODE,
88   },
89   [SRP_MODE_keep_alive] = {
90     .next_index = SRP_INPUT_NEXT_ERROR,
91     .error = SRP_ERROR_KEEP_ALIVE_DROPPED,
92   },
93   [SRP_MODE_data] = {
94     .next_index = SRP_INPUT_NEXT_ETHERNET_INPUT,
95     .buffer_advance = sizeof (srp_header_t),
96   },
97   [SRP_MODE_control_pass_to_host] = {
98     .next_index = SRP_INPUT_NEXT_CONTROL,
99   },
100   [SRP_MODE_control_locally_buffered_for_host] = {
101     .next_index = SRP_INPUT_NEXT_CONTROL,
102   },
103 };
104
105 static uword
106 srp_input (vlib_main_t * vm,
107            vlib_node_runtime_t * node,
108            vlib_frame_t * from_frame)
109 {
110   vnet_main_t * vnm = vnet_get_main();
111   srp_main_t * sm = &srp_main;
112   u32 n_left_from, next_index, * from, * to_next;
113
114   from = vlib_frame_vector_args (from_frame);
115   n_left_from = from_frame->n_vectors;
116
117   if (node->flags & VLIB_NODE_FLAG_TRACE)
118     vlib_trace_frame_buffers_only (vm, node,
119                                    from,
120                                    n_left_from,
121                                    sizeof (from[0]),
122                                    sizeof (srp_input_trace_t));
123
124   next_index = node->cached_next_index;
125
126   while (n_left_from > 0)
127     {
128       u32 n_left_to_next;
129
130       vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
131
132       while (n_left_from >= 4 && n_left_to_next >= 2)
133         {
134           u32 bi0, bi1, sw_if_index0, sw_if_index1;
135           vlib_buffer_t * b0, * b1;
136           u8 next0, next1, error0, error1;
137           srp_header_t * s0, * s1;
138           srp_input_disposition_t * d0, * d1;
139           vnet_hw_interface_t * hi0, * hi1;
140           srp_interface_t * si0, * si1;
141
142           /* Prefetch next iteration. */
143           {
144             vlib_buffer_t * b2, * b3;
145
146             b2 = vlib_get_buffer (vm, from[2]);
147             b3 = vlib_get_buffer (vm, from[3]);
148
149             vlib_prefetch_buffer_header (b2, LOAD);
150             vlib_prefetch_buffer_header (b3, LOAD);
151
152             CLIB_PREFETCH (b2->data, sizeof (srp_header_t), LOAD);
153             CLIB_PREFETCH (b3->data, sizeof (srp_header_t), LOAD);
154           }
155
156           bi0 = from[0];
157           bi1 = from[1];
158           to_next[0] = bi0;
159           to_next[1] = bi1;
160           from += 2;
161           to_next += 2;
162           n_left_to_next -= 2;
163           n_left_from -= 2;
164
165           b0 = vlib_get_buffer (vm, bi0);
166           b1 = vlib_get_buffer (vm, bi1);
167
168           s0 = (void *) (b0->data + b0->current_data);
169           s1 = (void *) (b1->data + b1->current_data);
170
171           /* Data packets are always assigned to side A (outer ring) interface. */
172           sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
173           sw_if_index1 = vnet_buffer (b1)->sw_if_index[VLIB_RX];
174
175           hi0 = vnet_get_sup_hw_interface (vnm, sw_if_index0);
176           hi1 = vnet_get_sup_hw_interface (vnm, sw_if_index1);
177
178           si0 = pool_elt_at_index (sm->interface_pool, hi0->hw_instance);
179           si1 = pool_elt_at_index (sm->interface_pool, hi1->hw_instance);
180
181           sw_if_index0 = (s0->mode == SRP_MODE_data
182                           ? si0->rings[SRP_RING_OUTER].sw_if_index
183                           : sw_if_index0);
184           sw_if_index1 = (s1->mode == SRP_MODE_data
185                           ? si1->rings[SRP_RING_OUTER].sw_if_index
186                           : sw_if_index1);
187             
188           vnet_buffer (b0)->sw_if_index[VLIB_RX] = sw_if_index0;
189           vnet_buffer (b1)->sw_if_index[VLIB_RX] = sw_if_index1;
190
191           d0 = srp_input_disposition_by_mode + s0->mode;
192           d1 = srp_input_disposition_by_mode + s1->mode;
193
194           next0 = d0->next_index;
195           next1 = d1->next_index;
196
197           error0 = d0->error;
198           error1 = d1->error;
199
200           vlib_buffer_advance (b0, d0->buffer_advance);
201           vlib_buffer_advance (b1, d1->buffer_advance);
202
203           b0->error = node->errors[error0];
204           b1->error = node->errors[error1];
205
206           vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
207                                            to_next, n_left_to_next,
208                                            bi0, bi1, next0, next1);
209         }
210     
211       while (n_left_from > 0 && n_left_to_next > 0)
212         {
213           u32 bi0, sw_if_index0;
214           vlib_buffer_t * b0;
215           u8 next0, error0;
216           srp_header_t * s0;
217           srp_input_disposition_t * d0;
218           srp_interface_t * si0;
219           vnet_hw_interface_t * hi0;
220
221           bi0 = from[0];
222           to_next[0] = bi0;
223           from += 1;
224           to_next += 1;
225           n_left_to_next -= 1;
226           n_left_from -= 1;
227
228           b0 = vlib_get_buffer (vm, bi0);
229
230           s0 = (void *) (b0->data + b0->current_data);
231
232           /* Data packets are always assigned to side A (outer ring) interface. */
233           sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
234
235           hi0 = vnet_get_sup_hw_interface (vnm, sw_if_index0);
236
237           si0 = pool_elt_at_index (sm->interface_pool, hi0->hw_instance);
238
239           sw_if_index0 = (s0->mode == SRP_MODE_data
240                           ? si0->rings[SRP_RING_OUTER].sw_if_index
241                           : sw_if_index0);
242             
243           vnet_buffer (b0)->sw_if_index[VLIB_RX] = sw_if_index0;
244
245           d0 = srp_input_disposition_by_mode + s0->mode;
246
247           next0 = d0->next_index;
248
249           error0 = d0->error;
250
251           vlib_buffer_advance (b0, d0->buffer_advance);
252
253           b0->error = node->errors[error0];
254
255           vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
256                                            to_next, n_left_to_next,
257                                            bi0, next0);
258         }
259
260       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
261     }
262
263   return from_frame->n_vectors;
264 }
265
266 static char * srp_error_strings[] = {
267 #define _(f,s) s,
268   foreach_srp_error
269 #undef _
270 };
271
272 static vlib_node_registration_t srp_input_node = {
273   .function = srp_input,
274   .name = "srp-input",
275   /* Takes a vector of packets. */
276   .vector_size = sizeof (u32),
277
278   .n_errors = SRP_N_ERROR,
279   .error_strings = srp_error_strings,
280
281   .n_next_nodes = SRP_INPUT_N_NEXT,
282   .next_nodes = {
283     [SRP_INPUT_NEXT_ERROR] = "error-drop",
284     [SRP_INPUT_NEXT_ETHERNET_INPUT] = "ethernet-input",
285     [SRP_INPUT_NEXT_CONTROL] = "srp-control",
286   },
287
288   .format_buffer = format_srp_header_with_length,
289   .format_trace = format_srp_input_trace,
290   .unformat_buffer = unformat_srp_header,
291 };
292
293 static uword
294 srp_topology_packet (vlib_main_t * vm, u32 sw_if_index, u8 ** contents)
295 {
296   vnet_main_t * vnm = vnet_get_main();
297   vnet_hw_interface_t * hi = vnet_get_sup_hw_interface (vnm, sw_if_index);
298   srp_topology_header_t * t;
299   srp_topology_mac_binding_t * mb;
300   u32 nb, nmb;
301
302   t = (void *) *contents;
303
304   nb = clib_net_to_host_u16 (t->n_bytes_of_data_that_follows);
305   nmb = (nb - sizeof (t->originator_address)) / sizeof (mb[0]);
306   if (vec_len (*contents) < sizeof (t[0]) + nmb * sizeof (mb[0]))
307     return SRP_ERROR_TOPOLOGY_BAD_LENGTH;
308
309   /* Fill in our source MAC address. */
310   clib_memcpy (t->ethernet.src_address, hi->hw_address, vec_len (hi->hw_address));
311
312   /* Make space for our MAC binding. */
313   vec_resize (*contents, sizeof (srp_topology_mac_binding_t));
314   t = (void *) *contents;
315   t->n_bytes_of_data_that_follows = clib_host_to_net_u16 (nb + sizeof (mb[0]));
316
317   mb = t->bindings + nmb;
318
319   mb->flags =
320     ((t->srp.is_inner_ring ? SRP_TOPOLOGY_MAC_BINDING_FLAG_IS_INNER_RING : 0)
321      | (/* is wrapped FIXME */ 0));
322   clib_memcpy (mb->address, hi->hw_address, vec_len (hi->hw_address));
323
324   t->control.checksum
325     = ~ip_csum_fold (ip_incremental_checksum (0, &t->control,
326                                               vec_len (*contents) - STRUCT_OFFSET_OF (srp_generic_control_header_t, control)));
327
328   {
329     vlib_frame_t * f = vlib_get_frame_to_node (vm, hi->output_node_index);
330     vlib_buffer_t * b;
331     u32 * to_next = vlib_frame_vector_args (f);
332     u32 bi;
333
334     bi = vlib_buffer_add_data (vm, VLIB_BUFFER_DEFAULT_FREE_LIST_INDEX,
335                                /* buffer to append to */ 0,
336                                *contents, vec_len (*contents));
337     b = vlib_get_buffer (vm, bi);
338     vnet_buffer (b)->sw_if_index[VLIB_RX] = vnet_buffer (b)->sw_if_index[VLIB_TX] = sw_if_index;
339     to_next[0] = bi;
340     f->n_vectors = 1;
341     vlib_put_frame_to_node (vm, hi->output_node_index, f);
342   }
343
344   return SRP_ERROR_CONTROL_PACKETS_PROCESSED;
345 }
346
347 typedef uword (srp_control_handler_function_t) (vlib_main_t * vm,
348                                                 u32 sw_if_index,
349                                                 u8 ** contents);
350
351 static uword
352 srp_control_input (vlib_main_t * vm,
353                    vlib_node_runtime_t * node,
354                    vlib_frame_t * from_frame)
355 {
356   u32 n_left_from, next_index, * from, * to_next;
357   vlib_node_runtime_t * error_node;
358   static u8 * contents;
359
360   error_node = vlib_node_get_runtime (vm, srp_input_node.index);
361
362   from = vlib_frame_vector_args (from_frame);
363   n_left_from = from_frame->n_vectors;
364
365   if (node->flags & VLIB_NODE_FLAG_TRACE)
366     vlib_trace_frame_buffers_only (vm, node,
367                                    from,
368                                    n_left_from,
369                                    sizeof (from[0]),
370                                    sizeof (srp_input_trace_t));
371
372   next_index = node->cached_next_index;
373
374   while (n_left_from > 0)
375     {
376       u32 n_left_to_next;
377
378       vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
379
380       while (n_left_from > 0 && n_left_to_next > 0)
381         {
382           u32 bi0, l2_len0, l3_len0;
383           vlib_buffer_t * b0;
384           u8 next0, error0;
385           srp_generic_control_header_t * s0;
386
387           bi0 = from[0];
388           to_next[0] = bi0;
389           from += 1;
390           to_next += 1;
391           n_left_to_next -= 1;
392           n_left_from -= 1;
393
394           b0 = vlib_get_buffer (vm, bi0);
395
396           s0 = (void *) (b0->data + b0->current_data);
397           l2_len0 = vlib_buffer_length_in_chain (vm, b0);
398           l3_len0 = l2_len0 - STRUCT_OFFSET_OF (srp_generic_control_header_t, control);
399
400           error0 = SRP_ERROR_CONTROL_PACKETS_PROCESSED;
401
402           error0 = s0->control.version != 0 ? SRP_ERROR_CONTROL_VERSION_NON_ZERO : error0;
403
404           {
405             u16 save0 = s0->control.checksum;
406             u16 computed0;
407             s0->control.checksum = 0;
408             computed0 = ~ip_csum_fold (ip_incremental_checksum (0, &s0->control, l3_len0));
409             error0 = save0 != computed0 ? SRP_ERROR_CONTROL_BAD_CHECKSUM : error0;
410           }
411
412           if (error0 == SRP_ERROR_CONTROL_PACKETS_PROCESSED)
413             {
414               static srp_control_handler_function_t * t[SRP_N_CONTROL_PACKET_TYPE] = {
415                 [SRP_CONTROL_PACKET_TYPE_topology] = srp_topology_packet,
416               };
417               srp_control_handler_function_t * f;
418
419               f = 0;
420               if (s0->control.type < ARRAY_LEN (t))
421                 f = t[s0->control.type];
422
423               if (f)
424                 {
425                   vec_validate (contents, l2_len0 - 1);
426                   vlib_buffer_contents (vm, bi0, contents);
427                   error0 = f (vm, vnet_buffer (b0)->sw_if_index[VLIB_RX], &contents);
428                 }
429               else
430                 error0 = SRP_ERROR_UNKNOWN_CONTROL;
431             }
432
433           b0->error = error_node->errors[error0];
434           next0 = 0;
435
436           vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
437                                            to_next, n_left_to_next,
438                                            bi0, next0);
439         }
440
441       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
442     }
443
444   return from_frame->n_vectors;
445 }
446
447 static vlib_node_registration_t srp_control_input_node = {
448   .function = srp_control_input,
449   .name = "srp-control",
450   /* Takes a vector of packets. */
451   .vector_size = sizeof (u32),
452
453   .n_next_nodes = 1,
454   .next_nodes = {
455     [0] = "error-drop",
456   },
457
458   .format_buffer = format_srp_header_with_length,
459   .format_trace = format_srp_input_trace,
460   .unformat_buffer = unformat_srp_header,
461 };
462
463 static u8 * format_srp_ips_request_type (u8 * s, va_list * args)
464 {
465   u32 x = va_arg (*args, u32);
466   char * t = 0;
467   switch (x)
468     {
469 #define _(f,n) case SRP_IPS_REQUEST_##f: t = #f; break;
470       foreach_srp_ips_request_type
471 #undef _
472     default:
473       return format (s, "unknown 0x%x", x);
474     }
475   return format (s, "%U", format_c_identifier, t);
476 }
477
478 static u8 * format_srp_ips_status (u8 * s, va_list * args)
479 {
480   u32 x = va_arg (*args, u32);
481   char * t = 0;
482   switch (x)
483     {
484 #define _(f,n) case SRP_IPS_STATUS_##f: t = #f; break;
485       foreach_srp_ips_status
486 #undef _
487     default:
488       return format (s, "unknown 0x%x", x);
489     }
490   return format (s, "%U", format_c_identifier, t);
491 }
492
493 static u8 * format_srp_ips_state (u8 * s, va_list * args)
494 {
495   u32 x = va_arg (*args, u32);
496   char * t = 0;
497   switch (x)
498     {
499 #define _(f) case SRP_IPS_STATE_##f: t = #f; break;
500       foreach_srp_ips_state
501 #undef _
502     default:
503       return format (s, "unknown 0x%x", x);
504     }
505   return format (s, "%U", format_c_identifier, t);
506 }
507
508 static u8 * format_srp_ring (u8 * s, va_list * args)
509 {
510   u32 ring = va_arg (*args, u32);
511   return format (s, "%s", ring == SRP_RING_INNER ? "inner" : "outer");
512 }
513
514 static u8 * format_srp_ips_header (u8 * s, va_list * args)
515 {
516   srp_ips_header_t * h = va_arg (*args, srp_ips_header_t *);
517
518   s = format (s, "%U, %U, %U, %s-path",
519               format_srp_ips_request_type, h->request_type,
520               format_ethernet_address, h->originator_address,
521               format_srp_ips_status, h->status,
522               h->is_long_path ? "long" : "short");
523
524   return s;
525 }
526
527 static u8 * format_srp_interface (u8 * s, va_list * args)
528 {
529   srp_interface_t * si = va_arg (*args, srp_interface_t *);
530   srp_interface_ring_t * ir;
531
532   s = format (s, "address %U, IPS state %U",
533               format_ethernet_address, si->my_address,
534               format_srp_ips_state, si->current_ips_state);
535   for (ir = si->rings; ir < si->rings + SRP_N_RING; ir++)
536     if (ir->rx_neighbor_address_valid)
537       s = format (s, ", %U neighbor %U",
538                   format_srp_ring, ir->ring,
539                   format_ethernet_address, ir->rx_neighbor_address);
540
541   return s;
542 }
543
544 u8 * format_srp_device (u8 * s, va_list * args)
545 {
546   u32 hw_if_index = va_arg (*args, u32);
547   CLIB_UNUSED (int verbose) = va_arg (*args, int);
548   vnet_main_t * vnm = vnet_get_main();
549   srp_main_t * sm = &srp_main;
550   vnet_hw_interface_t * hi = vnet_get_hw_interface (vnm, hw_if_index);
551   srp_interface_t * si = pool_elt_at_index (sm->interface_pool, hi->hw_instance);
552   return format (s, "%U", format_srp_interface, si);
553 }
554
555 always_inline srp_interface_t *
556 srp_get_interface (u32 sw_if_index, srp_ring_type_t * ring)
557 {
558   vnet_main_t * vnm = vnet_get_main();
559   srp_main_t * sm = &srp_main;
560   vnet_hw_interface_t * hi = vnet_get_sup_hw_interface (vnm, sw_if_index);
561   srp_interface_t * si;
562
563   ASSERT (hi->hw_class_index == srp_hw_interface_class.index);
564   si = pool_elt_at_index (sm->interface_pool, hi->hw_instance);
565
566   ASSERT (si->rings[SRP_RING_INNER].hw_if_index == hi->hw_if_index
567           || si->rings[SRP_RING_OUTER].hw_if_index == hi->hw_if_index);
568   if (ring)
569     *ring =
570       (hi->hw_if_index == si->rings[SRP_RING_INNER].hw_if_index
571        ? SRP_RING_INNER
572        : SRP_RING_OUTER);
573
574   return si;
575 }
576
577 static void init_ips_packet (srp_interface_t * si,
578                              srp_ring_type_t tx_ring,
579                              srp_ips_header_t * i)
580 {
581   memset (i, 0, sizeof (i[0]));
582
583   i->srp.ttl = 1;
584   i->srp.is_inner_ring = tx_ring;
585   i->srp.priority = 7;
586   i->srp.mode = SRP_MODE_control_locally_buffered_for_host;
587   srp_header_compute_parity (&i->srp);
588
589   clib_memcpy (&i->ethernet.src_address, &si->my_address, sizeof (si->my_address));
590   i->ethernet.type = clib_host_to_net_u16 (ETHERNET_TYPE_SRP_CONTROL);
591
592   /* Checksum will be filled in later. */
593   i->control.version = 0;
594   i->control.type = SRP_CONTROL_PACKET_TYPE_ips;
595   i->control.ttl = 255;
596
597   clib_memcpy (&i->originator_address, &si->my_address, sizeof (si->my_address));
598 }
599
600 static void tx_ips_packet (srp_interface_t * si,
601                            srp_ring_type_t tx_ring,
602                            srp_ips_header_t * i)
603 {
604   srp_main_t * sm = &srp_main;
605   vnet_main_t * vnm = vnet_get_main();
606   vlib_main_t * vm = sm->vlib_main;
607   vnet_hw_interface_t * hi = vnet_get_hw_interface (vnm, si->rings[tx_ring].hw_if_index);
608   vlib_frame_t * f;
609   vlib_buffer_t * b;
610   u32 * to_next, bi;
611
612   if (! vnet_sw_interface_is_admin_up (vnm, hi->sw_if_index))
613     return;
614   if (hi->hw_class_index != srp_hw_interface_class.index)
615     return;
616
617   i->control.checksum
618     = ~ip_csum_fold (ip_incremental_checksum (0, &i->control,
619                                               sizeof (i[0]) - STRUCT_OFFSET_OF (srp_ips_header_t, control)));
620
621   bi = vlib_buffer_add_data (vm, VLIB_BUFFER_DEFAULT_FREE_LIST_INDEX,
622                              /* buffer to append to */ 0,
623                              i, sizeof (i[0]));
624
625   /* FIXME trace. */
626   if (0)
627     clib_warning ("%U %U",
628                   format_vnet_sw_if_index_name, vnm, hi->sw_if_index,
629                   format_srp_ips_header, i);
630
631   b = vlib_get_buffer (vm, bi);
632   vnet_buffer (b)->sw_if_index[VLIB_RX] = vnet_buffer (b)->sw_if_index[VLIB_TX] = hi->sw_if_index;
633
634   f = vlib_get_frame_to_node (vm, hi->output_node_index);
635   to_next = vlib_frame_vector_args (f);
636   to_next[0] = bi;
637   f->n_vectors = 1;
638   vlib_put_frame_to_node (vm, hi->output_node_index, f);
639 }
640
641 static void serialize_srp_interface_state_msg (serialize_main_t * m, va_list * va)
642 {
643   srp_interface_t * si = va_arg (*va, srp_interface_t *);
644   srp_main_t * sm = &srp_main;
645   int r;
646
647   ASSERT (! pool_is_free (sm->interface_pool, si));
648   serialize_integer (m, si - sm->interface_pool, sizeof (u32));
649   serialize_likely_small_unsigned_integer (m, si->current_ips_state);
650   for (r = 0; r < SRP_N_RING; r++)
651     {
652       srp_interface_ring_t * ir = &si->rings[r];
653       void * p;
654       serialize_likely_small_unsigned_integer (m, ir->rx_neighbor_address_valid);
655       if (ir->rx_neighbor_address_valid)
656         {
657           p = serialize_get (m, sizeof (ir->rx_neighbor_address));
658           clib_memcpy (p, ir->rx_neighbor_address, sizeof (ir->rx_neighbor_address));
659         }
660       serialize_likely_small_unsigned_integer (m, ir->waiting_to_restore);
661       if (ir->waiting_to_restore)
662         serialize (m, serialize_f64, ir->wait_to_restore_start_time);
663     }
664 }
665
666 static void unserialize_srp_interface_state_msg (serialize_main_t * m, va_list * va)
667 {
668   CLIB_UNUSED (mc_main_t * mcm) = va_arg (*va, mc_main_t *);
669   srp_main_t * sm = &srp_main;
670   srp_interface_t * si;
671   u32 si_index, r;
672
673   unserialize_integer (m, &si_index, sizeof (u32));
674   si = pool_elt_at_index (sm->interface_pool, si_index);
675   si->current_ips_state = unserialize_likely_small_unsigned_integer (m);
676   for (r = 0; r < SRP_N_RING; r++)
677     {
678       srp_interface_ring_t * ir = &si->rings[r];
679       void * p;
680       ir->rx_neighbor_address_valid = unserialize_likely_small_unsigned_integer (m);
681       if (ir->rx_neighbor_address_valid)
682         {
683           p = unserialize_get (m, sizeof (ir->rx_neighbor_address));
684           clib_memcpy (ir->rx_neighbor_address, p, sizeof (ir->rx_neighbor_address));
685         }
686       ir->waiting_to_restore = unserialize_likely_small_unsigned_integer (m);
687       if (ir->waiting_to_restore)
688         unserialize (m, unserialize_f64, &ir->wait_to_restore_start_time);
689     }
690 }
691
692 MC_SERIALIZE_MSG (srp_interface_state_msg, static) = {
693   .name = "vnet_srp_interface_state",
694   .serialize = serialize_srp_interface_state_msg,
695   .unserialize = unserialize_srp_interface_state_msg,
696 };
697
698 static int requests_switch (srp_ips_request_type_t r)
699 {
700   static u8 t[16] = {
701     [SRP_IPS_REQUEST_forced_switch] = 1,
702     [SRP_IPS_REQUEST_manual_switch] = 1,
703     [SRP_IPS_REQUEST_signal_fail] = 1,
704     [SRP_IPS_REQUEST_signal_degrade] = 1,
705   };
706   return (int) r < ARRAY_LEN (t) ? t[r] : 0;
707 }
708
709 /* Called when an IPS control packet is received on given interface. */
710 void srp_ips_rx_packet (u32 sw_if_index, srp_ips_header_t * h)
711 {
712   vnet_main_t * vnm = vnet_get_main();
713   vlib_main_t * vm = srp_main.vlib_main;
714   srp_ring_type_t rx_ring;
715   srp_interface_t * si = srp_get_interface (sw_if_index, &rx_ring);
716   srp_interface_ring_t * ir = &si->rings[rx_ring];
717   int si_needs_broadcast = 0;
718
719   /* FIXME trace. */
720   if (0)
721     clib_warning ("%U %U %U",
722                   format_time_interval, "h:m:s:u", vlib_time_now (vm),
723                   format_vnet_sw_if_index_name, vnm, sw_if_index,
724                   format_srp_ips_header, h);
725
726   /* Ignore self-generated IPS packets. */
727   if (! memcmp (h->originator_address, si->my_address, sizeof (h->originator_address)))
728     goto done;
729
730   /* Learn neighbor address from short path messages. */
731   if (! h->is_long_path)
732     {
733       if (ir->rx_neighbor_address_valid
734           && memcmp (ir->rx_neighbor_address, h->originator_address, sizeof (ir->rx_neighbor_address)))
735         {
736           ASSERT (0);
737         }
738       ir->rx_neighbor_address_valid = 1;
739       clib_memcpy (ir->rx_neighbor_address, h->originator_address, sizeof (ir->rx_neighbor_address));
740     }
741
742   switch (si->current_ips_state)
743     {
744     case SRP_IPS_STATE_idle:
745       /* Received {REQ,NEIGHBOR,W,S} in idle state: wrap. */
746       if (requests_switch (h->request_type)
747           && ! h->is_long_path
748           && h->status == SRP_IPS_STATUS_wrapped)
749         {
750           srp_ips_header_t to_tx[2];
751
752           si_needs_broadcast = 1;
753           si->current_ips_state = SRP_IPS_STATE_wrapped;
754           si->hw_wrap_function (si->rings[SRP_SIDE_A].hw_if_index, /* enable_wrap */ 1);
755           si->hw_wrap_function (si->rings[SRP_SIDE_B].hw_if_index, /* enable_wrap */ 1);
756
757           init_ips_packet (si, rx_ring ^ 0, &to_tx[0]);
758           to_tx[0].request_type = SRP_IPS_REQUEST_idle;
759           to_tx[0].status = SRP_IPS_STATUS_wrapped;
760           to_tx[0].is_long_path = 0;
761           tx_ips_packet (si, rx_ring ^ 0, &to_tx[0]);
762
763           init_ips_packet (si, rx_ring ^ 1, &to_tx[1]);
764           to_tx[1].request_type = h->request_type;
765           to_tx[1].status = SRP_IPS_STATUS_wrapped;
766           to_tx[1].is_long_path = 1;
767           tx_ips_packet (si, rx_ring ^ 1, &to_tx[1]);
768         }
769       break;
770
771     case SRP_IPS_STATE_wrapped:
772       if (! h->is_long_path
773           && h->request_type == SRP_IPS_REQUEST_idle
774           && h->status == SRP_IPS_STATUS_idle)
775         {
776           si_needs_broadcast = 1;
777           si->current_ips_state = SRP_IPS_STATE_idle;
778           si->hw_wrap_function (si->rings[SRP_SIDE_A].hw_if_index, /* enable_wrap */ 0);
779           si->hw_wrap_function (si->rings[SRP_SIDE_B].hw_if_index, /* enable_wrap */ 0);
780         }
781       break;
782
783     case SRP_IPS_STATE_pass_thru:
784       /* FIXME */
785       break;
786
787     default:
788       abort ();
789       break;
790     }
791
792  done:
793   if (vm->mc_main && si_needs_broadcast)
794     mc_serialize (vm->mc_main, &srp_interface_state_msg, si);
795 }
796
797 /* Preform local IPS request on given interface. */
798 void srp_ips_local_request (u32 sw_if_index, srp_ips_request_type_t request)
799 {
800   vnet_main_t * vnm = vnet_get_main();
801   srp_main_t * sm = &srp_main;
802   vlib_main_t * vm = sm->vlib_main;
803   srp_ring_type_t rx_ring;
804   srp_interface_t * si = srp_get_interface (sw_if_index, &rx_ring);
805   srp_interface_ring_t * ir = &si->rings[rx_ring];
806   int si_needs_broadcast = 0;
807
808   if (request == SRP_IPS_REQUEST_wait_to_restore)
809     {
810       if (si->current_ips_state != SRP_IPS_STATE_wrapped)
811         return;
812       if (! ir->waiting_to_restore)
813         {
814           ir->wait_to_restore_start_time = vlib_time_now (sm->vlib_main);
815           ir->waiting_to_restore = 1;
816           si_needs_broadcast = 1;
817         }
818     }
819   else
820     {
821       /* FIXME handle local signal fail. */
822       si_needs_broadcast = ir->waiting_to_restore;
823       ir->wait_to_restore_start_time = 0;
824       ir->waiting_to_restore = 0;
825     }
826
827   /* FIXME trace. */
828   if (0)
829     clib_warning ("%U %U",
830                   format_vnet_sw_if_index_name, vnm, sw_if_index,
831                   format_srp_ips_request_type, request);
832
833   if (vm->mc_main && si_needs_broadcast)
834     mc_serialize (vm->mc_main, &srp_interface_state_msg, si);
835 }
836
837 static void maybe_send_ips_message (srp_interface_t * si)
838 {
839   srp_main_t * sm = &srp_main;
840   srp_ips_header_t to_tx[2];
841   srp_ring_type_t rx_ring = SRP_RING_OUTER;
842   srp_interface_ring_t * r0 = &si->rings[rx_ring ^ 0];
843   srp_interface_ring_t * r1 = &si->rings[rx_ring ^ 1];
844   f64 now = vlib_time_now (sm->vlib_main);
845
846   if (! si->ips_process_enable)
847     return;
848
849   if (si->current_ips_state == SRP_IPS_STATE_wrapped
850       && r0->waiting_to_restore
851       && r1->waiting_to_restore
852       && now >= r0->wait_to_restore_start_time + si->config.wait_to_restore_idle_delay
853       && now >= r1->wait_to_restore_start_time + si->config.wait_to_restore_idle_delay)
854     {
855       si->current_ips_state = SRP_IPS_STATE_idle;
856       r0->waiting_to_restore = r1->waiting_to_restore = 0;
857       r0->wait_to_restore_start_time = r1->wait_to_restore_start_time = 0;
858     }
859
860   if (si->current_ips_state != SRP_IPS_STATE_idle)
861     return;
862
863   init_ips_packet (si, rx_ring ^ 0, &to_tx[0]);
864   init_ips_packet (si, rx_ring ^ 1, &to_tx[1]);
865
866   if (si->current_ips_state == SRP_IPS_STATE_idle)
867     {
868       to_tx[0].request_type = to_tx[1].request_type = SRP_IPS_REQUEST_idle;
869       to_tx[0].status = to_tx[1].status = SRP_IPS_STATUS_idle;
870       to_tx[0].is_long_path = to_tx[1].is_long_path = 0;
871     }
872
873   else if (si->current_ips_state == SRP_IPS_STATE_wrapped)
874     {
875       to_tx[0].request_type =
876         (si->rings[rx_ring ^ 0].waiting_to_restore
877          ? SRP_IPS_REQUEST_wait_to_restore
878          : SRP_IPS_REQUEST_signal_fail);
879       to_tx[1].request_type =
880         (si->rings[rx_ring ^ 1].waiting_to_restore
881          ? SRP_IPS_REQUEST_wait_to_restore
882          : SRP_IPS_REQUEST_signal_fail);
883       to_tx[0].status = to_tx[1].status = SRP_IPS_STATUS_wrapped;
884       to_tx[0].is_long_path = 0;
885       to_tx[1].is_long_path = 1;
886     }
887
888   tx_ips_packet (si, rx_ring ^ 0, &to_tx[0]);
889   tx_ips_packet (si, rx_ring ^ 1, &to_tx[1]);
890 }
891
892 static uword
893 srp_ips_process (vlib_main_t * vm,
894                  vlib_node_runtime_t * rt,
895                  vlib_frame_t * f)
896 {
897   srp_main_t * sm = &srp_main;
898   srp_interface_t * si;
899
900   while (1)
901     {
902       pool_foreach (si, sm->interface_pool, ({
903         maybe_send_ips_message (si);
904       }));
905       vlib_process_suspend (vm, 1.0);
906     }
907
908   return 0;
909 }
910
911 vlib_node_registration_t srp_ips_process_node = {
912     .function = srp_ips_process,
913     .type = VLIB_NODE_TYPE_PROCESS,
914     .name = "srp-ips-process",
915     .state = VLIB_NODE_STATE_DISABLED,
916 };
917
918 static clib_error_t * srp_init (vlib_main_t * vm)
919 {
920   srp_main_t * sm = &srp_main;
921
922   sm->default_data_ttl = 255;
923   sm->vlib_main = vm;
924   vlib_register_node (vm, &srp_ips_process_node);
925   vlib_register_node (vm, &srp_input_node);
926   vlib_register_node (vm, &srp_control_input_node);
927   srp_setup_node (vm, srp_input_node.index);
928
929   return 0;
930 }
931
932 VLIB_INIT_FUNCTION (srp_init);