ipsec: esp_decrypt code clean up
[vpp.git] / src / vnet / ip-neighbor / ip_neighbor.c
1 /*
2  * src/vnet/ip/ip_neighboor.c: ip neighbor generic handling
3  *
4  * Copyright (c) 2018 Cisco and/or its affiliates.
5  * Licensed under the Apache License, Version 2.0 (the "License");
6  * you may not use this file except in compliance with the License.
7  * You may obtain a copy of the License at:
8  *
9  *     http://www.apache.org/licenses/LICENSE-2.0
10  *
11  * Unless required by applicable law or agreed to in writing, software
12  * distributed under the License is distributed on an "AS IS" BASIS,
13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  * See the License for the specific language governing permissions and
15  * limitations under the License.
16  */
17
18 #include <vppinfra/llist.h>
19
20 #include <vnet/ip-neighbor/ip_neighbor.h>
21 #include <vnet/ip-neighbor/ip4_neighbor.h>
22 #include <vnet/ip-neighbor/ip6_neighbor.h>
23 #include <vnet/ip-neighbor/ip_neighbor_watch.h>
24
25 #include <vnet/ip/ip6_ll_table.h>
26 #include <vnet/ip/ip46_address.h>
27 #include <vnet/fib/fib_table.h>
28 #include <vnet/adj/adj_mcast.h>
29
30 ip_neighbor_counters_t ip_neighbor_counters[] =
31 {
32  [AF_IP4] = {
33    .ipnc = {
34      [VLIB_RX] = {
35         [IP_NEIGHBOR_CTR_REPLY] = {
36           .name = "arp-rx-replies",
37           .stat_segment_name = "/net/arp/rx/replies",
38         },
39         [IP_NEIGHBOR_CTR_REQUEST] = {
40           .name = "arp-rx-requests",
41           .stat_segment_name = "/net/arp/rx/requests",
42         },
43         [IP_NEIGHBOR_CTR_GRAT] = {
44           .name = "arp-rx-gratuitous",
45           .stat_segment_name = "/net/arp/rx/gratuitous",
46         },
47       },
48       [VLIB_TX] = {
49         [IP_NEIGHBOR_CTR_REPLY] = {
50           .name = "arp-tx-replies",
51           .stat_segment_name = "/net/arp/tx/replies",
52         },
53         [IP_NEIGHBOR_CTR_REQUEST] = {
54           .name = "arp-tx-requests",
55           .stat_segment_name = "/net/arp/tx/requests",
56         },
57         [IP_NEIGHBOR_CTR_GRAT] = {
58           .name = "arp-tx-gratuitous",
59           .stat_segment_name = "/net/arp/tx/gratuitous",
60         },
61       },
62             },
63  },
64  [AF_IP6] = {
65    .ipnc = {
66      [VLIB_RX] = {
67         [IP_NEIGHBOR_CTR_REPLY] = {
68           .name = "ip6-nd-rx-replies",
69           .stat_segment_name = "/net/ip6-nd/rx/replies",
70         },
71         [IP_NEIGHBOR_CTR_REQUEST] = {
72           .name = "ip6-nd-rx-requests",
73           .stat_segment_name = "/net/ip6-nd/rx/requests",
74         },
75         [IP_NEIGHBOR_CTR_GRAT] = {
76           .name = "ip6-nd-rx-gratuitous",
77           .stat_segment_name = "/net/ip6-nd/rx/gratuitous",
78         },
79       },
80       [VLIB_TX] = {
81         [IP_NEIGHBOR_CTR_REPLY] = {
82           .name = "ip6-nd-tx-replies",
83           .stat_segment_name = "/net/ip6-nd/tx/replies",
84         },
85         [IP_NEIGHBOR_CTR_REQUEST] = {
86           .name = "ip6-nd-tx-requests",
87           .stat_segment_name = "/net/ip6-nd/tx/requests",
88         },
89         [IP_NEIGHBOR_CTR_GRAT] = {
90           .name = "ip6-nd-tx-gratuitous",
91           .stat_segment_name = "/net/ip6-nd/tx/gratuitous",
92         },
93       },
94     },
95  },
96 };
97
98 /** Pool for All IP neighbors */
99 static ip_neighbor_t *ip_neighbor_pool;
100
101 /** protocol specific lists of time sorted neighbors */
102 index_t ip_neighbor_list_head[N_AF];
103
104 typedef struct ip_neighbor_elt_t_
105 {
106   clib_llist_anchor_t ipne_anchor;
107   index_t ipne_index;
108 } ip_neighbor_elt_t;
109
110 /** Pool of linked list elemeents */
111 ip_neighbor_elt_t *ip_neighbor_elt_pool;
112
113 typedef struct ip_neighbor_db_t_
114 {
115   /** per interface hash */
116   uword **ipndb_hash;
117   /** per-protocol limit - max number of neighbors*/
118   u32 ipndb_limit;
119   /** max age of a neighbor before it's forcibly evicted */
120   u32 ipndb_age;
121   /** when the limit is reached and new neighbors are created, should
122    * we recycle an old one */
123   bool ipndb_recycle;
124   /** per-protocol number of elements */
125   u32 ipndb_n_elts;
126   /** per-protocol number of elements per-fib-index*/
127   u32 *ipndb_n_elts_per_fib;
128 } ip_neighbor_db_t;
129
130 static vlib_log_class_t ipn_logger;
131
132 /* DBs of neighbours one per AF */
133 static ip_neighbor_db_t ip_neighbor_db[N_AF] = {
134   [AF_IP4] = {
135     .ipndb_limit = 50000,
136     /* Default to not aging and not recycling */
137     .ipndb_age = 0,
138     .ipndb_recycle = false,
139   },
140   [AF_IP6] = {
141     .ipndb_limit = 50000,
142     /* Default to not aging and not recycling */
143     .ipndb_age = 0,
144     .ipndb_recycle = false,
145   }
146 };
147
148 #define IP_NEIGHBOR_DBG(...)                           \
149     vlib_log_debug (ipn_logger, __VA_ARGS__);
150
151 #define IP_NEIGHBOR_INFO(...)                          \
152     vlib_log_notice (ipn_logger, __VA_ARGS__);
153
154 ip_neighbor_t *
155 ip_neighbor_get (index_t ipni)
156 {
157   if (pool_is_free_index (ip_neighbor_pool, ipni))
158     return (NULL);
159
160   return (pool_elt_at_index (ip_neighbor_pool, ipni));
161 }
162
163 static index_t
164 ip_neighbor_get_index (const ip_neighbor_t * ipn)
165 {
166   return (ipn - ip_neighbor_pool);
167 }
168
169 static void
170 ip_neighbor_touch (ip_neighbor_t * ipn)
171 {
172   ipn->ipn_flags &= ~IP_NEIGHBOR_FLAG_STALE;
173 }
174
175 static bool
176 ip_neighbor_is_dynamic (const ip_neighbor_t * ipn)
177 {
178   return (ipn->ipn_flags & IP_NEIGHBOR_FLAG_DYNAMIC);
179 }
180
181 const ip_address_t *
182 ip_neighbor_get_ip (const ip_neighbor_t * ipn)
183 {
184   return (&ipn->ipn_key->ipnk_ip);
185 }
186
187 ip_address_family_t
188 ip_neighbor_get_af (const ip_neighbor_t * ipn)
189 {
190   return (ip_addr_version (&ipn->ipn_key->ipnk_ip));
191 }
192
193 const mac_address_t *
194 ip_neighbor_get_mac (const ip_neighbor_t * ipn)
195 {
196   return (&ipn->ipn_mac);
197 }
198
199 const u32
200 ip_neighbor_get_sw_if_index (const ip_neighbor_t * ipn)
201 {
202   return (ipn->ipn_key->ipnk_sw_if_index);
203 }
204
205 static void
206 ip_neighbor_list_remove (ip_neighbor_t * ipn)
207 {
208   /* new neighbours, are added to the head of the list, since the
209    * list is time sorted, newest first */
210   ip_neighbor_elt_t *elt;
211
212   if (~0 != ipn->ipn_elt)
213     {
214       elt = pool_elt_at_index (ip_neighbor_elt_pool, ipn->ipn_elt);
215
216       clib_llist_remove (ip_neighbor_elt_pool, ipne_anchor, elt);
217
218       ipn->ipn_elt = ~0;
219     }
220 }
221
222 static void
223 ip_neighbor_refresh (ip_neighbor_t * ipn)
224 {
225   /* new neighbours, are added to the head of the list, since the
226    * list is time sorted, newest first */
227   ip_neighbor_elt_t *elt, *head;
228
229   ip_neighbor_touch (ipn);
230   ipn->ipn_time_last_updated = vlib_time_now (vlib_get_main ());
231   ipn->ipn_n_probes = 0;
232
233   if (ip_neighbor_is_dynamic (ipn))
234     {
235       if (~0 == ipn->ipn_elt)
236         /* first time insertion */
237         pool_get_zero (ip_neighbor_elt_pool, elt);
238       else
239         {
240           /* already inserted - extract first */
241           elt = pool_elt_at_index (ip_neighbor_elt_pool, ipn->ipn_elt);
242
243           clib_llist_remove (ip_neighbor_elt_pool, ipne_anchor, elt);
244         }
245       head = pool_elt_at_index (ip_neighbor_elt_pool,
246                                 ip_neighbor_list_head[ip_neighbor_get_af
247                                                       (ipn)]);
248
249       elt->ipne_index = ip_neighbor_get_index (ipn);
250       clib_llist_add (ip_neighbor_elt_pool, ipne_anchor, elt, head);
251       ipn->ipn_elt = elt - ip_neighbor_elt_pool;
252     }
253 }
254
255 static void
256 ip_neighbor_db_add (const ip_neighbor_t * ipn)
257 {
258   ip_address_family_t af;
259   u32 sw_if_index;
260
261   af = ip_neighbor_get_af (ipn);
262   sw_if_index = ipn->ipn_key->ipnk_sw_if_index;
263
264   vec_validate (ip_neighbor_db[af].ipndb_hash, sw_if_index);
265
266   if (!ip_neighbor_db[af].ipndb_hash[sw_if_index])
267     ip_neighbor_db[af].ipndb_hash[sw_if_index]
268       = hash_create_mem (0, sizeof (ip_neighbor_key_t), sizeof (index_t));
269
270   hash_set_mem (ip_neighbor_db[af].ipndb_hash[sw_if_index],
271                 ipn->ipn_key, ip_neighbor_get_index (ipn));
272
273   ip_neighbor_db[af].ipndb_n_elts++;
274 }
275
276 static void
277 ip_neighbor_db_remove (const ip_neighbor_t * ipn)
278 {
279   ip_address_family_t af;
280   u32 sw_if_index;
281
282   af = ip_neighbor_get_af (ipn);
283   sw_if_index = ipn->ipn_key->ipnk_sw_if_index;
284
285   vec_validate (ip_neighbor_db[af].ipndb_hash, sw_if_index);
286
287   hash_unset_mem (ip_neighbor_db[af].ipndb_hash[sw_if_index], ipn->ipn_key);
288
289   ip_neighbor_db[af].ipndb_n_elts--;
290 }
291
292 static ip_neighbor_t *
293 ip_neighbor_db_find (const ip_neighbor_key_t * key)
294 {
295   ip_address_family_t af;
296   uword *p;
297
298   af = ip_addr_version (&key->ipnk_ip);
299
300   if (key->ipnk_sw_if_index >= vec_len (ip_neighbor_db[af].ipndb_hash))
301     return NULL;
302
303   p = hash_get_mem (ip_neighbor_db[af].ipndb_hash
304                     [key->ipnk_sw_if_index], key);
305
306   if (p)
307     return ip_neighbor_get (p[0]);
308
309   return (NULL);
310 }
311
312 static u8
313 ip_af_type_pfx_len (ip_address_family_t type)
314 {
315   return (type == AF_IP4 ? 32 : 128);
316 }
317
318 static void
319 ip_neighbor_adj_fib_add (ip_neighbor_t * ipn, u32 fib_index)
320 {
321   ip_address_family_t af;
322
323   af = ip_neighbor_get_af (ipn);
324
325   if (af == AF_IP6 &&
326       ip6_address_is_link_local_unicast (&ip_addr_v6
327                                          (&ipn->ipn_key->ipnk_ip)))
328     {
329       ip6_ll_prefix_t pfx = {
330         .ilp_addr = ip_addr_v6 (&ipn->ipn_key->ipnk_ip),
331         .ilp_sw_if_index = ipn->ipn_key->ipnk_sw_if_index,
332       };
333       ipn->ipn_fib_entry_index =
334         ip6_ll_table_entry_update (&pfx, FIB_ROUTE_PATH_FLAG_NONE);
335     }
336   else
337     {
338       fib_protocol_t fproto;
339
340       fproto = ip_address_family_to_fib_proto (af);
341
342       fib_prefix_t pfx = {
343         .fp_len = ip_af_type_pfx_len (af),
344         .fp_proto = fproto,
345         .fp_addr = ip_addr_46 (&ipn->ipn_key->ipnk_ip),
346       };
347
348       ipn->ipn_fib_entry_index =
349         fib_table_entry_path_add (fib_index, &pfx, FIB_SOURCE_ADJ,
350                                   FIB_ENTRY_FLAG_ATTACHED,
351                                   fib_proto_to_dpo (fproto),
352                                   &pfx.fp_addr,
353                                   ipn->ipn_key->ipnk_sw_if_index,
354                                   ~0, 1, NULL, FIB_ROUTE_PATH_FLAG_NONE);
355
356       vec_validate (ip_neighbor_db[af].ipndb_n_elts_per_fib, fib_index);
357
358       ip_neighbor_db[af].ipndb_n_elts_per_fib[fib_index]++;
359
360       if (1 == ip_neighbor_db[af].ipndb_n_elts_per_fib[fib_index])
361         fib_table_lock (fib_index, fproto, FIB_SOURCE_ADJ);
362     }
363 }
364
365 static void
366 ip_neighbor_adj_fib_remove (ip_neighbor_t * ipn, u32 fib_index)
367 {
368   ip_address_family_t af;
369
370   af = ip_neighbor_get_af (ipn);
371
372   if (FIB_NODE_INDEX_INVALID != ipn->ipn_fib_entry_index)
373     {
374       if (AF_IP6 == af &&
375           ip6_address_is_link_local_unicast (&ip_addr_v6
376                                              (&ipn->ipn_key->ipnk_ip)))
377         {
378           ip6_ll_prefix_t pfx = {
379             .ilp_addr = ip_addr_v6 (&ipn->ipn_key->ipnk_ip),
380             .ilp_sw_if_index = ipn->ipn_key->ipnk_sw_if_index,
381           };
382           ip6_ll_table_entry_delete (&pfx);
383         }
384       else
385         {
386           fib_protocol_t fproto;
387
388           fproto = ip_address_family_to_fib_proto (af);
389
390           fib_prefix_t pfx = {
391             .fp_len = ip_af_type_pfx_len (af),
392             .fp_proto = fproto,
393             .fp_addr = ip_addr_46 (&ipn->ipn_key->ipnk_ip),
394           };
395
396           fib_table_entry_path_remove (fib_index,
397                                        &pfx,
398                                        FIB_SOURCE_ADJ,
399                                        fib_proto_to_dpo (fproto),
400                                        &pfx.fp_addr,
401                                        ipn->ipn_key->ipnk_sw_if_index,
402                                        ~0, 1, FIB_ROUTE_PATH_FLAG_NONE);
403
404           ip_neighbor_db[af].ipndb_n_elts_per_fib[fib_index]--;
405
406           if (0 == ip_neighbor_db[af].ipndb_n_elts_per_fib[fib_index])
407             fib_table_unlock (fib_index, fproto, FIB_SOURCE_ADJ);
408         }
409     }
410 }
411
412 static void
413 ip_neighbor_mk_complete (adj_index_t ai, ip_neighbor_t * ipn)
414 {
415   adj_nbr_update_rewrite (ai, ADJ_NBR_REWRITE_FLAG_COMPLETE,
416                           ethernet_build_rewrite (vnet_get_main (),
417                                                   ipn->
418                                                   ipn_key->ipnk_sw_if_index,
419                                                   adj_get_link_type (ai),
420                                                   ipn->ipn_mac.bytes));
421 }
422
423 static void
424 ip_neighbor_mk_incomplete (adj_index_t ai)
425 {
426   ip_adjacency_t *adj = adj_get (ai);
427
428   adj_nbr_update_rewrite (ai,
429                           ADJ_NBR_REWRITE_FLAG_INCOMPLETE,
430                           ethernet_build_rewrite (vnet_get_main (),
431                                                   adj->
432                                                   rewrite_header.sw_if_index,
433                                                   VNET_LINK_ARP,
434                                                   VNET_REWRITE_FOR_SW_INTERFACE_ADDRESS_BROADCAST));
435 }
436
437 static adj_walk_rc_t
438 ip_neighbor_mk_complete_walk (adj_index_t ai, void *ctx)
439 {
440   ip_neighbor_t *ipn = ctx;
441
442   ip_neighbor_mk_complete (ai, ipn);
443
444   return (ADJ_WALK_RC_CONTINUE);
445 }
446
447 static adj_walk_rc_t
448 ip_neighbor_mk_incomplete_walk (adj_index_t ai, void *ctx)
449 {
450   ip_neighbor_mk_incomplete (ai);
451
452   return (ADJ_WALK_RC_CONTINUE);
453 }
454
455 static void
456 ip_neighbor_destroy (ip_neighbor_t * ipn)
457 {
458   ip_address_family_t af;
459
460   af = ip_neighbor_get_af (ipn);
461
462   IP_NEIGHBOR_DBG ("free: %U", format_ip_neighbor,
463                    ip_neighbor_get_index (ipn));
464
465   ip_neighbor_publish (ip_neighbor_get_index (ipn),
466                        IP_NEIGHBOR_EVENT_REMOVED);
467
468   adj_nbr_walk_nh (ipn->ipn_key->ipnk_sw_if_index,
469                    ip_address_family_to_fib_proto (af),
470                    &ip_addr_46 (&ipn->ipn_key->ipnk_ip),
471                    ip_neighbor_mk_incomplete_walk, ipn);
472   ip_neighbor_adj_fib_remove
473     (ipn,
474      fib_table_get_index_for_sw_if_index
475      (ip_address_family_to_fib_proto (af), ipn->ipn_key->ipnk_sw_if_index));
476
477   ip_neighbor_list_remove (ipn);
478   ip_neighbor_db_remove (ipn);
479   clib_mem_free (ipn->ipn_key);
480
481   pool_put (ip_neighbor_pool, ipn);
482 }
483
484 static bool
485 ip_neighbor_force_reuse (ip_address_family_t af)
486 {
487   if (!ip_neighbor_db[af].ipndb_recycle)
488     return false;
489
490   /* pluck the oldest entry, which is the one from the end of the list */
491   ip_neighbor_elt_t *elt, *head;
492
493   head = pool_elt_at_index (ip_neighbor_elt_pool, ip_neighbor_list_head[af]);
494
495   if (clib_llist_is_empty (ip_neighbor_elt_pool, ipne_anchor, head))
496     return (false);
497
498   elt = clib_llist_prev (ip_neighbor_elt_pool, ipne_anchor, head);
499   ip_neighbor_destroy (ip_neighbor_get (elt->ipne_index));
500
501   return (true);
502 }
503
504 static ip_neighbor_t *
505 ip_neighbor_alloc (const ip_neighbor_key_t * key,
506                    const mac_address_t * mac, ip_neighbor_flags_t flags)
507 {
508   ip_address_family_t af;
509   ip_neighbor_t *ipn;
510
511   af = ip_addr_version (&key->ipnk_ip);
512
513   if (ip_neighbor_db[af].ipndb_limit &&
514       (ip_neighbor_db[af].ipndb_n_elts >= ip_neighbor_db[af].ipndb_limit))
515     {
516       if (!ip_neighbor_force_reuse (af))
517         return (NULL);
518     }
519
520   pool_get_zero (ip_neighbor_pool, ipn);
521
522   ipn->ipn_key = clib_mem_alloc (sizeof (*ipn->ipn_key));
523   clib_memcpy (ipn->ipn_key, key, sizeof (*ipn->ipn_key));
524
525   ipn->ipn_fib_entry_index = FIB_NODE_INDEX_INVALID;
526   ipn->ipn_flags = flags;
527   ipn->ipn_elt = ~0;
528
529   mac_address_copy (&ipn->ipn_mac, mac);
530
531   ip_neighbor_db_add (ipn);
532
533   /* create the adj-fib. the entry in the FIB table for the peer's interface */
534   if (!(ipn->ipn_flags & IP_NEIGHBOR_FLAG_NO_FIB_ENTRY))
535     ip_neighbor_adj_fib_add
536       (ipn, fib_table_get_index_for_sw_if_index
537        (ip_address_family_to_fib_proto (af), ipn->ipn_key->ipnk_sw_if_index));
538
539   return (ipn);
540 }
541
542 int
543 ip_neighbor_add (const ip_address_t * ip,
544                  const mac_address_t * mac,
545                  u32 sw_if_index,
546                  ip_neighbor_flags_t flags, u32 * stats_index)
547 {
548   fib_protocol_t fproto;
549   ip_neighbor_t *ipn;
550
551   /* main thread only */
552   ASSERT (0 == vlib_get_thread_index ());
553
554   fproto = ip_address_family_to_fib_proto (ip_addr_version (ip));
555
556   const ip_neighbor_key_t key = {
557     .ipnk_ip = *ip,
558     .ipnk_sw_if_index = sw_if_index,
559   };
560
561   ipn = ip_neighbor_db_find (&key);
562
563   if (ipn)
564     {
565       IP_NEIGHBOR_DBG ("update: %U, %U",
566                        format_vnet_sw_if_index_name, vnet_get_main (),
567                        sw_if_index, format_ip_address, ip,
568                        format_ip_neighbor_flags, flags, format_mac_address_t,
569                        mac);
570
571       ip_neighbor_touch (ipn);
572
573       /* Refuse to over-write static neighbor entry. */
574       if (!(flags & IP_NEIGHBOR_FLAG_STATIC) &&
575           (ipn->ipn_flags & IP_NEIGHBOR_FLAG_STATIC))
576         {
577           /* if MAC address match, still check to send event */
578           if (0 == mac_address_cmp (&ipn->ipn_mac, mac))
579             goto check_customers;
580           return -2;
581         }
582
583       /* A dynamic entry can become static, but not vice-versa.
584        * i.e. since if it was programmed by the CP then it must
585        * be removed by the CP */
586       if ((flags & IP_NEIGHBOR_FLAG_STATIC) &&
587           !(ipn->ipn_flags & IP_NEIGHBOR_FLAG_STATIC))
588         {
589           ip_neighbor_list_remove (ipn);
590           ipn->ipn_flags |= IP_NEIGHBOR_FLAG_STATIC;
591           ipn->ipn_flags &= ~IP_NEIGHBOR_FLAG_DYNAMIC;
592         }
593
594       /*
595        * prevent a DoS attack from the data-plane that
596        * spams us with no-op updates to the MAC address
597        */
598       if (0 == mac_address_cmp (&ipn->ipn_mac, mac))
599         {
600           ip_neighbor_refresh (ipn);
601           goto check_customers;
602         }
603
604       mac_address_copy (&ipn->ipn_mac, mac);
605     }
606   else
607     {
608       IP_NEIGHBOR_INFO ("add: %U, %U",
609                         format_vnet_sw_if_index_name, vnet_get_main (),
610                         sw_if_index, format_ip_address, ip,
611                         format_ip_neighbor_flags, flags, format_mac_address_t,
612                         mac);
613
614       ipn = ip_neighbor_alloc (&key, mac, flags);
615
616       if (NULL == ipn)
617         return VNET_API_ERROR_LIMIT_EXCEEDED;
618     }
619
620   /* Update time stamp and flags. */
621   ip_neighbor_refresh (ipn);
622
623   adj_nbr_walk_nh (ipn->ipn_key->ipnk_sw_if_index,
624                    fproto, &ip_addr_46 (&ipn->ipn_key->ipnk_ip),
625                    ip_neighbor_mk_complete_walk, ipn);
626
627 check_customers:
628   /* Customer(s) requesting event for this address? */
629   ip_neighbor_publish (ip_neighbor_get_index (ipn), IP_NEIGHBOR_EVENT_ADDED);
630
631   if (stats_index)
632     *stats_index = adj_nbr_find (fproto,
633                                  fib_proto_to_link (fproto),
634                                  &ip_addr_46 (&ipn->ipn_key->ipnk_ip),
635                                  ipn->ipn_key->ipnk_sw_if_index);
636   return 0;
637 }
638
639 int
640 ip_neighbor_del (const ip_address_t * ip, u32 sw_if_index)
641 {
642   ip_neighbor_t *ipn;
643
644   /* main thread only */
645   ASSERT (0 == vlib_get_thread_index ());
646
647   IP_NEIGHBOR_INFO ("delete: %U, %U",
648                     format_vnet_sw_if_index_name, vnet_get_main (),
649                     sw_if_index, format_ip_address, ip);
650
651   const ip_neighbor_key_t key = {
652     .ipnk_ip = *ip,
653     .ipnk_sw_if_index = sw_if_index,
654   };
655
656   ipn = ip_neighbor_db_find (&key);
657
658   if (NULL == ipn)
659     return (VNET_API_ERROR_NO_SUCH_ENTRY);
660
661   ip_neighbor_destroy (ipn);
662
663   return (0);
664 }
665
666 typedef struct ip_neighbor_del_all_ctx_t_
667 {
668   index_t *ipn_del;
669 } ip_neighbor_del_all_ctx_t;
670
671 static walk_rc_t
672 ip_neighbor_del_all_walk_cb (index_t ipni, void *arg)
673 {
674   ip_neighbor_del_all_ctx_t *ctx = arg;
675
676   vec_add1 (ctx->ipn_del, ipni);
677
678   return (WALK_CONTINUE);
679 }
680
681 void
682 ip_neighbor_del_all (ip_address_family_t af, u32 sw_if_index)
683 {
684   IP_NEIGHBOR_INFO ("delete-all: %U, %U",
685                     format_ip_address_family, af,
686                     format_vnet_sw_if_index_name, vnet_get_main (),
687                     sw_if_index);
688
689   ip_neighbor_del_all_ctx_t ctx = {
690     .ipn_del = NULL,
691   };
692   index_t *ipni;
693
694   ip_neighbor_walk (af, sw_if_index, ip_neighbor_del_all_walk_cb, &ctx);
695
696   vec_foreach (ipni,
697                ctx.ipn_del) ip_neighbor_destroy (ip_neighbor_get (*ipni));
698   vec_free (ctx.ipn_del);
699 }
700
701 void
702 ip_neighbor_update (vnet_main_t * vnm, adj_index_t ai)
703 {
704   ip_neighbor_t *ipn;
705   ip_adjacency_t *adj;
706
707   adj = adj_get (ai);
708
709   ip_neighbor_key_t key = {
710     .ipnk_sw_if_index = adj->rewrite_header.sw_if_index,
711   };
712
713   ip_address_from_46 (&adj->sub_type.nbr.next_hop,
714                       adj->ia_nh_proto, &key.ipnk_ip);
715
716   ipn = ip_neighbor_db_find (&key);
717
718   switch (adj->lookup_next_index)
719     {
720     case IP_LOOKUP_NEXT_ARP:
721       if (NULL != ipn)
722         {
723           adj_nbr_walk_nh (adj->rewrite_header.sw_if_index,
724                            adj->ia_nh_proto,
725                            &adj->sub_type.nbr.next_hop,
726                            ip_neighbor_mk_complete_walk, ipn);
727         }
728       else
729         {
730           /*
731            * no matching ARP entry.
732            * construct the rewrite required to for an ARP packet, and stick
733            * that in the adj's pipe to smoke.
734            */
735           adj_nbr_update_rewrite
736             (ai,
737              ADJ_NBR_REWRITE_FLAG_INCOMPLETE,
738              ethernet_build_rewrite
739              (vnm,
740               adj->rewrite_header.sw_if_index,
741               VNET_LINK_ARP,
742               VNET_REWRITE_FOR_SW_INTERFACE_ADDRESS_BROADCAST));
743
744           /*
745            * since the FIB has added this adj for a route, it makes sense it
746            * may want to forward traffic sometime soon. Let's send a
747            * speculative ARP. just one. If we were to do periodically that
748            * wouldn't be bad either, but that's more code than i'm prepared to
749            * write at this time for relatively little reward.
750            */
751           /*
752            * adj_nbr_update_rewrite may actually call fib_walk_sync.
753            * fib_walk_sync may allocate a new adjacency and potentially cause
754            * a realloc for adj_pool. When that happens, adj pointer is no
755            * longer valid here.x We refresh adj pointer accordingly.
756            */
757           adj = adj_get (ai);
758           ip_neighbor_probe (adj);
759         }
760       break;
761     case IP_LOOKUP_NEXT_REWRITE:
762       /* Update of an existing rewrite adjacency happens e.g. when the
763        * interface's MAC address changes */
764       if (NULL != ipn)
765         ip_neighbor_mk_complete (ai, ipn);
766       break;
767     case IP_LOOKUP_NEXT_GLEAN:
768     case IP_LOOKUP_NEXT_BCAST:
769     case IP_LOOKUP_NEXT_MCAST:
770     case IP_LOOKUP_NEXT_DROP:
771     case IP_LOOKUP_NEXT_PUNT:
772     case IP_LOOKUP_NEXT_LOCAL:
773     case IP_LOOKUP_NEXT_MCAST_MIDCHAIN:
774     case IP_LOOKUP_NEXT_MIDCHAIN:
775     case IP_LOOKUP_NEXT_ICMP_ERROR:
776     case IP_LOOKUP_N_NEXT:
777       ASSERT (0);
778       break;
779     }
780 }
781
782 void
783 ip_neighbor_learn (const ip_neighbor_learn_t * l)
784 {
785   ip_neighbor_add (&l->ip, &l->mac, l->sw_if_index,
786                    IP_NEIGHBOR_FLAG_DYNAMIC, NULL);
787 }
788
789 static clib_error_t *
790 ip_neighbor_cmd (vlib_main_t * vm,
791                  unformat_input_t * input, vlib_cli_command_t * cmd)
792 {
793   ip_address_t ip = IP_ADDRESS_V6_ALL_0S;
794   mac_address_t mac = ZERO_MAC_ADDRESS;
795   vnet_main_t *vnm = vnet_get_main ();
796   ip_neighbor_flags_t flags;
797   u32 sw_if_index = ~0;
798   int is_add = 1, is_flush = 0;
799   int count = 1;
800
801   flags = IP_NEIGHBOR_FLAG_DYNAMIC;
802
803   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
804     {
805       /* set ip arp TenGigE1/1/0/1 1.2.3.4 aa:bb:... or aabb.ccdd... */
806       if (unformat (input, "%U %U %U",
807                     unformat_vnet_sw_interface, vnm, &sw_if_index,
808                     unformat_ip_address, &ip, unformat_mac_address_t, &mac))
809         ;
810       else if (unformat (input, "delete") || unformat (input, "del"))
811         is_add = 0;
812       else if (unformat (input, "flush"))
813         is_flush = 1;
814       else if (unformat (input, "static"))
815         {
816           flags |= IP_NEIGHBOR_FLAG_STATIC;
817           flags &= ~IP_NEIGHBOR_FLAG_DYNAMIC;
818         }
819       else if (unformat (input, "no-fib-entry"))
820         flags |= IP_NEIGHBOR_FLAG_NO_FIB_ENTRY;
821       else if (unformat (input, "count %d", &count))
822         ;
823       else
824         break;
825     }
826
827   if (is_flush)
828     {
829       ip_neighbor_del_all (AF_IP4, sw_if_index);
830       ip_neighbor_del_all (AF_IP6, sw_if_index);
831       return NULL;
832     }
833
834   if (sw_if_index == ~0 ||
835       ip_address_is_zero (&ip) || mac_address_is_zero (&mac))
836     return clib_error_return (0,
837                               "specify interface, IP address and MAC: `%U'",
838                               format_unformat_error, input);
839
840   while (count)
841     {
842       if (is_add)
843         ip_neighbor_add (&ip, &mac, sw_if_index, flags, NULL);
844       else
845         ip_neighbor_del (&ip, sw_if_index);
846
847       ip_address_increment (&ip);
848       mac_address_increment (&mac);
849
850       --count;
851     }
852
853   return NULL;
854 }
855
856 /*?
857  * Add or delete IPv4 ARP cache entries.
858  *
859  * @note 'set ip neighbor' options (e.g. delete, static,
860  * 'count <number>', 'interface ip4_addr mac_addr') can be added in
861  * any order and combination.
862  *
863  * @cliexpar
864  * @parblock
865  * Add or delete IPv4 ARP cache entries as follows. MAC Address can be in
866  * either aa:bb:cc:dd:ee:ff format or aabb.ccdd.eeff format.
867  * @cliexcmd{set ip neighbor GigabitEthernet2/0/0 6.0.0.3 dead.beef.babe}
868  * @cliexcmd{set ip neighbor delete GigabitEthernet2/0/0 6.0.0.3
869  * de:ad:be:ef:ba:be}
870  *
871  * To add or delete an IPv4 ARP cache entry
872  * table:
873  * @cliexcmd{set ip neighbor GigabitEthernet2/0/0 6.0.0.3 dead.beef.babe}
874  * @cliexcmd{set ip neighbor delete GigabitEthernet2/0/0 6.0.0.3
875  * dead.beef.babe}
876  *
877  * Add or delete IPv4 static ARP cache entries as follows:
878  * @cliexcmd{set ip neighbor static GigabitEthernet2/0/0 6.0.0.3
879  * dead.beef.babe}
880  * @cliexcmd{set ip neighbor static delete GigabitEthernet2/0/0 6.0.0.3
881  * dead.beef.babe}
882  *
883  * For testing / debugging purposes, the 'set ip neighbor' command can add or
884  * delete multiple entries. Supply the 'count N' parameter:
885  * @cliexcmd{set ip neighbor count 10 GigabitEthernet2/0/0 6.0.0.3
886  * dead.beef.babe}
887  * @endparblock
888  ?*/
889 VLIB_CLI_COMMAND (ip_neighbor_command, static) = {
890   .path = "set ip neighbor",
891   .short_help = "set ip neighbor [del] <intfc> <ip-address> <mac-address> "
892                 "[static] [no-fib-entry] [count <count>]",
893   .function = ip_neighbor_cmd,
894 };
895 VLIB_CLI_COMMAND (ip_neighbor_command2, static) = {
896   .path = "ip neighbor",
897   .short_help = "ip neighbor [del] [flush] <intfc> <ip-address> <mac-address> "
898                 "[static] [no-fib-entry] [count <count>]",
899   .function = ip_neighbor_cmd,
900 };
901
902 static int
903 ip_neighbor_sort (void *a1, void *a2)
904 {
905   index_t *ipni1 = a1, *ipni2 = a2;
906   ip_neighbor_t *ipn1, *ipn2;
907   int cmp;
908
909   ipn1 = ip_neighbor_get (*ipni1);
910   ipn2 = ip_neighbor_get (*ipni2);
911
912   cmp = vnet_sw_interface_compare (vnet_get_main (),
913                                    ipn1->ipn_key->ipnk_sw_if_index,
914                                    ipn2->ipn_key->ipnk_sw_if_index);
915   if (!cmp)
916     cmp = ip_address_cmp (&ipn1->ipn_key->ipnk_ip, &ipn2->ipn_key->ipnk_ip);
917   return cmp;
918 }
919
920 static index_t *
921 ip_neighbor_entries (u32 sw_if_index, ip_address_family_t af)
922 {
923   index_t *ipnis = NULL;
924   ip_neighbor_t *ipn;
925
926   pool_foreach (ipn, ip_neighbor_pool)
927    {
928     if ((sw_if_index == ~0 ||
929         ipn->ipn_key->ipnk_sw_if_index == sw_if_index) &&
930         (N_AF == af ||
931          ip_neighbor_get_af(ipn) == af))
932        vec_add1 (ipnis, ip_neighbor_get_index(ipn));
933   }
934
935
936   if (ipnis)
937     vec_sort_with_function (ipnis, ip_neighbor_sort);
938   return ipnis;
939 }
940
941 static clib_error_t *
942 ip_neighbor_show_sorted_i (vlib_main_t * vm,
943                            unformat_input_t * input,
944                            vlib_cli_command_t * cmd, ip_address_family_t af)
945 {
946   ip_neighbor_elt_t *elt, *head;
947
948   head = pool_elt_at_index (ip_neighbor_elt_pool, ip_neighbor_list_head[af]);
949
950
951   vlib_cli_output (vm, "%=12s%=40s%=6s%=20s%=24s", "Time", "IP",
952                    "Flags", "Ethernet", "Interface");
953
954   /* the list is time sorted, newest first, so start from the back
955    * and work forwards. Stop when we get to one that is alive */
956   clib_llist_foreach_reverse(ip_neighbor_elt_pool,
957                              ipne_anchor, head, elt,
958   ({
959     vlib_cli_output (vm, "%U", format_ip_neighbor, elt->ipne_index);
960   }));
961
962   return (NULL);
963 }
964
965 static clib_error_t *
966 ip_neighbor_show_i (vlib_main_t * vm,
967                     unformat_input_t * input,
968                     vlib_cli_command_t * cmd, ip_address_family_t af)
969 {
970   index_t *ipni, *ipnis = NULL;
971   u32 sw_if_index;
972
973   /* Filter entries by interface if given. */
974   sw_if_index = ~0;
975   (void) unformat_user (input, unformat_vnet_sw_interface, vnet_get_main (),
976                         &sw_if_index);
977
978   ipnis = ip_neighbor_entries (sw_if_index, af);
979
980   if (ipnis)
981     vlib_cli_output (vm, "%=12s%=40s%=6s%=20s%=24s", "Time", "IP",
982                      "Flags", "Ethernet", "Interface");
983
984   vec_foreach (ipni, ipnis)
985   {
986     vlib_cli_output (vm, "%U", format_ip_neighbor, *ipni);
987   }
988   vec_free (ipnis);
989
990   return (NULL);
991 }
992
993 static clib_error_t *
994 ip_neighbor_show (vlib_main_t * vm,
995                   unformat_input_t * input, vlib_cli_command_t * cmd)
996 {
997   return (ip_neighbor_show_i (vm, input, cmd, N_AF));
998 }
999
1000 static clib_error_t *
1001 ip6_neighbor_show (vlib_main_t * vm,
1002                    unformat_input_t * input, vlib_cli_command_t * cmd)
1003 {
1004   return (ip_neighbor_show_i (vm, input, cmd, AF_IP6));
1005 }
1006
1007 static clib_error_t *
1008 ip4_neighbor_show (vlib_main_t * vm,
1009                    unformat_input_t * input, vlib_cli_command_t * cmd)
1010 {
1011   return (ip_neighbor_show_i (vm, input, cmd, AF_IP4));
1012 }
1013
1014 static clib_error_t *
1015 ip6_neighbor_show_sorted (vlib_main_t * vm,
1016                           unformat_input_t * input, vlib_cli_command_t * cmd)
1017 {
1018   return (ip_neighbor_show_sorted_i (vm, input, cmd, AF_IP6));
1019 }
1020
1021 static clib_error_t *
1022 ip4_neighbor_show_sorted (vlib_main_t * vm,
1023                           unformat_input_t * input, vlib_cli_command_t * cmd)
1024 {
1025   return (ip_neighbor_show_sorted_i (vm, input, cmd, AF_IP4));
1026 }
1027
1028 /*?
1029  * Display all the IP neighbor entries.
1030  *
1031  * @cliexpar
1032  * Example of how to display the IPv4 ARP table:
1033  * @cliexstart{show ip neighbor}
1034  *    Time      FIB        IP4       Flags      Ethernet              Interface
1035  *    346.3028   0       6.1.1.3            de:ad:be:ef:ba:be   GigabitEthernet2/0/0
1036  *   3077.4271   0       6.1.1.4       S    de:ad:be:ef:ff:ff   GigabitEthernet2/0/0
1037  *   2998.6409   1       6.2.2.3            de:ad:be:ef:00:01   GigabitEthernet2/0/0
1038  * Proxy arps enabled for:
1039  * Fib_index 0   6.0.0.1 - 6.0.0.11
1040  * @cliexend
1041  ?*/
1042 VLIB_CLI_COMMAND (show_ip_neighbors_cmd_node, static) = {
1043   .path = "show ip neighbors",
1044   .function = ip_neighbor_show,
1045   .short_help = "show ip neighbors [interface]",
1046 };
1047 VLIB_CLI_COMMAND (show_ip4_neighbors_cmd_node, static) = {
1048   .path = "show ip4 neighbors",
1049   .function = ip4_neighbor_show,
1050   .short_help = "show ip4 neighbors [interface]",
1051 };
1052 VLIB_CLI_COMMAND (show_ip6_neighbors_cmd_node, static) = {
1053   .path = "show ip6 neighbors",
1054   .function = ip6_neighbor_show,
1055   .short_help = "show ip6 neighbors [interface]",
1056 };
1057 VLIB_CLI_COMMAND (show_ip_neighbor_cmd_node, static) = {
1058   .path = "show ip neighbor",
1059   .function = ip_neighbor_show,
1060   .short_help = "show ip neighbor [interface]",
1061 };
1062 VLIB_CLI_COMMAND (show_ip4_neighbor_cmd_node, static) = {
1063   .path = "show ip4 neighbor",
1064   .function = ip4_neighbor_show,
1065   .short_help = "show ip4 neighbor [interface]",
1066 };
1067 VLIB_CLI_COMMAND (show_ip6_neighbor_cmd_node, static) = {
1068   .path = "show ip6 neighbor",
1069   .function = ip6_neighbor_show,
1070   .short_help = "show ip6 neighbor [interface]",
1071 };
1072 VLIB_CLI_COMMAND (show_ip4_neighbor_sorted_cmd_node, static) = {
1073   .path = "show ip4 neighbor-sorted",
1074   .function = ip4_neighbor_show_sorted,
1075   .short_help = "show ip4 neighbor-sorted",
1076 };
1077 VLIB_CLI_COMMAND (show_ip6_neighbor_sorted_cmd_node, static) = {
1078   .path = "show ip6 neighbor-sorted",
1079   .function = ip6_neighbor_show_sorted,
1080   .short_help = "show ip6 neighbor-sorted",
1081 };
1082
1083 static ip_neighbor_vft_t ip_nbr_vfts[N_AF];
1084
1085 void
1086 ip_neighbor_register (ip_address_family_t af, const ip_neighbor_vft_t * vft)
1087 {
1088   ip_nbr_vfts[af] = *vft;
1089 }
1090
1091 void
1092 ip_neighbor_probe_dst (u32 sw_if_index, u32 thread_index,
1093                        ip_address_family_t af, const ip46_address_t *dst)
1094 {
1095   if (!vnet_sw_interface_is_admin_up (vnet_get_main (), sw_if_index))
1096     return;
1097
1098   switch (af)
1099     {
1100     case AF_IP6:
1101       ip6_neighbor_probe_dst (sw_if_index, thread_index, &dst->ip6);
1102       break;
1103     case AF_IP4:
1104       ip4_neighbor_probe_dst (sw_if_index, thread_index, &dst->ip4);
1105       break;
1106     }
1107 }
1108
1109 void
1110 ip_neighbor_probe (const ip_adjacency_t * adj)
1111 {
1112   ip_neighbor_probe_dst (adj->rewrite_header.sw_if_index,
1113                          vlib_get_thread_index (),
1114                          ip_address_family_from_fib_proto (adj->ia_nh_proto),
1115                          &adj->sub_type.nbr.next_hop);
1116 }
1117
1118 void
1119 ip_neighbor_walk (ip_address_family_t af,
1120                   u32 sw_if_index, ip_neighbor_walk_cb_t cb, void *ctx)
1121 {
1122   ip_neighbor_key_t *key;
1123   index_t ipni;
1124
1125   if (~0 == sw_if_index)
1126     {
1127       uword **hash;
1128
1129       vec_foreach (hash, ip_neighbor_db[af].ipndb_hash)
1130       {
1131           hash_foreach (key, ipni, *hash,
1132           ({
1133             if (WALK_STOP == cb (ipni, ctx))
1134               break;
1135           }));
1136       }
1137     }
1138   else
1139     {
1140       uword *hash;
1141
1142       if (vec_len (ip_neighbor_db[af].ipndb_hash) <= sw_if_index)
1143         return;
1144       hash = ip_neighbor_db[af].ipndb_hash[sw_if_index];
1145
1146       hash_foreach (key, ipni, hash,
1147       ({
1148         if (WALK_STOP == cb (ipni, ctx))
1149           break;
1150       }));
1151     }
1152 }
1153
1154 int
1155 ip4_neighbor_proxy_add (u32 fib_index,
1156                         const ip4_address_t * start,
1157                         const ip4_address_t * end)
1158 {
1159   if (ip_nbr_vfts[AF_IP4].inv_proxy4_add)
1160     {
1161       return (ip_nbr_vfts[AF_IP4].inv_proxy4_add (fib_index, start, end));
1162     }
1163
1164   return (-1);
1165 }
1166
1167 int
1168 ip4_neighbor_proxy_delete (u32 fib_index,
1169                            const ip4_address_t * start,
1170                            const ip4_address_t * end)
1171 {
1172   if (ip_nbr_vfts[AF_IP4].inv_proxy4_del)
1173     {
1174       return (ip_nbr_vfts[AF_IP4].inv_proxy4_del (fib_index, start, end));
1175     }
1176   return -1;
1177 }
1178
1179 int
1180 ip4_neighbor_proxy_enable (u32 sw_if_index)
1181 {
1182   if (ip_nbr_vfts[AF_IP4].inv_proxy4_enable)
1183     {
1184       return (ip_nbr_vfts[AF_IP4].inv_proxy4_enable (sw_if_index));
1185     }
1186   return -1;
1187 }
1188
1189 int
1190 ip4_neighbor_proxy_disable (u32 sw_if_index)
1191 {
1192   if (ip_nbr_vfts[AF_IP4].inv_proxy4_disable)
1193     {
1194       return (ip_nbr_vfts[AF_IP4].inv_proxy4_disable (sw_if_index));
1195     }
1196   return -1;
1197 }
1198
1199 int
1200 ip6_neighbor_proxy_add (u32 sw_if_index, const ip6_address_t * addr)
1201 {
1202   if (ip_nbr_vfts[AF_IP6].inv_proxy6_add)
1203     {
1204       return (ip_nbr_vfts[AF_IP6].inv_proxy6_add (sw_if_index, addr));
1205     }
1206   return -1;
1207 }
1208
1209 int
1210 ip6_neighbor_proxy_del (u32 sw_if_index, const ip6_address_t * addr)
1211 {
1212   if (ip_nbr_vfts[AF_IP6].inv_proxy6_del)
1213     {
1214       return (ip_nbr_vfts[AF_IP6].inv_proxy6_del (sw_if_index, addr));
1215     }
1216   return -1;
1217 }
1218
1219 void
1220 ip_neighbor_populate (ip_address_family_t af, u32 sw_if_index)
1221 {
1222   index_t *ipnis = NULL, *ipni;
1223   ip_neighbor_t *ipn;
1224
1225   IP_NEIGHBOR_DBG ("populate: %U %U",
1226                    format_vnet_sw_if_index_name, vnet_get_main (),
1227                    sw_if_index, format_ip_address_family, af);
1228
1229   pool_foreach (ipn, ip_neighbor_pool)
1230    {
1231     if (ip_neighbor_get_af(ipn) == af &&
1232         ipn->ipn_key->ipnk_sw_if_index == sw_if_index)
1233       vec_add1 (ipnis, ipn - ip_neighbor_pool);
1234   }
1235
1236   vec_foreach (ipni, ipnis)
1237   {
1238     ipn = ip_neighbor_get (*ipni);
1239
1240     adj_nbr_walk_nh (ipn->ipn_key->ipnk_sw_if_index,
1241                      ip_address_family_to_fib_proto (ip_neighbor_get_af
1242                                                      (ipn)),
1243                      &ip_addr_46 (&ipn->ipn_key->ipnk_ip),
1244                      ip_neighbor_mk_complete_walk, ipn);
1245   }
1246   vec_free (ipnis);
1247 }
1248
1249 void
1250 ip_neighbor_flush (ip_address_family_t af, u32 sw_if_index)
1251 {
1252   index_t *ipnis = NULL, *ipni;
1253   ip_neighbor_t *ipn;
1254
1255
1256   IP_NEIGHBOR_DBG ("flush: %U %U",
1257                    format_vnet_sw_if_index_name, vnet_get_main (),
1258                    sw_if_index, format_ip_address_family, af);
1259
1260   pool_foreach (ipn, ip_neighbor_pool)
1261    {
1262     if (ip_neighbor_get_af(ipn) == af &&
1263         ipn->ipn_key->ipnk_sw_if_index == sw_if_index &&
1264         ip_neighbor_is_dynamic (ipn))
1265       vec_add1 (ipnis, ipn - ip_neighbor_pool);
1266   }
1267
1268   vec_foreach (ipni, ipnis) ip_neighbor_destroy (ip_neighbor_get (*ipni));
1269   vec_free (ipnis);
1270 }
1271
1272 walk_rc_t
1273 ip_neighbor_mark_one (index_t ipni, void *ctx)
1274 {
1275   ip_neighbor_t *ipn;
1276
1277   ipn = ip_neighbor_get (ipni);
1278
1279   ipn->ipn_flags |= IP_NEIGHBOR_FLAG_STALE;
1280
1281   return (WALK_CONTINUE);
1282 }
1283
1284 void
1285 ip_neighbor_mark (ip_address_family_t af)
1286 {
1287   ip_neighbor_walk (af, ~0, ip_neighbor_mark_one, NULL);
1288 }
1289
1290 typedef struct ip_neighbor_sweep_ctx_t_
1291 {
1292   index_t *ipnsc_stale;
1293 } ip_neighbor_sweep_ctx_t;
1294
1295 static walk_rc_t
1296 ip_neighbor_sweep_one (index_t ipni, void *arg)
1297 {
1298   ip_neighbor_sweep_ctx_t *ctx = arg;
1299   ip_neighbor_t *ipn;
1300
1301   ipn = ip_neighbor_get (ipni);
1302
1303   if (ipn->ipn_flags & IP_NEIGHBOR_FLAG_STALE)
1304     {
1305       vec_add1 (ctx->ipnsc_stale, ipni);
1306     }
1307
1308   return (WALK_CONTINUE);
1309 }
1310
1311 void
1312 ip_neighbor_sweep (ip_address_family_t af)
1313 {
1314   ip_neighbor_sweep_ctx_t ctx = { };
1315   index_t *ipni;
1316
1317   ip_neighbor_walk (af, ~0, ip_neighbor_sweep_one, &ctx);
1318
1319   vec_foreach (ipni, ctx.ipnsc_stale)
1320   {
1321     ip_neighbor_destroy (ip_neighbor_get (*ipni));
1322   }
1323   vec_free (ctx.ipnsc_stale);
1324 }
1325
1326 /*
1327  * Remove any arp entries associated with the specified interface
1328  */
1329 static clib_error_t *
1330 ip_neighbor_interface_admin_change (vnet_main_t * vnm,
1331                                     u32 sw_if_index, u32 flags)
1332 {
1333   ip_address_family_t af;
1334
1335   IP_NEIGHBOR_DBG ("interface-admin: %U  %s",
1336                    format_vnet_sw_if_index_name, vnet_get_main (),
1337                    sw_if_index,
1338                    (flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP ? "up" : "down"));
1339
1340   if (flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP)
1341     {
1342       FOR_EACH_IP_ADDRESS_FAMILY (af) ip_neighbor_populate (af, sw_if_index);
1343     }
1344   else
1345     {
1346       /* admin down, flush all neighbours */
1347       FOR_EACH_IP_ADDRESS_FAMILY (af) ip_neighbor_flush (af, sw_if_index);
1348     }
1349
1350   return (NULL);
1351 }
1352
1353 VNET_SW_INTERFACE_ADMIN_UP_DOWN_FUNCTION (ip_neighbor_interface_admin_change);
1354
1355 /*
1356  * Remove any arp entries associated with the specified interface
1357  */
1358 static clib_error_t *
1359 ip_neighbor_add_del_sw_interface (vnet_main_t *vnm, u32 sw_if_index,
1360                                   u32 is_add)
1361 {
1362   IP_NEIGHBOR_DBG ("interface-change: %U  %s",
1363                    format_vnet_sw_if_index_name, vnet_get_main (),
1364                    sw_if_index, (is_add ? "add" : "del"));
1365
1366   if (!is_add && sw_if_index != ~0)
1367     {
1368       ip_address_family_t af;
1369
1370       FOR_EACH_IP_ADDRESS_FAMILY (af) ip_neighbor_flush (af, sw_if_index);
1371     }
1372
1373   if (is_add)
1374     {
1375       ip_neighbor_alloc_ctr (&ip_neighbor_counters[AF_IP4], sw_if_index);
1376       ip_neighbor_alloc_ctr (&ip_neighbor_counters[AF_IP6], sw_if_index);
1377     }
1378
1379   return (NULL);
1380 }
1381
1382 VNET_SW_INTERFACE_ADD_DEL_FUNCTION (ip_neighbor_add_del_sw_interface);
1383
1384 typedef struct ip_neighbor_walk_covered_ctx_t_
1385 {
1386   ip_address_t addr;
1387   u32 length;
1388   index_t *ipnis;
1389 } ip_neighbor_walk_covered_ctx_t;
1390
1391 static walk_rc_t
1392 ip_neighbor_walk_covered (index_t ipni, void *arg)
1393 {
1394   ip_neighbor_walk_covered_ctx_t *ctx = arg;
1395   ip_neighbor_t *ipn;
1396
1397   ipn = ip_neighbor_get (ipni);
1398
1399   if (AF_IP4 == ip_addr_version (&ctx->addr))
1400     {
1401       if (ip4_destination_matches_route (&ip4_main,
1402                                          &ip_addr_v4 (&ipn->ipn_key->ipnk_ip),
1403                                          &ip_addr_v4 (&ctx->addr),
1404                                          ctx->length) &&
1405           ip_neighbor_is_dynamic (ipn))
1406         {
1407           vec_add1 (ctx->ipnis, ip_neighbor_get_index (ipn));
1408         }
1409     }
1410   else if (AF_IP6 == ip_addr_version (&ctx->addr))
1411     {
1412       if (ip6_destination_matches_route (&ip6_main,
1413                                          &ip_addr_v6 (&ipn->ipn_key->ipnk_ip),
1414                                          &ip_addr_v6 (&ctx->addr),
1415                                          ctx->length) &&
1416           ip_neighbor_is_dynamic (ipn))
1417         {
1418           vec_add1 (ctx->ipnis, ip_neighbor_get_index (ipn));
1419         }
1420     }
1421   return (WALK_CONTINUE);
1422 }
1423
1424
1425 /*
1426  * callback when an interface address is added or deleted
1427  */
1428 static void
1429 ip_neighbor_add_del_interface_address_v4 (ip4_main_t * im,
1430                                           uword opaque,
1431                                           u32 sw_if_index,
1432                                           ip4_address_t * address,
1433                                           u32 address_length,
1434                                           u32 if_address_index, u32 is_del)
1435 {
1436   /*
1437    * Flush the ARP cache of all entries covered by the address
1438    * that is being removed.
1439    */
1440   IP_NEIGHBOR_DBG ("addr-%s: %U, %U/%d", (is_del ? "del" : "add"),
1441                    format_vnet_sw_if_index_name, vnet_get_main (), sw_if_index,
1442                    format_ip4_address, address, address_length);
1443
1444   if (is_del)
1445     {
1446       ip_neighbor_walk_covered_ctx_t ctx = {
1447         .addr = {
1448           .ip.ip4 = *address,
1449           .version = AF_IP4,
1450         },
1451         .length = address_length,
1452       };
1453       index_t *ipni;
1454
1455       ip_neighbor_walk (AF_IP4, sw_if_index, ip_neighbor_walk_covered, &ctx);
1456
1457       vec_foreach (ipni, ctx.ipnis)
1458         ip_neighbor_destroy (ip_neighbor_get (*ipni));
1459
1460       vec_free (ctx.ipnis);
1461     }
1462 }
1463
1464 /*
1465  * callback when an interface address is added or deleted
1466  */
1467 static void
1468 ip_neighbor_add_del_interface_address_v6 (ip6_main_t * im,
1469                                           uword opaque,
1470                                           u32 sw_if_index,
1471                                           ip6_address_t * address,
1472                                           u32 address_length,
1473                                           u32 if_address_index, u32 is_del)
1474 {
1475   /*
1476    * Flush the ARP cache of all entries covered by the address
1477    * that is being removed.
1478    */
1479   IP_NEIGHBOR_DBG ("addr-change: %U, %U/%d %s",
1480                    format_vnet_sw_if_index_name, vnet_get_main (),
1481                    sw_if_index, format_ip6_address, address, address_length,
1482                    (is_del ? "del" : "add"));
1483
1484   if (is_del)
1485     {
1486       ip_neighbor_walk_covered_ctx_t ctx = {
1487         .addr = {
1488           .ip.ip6 = *address,
1489           .version = AF_IP6,
1490         },
1491         .length = address_length,
1492       };
1493       index_t *ipni;
1494
1495       ip_neighbor_walk (AF_IP6, sw_if_index, ip_neighbor_walk_covered, &ctx);
1496
1497       vec_foreach (ipni, ctx.ipnis)
1498         ip_neighbor_destroy (ip_neighbor_get (*ipni));
1499
1500       vec_free (ctx.ipnis);
1501     }
1502 }
1503
1504 typedef struct ip_neighbor_table_bind_ctx_t_
1505 {
1506   u32 new_fib_index;
1507   u32 old_fib_index;
1508 } ip_neighbor_table_bind_ctx_t;
1509
1510 static walk_rc_t
1511 ip_neighbor_walk_table_bind (index_t ipni, void *arg)
1512 {
1513   ip_neighbor_table_bind_ctx_t *ctx = arg;
1514   ip_neighbor_t *ipn;
1515
1516   ipn = ip_neighbor_get (ipni);
1517   ip_neighbor_adj_fib_remove (ipn, ctx->old_fib_index);
1518   ip_neighbor_adj_fib_add (ipn, ctx->new_fib_index);
1519
1520   return (WALK_CONTINUE);
1521 }
1522
1523 static void
1524 ip_neighbor_table_bind_v4 (ip4_main_t * im,
1525                            uword opaque,
1526                            u32 sw_if_index,
1527                            u32 new_fib_index, u32 old_fib_index)
1528 {
1529   ip_neighbor_table_bind_ctx_t ctx = {
1530     .old_fib_index = old_fib_index,
1531     .new_fib_index = new_fib_index,
1532   };
1533
1534   ip_neighbor_walk (AF_IP4, sw_if_index, ip_neighbor_walk_table_bind, &ctx);
1535 }
1536
1537 static void
1538 ip_neighbor_table_bind_v6 (ip6_main_t * im,
1539                            uword opaque,
1540                            u32 sw_if_index,
1541                            u32 new_fib_index, u32 old_fib_index)
1542 {
1543   ip_neighbor_table_bind_ctx_t ctx = {
1544     .old_fib_index = old_fib_index,
1545     .new_fib_index = new_fib_index,
1546   };
1547
1548   ip_neighbor_walk (AF_IP6, sw_if_index, ip_neighbor_walk_table_bind, &ctx);
1549 }
1550
1551 typedef enum ip_neighbor_age_state_t_
1552 {
1553   IP_NEIGHBOR_AGE_ALIVE,
1554   IP_NEIGHBOR_AGE_PROBE,
1555   IP_NEIGHBOR_AGE_DEAD,
1556 } ip_neighbor_age_state_t;
1557
1558 #define IP_NEIGHBOR_PROCESS_SLEEP_LONG (0)
1559
1560 static ip_neighbor_age_state_t
1561 ip_neighbour_age_out (index_t ipni, f64 now, f64 * wait)
1562 {
1563   ip_address_family_t af;
1564   ip_neighbor_t *ipn;
1565   u32 ipndb_age;
1566   u32 ttl;
1567
1568   ipn = ip_neighbor_get (ipni);
1569   af = ip_neighbor_get_af (ipn);
1570   ipndb_age = ip_neighbor_db[af].ipndb_age;
1571   ttl = now - ipn->ipn_time_last_updated;
1572   *wait = ipndb_age;
1573
1574   if (ttl > ipndb_age)
1575     {
1576       IP_NEIGHBOR_DBG ("aged: %U @%f - %f > %d",
1577                        format_ip_neighbor, ipni, now,
1578                        ipn->ipn_time_last_updated, ipndb_age);
1579       if (ipn->ipn_n_probes > 2)
1580         {
1581           /* 3 strikes and yea-re out */
1582           IP_NEIGHBOR_DBG ("dead: %U", format_ip_neighbor, ipni);
1583           *wait = 1;
1584           return (IP_NEIGHBOR_AGE_DEAD);
1585         }
1586       else
1587         {
1588           ip_neighbor_probe_dst (ip_neighbor_get_sw_if_index (ipn),
1589                                  vlib_get_thread_index (), af,
1590                                  &ip_addr_46 (&ipn->ipn_key->ipnk_ip));
1591
1592           ipn->ipn_n_probes++;
1593           *wait = 1;
1594         }
1595     }
1596   else
1597     {
1598       /* here we are sure that ttl <= ipndb_age */
1599       *wait = ipndb_age - ttl + 1;
1600       return (IP_NEIGHBOR_AGE_ALIVE);
1601     }
1602
1603   return (IP_NEIGHBOR_AGE_PROBE);
1604 }
1605
1606 typedef enum ip_neighbor_process_event_t_
1607 {
1608   IP_NEIGHBOR_AGE_PROCESS_WAKEUP,
1609 } ip_neighbor_process_event_t;
1610
1611 static uword
1612 ip_neighbor_age_loop (vlib_main_t * vm,
1613                       vlib_node_runtime_t * rt,
1614                       vlib_frame_t * f, ip_address_family_t af)
1615 {
1616   uword event_type, *event_data = NULL;
1617   f64 timeout;
1618
1619   /* Set the timeout to an effectively infinite value when the process starts */
1620   timeout = IP_NEIGHBOR_PROCESS_SLEEP_LONG;
1621
1622   while (1)
1623     {
1624       f64 now;
1625
1626       if (!timeout)
1627         vlib_process_wait_for_event (vm);
1628       else
1629         vlib_process_wait_for_event_or_clock (vm, timeout);
1630
1631       event_type = vlib_process_get_events (vm, &event_data);
1632       vec_reset_length (event_data);
1633
1634       now = vlib_time_now (vm);
1635
1636       switch (event_type)
1637         {
1638         case ~0:
1639           {
1640             /* timer expired */
1641             ip_neighbor_elt_t *elt, *head;
1642             f64 wait;
1643
1644             timeout = ip_neighbor_db[af].ipndb_age;
1645             head = pool_elt_at_index (ip_neighbor_elt_pool,
1646                                       ip_neighbor_list_head[af]);
1647
1648           /* the list is time sorted, newest first, so start from the back
1649            * and work forwards. Stop when we get to one that is alive */
1650           restart:
1651           clib_llist_foreach_reverse(ip_neighbor_elt_pool,
1652                                      ipne_anchor, head, elt,
1653           ({
1654             ip_neighbor_age_state_t res;
1655
1656             res = ip_neighbour_age_out(elt->ipne_index, now, &wait);
1657
1658             if (IP_NEIGHBOR_AGE_ALIVE == res) {
1659               /* the oldest neighbor has not yet expired, go back to sleep */
1660               timeout = clib_min (wait, timeout);
1661               break;
1662             }
1663             else if (IP_NEIGHBOR_AGE_DEAD == res) {
1664               /* the oldest neighbor is dead, pop it, then restart the walk
1665                * again from the back */
1666               ip_neighbor_destroy (ip_neighbor_get(elt->ipne_index));
1667               goto restart;
1668             }
1669
1670             timeout = clib_min (wait, timeout);
1671           }));
1672             break;
1673           }
1674         case IP_NEIGHBOR_AGE_PROCESS_WAKEUP:
1675           {
1676
1677             if (!ip_neighbor_db[af].ipndb_age)
1678               {
1679                 /* aging has been disabled */
1680                 timeout = 0;
1681                 break;
1682               }
1683             ip_neighbor_elt_t *elt, *head;
1684
1685             head = pool_elt_at_index (ip_neighbor_elt_pool,
1686                                       ip_neighbor_list_head[af]);
1687             /* no neighbors yet */
1688             if (clib_llist_is_empty (ip_neighbor_elt_pool, ipne_anchor, head))
1689               {
1690                 timeout = ip_neighbor_db[af].ipndb_age;
1691                 break;
1692               }
1693
1694             /* poke the oldset neighbour for aging, which returns how long we sleep for */
1695             elt = clib_llist_prev (ip_neighbor_elt_pool, ipne_anchor, head);
1696             ip_neighbour_age_out (elt->ipne_index, now, &timeout);
1697             break;
1698           }
1699         }
1700     }
1701   return 0;
1702 }
1703
1704 static uword
1705 ip4_neighbor_age_process (vlib_main_t * vm,
1706                           vlib_node_runtime_t * rt, vlib_frame_t * f)
1707 {
1708   return (ip_neighbor_age_loop (vm, rt, f, AF_IP4));
1709 }
1710
1711 static uword
1712 ip6_neighbor_age_process (vlib_main_t * vm,
1713                           vlib_node_runtime_t * rt, vlib_frame_t * f)
1714 {
1715   return (ip_neighbor_age_loop (vm, rt, f, AF_IP6));
1716 }
1717
1718 VLIB_REGISTER_NODE (ip4_neighbor_age_process_node,static) = {
1719   .function = ip4_neighbor_age_process,
1720   .type = VLIB_NODE_TYPE_PROCESS,
1721   .name = "ip4-neighbor-age-process",
1722 };
1723 VLIB_REGISTER_NODE (ip6_neighbor_age_process_node,static) = {
1724   .function = ip6_neighbor_age_process,
1725   .type = VLIB_NODE_TYPE_PROCESS,
1726   .name = "ip6-neighbor-age-process",
1727 };
1728
1729 int
1730 ip_neighbor_config (ip_address_family_t af, u32 limit, u32 age, bool recycle)
1731 {
1732   ip_neighbor_db[af].ipndb_limit = limit;
1733   ip_neighbor_db[af].ipndb_recycle = recycle;
1734   ip_neighbor_db[af].ipndb_age = age;
1735
1736   vlib_process_signal_event (vlib_get_main (),
1737                              (AF_IP4 == af ?
1738                               ip4_neighbor_age_process_node.index :
1739                               ip6_neighbor_age_process_node.index),
1740                              IP_NEIGHBOR_AGE_PROCESS_WAKEUP, 0);
1741
1742   return (0);
1743 }
1744
1745 int
1746 ip_neighbor_get_config (ip_address_family_t af, u32 *limit, u32 *age,
1747                         bool *recycle)
1748 {
1749   *limit = ip_neighbor_db[af].ipndb_limit;
1750   *age = ip_neighbor_db[af].ipndb_age;
1751   *recycle = ip_neighbor_db[af].ipndb_recycle;
1752
1753   return (0);
1754 }
1755
1756 static clib_error_t *
1757 ip_neighbor_config_show (vlib_main_t * vm,
1758                          unformat_input_t * input, vlib_cli_command_t * cmd)
1759 {
1760   ip_address_family_t af;
1761
1762   FOR_EACH_IP_ADDRESS_FAMILY(af) {
1763     vlib_cli_output (vm, "%U:", format_ip_address_family, af);
1764     vlib_cli_output (vm, "  limit:%d, age:%d, recycle:%d",
1765                      ip_neighbor_db[af].ipndb_limit,
1766                      ip_neighbor_db[af].ipndb_age,
1767                      ip_neighbor_db[af].ipndb_recycle);
1768   }
1769
1770   return (NULL);
1771 }
1772
1773 static clib_error_t *
1774 ip_neighbor_config_set (vlib_main_t *vm, unformat_input_t *input,
1775                         vlib_cli_command_t *cmd)
1776 {
1777   unformat_input_t _line_input, *line_input = &_line_input;
1778   clib_error_t *error = NULL;
1779   ip_address_family_t af;
1780   u32 limit, age;
1781   bool recycle;
1782
1783   if (!unformat_user (input, unformat_line_input, line_input))
1784     return 0;
1785
1786   if (!unformat (line_input, "%U", unformat_ip_address_family, &af))
1787     {
1788       error = unformat_parse_error (line_input);
1789       goto done;
1790     }
1791
1792   limit = ip_neighbor_db[af].ipndb_limit;
1793   age = ip_neighbor_db[af].ipndb_age;
1794   recycle = ip_neighbor_db[af].ipndb_recycle;
1795
1796   while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
1797     {
1798       if (unformat (line_input, "limit %u", &limit))
1799         ;
1800       else if (unformat (line_input, "age %u", &age))
1801         ;
1802       else if (unformat (line_input, "recycle"))
1803         recycle = true;
1804       else if (unformat (line_input, "norecycle"))
1805         recycle = false;
1806       else
1807         {
1808           error = unformat_parse_error (line_input);
1809           goto done;
1810         }
1811     }
1812
1813   ip_neighbor_config (af, limit, age, recycle);
1814
1815 done:
1816   unformat_free (line_input);
1817   return error;
1818 }
1819
1820 static void
1821 ip_neighbor_stats_show_one (vlib_main_t *vm, vnet_main_t *vnm, u32 sw_if_index)
1822 {
1823   vlib_cli_output (vm, "  %U", format_vnet_sw_if_index_name, vnm, sw_if_index);
1824   vlib_cli_output (vm, "    arp:%U", format_ip_neighbor_counters,
1825                    &ip_neighbor_counters[AF_IP4], sw_if_index);
1826   vlib_cli_output (vm, "    nd: %U", format_ip_neighbor_counters,
1827                    &ip_neighbor_counters[AF_IP6], sw_if_index);
1828 }
1829
1830 static walk_rc_t
1831 ip_neighbor_stats_show_cb (vnet_main_t *vnm, vnet_sw_interface_t *si,
1832                            void *ctx)
1833 {
1834   ip_neighbor_stats_show_one (ctx, vnm, si->sw_if_index);
1835
1836   return (WALK_CONTINUE);
1837 }
1838
1839 static clib_error_t *
1840 ip_neighbor_stats_show (vlib_main_t *vm, unformat_input_t *input,
1841                         vlib_cli_command_t *cmd)
1842 {
1843   vnet_main_t *vnm;
1844   u32 sw_if_index;
1845
1846   vnm = vnet_get_main ();
1847   sw_if_index = ~0;
1848   (void) unformat_user (input, unformat_vnet_sw_interface, vnm, &sw_if_index);
1849
1850   if (~0 == sw_if_index)
1851     {
1852       vnet_sw_interface_walk (vnm, ip_neighbor_stats_show_cb, vm);
1853     }
1854   else
1855     {
1856       ip_neighbor_stats_show_one (vm, vnm, sw_if_index);
1857     }
1858   return (NULL);
1859 }
1860
1861 VLIB_CLI_COMMAND (show_ip_neighbor_cfg_cmd_node, static) = {
1862   .path = "show ip neighbor-config",
1863   .function = ip_neighbor_config_show,
1864   .short_help = "show ip neighbor-config",
1865 };
1866 VLIB_CLI_COMMAND (set_ip_neighbor_cfg_cmd_node, static) = {
1867   .path = "set ip neighbor-config",
1868   .function = ip_neighbor_config_set,
1869   .short_help = "set ip neighbor-config ip4|ip6 [limit <limit>] [age <age>] "
1870                 "[recycle|norecycle]",
1871 };
1872 VLIB_CLI_COMMAND (show_ip_neighbor_stats_cmd_node, static) = {
1873   .path = "show ip neighbor-stats",
1874   .function = ip_neighbor_stats_show,
1875   .short_help = "show ip neighbor-stats [interface]",
1876 };
1877
1878 static clib_error_t *
1879 ip_neighbor_init (vlib_main_t * vm)
1880 {
1881   {
1882     ip4_add_del_interface_address_callback_t cb = {
1883       .function = ip_neighbor_add_del_interface_address_v4,
1884     };
1885     vec_add1 (ip4_main.add_del_interface_address_callbacks, cb);
1886   }
1887   {
1888     ip6_add_del_interface_address_callback_t cb = {
1889       .function = ip_neighbor_add_del_interface_address_v6,
1890     };
1891     vec_add1 (ip6_main.add_del_interface_address_callbacks, cb);
1892   }
1893   {
1894     ip4_table_bind_callback_t cb = {
1895       .function = ip_neighbor_table_bind_v4,
1896     };
1897     vec_add1 (ip4_main.table_bind_callbacks, cb);
1898   }
1899   {
1900     ip6_table_bind_callback_t cb = {
1901       .function = ip_neighbor_table_bind_v6,
1902     };
1903     vec_add1 (ip6_main.table_bind_callbacks, cb);
1904   }
1905   ipn_logger = vlib_log_register_class ("ip", "neighbor");
1906
1907   ip_address_family_t af;
1908
1909   FOR_EACH_IP_ADDRESS_FAMILY (af)
1910     ip_neighbor_list_head[af] =
1911     clib_llist_make_head (ip_neighbor_elt_pool, ipne_anchor);
1912
1913   return (NULL);
1914 }
1915
1916 VLIB_INIT_FUNCTION (ip_neighbor_init) =
1917 {
1918   .runs_after = VLIB_INITS("ip_main_init"),
1919 };
1920
1921 /*
1922  * fd.io coding-style-patch-verification: ON
1923  *
1924  * Local Variables:
1925  * eval: (c-set-style "gnu")
1926  * End:
1927  */