http: ignore http_proxy env in tests
[vpp.git] / src / vnet / ip-neighbor / ip_neighbor.c
1 /*
2  * src/vnet/ip/ip_neighboor.c: ip neighbor generic handling
3  *
4  * Copyright (c) 2018 Cisco and/or its affiliates.
5  * Licensed under the Apache License, Version 2.0 (the "License");
6  * you may not use this file except in compliance with the License.
7  * You may obtain a copy of the License at:
8  *
9  *     http://www.apache.org/licenses/LICENSE-2.0
10  *
11  * Unless required by applicable law or agreed to in writing, software
12  * distributed under the License is distributed on an "AS IS" BASIS,
13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  * See the License for the specific language governing permissions and
15  * limitations under the License.
16  */
17
18 #include <vppinfra/llist.h>
19
20 #include <vnet/ip-neighbor/ip_neighbor.h>
21 #include <vnet/ip-neighbor/ip4_neighbor.h>
22 #include <vnet/ip-neighbor/ip6_neighbor.h>
23 #include <vnet/ip-neighbor/ip_neighbor_watch.h>
24
25 #include <vnet/ip/ip6_ll_table.h>
26 #include <vnet/ip/ip46_address.h>
27 #include <vnet/fib/fib_table.h>
28 #include <vnet/adj/adj_mcast.h>
29
30 ip_neighbor_counters_t ip_neighbor_counters[] =
31 {
32  [AF_IP4] = {
33    .ipnc = {
34      [VLIB_RX] = {
35         [IP_NEIGHBOR_CTR_REPLY] = {
36           .name = "arp-rx-replies",
37           .stat_segment_name = "/net/arp/rx/replies",
38         },
39         [IP_NEIGHBOR_CTR_REQUEST] = {
40           .name = "arp-rx-requests",
41           .stat_segment_name = "/net/arp/rx/requests",
42         },
43         [IP_NEIGHBOR_CTR_GRAT] = {
44           .name = "arp-rx-gratuitous",
45           .stat_segment_name = "/net/arp/rx/gratuitous",
46         },
47       },
48       [VLIB_TX] = {
49         [IP_NEIGHBOR_CTR_REPLY] = {
50           .name = "arp-tx-replies",
51           .stat_segment_name = "/net/arp/tx/replies",
52         },
53         [IP_NEIGHBOR_CTR_REQUEST] = {
54           .name = "arp-tx-requests",
55           .stat_segment_name = "/net/arp/tx/requests",
56         },
57         [IP_NEIGHBOR_CTR_GRAT] = {
58           .name = "arp-tx-gratuitous",
59           .stat_segment_name = "/net/arp/tx/gratuitous",
60         },
61       },
62             },
63  },
64  [AF_IP6] = {
65    .ipnc = {
66      [VLIB_RX] = {
67         [IP_NEIGHBOR_CTR_REPLY] = {
68           .name = "ip6-nd-rx-replies",
69           .stat_segment_name = "/net/ip6-nd/rx/replies",
70         },
71         [IP_NEIGHBOR_CTR_REQUEST] = {
72           .name = "ip6-nd-rx-requests",
73           .stat_segment_name = "/net/ip6-nd/rx/requests",
74         },
75         [IP_NEIGHBOR_CTR_GRAT] = {
76           .name = "ip6-nd-rx-gratuitous",
77           .stat_segment_name = "/net/ip6-nd/rx/gratuitous",
78         },
79       },
80       [VLIB_TX] = {
81         [IP_NEIGHBOR_CTR_REPLY] = {
82           .name = "ip6-nd-tx-replies",
83           .stat_segment_name = "/net/ip6-nd/tx/replies",
84         },
85         [IP_NEIGHBOR_CTR_REQUEST] = {
86           .name = "ip6-nd-tx-requests",
87           .stat_segment_name = "/net/ip6-nd/tx/requests",
88         },
89         [IP_NEIGHBOR_CTR_GRAT] = {
90           .name = "ip6-nd-tx-gratuitous",
91           .stat_segment_name = "/net/ip6-nd/tx/gratuitous",
92         },
93       },
94     },
95  },
96 };
97
98 /** Pool for All IP neighbors */
99 static ip_neighbor_t *ip_neighbor_pool;
100
101 /** protocol specific lists of time sorted neighbors */
102 index_t ip_neighbor_list_head[N_AF];
103
104 typedef struct ip_neighbor_elt_t_
105 {
106   clib_llist_anchor_t ipne_anchor;
107   index_t ipne_index;
108 } ip_neighbor_elt_t;
109
110 /** Pool of linked list elemeents */
111 ip_neighbor_elt_t *ip_neighbor_elt_pool;
112
113 typedef struct ip_neighbor_db_t_
114 {
115   /** per interface hash */
116   uword **ipndb_hash;
117   /** per-protocol limit - max number of neighbors*/
118   u32 ipndb_limit;
119   /** max age of a neighbor before it's forcibly evicted */
120   u32 ipndb_age;
121   /** when the limit is reached and new neighbors are created, should
122    * we recycle an old one */
123   bool ipndb_recycle;
124   /** per-protocol number of elements */
125   u32 ipndb_n_elts;
126   /** per-protocol number of elements per-fib-index*/
127   u32 *ipndb_n_elts_per_fib;
128 } ip_neighbor_db_t;
129
130 static vlib_log_class_t ipn_logger;
131
132 /* DBs of neighbours one per AF */
133 static ip_neighbor_db_t ip_neighbor_db[N_AF] = {
134   [AF_IP4] = {
135     .ipndb_limit = 50000,
136     /* Default to not aging and not recycling */
137     .ipndb_age = 0,
138     .ipndb_recycle = false,
139   },
140   [AF_IP6] = {
141     .ipndb_limit = 50000,
142     /* Default to not aging and not recycling */
143     .ipndb_age = 0,
144     .ipndb_recycle = false,
145   }
146 };
147
148 #define IP_NEIGHBOR_DBG(...)                           \
149     vlib_log_debug (ipn_logger, __VA_ARGS__);
150
151 #define IP_NEIGHBOR_INFO(...)                          \
152     vlib_log_notice (ipn_logger, __VA_ARGS__);
153
154 ip_neighbor_t *
155 ip_neighbor_get (index_t ipni)
156 {
157   if (pool_is_free_index (ip_neighbor_pool, ipni))
158     return (NULL);
159
160   return (pool_elt_at_index (ip_neighbor_pool, ipni));
161 }
162
163 static index_t
164 ip_neighbor_get_index (const ip_neighbor_t * ipn)
165 {
166   return (ipn - ip_neighbor_pool);
167 }
168
169 static void
170 ip_neighbor_touch (ip_neighbor_t * ipn)
171 {
172   ipn->ipn_flags &= ~IP_NEIGHBOR_FLAG_STALE;
173 }
174
175 static bool
176 ip_neighbor_is_dynamic (const ip_neighbor_t * ipn)
177 {
178   return (ipn->ipn_flags & IP_NEIGHBOR_FLAG_DYNAMIC);
179 }
180
181 const ip_address_t *
182 ip_neighbor_get_ip (const ip_neighbor_t * ipn)
183 {
184   return (&ipn->ipn_key->ipnk_ip);
185 }
186
187 ip_address_family_t
188 ip_neighbor_get_af (const ip_neighbor_t * ipn)
189 {
190   return (ip_addr_version (&ipn->ipn_key->ipnk_ip));
191 }
192
193 const mac_address_t *
194 ip_neighbor_get_mac (const ip_neighbor_t * ipn)
195 {
196   return (&ipn->ipn_mac);
197 }
198
199 const u32
200 ip_neighbor_get_sw_if_index (const ip_neighbor_t * ipn)
201 {
202   return (ipn->ipn_key->ipnk_sw_if_index);
203 }
204
205 static void
206 ip_neighbor_list_remove (ip_neighbor_t * ipn)
207 {
208   /* new neighbours, are added to the head of the list, since the
209    * list is time sorted, newest first */
210   ip_neighbor_elt_t *elt;
211
212   if (~0 != ipn->ipn_elt)
213     {
214       elt = pool_elt_at_index (ip_neighbor_elt_pool, ipn->ipn_elt);
215
216       clib_llist_remove (ip_neighbor_elt_pool, ipne_anchor, elt);
217
218       ipn->ipn_elt = ~0;
219     }
220 }
221
222 static void
223 ip_neighbor_refresh (ip_neighbor_t * ipn)
224 {
225   /* new neighbours, are added to the head of the list, since the
226    * list is time sorted, newest first */
227   ip_neighbor_elt_t *elt, *head;
228
229   ip_neighbor_touch (ipn);
230   ipn->ipn_time_last_updated = vlib_time_now (vlib_get_main ());
231   ipn->ipn_n_probes = 0;
232
233   if (ip_neighbor_is_dynamic (ipn))
234     {
235       if (~0 == ipn->ipn_elt)
236         /* first time insertion */
237         pool_get_zero (ip_neighbor_elt_pool, elt);
238       else
239         {
240           /* already inserted - extract first */
241           elt = pool_elt_at_index (ip_neighbor_elt_pool, ipn->ipn_elt);
242
243           clib_llist_remove (ip_neighbor_elt_pool, ipne_anchor, elt);
244         }
245       head = pool_elt_at_index (ip_neighbor_elt_pool,
246                                 ip_neighbor_list_head[ip_neighbor_get_af
247                                                       (ipn)]);
248
249       elt->ipne_index = ip_neighbor_get_index (ipn);
250       clib_llist_add (ip_neighbor_elt_pool, ipne_anchor, elt, head);
251       ipn->ipn_elt = elt - ip_neighbor_elt_pool;
252     }
253 }
254
255 static void
256 ip_neighbor_db_add (const ip_neighbor_t * ipn)
257 {
258   ip_address_family_t af;
259   u32 sw_if_index;
260
261   af = ip_neighbor_get_af (ipn);
262   sw_if_index = ipn->ipn_key->ipnk_sw_if_index;
263
264   vec_validate (ip_neighbor_db[af].ipndb_hash, sw_if_index);
265
266   if (!ip_neighbor_db[af].ipndb_hash[sw_if_index])
267     ip_neighbor_db[af].ipndb_hash[sw_if_index]
268       = hash_create_mem (0, sizeof (ip_neighbor_key_t), sizeof (index_t));
269
270   hash_set_mem (ip_neighbor_db[af].ipndb_hash[sw_if_index],
271                 ipn->ipn_key, ip_neighbor_get_index (ipn));
272
273   ip_neighbor_db[af].ipndb_n_elts++;
274 }
275
276 static void
277 ip_neighbor_db_remove (const ip_neighbor_t * ipn)
278 {
279   ip_address_family_t af;
280   u32 sw_if_index;
281
282   af = ip_neighbor_get_af (ipn);
283   sw_if_index = ipn->ipn_key->ipnk_sw_if_index;
284
285   vec_validate (ip_neighbor_db[af].ipndb_hash, sw_if_index);
286
287   hash_unset_mem (ip_neighbor_db[af].ipndb_hash[sw_if_index], ipn->ipn_key);
288
289   ip_neighbor_db[af].ipndb_n_elts--;
290 }
291
292 static ip_neighbor_t *
293 ip_neighbor_db_find (const ip_neighbor_key_t * key)
294 {
295   ip_address_family_t af;
296   uword *p;
297
298   af = ip_addr_version (&key->ipnk_ip);
299
300   if (key->ipnk_sw_if_index >= vec_len (ip_neighbor_db[af].ipndb_hash))
301     return NULL;
302
303   p = hash_get_mem (ip_neighbor_db[af].ipndb_hash
304                     [key->ipnk_sw_if_index], key);
305
306   if (p)
307     return ip_neighbor_get (p[0]);
308
309   return (NULL);
310 }
311
312 static u8
313 ip_af_type_pfx_len (ip_address_family_t type)
314 {
315   return (type == AF_IP4 ? 32 : 128);
316 }
317
318 static void
319 ip_neighbor_adj_fib_add (ip_neighbor_t * ipn, u32 fib_index)
320 {
321   ip_address_family_t af;
322
323   af = ip_neighbor_get_af (ipn);
324
325   if (af == AF_IP6 &&
326       ip6_address_is_link_local_unicast (&ip_addr_v6
327                                          (&ipn->ipn_key->ipnk_ip)))
328     {
329       ip6_ll_prefix_t pfx = {
330         .ilp_addr = ip_addr_v6 (&ipn->ipn_key->ipnk_ip),
331         .ilp_sw_if_index = ipn->ipn_key->ipnk_sw_if_index,
332       };
333       ipn->ipn_fib_entry_index =
334         ip6_ll_table_entry_update (&pfx, FIB_ROUTE_PATH_FLAG_NONE);
335     }
336   else
337     {
338       fib_protocol_t fproto;
339
340       fproto = ip_address_family_to_fib_proto (af);
341
342       fib_prefix_t pfx = {
343         .fp_len = ip_af_type_pfx_len (af),
344         .fp_proto = fproto,
345         .fp_addr = ip_addr_46 (&ipn->ipn_key->ipnk_ip),
346       };
347
348       ipn->ipn_fib_entry_index =
349         fib_table_entry_path_add (fib_index, &pfx, FIB_SOURCE_ADJ,
350                                   FIB_ENTRY_FLAG_ATTACHED,
351                                   fib_proto_to_dpo (fproto),
352                                   &pfx.fp_addr,
353                                   ipn->ipn_key->ipnk_sw_if_index,
354                                   ~0, 1, NULL, FIB_ROUTE_PATH_FLAG_NONE);
355
356       vec_validate (ip_neighbor_db[af].ipndb_n_elts_per_fib, fib_index);
357
358       ip_neighbor_db[af].ipndb_n_elts_per_fib[fib_index]++;
359
360       if (1 == ip_neighbor_db[af].ipndb_n_elts_per_fib[fib_index])
361         fib_table_lock (fib_index, fproto, FIB_SOURCE_ADJ);
362     }
363 }
364
365 static void
366 ip_neighbor_adj_fib_remove (ip_neighbor_t * ipn, u32 fib_index)
367 {
368   ip_address_family_t af;
369
370   af = ip_neighbor_get_af (ipn);
371
372   if (FIB_NODE_INDEX_INVALID != ipn->ipn_fib_entry_index)
373     {
374       if (AF_IP6 == af &&
375           ip6_address_is_link_local_unicast (&ip_addr_v6
376                                              (&ipn->ipn_key->ipnk_ip)))
377         {
378           ip6_ll_prefix_t pfx = {
379             .ilp_addr = ip_addr_v6 (&ipn->ipn_key->ipnk_ip),
380             .ilp_sw_if_index = ipn->ipn_key->ipnk_sw_if_index,
381           };
382           ip6_ll_table_entry_delete (&pfx);
383         }
384       else
385         {
386           fib_protocol_t fproto;
387
388           fproto = ip_address_family_to_fib_proto (af);
389
390           fib_prefix_t pfx = {
391             .fp_len = ip_af_type_pfx_len (af),
392             .fp_proto = fproto,
393             .fp_addr = ip_addr_46 (&ipn->ipn_key->ipnk_ip),
394           };
395
396           fib_table_entry_path_remove (fib_index,
397                                        &pfx,
398                                        FIB_SOURCE_ADJ,
399                                        fib_proto_to_dpo (fproto),
400                                        &pfx.fp_addr,
401                                        ipn->ipn_key->ipnk_sw_if_index,
402                                        ~0, 1, FIB_ROUTE_PATH_FLAG_NONE);
403
404           ip_neighbor_db[af].ipndb_n_elts_per_fib[fib_index]--;
405
406           if (0 == ip_neighbor_db[af].ipndb_n_elts_per_fib[fib_index])
407             fib_table_unlock (fib_index, fproto, FIB_SOURCE_ADJ);
408         }
409     }
410 }
411
412 static void
413 ip_neighbor_mk_complete (adj_index_t ai, ip_neighbor_t * ipn)
414 {
415   adj_nbr_update_rewrite (ai, ADJ_NBR_REWRITE_FLAG_COMPLETE,
416                           ethernet_build_rewrite (vnet_get_main (),
417                                                   ipn->
418                                                   ipn_key->ipnk_sw_if_index,
419                                                   adj_get_link_type (ai),
420                                                   ipn->ipn_mac.bytes));
421 }
422
423 static void
424 ip_neighbor_mk_incomplete (adj_index_t ai)
425 {
426   ip_adjacency_t *adj = adj_get (ai);
427
428   adj_nbr_update_rewrite (ai,
429                           ADJ_NBR_REWRITE_FLAG_INCOMPLETE,
430                           ethernet_build_rewrite (vnet_get_main (),
431                                                   adj->
432                                                   rewrite_header.sw_if_index,
433                                                   VNET_LINK_ARP,
434                                                   VNET_REWRITE_FOR_SW_INTERFACE_ADDRESS_BROADCAST));
435 }
436
437 static adj_walk_rc_t
438 ip_neighbor_mk_complete_walk (adj_index_t ai, void *ctx)
439 {
440   ip_neighbor_t *ipn = ctx;
441
442   ip_neighbor_mk_complete (ai, ipn);
443
444   return (ADJ_WALK_RC_CONTINUE);
445 }
446
447 static adj_walk_rc_t
448 ip_neighbor_mk_incomplete_walk (adj_index_t ai, void *ctx)
449 {
450   ip_neighbor_mk_incomplete (ai);
451
452   return (ADJ_WALK_RC_CONTINUE);
453 }
454
455 static void
456 ip_neighbor_destroy (ip_neighbor_t * ipn)
457 {
458   ip_address_family_t af;
459
460   af = ip_neighbor_get_af (ipn);
461
462   IP_NEIGHBOR_DBG ("free: %U", format_ip_neighbor,
463                    vlib_time_now (vlib_get_main ()),
464                    ip_neighbor_get_index (ipn));
465
466   ip_neighbor_publish (ip_neighbor_get_index (ipn),
467                        IP_NEIGHBOR_EVENT_REMOVED);
468
469   adj_nbr_walk_nh (ipn->ipn_key->ipnk_sw_if_index,
470                    ip_address_family_to_fib_proto (af),
471                    &ip_addr_46 (&ipn->ipn_key->ipnk_ip),
472                    ip_neighbor_mk_incomplete_walk, ipn);
473   ip_neighbor_adj_fib_remove
474     (ipn,
475      fib_table_get_index_for_sw_if_index
476      (ip_address_family_to_fib_proto (af), ipn->ipn_key->ipnk_sw_if_index));
477
478   ip_neighbor_list_remove (ipn);
479   ip_neighbor_db_remove (ipn);
480   clib_mem_free (ipn->ipn_key);
481
482   pool_put (ip_neighbor_pool, ipn);
483 }
484
485 static bool
486 ip_neighbor_force_reuse (ip_address_family_t af)
487 {
488   if (!ip_neighbor_db[af].ipndb_recycle)
489     return false;
490
491   /* pluck the oldest entry, which is the one from the end of the list */
492   ip_neighbor_elt_t *elt, *head;
493
494   head = pool_elt_at_index (ip_neighbor_elt_pool, ip_neighbor_list_head[af]);
495
496   if (clib_llist_is_empty (ip_neighbor_elt_pool, ipne_anchor, head))
497     return (false);
498
499   elt = clib_llist_prev (ip_neighbor_elt_pool, ipne_anchor, head);
500   ip_neighbor_destroy (ip_neighbor_get (elt->ipne_index));
501
502   return (true);
503 }
504
505 static ip_neighbor_t *
506 ip_neighbor_alloc (const ip_neighbor_key_t * key,
507                    const mac_address_t * mac, ip_neighbor_flags_t flags)
508 {
509   ip_address_family_t af;
510   ip_neighbor_t *ipn;
511
512   af = ip_addr_version (&key->ipnk_ip);
513
514   if (ip_neighbor_db[af].ipndb_limit &&
515       (ip_neighbor_db[af].ipndb_n_elts >= ip_neighbor_db[af].ipndb_limit))
516     {
517       if (!ip_neighbor_force_reuse (af))
518         return (NULL);
519     }
520
521   pool_get_zero (ip_neighbor_pool, ipn);
522
523   ipn->ipn_key = clib_mem_alloc (sizeof (*ipn->ipn_key));
524   clib_memcpy (ipn->ipn_key, key, sizeof (*ipn->ipn_key));
525
526   ipn->ipn_fib_entry_index = FIB_NODE_INDEX_INVALID;
527   ipn->ipn_flags = flags;
528   ipn->ipn_elt = ~0;
529
530   mac_address_copy (&ipn->ipn_mac, mac);
531
532   ip_neighbor_db_add (ipn);
533
534   /* create the adj-fib. the entry in the FIB table for the peer's interface */
535   if (!(ipn->ipn_flags & IP_NEIGHBOR_FLAG_NO_FIB_ENTRY))
536     ip_neighbor_adj_fib_add
537       (ipn, fib_table_get_index_for_sw_if_index
538        (ip_address_family_to_fib_proto (af), ipn->ipn_key->ipnk_sw_if_index));
539
540   return (ipn);
541 }
542
543 int
544 ip_neighbor_add (const ip_address_t * ip,
545                  const mac_address_t * mac,
546                  u32 sw_if_index,
547                  ip_neighbor_flags_t flags, u32 * stats_index)
548 {
549   fib_protocol_t fproto;
550   ip_neighbor_t *ipn;
551
552   /* main thread only */
553   ASSERT (0 == vlib_get_thread_index ());
554
555   fproto = ip_address_family_to_fib_proto (ip_addr_version (ip));
556
557   const ip_neighbor_key_t key = {
558     .ipnk_ip = *ip,
559     .ipnk_sw_if_index = sw_if_index,
560   };
561
562   ipn = ip_neighbor_db_find (&key);
563
564   if (ipn)
565     {
566       IP_NEIGHBOR_DBG ("update: %U, %U",
567                        format_vnet_sw_if_index_name, vnet_get_main (),
568                        sw_if_index, format_ip_address, ip,
569                        format_ip_neighbor_flags, flags, format_mac_address_t,
570                        mac);
571
572       ip_neighbor_touch (ipn);
573
574       /* Refuse to over-write static neighbor entry. */
575       if (!(flags & IP_NEIGHBOR_FLAG_STATIC) &&
576           (ipn->ipn_flags & IP_NEIGHBOR_FLAG_STATIC))
577         {
578           /* if MAC address match, still check to send event */
579           if (0 == mac_address_cmp (&ipn->ipn_mac, mac))
580             goto check_customers;
581           return -2;
582         }
583
584       /* A dynamic entry can become static, but not vice-versa.
585        * i.e. since if it was programmed by the CP then it must
586        * be removed by the CP */
587       if ((flags & IP_NEIGHBOR_FLAG_STATIC) &&
588           !(ipn->ipn_flags & IP_NEIGHBOR_FLAG_STATIC))
589         {
590           ip_neighbor_list_remove (ipn);
591           ipn->ipn_flags |= IP_NEIGHBOR_FLAG_STATIC;
592           ipn->ipn_flags &= ~IP_NEIGHBOR_FLAG_DYNAMIC;
593         }
594
595       /*
596        * prevent a DoS attack from the data-plane that
597        * spams us with no-op updates to the MAC address
598        */
599       if (0 == mac_address_cmp (&ipn->ipn_mac, mac))
600         {
601           ip_neighbor_refresh (ipn);
602           goto check_customers;
603         }
604
605       mac_address_copy (&ipn->ipn_mac, mac);
606     }
607   else
608     {
609       IP_NEIGHBOR_INFO ("add: %U, %U",
610                         format_vnet_sw_if_index_name, vnet_get_main (),
611                         sw_if_index, format_ip_address, ip,
612                         format_ip_neighbor_flags, flags, format_mac_address_t,
613                         mac);
614
615       ipn = ip_neighbor_alloc (&key, mac, flags);
616
617       if (NULL == ipn)
618         return VNET_API_ERROR_LIMIT_EXCEEDED;
619     }
620
621   /* Update time stamp and flags. */
622   ip_neighbor_refresh (ipn);
623
624   adj_nbr_walk_nh (ipn->ipn_key->ipnk_sw_if_index,
625                    fproto, &ip_addr_46 (&ipn->ipn_key->ipnk_ip),
626                    ip_neighbor_mk_complete_walk, ipn);
627
628 check_customers:
629   /* Customer(s) requesting event for this address? */
630   ip_neighbor_publish (ip_neighbor_get_index (ipn), IP_NEIGHBOR_EVENT_ADDED);
631
632   if (stats_index)
633     *stats_index = adj_nbr_find (fproto,
634                                  fib_proto_to_link (fproto),
635                                  &ip_addr_46 (&ipn->ipn_key->ipnk_ip),
636                                  ipn->ipn_key->ipnk_sw_if_index);
637   return 0;
638 }
639
640 int
641 ip_neighbor_del (const ip_address_t * ip, u32 sw_if_index)
642 {
643   ip_neighbor_t *ipn;
644
645   /* main thread only */
646   ASSERT (0 == vlib_get_thread_index ());
647
648   IP_NEIGHBOR_INFO ("delete: %U, %U",
649                     format_vnet_sw_if_index_name, vnet_get_main (),
650                     sw_if_index, format_ip_address, ip);
651
652   const ip_neighbor_key_t key = {
653     .ipnk_ip = *ip,
654     .ipnk_sw_if_index = sw_if_index,
655   };
656
657   ipn = ip_neighbor_db_find (&key);
658
659   if (NULL == ipn)
660     return (VNET_API_ERROR_NO_SUCH_ENTRY);
661
662   ip_neighbor_destroy (ipn);
663
664   return (0);
665 }
666
667 typedef struct ip_neighbor_del_all_ctx_t_
668 {
669   index_t *ipn_del;
670 } ip_neighbor_del_all_ctx_t;
671
672 static walk_rc_t
673 ip_neighbor_del_all_walk_cb (index_t ipni, void *arg)
674 {
675   ip_neighbor_del_all_ctx_t *ctx = arg;
676
677   vec_add1 (ctx->ipn_del, ipni);
678
679   return (WALK_CONTINUE);
680 }
681
682 void
683 ip_neighbor_del_all (ip_address_family_t af, u32 sw_if_index)
684 {
685   IP_NEIGHBOR_INFO ("delete-all: %U, %U",
686                     format_ip_address_family, af,
687                     format_vnet_sw_if_index_name, vnet_get_main (),
688                     sw_if_index);
689
690   ip_neighbor_del_all_ctx_t ctx = {
691     .ipn_del = NULL,
692   };
693   index_t *ipni;
694
695   ip_neighbor_walk (af, sw_if_index, ip_neighbor_del_all_walk_cb, &ctx);
696
697   vec_foreach (ipni,
698                ctx.ipn_del) ip_neighbor_destroy (ip_neighbor_get (*ipni));
699   vec_free (ctx.ipn_del);
700 }
701
702 void
703 ip_neighbor_update (vnet_main_t * vnm, adj_index_t ai)
704 {
705   ip_neighbor_t *ipn;
706   ip_adjacency_t *adj;
707
708   adj = adj_get (ai);
709
710   ip_neighbor_key_t key = {
711     .ipnk_sw_if_index = adj->rewrite_header.sw_if_index,
712   };
713
714   ip_address_from_46 (&adj->sub_type.nbr.next_hop,
715                       adj->ia_nh_proto, &key.ipnk_ip);
716
717   ipn = ip_neighbor_db_find (&key);
718
719   switch (adj->lookup_next_index)
720     {
721     case IP_LOOKUP_NEXT_ARP:
722       if (NULL != ipn)
723         {
724           adj_nbr_walk_nh (adj->rewrite_header.sw_if_index,
725                            adj->ia_nh_proto,
726                            &adj->sub_type.nbr.next_hop,
727                            ip_neighbor_mk_complete_walk, ipn);
728         }
729       else
730         {
731           /*
732            * no matching ARP entry.
733            * construct the rewrite required to for an ARP packet, and stick
734            * that in the adj's pipe to smoke.
735            */
736           adj_nbr_update_rewrite
737             (ai,
738              ADJ_NBR_REWRITE_FLAG_INCOMPLETE,
739              ethernet_build_rewrite
740              (vnm,
741               adj->rewrite_header.sw_if_index,
742               VNET_LINK_ARP,
743               VNET_REWRITE_FOR_SW_INTERFACE_ADDRESS_BROADCAST));
744
745           /*
746            * since the FIB has added this adj for a route, it makes sense it
747            * may want to forward traffic sometime soon. Let's send a
748            * speculative ARP. just one. If we were to do periodically that
749            * wouldn't be bad either, but that's more code than i'm prepared to
750            * write at this time for relatively little reward.
751            */
752           /*
753            * adj_nbr_update_rewrite may actually call fib_walk_sync.
754            * fib_walk_sync may allocate a new adjacency and potentially cause
755            * a realloc for adj_pool. When that happens, adj pointer is no
756            * longer valid here.x We refresh adj pointer accordingly.
757            */
758           adj = adj_get (ai);
759           ip_neighbor_probe (adj);
760         }
761       break;
762     case IP_LOOKUP_NEXT_REWRITE:
763       /* Update of an existing rewrite adjacency happens e.g. when the
764        * interface's MAC address changes */
765       if (NULL != ipn)
766         ip_neighbor_mk_complete (ai, ipn);
767       break;
768     case IP_LOOKUP_NEXT_GLEAN:
769     case IP_LOOKUP_NEXT_BCAST:
770     case IP_LOOKUP_NEXT_MCAST:
771     case IP_LOOKUP_NEXT_DROP:
772     case IP_LOOKUP_NEXT_PUNT:
773     case IP_LOOKUP_NEXT_LOCAL:
774     case IP_LOOKUP_NEXT_MCAST_MIDCHAIN:
775     case IP_LOOKUP_NEXT_MIDCHAIN:
776     case IP_LOOKUP_NEXT_ICMP_ERROR:
777     case IP_LOOKUP_N_NEXT:
778       ASSERT (0);
779       break;
780     }
781 }
782
783 void
784 ip_neighbor_learn (const ip_neighbor_learn_t * l)
785 {
786   ip_neighbor_add (&l->ip, &l->mac, l->sw_if_index,
787                    IP_NEIGHBOR_FLAG_DYNAMIC, NULL);
788 }
789
790 static clib_error_t *
791 ip_neighbor_cmd (vlib_main_t * vm,
792                  unformat_input_t * input, vlib_cli_command_t * cmd)
793 {
794   ip_address_t ip = IP_ADDRESS_V6_ALL_0S;
795   mac_address_t mac = ZERO_MAC_ADDRESS;
796   vnet_main_t *vnm = vnet_get_main ();
797   ip_neighbor_flags_t flags;
798   u32 sw_if_index = ~0;
799   int is_add = 1, is_flush = 0;
800   int count = 1;
801
802   flags = IP_NEIGHBOR_FLAG_DYNAMIC;
803
804   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
805     {
806       /* set ip arp TenGigE1/1/0/1 1.2.3.4 aa:bb:... or aabb.ccdd... */
807       if (unformat (input, "%U %U %U",
808                     unformat_vnet_sw_interface, vnm, &sw_if_index,
809                     unformat_ip_address, &ip, unformat_mac_address_t, &mac))
810         ;
811       else if (unformat (input, "delete") || unformat (input, "del"))
812         is_add = 0;
813       else if (unformat (input, "flush"))
814         is_flush = 1;
815       else if (unformat (input, "static"))
816         {
817           flags |= IP_NEIGHBOR_FLAG_STATIC;
818           flags &= ~IP_NEIGHBOR_FLAG_DYNAMIC;
819         }
820       else if (unformat (input, "no-fib-entry"))
821         flags |= IP_NEIGHBOR_FLAG_NO_FIB_ENTRY;
822       else if (unformat (input, "count %d", &count))
823         ;
824       else
825         break;
826     }
827
828   if (is_flush)
829     {
830       ip_neighbor_del_all (AF_IP4, sw_if_index);
831       ip_neighbor_del_all (AF_IP6, sw_if_index);
832       return NULL;
833     }
834
835   if (sw_if_index == ~0 ||
836       ip_address_is_zero (&ip) || mac_address_is_zero (&mac))
837     return clib_error_return (0,
838                               "specify interface, IP address and MAC: `%U'",
839                               format_unformat_error, input);
840
841   while (count)
842     {
843       if (is_add)
844         ip_neighbor_add (&ip, &mac, sw_if_index, flags, NULL);
845       else
846         ip_neighbor_del (&ip, sw_if_index);
847
848       ip_address_increment (&ip);
849       mac_address_increment (&mac);
850
851       --count;
852     }
853
854   return NULL;
855 }
856
857 /*?
858  * Add or delete IPv4 ARP cache entries.
859  *
860  * @note 'set ip neighbor' options (e.g. delete, static,
861  * 'count <number>', 'interface ip4_addr mac_addr') can be added in
862  * any order and combination.
863  *
864  * @cliexpar
865  * @parblock
866  * Add or delete IPv4 ARP cache entries as follows. MAC Address can be in
867  * either aa:bb:cc:dd:ee:ff format or aabb.ccdd.eeff format.
868  * @cliexcmd{set ip neighbor GigabitEthernet2/0/0 6.0.0.3 dead.beef.babe}
869  * @cliexcmd{set ip neighbor delete GigabitEthernet2/0/0 6.0.0.3
870  * de:ad:be:ef:ba:be}
871  *
872  * To add or delete an IPv4 ARP cache entry
873  * table:
874  * @cliexcmd{set ip neighbor GigabitEthernet2/0/0 6.0.0.3 dead.beef.babe}
875  * @cliexcmd{set ip neighbor delete GigabitEthernet2/0/0 6.0.0.3
876  * dead.beef.babe}
877  *
878  * Add or delete IPv4 static ARP cache entries as follows:
879  * @cliexcmd{set ip neighbor static GigabitEthernet2/0/0 6.0.0.3
880  * dead.beef.babe}
881  * @cliexcmd{set ip neighbor static delete GigabitEthernet2/0/0 6.0.0.3
882  * dead.beef.babe}
883  *
884  * For testing / debugging purposes, the 'set ip neighbor' command can add or
885  * delete multiple entries. Supply the 'count N' parameter:
886  * @cliexcmd{set ip neighbor count 10 GigabitEthernet2/0/0 6.0.0.3
887  * dead.beef.babe}
888  * @endparblock
889  ?*/
890 VLIB_CLI_COMMAND (ip_neighbor_command, static) = {
891   .path = "set ip neighbor",
892   .short_help = "set ip neighbor [del] <intfc> <ip-address> <mac-address> "
893                 "[static] [no-fib-entry] [count <count>]",
894   .function = ip_neighbor_cmd,
895 };
896 VLIB_CLI_COMMAND (ip_neighbor_command2, static) = {
897   .path = "ip neighbor",
898   .short_help = "ip neighbor [del] [flush] <intfc> <ip-address> <mac-address> "
899                 "[static] [no-fib-entry] [count <count>]",
900   .function = ip_neighbor_cmd,
901 };
902
903 static int
904 ip_neighbor_sort (void *a1, void *a2)
905 {
906   index_t *ipni1 = a1, *ipni2 = a2;
907   ip_neighbor_t *ipn1, *ipn2;
908   int cmp;
909
910   ipn1 = ip_neighbor_get (*ipni1);
911   ipn2 = ip_neighbor_get (*ipni2);
912
913   cmp = vnet_sw_interface_compare (vnet_get_main (),
914                                    ipn1->ipn_key->ipnk_sw_if_index,
915                                    ipn2->ipn_key->ipnk_sw_if_index);
916   if (!cmp)
917     cmp = ip_address_cmp (&ipn1->ipn_key->ipnk_ip, &ipn2->ipn_key->ipnk_ip);
918   return cmp;
919 }
920
921 static index_t *
922 ip_neighbor_entries (u32 sw_if_index, ip_address_family_t af)
923 {
924   index_t *ipnis = NULL;
925   ip_neighbor_t *ipn;
926
927   pool_foreach (ipn, ip_neighbor_pool)
928    {
929     if ((sw_if_index == ~0 ||
930         ipn->ipn_key->ipnk_sw_if_index == sw_if_index) &&
931         (N_AF == af ||
932          ip_neighbor_get_af(ipn) == af))
933        vec_add1 (ipnis, ip_neighbor_get_index(ipn));
934   }
935
936
937   if (ipnis)
938     vec_sort_with_function (ipnis, ip_neighbor_sort);
939   return ipnis;
940 }
941
942 static clib_error_t *
943 ip_neighbor_show_sorted_i (vlib_main_t * vm,
944                            unformat_input_t * input,
945                            vlib_cli_command_t * cmd, ip_address_family_t af)
946 {
947   ip_neighbor_elt_t *elt, *head;
948   f64 now;
949
950   head = pool_elt_at_index (ip_neighbor_elt_pool, ip_neighbor_list_head[af]);
951   now = vlib_time_now (vm);
952
953   vlib_cli_output (vm, "%=12s%=40s%=6s%=20s%=24s", "Age", "IP", "Flags",
954                    "Ethernet", "Interface");
955
956   /* the list is time sorted, newest first, so start from the back
957    * and work forwards. Stop when we get to one that is alive */
958   clib_llist_foreach_reverse (ip_neighbor_elt_pool, ipne_anchor, head, elt, ({
959                                 vlib_cli_output (vm, "%U", format_ip_neighbor,
960                                                  now, elt->ipne_index);
961                               }));
962
963   return (NULL);
964 }
965
966 static clib_error_t *
967 ip_neighbor_show_i (vlib_main_t * vm,
968                     unformat_input_t * input,
969                     vlib_cli_command_t * cmd, ip_address_family_t af)
970 {
971   index_t *ipni, *ipnis = NULL;
972   u32 sw_if_index;
973   f64 now;
974
975   /* Filter entries by interface if given. */
976   sw_if_index = ~0;
977   (void) unformat_user (input, unformat_vnet_sw_interface, vnet_get_main (),
978                         &sw_if_index);
979
980   ipnis = ip_neighbor_entries (sw_if_index, af);
981   now = vlib_time_now (vm);
982
983   if (ipnis)
984     vlib_cli_output (vm, "%=12s%=40s%=6s%=20s%=24s", "Age", "IP", "Flags",
985                      "Ethernet", "Interface");
986
987   vec_foreach (ipni, ipnis)
988   {
989     vlib_cli_output (vm, "%U", format_ip_neighbor, now, *ipni);
990   }
991   vec_free (ipnis);
992
993   return (NULL);
994 }
995
996 static clib_error_t *
997 ip_neighbor_show (vlib_main_t * vm,
998                   unformat_input_t * input, vlib_cli_command_t * cmd)
999 {
1000   return (ip_neighbor_show_i (vm, input, cmd, N_AF));
1001 }
1002
1003 static clib_error_t *
1004 ip6_neighbor_show (vlib_main_t * vm,
1005                    unformat_input_t * input, vlib_cli_command_t * cmd)
1006 {
1007   return (ip_neighbor_show_i (vm, input, cmd, AF_IP6));
1008 }
1009
1010 static clib_error_t *
1011 ip4_neighbor_show (vlib_main_t * vm,
1012                    unformat_input_t * input, vlib_cli_command_t * cmd)
1013 {
1014   return (ip_neighbor_show_i (vm, input, cmd, AF_IP4));
1015 }
1016
1017 static clib_error_t *
1018 ip6_neighbor_show_sorted (vlib_main_t * vm,
1019                           unformat_input_t * input, vlib_cli_command_t * cmd)
1020 {
1021   return (ip_neighbor_show_sorted_i (vm, input, cmd, AF_IP6));
1022 }
1023
1024 static clib_error_t *
1025 ip4_neighbor_show_sorted (vlib_main_t * vm,
1026                           unformat_input_t * input, vlib_cli_command_t * cmd)
1027 {
1028   return (ip_neighbor_show_sorted_i (vm, input, cmd, AF_IP4));
1029 }
1030
1031 /*?
1032  * Display all the IP neighbor entries.
1033  *
1034  * @cliexpar
1035  * Example of how to display the IPv4 ARP table:
1036  * @cliexstart{show ip neighbor}
1037  *    Time      FIB        IP4       Flags      Ethernet              Interface
1038  *    346.3028   0       6.1.1.3            de:ad:be:ef:ba:be   GigabitEthernet2/0/0
1039  *   3077.4271   0       6.1.1.4       S    de:ad:be:ef:ff:ff   GigabitEthernet2/0/0
1040  *   2998.6409   1       6.2.2.3            de:ad:be:ef:00:01   GigabitEthernet2/0/0
1041  * Proxy arps enabled for:
1042  * Fib_index 0   6.0.0.1 - 6.0.0.11
1043  * @cliexend
1044  ?*/
1045 VLIB_CLI_COMMAND (show_ip_neighbors_cmd_node, static) = {
1046   .path = "show ip neighbors",
1047   .function = ip_neighbor_show,
1048   .short_help = "show ip neighbors [interface]",
1049 };
1050 VLIB_CLI_COMMAND (show_ip4_neighbors_cmd_node, static) = {
1051   .path = "show ip4 neighbors",
1052   .function = ip4_neighbor_show,
1053   .short_help = "show ip4 neighbors [interface]",
1054 };
1055 VLIB_CLI_COMMAND (show_ip6_neighbors_cmd_node, static) = {
1056   .path = "show ip6 neighbors",
1057   .function = ip6_neighbor_show,
1058   .short_help = "show ip6 neighbors [interface]",
1059 };
1060 VLIB_CLI_COMMAND (show_ip_neighbor_cmd_node, static) = {
1061   .path = "show ip neighbor",
1062   .function = ip_neighbor_show,
1063   .short_help = "show ip neighbor [interface]",
1064 };
1065 VLIB_CLI_COMMAND (show_ip4_neighbor_cmd_node, static) = {
1066   .path = "show ip4 neighbor",
1067   .function = ip4_neighbor_show,
1068   .short_help = "show ip4 neighbor [interface]",
1069 };
1070 VLIB_CLI_COMMAND (show_ip6_neighbor_cmd_node, static) = {
1071   .path = "show ip6 neighbor",
1072   .function = ip6_neighbor_show,
1073   .short_help = "show ip6 neighbor [interface]",
1074 };
1075 VLIB_CLI_COMMAND (show_ip4_neighbor_sorted_cmd_node, static) = {
1076   .path = "show ip4 neighbor-sorted",
1077   .function = ip4_neighbor_show_sorted,
1078   .short_help = "show ip4 neighbor-sorted",
1079 };
1080 VLIB_CLI_COMMAND (show_ip6_neighbor_sorted_cmd_node, static) = {
1081   .path = "show ip6 neighbor-sorted",
1082   .function = ip6_neighbor_show_sorted,
1083   .short_help = "show ip6 neighbor-sorted",
1084 };
1085
1086 static ip_neighbor_vft_t ip_nbr_vfts[N_AF];
1087
1088 void
1089 ip_neighbor_register (ip_address_family_t af, const ip_neighbor_vft_t * vft)
1090 {
1091   ip_nbr_vfts[af] = *vft;
1092 }
1093
1094 void
1095 ip_neighbor_probe_dst (u32 sw_if_index, u32 thread_index,
1096                        ip_address_family_t af, const ip46_address_t *dst)
1097 {
1098   if (!vnet_sw_interface_is_admin_up (vnet_get_main (), sw_if_index))
1099     return;
1100
1101   switch (af)
1102     {
1103     case AF_IP6:
1104       ip6_neighbor_probe_dst (sw_if_index, thread_index, &dst->ip6);
1105       break;
1106     case AF_IP4:
1107       ip4_neighbor_probe_dst (sw_if_index, thread_index, &dst->ip4);
1108       break;
1109     }
1110 }
1111
1112 void
1113 ip_neighbor_probe (const ip_adjacency_t * adj)
1114 {
1115   ip_neighbor_probe_dst (adj->rewrite_header.sw_if_index,
1116                          vlib_get_thread_index (),
1117                          ip_address_family_from_fib_proto (adj->ia_nh_proto),
1118                          &adj->sub_type.nbr.next_hop);
1119 }
1120
1121 void
1122 ip_neighbor_walk (ip_address_family_t af,
1123                   u32 sw_if_index, ip_neighbor_walk_cb_t cb, void *ctx)
1124 {
1125   ip_neighbor_key_t *key;
1126   index_t ipni;
1127
1128   if (~0 == sw_if_index)
1129     {
1130       uword **hash;
1131
1132       vec_foreach (hash, ip_neighbor_db[af].ipndb_hash)
1133       {
1134           hash_foreach (key, ipni, *hash,
1135           ({
1136             if (WALK_STOP == cb (ipni, ctx))
1137               break;
1138           }));
1139       }
1140     }
1141   else
1142     {
1143       uword *hash;
1144
1145       if (vec_len (ip_neighbor_db[af].ipndb_hash) <= sw_if_index)
1146         return;
1147       hash = ip_neighbor_db[af].ipndb_hash[sw_if_index];
1148
1149       hash_foreach (key, ipni, hash,
1150       ({
1151         if (WALK_STOP == cb (ipni, ctx))
1152           break;
1153       }));
1154     }
1155 }
1156
1157 int
1158 ip4_neighbor_proxy_add (u32 fib_index,
1159                         const ip4_address_t * start,
1160                         const ip4_address_t * end)
1161 {
1162   if (ip_nbr_vfts[AF_IP4].inv_proxy4_add)
1163     {
1164       return (ip_nbr_vfts[AF_IP4].inv_proxy4_add (fib_index, start, end));
1165     }
1166
1167   return (-1);
1168 }
1169
1170 int
1171 ip4_neighbor_proxy_delete (u32 fib_index,
1172                            const ip4_address_t * start,
1173                            const ip4_address_t * end)
1174 {
1175   if (ip_nbr_vfts[AF_IP4].inv_proxy4_del)
1176     {
1177       return (ip_nbr_vfts[AF_IP4].inv_proxy4_del (fib_index, start, end));
1178     }
1179   return -1;
1180 }
1181
1182 int
1183 ip4_neighbor_proxy_enable (u32 sw_if_index)
1184 {
1185   if (ip_nbr_vfts[AF_IP4].inv_proxy4_enable)
1186     {
1187       return (ip_nbr_vfts[AF_IP4].inv_proxy4_enable (sw_if_index));
1188     }
1189   return -1;
1190 }
1191
1192 int
1193 ip4_neighbor_proxy_disable (u32 sw_if_index)
1194 {
1195   if (ip_nbr_vfts[AF_IP4].inv_proxy4_disable)
1196     {
1197       return (ip_nbr_vfts[AF_IP4].inv_proxy4_disable (sw_if_index));
1198     }
1199   return -1;
1200 }
1201
1202 int
1203 ip6_neighbor_proxy_add (u32 sw_if_index, const ip6_address_t * addr)
1204 {
1205   if (ip_nbr_vfts[AF_IP6].inv_proxy6_add)
1206     {
1207       return (ip_nbr_vfts[AF_IP6].inv_proxy6_add (sw_if_index, addr));
1208     }
1209   return -1;
1210 }
1211
1212 int
1213 ip6_neighbor_proxy_del (u32 sw_if_index, const ip6_address_t * addr)
1214 {
1215   if (ip_nbr_vfts[AF_IP6].inv_proxy6_del)
1216     {
1217       return (ip_nbr_vfts[AF_IP6].inv_proxy6_del (sw_if_index, addr));
1218     }
1219   return -1;
1220 }
1221
1222 void
1223 ip_neighbor_populate (ip_address_family_t af, u32 sw_if_index)
1224 {
1225   index_t *ipnis = NULL, *ipni;
1226   ip_neighbor_t *ipn;
1227
1228   IP_NEIGHBOR_DBG ("populate: %U %U",
1229                    format_vnet_sw_if_index_name, vnet_get_main (),
1230                    sw_if_index, format_ip_address_family, af);
1231
1232   pool_foreach (ipn, ip_neighbor_pool)
1233    {
1234     if (ip_neighbor_get_af(ipn) == af &&
1235         ipn->ipn_key->ipnk_sw_if_index == sw_if_index)
1236       vec_add1 (ipnis, ipn - ip_neighbor_pool);
1237   }
1238
1239   vec_foreach (ipni, ipnis)
1240   {
1241     ipn = ip_neighbor_get (*ipni);
1242
1243     adj_nbr_walk_nh (ipn->ipn_key->ipnk_sw_if_index,
1244                      ip_address_family_to_fib_proto (ip_neighbor_get_af
1245                                                      (ipn)),
1246                      &ip_addr_46 (&ipn->ipn_key->ipnk_ip),
1247                      ip_neighbor_mk_complete_walk, ipn);
1248   }
1249   vec_free (ipnis);
1250 }
1251
1252 void
1253 ip_neighbor_flush (ip_address_family_t af, u32 sw_if_index)
1254 {
1255   index_t *ipnis = NULL, *ipni;
1256   ip_neighbor_t *ipn;
1257
1258
1259   IP_NEIGHBOR_DBG ("flush: %U %U",
1260                    format_vnet_sw_if_index_name, vnet_get_main (),
1261                    sw_if_index, format_ip_address_family, af);
1262
1263   pool_foreach (ipn, ip_neighbor_pool)
1264    {
1265     if (ip_neighbor_get_af(ipn) == af &&
1266         ipn->ipn_key->ipnk_sw_if_index == sw_if_index &&
1267         ip_neighbor_is_dynamic (ipn))
1268       vec_add1 (ipnis, ipn - ip_neighbor_pool);
1269   }
1270
1271   vec_foreach (ipni, ipnis) ip_neighbor_destroy (ip_neighbor_get (*ipni));
1272   vec_free (ipnis);
1273 }
1274
1275 walk_rc_t
1276 ip_neighbor_mark_one (index_t ipni, void *ctx)
1277 {
1278   ip_neighbor_t *ipn;
1279
1280   ipn = ip_neighbor_get (ipni);
1281
1282   ipn->ipn_flags |= IP_NEIGHBOR_FLAG_STALE;
1283
1284   return (WALK_CONTINUE);
1285 }
1286
1287 void
1288 ip_neighbor_mark (ip_address_family_t af)
1289 {
1290   ip_neighbor_walk (af, ~0, ip_neighbor_mark_one, NULL);
1291 }
1292
1293 typedef struct ip_neighbor_sweep_ctx_t_
1294 {
1295   index_t *ipnsc_stale;
1296 } ip_neighbor_sweep_ctx_t;
1297
1298 static walk_rc_t
1299 ip_neighbor_sweep_one (index_t ipni, void *arg)
1300 {
1301   ip_neighbor_sweep_ctx_t *ctx = arg;
1302   ip_neighbor_t *ipn;
1303
1304   ipn = ip_neighbor_get (ipni);
1305
1306   if (ipn->ipn_flags & IP_NEIGHBOR_FLAG_STALE)
1307     {
1308       vec_add1 (ctx->ipnsc_stale, ipni);
1309     }
1310
1311   return (WALK_CONTINUE);
1312 }
1313
1314 void
1315 ip_neighbor_sweep (ip_address_family_t af)
1316 {
1317   ip_neighbor_sweep_ctx_t ctx = { };
1318   index_t *ipni;
1319
1320   ip_neighbor_walk (af, ~0, ip_neighbor_sweep_one, &ctx);
1321
1322   vec_foreach (ipni, ctx.ipnsc_stale)
1323   {
1324     ip_neighbor_destroy (ip_neighbor_get (*ipni));
1325   }
1326   vec_free (ctx.ipnsc_stale);
1327 }
1328
1329 /*
1330  * Remove any arp entries associated with the specified interface
1331  */
1332 static clib_error_t *
1333 ip_neighbor_interface_admin_change (vnet_main_t * vnm,
1334                                     u32 sw_if_index, u32 flags)
1335 {
1336   ip_address_family_t af;
1337
1338   IP_NEIGHBOR_DBG ("interface-admin: %U  %s",
1339                    format_vnet_sw_if_index_name, vnet_get_main (),
1340                    sw_if_index,
1341                    (flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP ? "up" : "down"));
1342
1343   if (flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP)
1344     {
1345       FOR_EACH_IP_ADDRESS_FAMILY (af) ip_neighbor_populate (af, sw_if_index);
1346     }
1347   else
1348     {
1349       /* admin down, flush all neighbours */
1350       FOR_EACH_IP_ADDRESS_FAMILY (af) ip_neighbor_flush (af, sw_if_index);
1351     }
1352
1353   return (NULL);
1354 }
1355
1356 VNET_SW_INTERFACE_ADMIN_UP_DOWN_FUNCTION (ip_neighbor_interface_admin_change);
1357
1358 /*
1359  * Remove any arp entries associated with the specified interface
1360  */
1361 static clib_error_t *
1362 ip_neighbor_add_del_sw_interface (vnet_main_t *vnm, u32 sw_if_index,
1363                                   u32 is_add)
1364 {
1365   IP_NEIGHBOR_DBG ("interface-change: %U  %s",
1366                    format_vnet_sw_if_index_name, vnet_get_main (),
1367                    sw_if_index, (is_add ? "add" : "del"));
1368
1369   if (!is_add && sw_if_index != ~0)
1370     {
1371       ip_address_family_t af;
1372
1373       FOR_EACH_IP_ADDRESS_FAMILY (af) ip_neighbor_flush (af, sw_if_index);
1374     }
1375
1376   if (is_add)
1377     {
1378       ip_neighbor_alloc_ctr (&ip_neighbor_counters[AF_IP4], sw_if_index);
1379       ip_neighbor_alloc_ctr (&ip_neighbor_counters[AF_IP6], sw_if_index);
1380     }
1381
1382   return (NULL);
1383 }
1384
1385 VNET_SW_INTERFACE_ADD_DEL_FUNCTION (ip_neighbor_add_del_sw_interface);
1386
1387 typedef struct ip_neighbor_walk_covered_ctx_t_
1388 {
1389   ip_address_t addr;
1390   u32 length;
1391   index_t *ipnis;
1392 } ip_neighbor_walk_covered_ctx_t;
1393
1394 static walk_rc_t
1395 ip_neighbor_walk_covered (index_t ipni, void *arg)
1396 {
1397   ip_neighbor_walk_covered_ctx_t *ctx = arg;
1398   ip_neighbor_t *ipn;
1399
1400   ipn = ip_neighbor_get (ipni);
1401
1402   if (AF_IP4 == ip_addr_version (&ctx->addr))
1403     {
1404       if (ip4_destination_matches_route (&ip4_main,
1405                                          &ip_addr_v4 (&ipn->ipn_key->ipnk_ip),
1406                                          &ip_addr_v4 (&ctx->addr),
1407                                          ctx->length) &&
1408           ip_neighbor_is_dynamic (ipn))
1409         {
1410           vec_add1 (ctx->ipnis, ip_neighbor_get_index (ipn));
1411         }
1412     }
1413   else if (AF_IP6 == ip_addr_version (&ctx->addr))
1414     {
1415       if (ip6_destination_matches_route (&ip6_main,
1416                                          &ip_addr_v6 (&ipn->ipn_key->ipnk_ip),
1417                                          &ip_addr_v6 (&ctx->addr),
1418                                          ctx->length) &&
1419           ip_neighbor_is_dynamic (ipn))
1420         {
1421           vec_add1 (ctx->ipnis, ip_neighbor_get_index (ipn));
1422         }
1423     }
1424   return (WALK_CONTINUE);
1425 }
1426
1427
1428 /*
1429  * callback when an interface address is added or deleted
1430  */
1431 static void
1432 ip_neighbor_add_del_interface_address_v4 (ip4_main_t * im,
1433                                           uword opaque,
1434                                           u32 sw_if_index,
1435                                           ip4_address_t * address,
1436                                           u32 address_length,
1437                                           u32 if_address_index, u32 is_del)
1438 {
1439   /*
1440    * Flush the ARP cache of all entries covered by the address
1441    * that is being removed.
1442    */
1443   IP_NEIGHBOR_DBG ("addr-%s: %U, %U/%d", (is_del ? "del" : "add"),
1444                    format_vnet_sw_if_index_name, vnet_get_main (), sw_if_index,
1445                    format_ip4_address, address, address_length);
1446
1447   if (is_del)
1448     {
1449       ip_neighbor_walk_covered_ctx_t ctx = {
1450         .addr = {
1451           .ip.ip4 = *address,
1452           .version = AF_IP4,
1453         },
1454         .length = address_length,
1455       };
1456       index_t *ipni;
1457
1458       ip_neighbor_walk (AF_IP4, sw_if_index, ip_neighbor_walk_covered, &ctx);
1459
1460       vec_foreach (ipni, ctx.ipnis)
1461         ip_neighbor_destroy (ip_neighbor_get (*ipni));
1462
1463       vec_free (ctx.ipnis);
1464     }
1465 }
1466
1467 /*
1468  * callback when an interface address is added or deleted
1469  */
1470 static void
1471 ip_neighbor_add_del_interface_address_v6 (ip6_main_t * im,
1472                                           uword opaque,
1473                                           u32 sw_if_index,
1474                                           ip6_address_t * address,
1475                                           u32 address_length,
1476                                           u32 if_address_index, u32 is_del)
1477 {
1478   /*
1479    * Flush the ARP cache of all entries covered by the address
1480    * that is being removed.
1481    */
1482   IP_NEIGHBOR_DBG ("addr-change: %U, %U/%d %s",
1483                    format_vnet_sw_if_index_name, vnet_get_main (),
1484                    sw_if_index, format_ip6_address, address, address_length,
1485                    (is_del ? "del" : "add"));
1486
1487   if (is_del)
1488     {
1489       ip_neighbor_walk_covered_ctx_t ctx = {
1490         .addr = {
1491           .ip.ip6 = *address,
1492           .version = AF_IP6,
1493         },
1494         .length = address_length,
1495       };
1496       index_t *ipni;
1497
1498       ip_neighbor_walk (AF_IP6, sw_if_index, ip_neighbor_walk_covered, &ctx);
1499
1500       vec_foreach (ipni, ctx.ipnis)
1501         ip_neighbor_destroy (ip_neighbor_get (*ipni));
1502
1503       vec_free (ctx.ipnis);
1504     }
1505 }
1506
1507 typedef struct ip_neighbor_table_bind_ctx_t_
1508 {
1509   u32 new_fib_index;
1510   u32 old_fib_index;
1511 } ip_neighbor_table_bind_ctx_t;
1512
1513 static walk_rc_t
1514 ip_neighbor_walk_table_bind (index_t ipni, void *arg)
1515 {
1516   ip_neighbor_table_bind_ctx_t *ctx = arg;
1517   ip_neighbor_t *ipn;
1518
1519   ipn = ip_neighbor_get (ipni);
1520   ip_neighbor_adj_fib_remove (ipn, ctx->old_fib_index);
1521   ip_neighbor_adj_fib_add (ipn, ctx->new_fib_index);
1522
1523   return (WALK_CONTINUE);
1524 }
1525
1526 static void
1527 ip_neighbor_table_bind_v4 (ip4_main_t * im,
1528                            uword opaque,
1529                            u32 sw_if_index,
1530                            u32 new_fib_index, u32 old_fib_index)
1531 {
1532   ip_neighbor_table_bind_ctx_t ctx = {
1533     .old_fib_index = old_fib_index,
1534     .new_fib_index = new_fib_index,
1535   };
1536
1537   ip_neighbor_walk (AF_IP4, sw_if_index, ip_neighbor_walk_table_bind, &ctx);
1538 }
1539
1540 static void
1541 ip_neighbor_table_bind_v6 (ip6_main_t * im,
1542                            uword opaque,
1543                            u32 sw_if_index,
1544                            u32 new_fib_index, u32 old_fib_index)
1545 {
1546   ip_neighbor_table_bind_ctx_t ctx = {
1547     .old_fib_index = old_fib_index,
1548     .new_fib_index = new_fib_index,
1549   };
1550
1551   ip_neighbor_walk (AF_IP6, sw_if_index, ip_neighbor_walk_table_bind, &ctx);
1552 }
1553
1554 typedef enum ip_neighbor_age_state_t_
1555 {
1556   IP_NEIGHBOR_AGE_ALIVE,
1557   IP_NEIGHBOR_AGE_PROBE,
1558   IP_NEIGHBOR_AGE_DEAD,
1559 } ip_neighbor_age_state_t;
1560
1561 #define IP_NEIGHBOR_PROCESS_SLEEP_LONG (0)
1562
1563 static ip_neighbor_age_state_t
1564 ip_neighbour_age_out (index_t ipni, f64 now, f64 * wait)
1565 {
1566   ip_address_family_t af;
1567   ip_neighbor_t *ipn;
1568   u32 ipndb_age;
1569   u32 ttl;
1570
1571   ipn = ip_neighbor_get (ipni);
1572   af = ip_neighbor_get_af (ipn);
1573   ipndb_age = ip_neighbor_db[af].ipndb_age;
1574   ttl = now - ipn->ipn_time_last_updated;
1575   *wait = ipndb_age;
1576
1577   if (ttl > ipndb_age)
1578     {
1579       IP_NEIGHBOR_DBG ("aged: %U @%f - %f > %d", format_ip_neighbor, now, ipni,
1580                        now, ipn->ipn_time_last_updated, ipndb_age);
1581       if (ipn->ipn_n_probes > 2)
1582         {
1583           /* 3 strikes and yea-re out */
1584           IP_NEIGHBOR_DBG ("dead: %U", format_ip_neighbor, now, ipni);
1585           *wait = 1;
1586           return (IP_NEIGHBOR_AGE_DEAD);
1587         }
1588       else
1589         {
1590           ip_neighbor_probe_dst (ip_neighbor_get_sw_if_index (ipn),
1591                                  vlib_get_thread_index (), af,
1592                                  &ip_addr_46 (&ipn->ipn_key->ipnk_ip));
1593
1594           ipn->ipn_n_probes++;
1595           *wait = 1;
1596         }
1597     }
1598   else
1599     {
1600       /* here we are sure that ttl <= ipndb_age */
1601       *wait = ipndb_age - ttl + 1;
1602       return (IP_NEIGHBOR_AGE_ALIVE);
1603     }
1604
1605   return (IP_NEIGHBOR_AGE_PROBE);
1606 }
1607
1608 typedef enum ip_neighbor_process_event_t_
1609 {
1610   IP_NEIGHBOR_AGE_PROCESS_WAKEUP,
1611 } ip_neighbor_process_event_t;
1612
1613 static uword
1614 ip_neighbor_age_loop (vlib_main_t * vm,
1615                       vlib_node_runtime_t * rt,
1616                       vlib_frame_t * f, ip_address_family_t af)
1617 {
1618   uword event_type, *event_data = NULL;
1619   f64 timeout;
1620
1621   /* Set the timeout to an effectively infinite value when the process starts */
1622   timeout = IP_NEIGHBOR_PROCESS_SLEEP_LONG;
1623
1624   while (1)
1625     {
1626       f64 now;
1627
1628       if (!timeout)
1629         vlib_process_wait_for_event (vm);
1630       else
1631         vlib_process_wait_for_event_or_clock (vm, timeout);
1632
1633       event_type = vlib_process_get_events (vm, &event_data);
1634       vec_reset_length (event_data);
1635
1636       now = vlib_time_now (vm);
1637
1638       switch (event_type)
1639         {
1640         case ~0:
1641           {
1642             /* timer expired */
1643             ip_neighbor_elt_t *elt, *head;
1644             f64 wait;
1645
1646             timeout = ip_neighbor_db[af].ipndb_age;
1647             head = pool_elt_at_index (ip_neighbor_elt_pool,
1648                                       ip_neighbor_list_head[af]);
1649
1650           /* the list is time sorted, newest first, so start from the back
1651            * and work forwards. Stop when we get to one that is alive */
1652           restart:
1653           clib_llist_foreach_reverse(ip_neighbor_elt_pool,
1654                                      ipne_anchor, head, elt,
1655           ({
1656             ip_neighbor_age_state_t res;
1657
1658             res = ip_neighbour_age_out(elt->ipne_index, now, &wait);
1659
1660             if (IP_NEIGHBOR_AGE_ALIVE == res) {
1661               /* the oldest neighbor has not yet expired, go back to sleep */
1662               timeout = clib_min (wait, timeout);
1663               break;
1664             }
1665             else if (IP_NEIGHBOR_AGE_DEAD == res) {
1666               /* the oldest neighbor is dead, pop it, then restart the walk
1667                * again from the back */
1668               ip_neighbor_destroy (ip_neighbor_get(elt->ipne_index));
1669               goto restart;
1670             }
1671
1672             timeout = clib_min (wait, timeout);
1673           }));
1674             break;
1675           }
1676         case IP_NEIGHBOR_AGE_PROCESS_WAKEUP:
1677           {
1678
1679             if (!ip_neighbor_db[af].ipndb_age)
1680               {
1681                 /* aging has been disabled */
1682                 timeout = 0;
1683                 break;
1684               }
1685             ip_neighbor_elt_t *elt, *head;
1686
1687             head = pool_elt_at_index (ip_neighbor_elt_pool,
1688                                       ip_neighbor_list_head[af]);
1689             /* no neighbors yet */
1690             if (clib_llist_is_empty (ip_neighbor_elt_pool, ipne_anchor, head))
1691               {
1692                 timeout = ip_neighbor_db[af].ipndb_age;
1693                 break;
1694               }
1695
1696             /* poke the oldset neighbour for aging, which returns how long we sleep for */
1697             elt = clib_llist_prev (ip_neighbor_elt_pool, ipne_anchor, head);
1698             ip_neighbour_age_out (elt->ipne_index, now, &timeout);
1699             break;
1700           }
1701         }
1702     }
1703   return 0;
1704 }
1705
1706 static uword
1707 ip4_neighbor_age_process (vlib_main_t * vm,
1708                           vlib_node_runtime_t * rt, vlib_frame_t * f)
1709 {
1710   return (ip_neighbor_age_loop (vm, rt, f, AF_IP4));
1711 }
1712
1713 static uword
1714 ip6_neighbor_age_process (vlib_main_t * vm,
1715                           vlib_node_runtime_t * rt, vlib_frame_t * f)
1716 {
1717   return (ip_neighbor_age_loop (vm, rt, f, AF_IP6));
1718 }
1719
1720 VLIB_REGISTER_NODE (ip4_neighbor_age_process_node,static) = {
1721   .function = ip4_neighbor_age_process,
1722   .type = VLIB_NODE_TYPE_PROCESS,
1723   .name = "ip4-neighbor-age-process",
1724 };
1725 VLIB_REGISTER_NODE (ip6_neighbor_age_process_node,static) = {
1726   .function = ip6_neighbor_age_process,
1727   .type = VLIB_NODE_TYPE_PROCESS,
1728   .name = "ip6-neighbor-age-process",
1729 };
1730
1731 int
1732 ip_neighbor_config (ip_address_family_t af, u32 limit, u32 age, bool recycle)
1733 {
1734   ip_neighbor_db[af].ipndb_limit = limit;
1735   ip_neighbor_db[af].ipndb_recycle = recycle;
1736   ip_neighbor_db[af].ipndb_age = age;
1737
1738   vlib_process_signal_event (vlib_get_main (),
1739                              (AF_IP4 == af ?
1740                               ip4_neighbor_age_process_node.index :
1741                               ip6_neighbor_age_process_node.index),
1742                              IP_NEIGHBOR_AGE_PROCESS_WAKEUP, 0);
1743
1744   return (0);
1745 }
1746
1747 int
1748 ip_neighbor_get_config (ip_address_family_t af, u32 *limit, u32 *age,
1749                         bool *recycle)
1750 {
1751   *limit = ip_neighbor_db[af].ipndb_limit;
1752   *age = ip_neighbor_db[af].ipndb_age;
1753   *recycle = ip_neighbor_db[af].ipndb_recycle;
1754
1755   return (0);
1756 }
1757
1758 static clib_error_t *
1759 ip_neighbor_config_show (vlib_main_t * vm,
1760                          unformat_input_t * input, vlib_cli_command_t * cmd)
1761 {
1762   ip_address_family_t af;
1763
1764   FOR_EACH_IP_ADDRESS_FAMILY(af) {
1765     vlib_cli_output (vm, "%U:", format_ip_address_family, af);
1766     vlib_cli_output (vm, "  limit:%d, age:%d, recycle:%d",
1767                      ip_neighbor_db[af].ipndb_limit,
1768                      ip_neighbor_db[af].ipndb_age,
1769                      ip_neighbor_db[af].ipndb_recycle);
1770   }
1771
1772   return (NULL);
1773 }
1774
1775 static clib_error_t *
1776 ip_neighbor_config_set (vlib_main_t *vm, unformat_input_t *input,
1777                         vlib_cli_command_t *cmd)
1778 {
1779   unformat_input_t _line_input, *line_input = &_line_input;
1780   clib_error_t *error = NULL;
1781   ip_address_family_t af;
1782   u32 limit, age;
1783   bool recycle;
1784
1785   if (!unformat_user (input, unformat_line_input, line_input))
1786     return 0;
1787
1788   if (!unformat (line_input, "%U", unformat_ip_address_family, &af))
1789     {
1790       error = unformat_parse_error (line_input);
1791       goto done;
1792     }
1793
1794   limit = ip_neighbor_db[af].ipndb_limit;
1795   age = ip_neighbor_db[af].ipndb_age;
1796   recycle = ip_neighbor_db[af].ipndb_recycle;
1797
1798   while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
1799     {
1800       if (unformat (line_input, "limit %u", &limit))
1801         ;
1802       else if (unformat (line_input, "age %u", &age))
1803         ;
1804       else if (unformat (line_input, "recycle"))
1805         recycle = true;
1806       else if (unformat (line_input, "norecycle"))
1807         recycle = false;
1808       else
1809         {
1810           error = unformat_parse_error (line_input);
1811           goto done;
1812         }
1813     }
1814
1815   ip_neighbor_config (af, limit, age, recycle);
1816
1817 done:
1818   unformat_free (line_input);
1819   return error;
1820 }
1821
1822 static void
1823 ip_neighbor_stats_show_one (vlib_main_t *vm, vnet_main_t *vnm, u32 sw_if_index)
1824 {
1825   vlib_cli_output (vm, "  %U", format_vnet_sw_if_index_name, vnm, sw_if_index);
1826   vlib_cli_output (vm, "    arp:%U", format_ip_neighbor_counters,
1827                    &ip_neighbor_counters[AF_IP4], sw_if_index);
1828   vlib_cli_output (vm, "    nd: %U", format_ip_neighbor_counters,
1829                    &ip_neighbor_counters[AF_IP6], sw_if_index);
1830 }
1831
1832 static walk_rc_t
1833 ip_neighbor_stats_show_cb (vnet_main_t *vnm, vnet_sw_interface_t *si,
1834                            void *ctx)
1835 {
1836   ip_neighbor_stats_show_one (ctx, vnm, si->sw_if_index);
1837
1838   return (WALK_CONTINUE);
1839 }
1840
1841 static clib_error_t *
1842 ip_neighbor_stats_show (vlib_main_t *vm, unformat_input_t *input,
1843                         vlib_cli_command_t *cmd)
1844 {
1845   vnet_main_t *vnm;
1846   u32 sw_if_index;
1847
1848   vnm = vnet_get_main ();
1849   sw_if_index = ~0;
1850   (void) unformat_user (input, unformat_vnet_sw_interface, vnm, &sw_if_index);
1851
1852   if (~0 == sw_if_index)
1853     {
1854       vnet_sw_interface_walk (vnm, ip_neighbor_stats_show_cb, vm);
1855     }
1856   else
1857     {
1858       ip_neighbor_stats_show_one (vm, vnm, sw_if_index);
1859     }
1860   return (NULL);
1861 }
1862
1863 VLIB_CLI_COMMAND (show_ip_neighbor_cfg_cmd_node, static) = {
1864   .path = "show ip neighbor-config",
1865   .function = ip_neighbor_config_show,
1866   .short_help = "show ip neighbor-config",
1867 };
1868 VLIB_CLI_COMMAND (set_ip_neighbor_cfg_cmd_node, static) = {
1869   .path = "set ip neighbor-config",
1870   .function = ip_neighbor_config_set,
1871   .short_help = "set ip neighbor-config ip4|ip6 [limit <limit>] [age <age>] "
1872                 "[recycle|norecycle]",
1873 };
1874 VLIB_CLI_COMMAND (show_ip_neighbor_stats_cmd_node, static) = {
1875   .path = "show ip neighbor-stats",
1876   .function = ip_neighbor_stats_show,
1877   .short_help = "show ip neighbor-stats [interface]",
1878 };
1879
1880 static clib_error_t *
1881 ip_neighbor_init (vlib_main_t * vm)
1882 {
1883   {
1884     ip4_add_del_interface_address_callback_t cb = {
1885       .function = ip_neighbor_add_del_interface_address_v4,
1886     };
1887     vec_add1 (ip4_main.add_del_interface_address_callbacks, cb);
1888   }
1889   {
1890     ip6_add_del_interface_address_callback_t cb = {
1891       .function = ip_neighbor_add_del_interface_address_v6,
1892     };
1893     vec_add1 (ip6_main.add_del_interface_address_callbacks, cb);
1894   }
1895   {
1896     ip4_table_bind_callback_t cb = {
1897       .function = ip_neighbor_table_bind_v4,
1898     };
1899     vec_add1 (ip4_main.table_bind_callbacks, cb);
1900   }
1901   {
1902     ip6_table_bind_callback_t cb = {
1903       .function = ip_neighbor_table_bind_v6,
1904     };
1905     vec_add1 (ip6_main.table_bind_callbacks, cb);
1906   }
1907   ipn_logger = vlib_log_register_class ("ip", "neighbor");
1908
1909   ip_address_family_t af;
1910
1911   FOR_EACH_IP_ADDRESS_FAMILY (af)
1912     ip_neighbor_list_head[af] =
1913     clib_llist_make_head (ip_neighbor_elt_pool, ipne_anchor);
1914
1915   return (NULL);
1916 }
1917
1918 VLIB_INIT_FUNCTION (ip_neighbor_init) =
1919 {
1920   .runs_after = VLIB_INITS("ip_main_init"),
1921 };
1922
1923 /*
1924  * fd.io coding-style-patch-verification: ON
1925  *
1926  * Local Variables:
1927  * eval: (c-set-style "gnu")
1928  * End:
1929  */