ip-neighbor: Allow to replace dynamic entry
[vpp.git] / src / vnet / ip-neighbor / ip_neighbor.c
1 /*
2  * src/vnet/ip/ip_neighboor.c: ip neighbor generic handling
3  *
4  * Copyright (c) 2018 Cisco and/or its affiliates.
5  * Licensed under the Apache License, Version 2.0 (the "License");
6  * you may not use this file except in compliance with the License.
7  * You may obtain a copy of the License at:
8  *
9  *     http://www.apache.org/licenses/LICENSE-2.0
10  *
11  * Unless required by applicable law or agreed to in writing, software
12  * distributed under the License is distributed on an "AS IS" BASIS,
13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  * See the License for the specific language governing permissions and
15  * limitations under the License.
16  */
17
18 #include <vppinfra/llist.h>
19
20 #include <vnet/ip-neighbor/ip_neighbor.h>
21 #include <vnet/ip-neighbor/ip4_neighbor.h>
22 #include <vnet/ip-neighbor/ip6_neighbor.h>
23 #include <vnet/ip-neighbor/ip_neighbor_watch.h>
24
25 #include <vnet/ip/ip6_ll_table.h>
26 #include <vnet/fib/fib_table.h>
27 #include <vnet/adj/adj_mcast.h>
28
29 /** Pool for All IP neighbors */
30 static ip_neighbor_t *ip_neighbor_pool;
31
32 /** protocol specific lists of time sorted neighbors */
33 index_t ip_neighbor_list_head[IP46_N_TYPES];
34
35 typedef struct ip_neighbor_elt_t_
36 {
37   clib_llist_anchor_t ipne_anchor;
38   index_t ipne_index;
39 } ip_neighbor_elt_t;
40
41 /** Pool of linked list elemeents */
42 ip_neighbor_elt_t *ip_neighbor_elt_pool;
43
44 typedef struct ip_neighbor_db_t_
45 {
46   /** per interface hash */
47   uword **ipndb_hash;
48   /** per-protocol limit - max number of neighbors*/
49   u32 ipndb_limit;
50   /** max age of a neighbor before it's forcibly evicted */
51   u32 ipndb_age;
52   /** when the limit is reached and new neighbors are created, should
53    * we recycle an old one */
54   bool ipndb_recycle;
55   /** per-protocol number of elements */
56   u32 ipndb_n_elts;
57   /** per-protocol number of elements per-fib-index*/
58   u32 *ipndb_n_elts_per_fib;
59 } ip_neighbor_db_t;
60
61 static vlib_log_class_t ipn_logger;
62
63 /* DBs of neighbours one per AF */
64 /* *INDENT-OFF* */
65 static ip_neighbor_db_t ip_neighbor_db[IP46_N_TYPES] = {
66   [IP46_TYPE_IP4] = {
67     .ipndb_limit = 50000,
68     /* Default to not aging and not recycling */
69     .ipndb_age = 0,
70     .ipndb_recycle = false,
71   },
72   [IP46_TYPE_IP6] = {
73     .ipndb_limit = 50000,
74     /* Default to not aging and not recycling */
75     .ipndb_age = 0,
76     .ipndb_recycle = false,
77   }
78 };
79 /* *INDENT-ON* */
80
81 #define IP_NEIGHBOR_DBG(...)                           \
82     vlib_log_debug (ipn_logger, __VA_ARGS__);
83
84 #define IP_NEIGHBOR_INFO(...)                          \
85     vlib_log_notice (ipn_logger, __VA_ARGS__);
86
87 ip_neighbor_t *
88 ip_neighbor_get (index_t ipni)
89 {
90   if (pool_is_free_index (ip_neighbor_pool, ipni))
91     return (NULL);
92
93   return (pool_elt_at_index (ip_neighbor_pool, ipni));
94 }
95
96 static index_t
97 ip_neighbor_get_index (const ip_neighbor_t * ipn)
98 {
99   return (ipn - ip_neighbor_pool);
100 }
101
102 static void
103 ip_neighbor_touch (ip_neighbor_t * ipn)
104 {
105   ipn->ipn_flags &= ~IP_NEIGHBOR_FLAG_STALE;
106 }
107
108 static bool
109 ip_neighbor_is_dynamic (const ip_neighbor_t * ipn)
110 {
111   return (ipn->ipn_flags & IP_NEIGHBOR_FLAG_DYNAMIC);
112 }
113
114 const ip46_address_t *
115 ip_neighbor_get_ip (const ip_neighbor_t * ipn)
116 {
117   return (&ipn->ipn_key->ipnk_ip);
118 }
119
120 const mac_address_t *
121 ip_neighbor_get_mac (const ip_neighbor_t * ipn)
122 {
123   return (&ipn->ipn_mac);
124 }
125
126 const u32
127 ip_neighbor_get_sw_if_index (const ip_neighbor_t * ipn)
128 {
129   return (ipn->ipn_key->ipnk_sw_if_index);
130 }
131
132 static void
133 ip_neighbor_list_remove (ip_neighbor_t * ipn)
134 {
135   /* new neighbours, are added to the head of the list, since the
136    * list is time sorted, newest first */
137   ip_neighbor_elt_t *elt;
138
139   if (~0 != ipn->ipn_elt)
140     {
141       elt = pool_elt_at_index (ip_neighbor_elt_pool, ipn->ipn_elt);
142
143       clib_llist_remove (ip_neighbor_elt_pool, ipne_anchor, elt);
144
145       ipn->ipn_elt = ~0;
146     }
147 }
148
149 static void
150 ip_neighbor_refresh (ip_neighbor_t * ipn)
151 {
152   /* new neighbours, are added to the head of the list, since the
153    * list is time sorted, newest first */
154   ip_neighbor_elt_t *elt, *head;
155
156   ip_neighbor_touch (ipn);
157   ipn->ipn_time_last_updated = vlib_time_now (vlib_get_main ());
158   ipn->ipn_n_probes = 0;
159
160   if (ip_neighbor_is_dynamic (ipn))
161     {
162       if (~0 == ipn->ipn_elt)
163         /* first time insertion */
164         pool_get_zero (ip_neighbor_elt_pool, elt);
165       else
166         {
167           /* already inserted - extract first */
168           elt = pool_elt_at_index (ip_neighbor_elt_pool, ipn->ipn_elt);
169
170           clib_llist_remove (ip_neighbor_elt_pool, ipne_anchor, elt);
171         }
172       head = pool_elt_at_index (ip_neighbor_elt_pool,
173                                 ip_neighbor_list_head[ipn->
174                                                       ipn_key->ipnk_type]);
175
176       elt->ipne_index = ip_neighbor_get_index (ipn);
177       clib_llist_add (ip_neighbor_elt_pool, ipne_anchor, elt, head);
178       ipn->ipn_elt = elt - ip_neighbor_elt_pool;
179     }
180 }
181
182 static void
183 ip_neighbor_db_add (const ip_neighbor_t * ipn)
184 {
185   vec_validate (ip_neighbor_db[ipn->ipn_key->ipnk_type].ipndb_hash,
186                 ipn->ipn_key->ipnk_sw_if_index);
187
188   if (!ip_neighbor_db[ipn->ipn_key->ipnk_type].ipndb_hash
189       [ipn->ipn_key->ipnk_sw_if_index])
190     ip_neighbor_db[ipn->ipn_key->ipnk_type].ipndb_hash[ipn->
191                                                        ipn_key->ipnk_sw_if_index]
192       = hash_create_mem (0, sizeof (ip_neighbor_key_t), sizeof (index_t));
193
194   hash_set_mem (ip_neighbor_db[ipn->ipn_key->ipnk_type].ipndb_hash
195                 [ipn->ipn_key->ipnk_sw_if_index], ipn->ipn_key,
196                 ip_neighbor_get_index (ipn));
197
198   ip_neighbor_db[ipn->ipn_key->ipnk_type].ipndb_n_elts++;
199 }
200
201 static void
202 ip_neighbor_db_remove (const ip_neighbor_key_t * key)
203 {
204   vec_validate (ip_neighbor_db[key->ipnk_type].ipndb_hash,
205                 key->ipnk_sw_if_index);
206
207   hash_unset_mem (ip_neighbor_db[key->ipnk_type].ipndb_hash
208                   [key->ipnk_sw_if_index], key);
209
210   ip_neighbor_db[key->ipnk_type].ipndb_n_elts--;
211 }
212
213 static ip_neighbor_t *
214 ip_neighbor_db_find (const ip_neighbor_key_t * key)
215 {
216   uword *p;
217
218   if (key->ipnk_sw_if_index >=
219       vec_len (ip_neighbor_db[key->ipnk_type].ipndb_hash))
220     return NULL;
221
222   p =
223     hash_get_mem (ip_neighbor_db[key->ipnk_type].ipndb_hash
224                   [key->ipnk_sw_if_index], key);
225
226   if (p)
227     return ip_neighbor_get (p[0]);
228
229   return (NULL);
230 }
231
232 static u8
233 ip46_type_pfx_len (ip46_type_t type)
234 {
235   return (type == IP46_TYPE_IP4 ? 32 : 128);
236 }
237
238 static void
239 ip_neighbor_adj_fib_add (ip_neighbor_t * ipn, u32 fib_index)
240 {
241   if (ipn->ipn_key->ipnk_type == IP46_TYPE_IP6 &&
242       ip6_address_is_link_local_unicast (&ipn->ipn_key->ipnk_ip.ip6))
243     {
244       ip6_ll_prefix_t pfx = {
245         .ilp_addr = ipn->ipn_key->ipnk_ip.ip6,
246         .ilp_sw_if_index = ipn->ipn_key->ipnk_sw_if_index,
247       };
248       ipn->ipn_fib_entry_index =
249         ip6_ll_table_entry_update (&pfx, FIB_ROUTE_PATH_FLAG_NONE);
250     }
251   else
252     {
253       fib_protocol_t fproto;
254
255       fproto = fib_proto_from_ip46 (ipn->ipn_key->ipnk_type);
256
257       fib_prefix_t pfx = {
258         .fp_len = ip46_type_pfx_len (ipn->ipn_key->ipnk_type),
259         .fp_proto = fproto,
260         .fp_addr = ipn->ipn_key->ipnk_ip,
261       };
262
263       ipn->ipn_fib_entry_index =
264         fib_table_entry_path_add (fib_index, &pfx, FIB_SOURCE_ADJ,
265                                   FIB_ENTRY_FLAG_ATTACHED,
266                                   fib_proto_to_dpo (fproto),
267                                   &pfx.fp_addr,
268                                   ipn->ipn_key->ipnk_sw_if_index,
269                                   ~0, 1, NULL, FIB_ROUTE_PATH_FLAG_NONE);
270
271       vec_validate (ip_neighbor_db
272                     [ipn->ipn_key->ipnk_type].ipndb_n_elts_per_fib,
273                     fib_index);
274
275       ip_neighbor_db[ipn->ipn_key->
276                      ipnk_type].ipndb_n_elts_per_fib[fib_index]++;
277
278       if (1 ==
279           ip_neighbor_db[ipn->ipn_key->
280                          ipnk_type].ipndb_n_elts_per_fib[fib_index])
281         fib_table_lock (fib_index, fproto, FIB_SOURCE_ADJ);
282     }
283 }
284
285 static void
286 ip_neighbor_adj_fib_remove (ip_neighbor_t * ipn, u32 fib_index)
287 {
288   if (FIB_NODE_INDEX_INVALID != ipn->ipn_fib_entry_index)
289     {
290       if (ipn->ipn_key->ipnk_type == IP46_TYPE_IP6 &&
291           ip6_address_is_link_local_unicast (&ipn->ipn_key->ipnk_ip.ip6))
292         {
293           ip6_ll_prefix_t pfx = {
294             .ilp_addr = ipn->ipn_key->ipnk_ip.ip6,
295             .ilp_sw_if_index = ipn->ipn_key->ipnk_sw_if_index,
296           };
297           ip6_ll_table_entry_delete (&pfx);
298         }
299       else
300         {
301           fib_protocol_t fproto;
302
303           fproto = fib_proto_from_ip46 (ipn->ipn_key->ipnk_type);
304
305           fib_prefix_t pfx = {
306             .fp_len = ip46_type_pfx_len (ipn->ipn_key->ipnk_type),
307             .fp_proto = fproto,
308             .fp_addr = ipn->ipn_key->ipnk_ip,
309           };
310
311           fib_table_entry_path_remove (fib_index,
312                                        &pfx,
313                                        FIB_SOURCE_ADJ,
314                                        fib_proto_to_dpo (fproto),
315                                        &pfx.fp_addr,
316                                        ipn->ipn_key->ipnk_sw_if_index,
317                                        ~0, 1, FIB_ROUTE_PATH_FLAG_NONE);
318
319           ip_neighbor_db[ipn->ipn_key->
320                          ipnk_type].ipndb_n_elts_per_fib[fib_index]--;
321
322           if (0 ==
323               ip_neighbor_db[ipn->ipn_key->
324                              ipnk_type].ipndb_n_elts_per_fib[fib_index])
325             fib_table_unlock (fib_index, fproto, FIB_SOURCE_ADJ);
326         }
327     }
328 }
329
330 static void
331 ip_neighbor_mk_complete (adj_index_t ai, ip_neighbor_t * ipn)
332 {
333   adj_nbr_update_rewrite (ai, ADJ_NBR_REWRITE_FLAG_COMPLETE,
334                           ethernet_build_rewrite (vnet_get_main (),
335                                                   ipn->
336                                                   ipn_key->ipnk_sw_if_index,
337                                                   adj_get_link_type (ai),
338                                                   ipn->ipn_mac.bytes));
339 }
340
341 static void
342 ip_neighbor_mk_incomplete (adj_index_t ai)
343 {
344   ip_adjacency_t *adj = adj_get (ai);
345
346   adj_nbr_update_rewrite (ai,
347                           ADJ_NBR_REWRITE_FLAG_INCOMPLETE,
348                           ethernet_build_rewrite (vnet_get_main (),
349                                                   adj->
350                                                   rewrite_header.sw_if_index,
351                                                   VNET_LINK_ARP,
352                                                   VNET_REWRITE_FOR_SW_INTERFACE_ADDRESS_BROADCAST));
353 }
354
355 static adj_walk_rc_t
356 ip_neighbor_mk_complete_walk (adj_index_t ai, void *ctx)
357 {
358   ip_neighbor_t *ipn = ctx;
359
360   ip_neighbor_mk_complete (ai, ipn);
361
362   return (ADJ_WALK_RC_CONTINUE);
363 }
364
365 static adj_walk_rc_t
366 ip_neighbor_mk_incomplete_walk (adj_index_t ai, void *ctx)
367 {
368   ip_neighbor_mk_incomplete (ai);
369
370   return (ADJ_WALK_RC_CONTINUE);
371 }
372
373 static void
374 ip_neighbor_free (ip_neighbor_t * ipn)
375 {
376   IP_NEIGHBOR_DBG ("free: %U", format_ip_neighbor,
377                    ip_neighbor_get_index (ipn));
378
379   adj_nbr_walk_nh (ipn->ipn_key->ipnk_sw_if_index,
380                    fib_proto_from_ip46 (ipn->ipn_key->ipnk_type),
381                    &ipn->ipn_key->ipnk_ip,
382                    ip_neighbor_mk_incomplete_walk, ipn);
383   ip_neighbor_adj_fib_remove
384     (ipn,
385      fib_table_get_index_for_sw_if_index
386      (fib_proto_from_ip46 (ipn->ipn_key->ipnk_type),
387       ipn->ipn_key->ipnk_sw_if_index));
388
389   ip_neighbor_list_remove (ipn);
390   ip_neighbor_db_remove (ipn->ipn_key);
391   clib_mem_free (ipn->ipn_key);
392
393   pool_put (ip_neighbor_pool, ipn);
394 }
395
396 static bool
397 ip_neighbor_force_reuse (ip46_type_t type)
398 {
399   if (!ip_neighbor_db[type].ipndb_recycle)
400     return false;
401
402   /* pluck the oldest entry, which is the one from the end of the list */
403   ip_neighbor_elt_t *elt, *head;
404
405   head =
406     pool_elt_at_index (ip_neighbor_elt_pool, ip_neighbor_list_head[type]);
407
408   if (clib_llist_is_empty (ip_neighbor_elt_pool, ipne_anchor, head))
409     return (false);
410
411   elt = clib_llist_prev (ip_neighbor_elt_pool, ipne_anchor, head);
412   ip_neighbor_free (ip_neighbor_get (elt->ipne_index));
413
414   return (true);
415 }
416
417 static ip_neighbor_t *
418 ip_neighbor_alloc (const ip_neighbor_key_t * key,
419                    const mac_address_t * mac, ip_neighbor_flags_t flags)
420 {
421   ip_neighbor_t *ipn;
422
423   if (ip_neighbor_db[key->ipnk_type].ipndb_limit &&
424       (ip_neighbor_db[key->ipnk_type].ipndb_n_elts >=
425        ip_neighbor_db[key->ipnk_type].ipndb_limit))
426     {
427       if (!ip_neighbor_force_reuse (key->ipnk_type))
428         return (NULL);
429     }
430
431   pool_get_zero (ip_neighbor_pool, ipn);
432
433   ipn->ipn_key = clib_mem_alloc (sizeof (*ipn->ipn_key));
434   clib_memcpy (ipn->ipn_key, key, sizeof (*ipn->ipn_key));
435
436   ipn->ipn_fib_entry_index = FIB_NODE_INDEX_INVALID;
437   ipn->ipn_flags = flags;
438   ipn->ipn_elt = ~0;
439
440   mac_address_copy (&ipn->ipn_mac, mac);
441
442   ip_neighbor_db_add (ipn);
443
444   /* create the adj-fib. the entry in the FIB table for the peer's interface */
445   if (!(ipn->ipn_flags & IP_NEIGHBOR_FLAG_NO_FIB_ENTRY))
446     ip_neighbor_adj_fib_add
447       (ipn, fib_table_get_index_for_sw_if_index
448        (fib_proto_from_ip46 (ipn->ipn_key->ipnk_type),
449         ipn->ipn_key->ipnk_sw_if_index));
450
451   return (ipn);
452 }
453
454 int
455 ip_neighbor_add (const ip46_address_t * ip,
456                  ip46_type_t type,
457                  const mac_address_t * mac,
458                  u32 sw_if_index,
459                  ip_neighbor_flags_t flags, u32 * stats_index)
460 {
461   fib_protocol_t fproto;
462   ip_neighbor_t *ipn;
463
464   /* main thread only */
465   ASSERT (0 == vlib_get_thread_index ());
466
467   fproto = fib_proto_from_ip46 (type);
468
469   const ip_neighbor_key_t key = {
470     .ipnk_ip = *ip,
471     .ipnk_sw_if_index = sw_if_index,
472     .ipnk_type = type,
473   };
474
475   ipn = ip_neighbor_db_find (&key);
476
477   if (ipn)
478     {
479       IP_NEIGHBOR_DBG ("update: %U, %U",
480                        format_vnet_sw_if_index_name, vnet_get_main (),
481                        sw_if_index, format_ip46_address, ip, type,
482                        format_ip_neighbor_flags, flags, format_mac_address_t,
483                        mac);
484
485       ip_neighbor_touch (ipn);
486
487       /* Refuse to over-write static neighbor entry. */
488       if (!(flags & IP_NEIGHBOR_FLAG_STATIC) &&
489           (ipn->ipn_flags & IP_NEIGHBOR_FLAG_STATIC))
490         {
491           /* if MAC address match, still check to send event */
492           if (0 == mac_address_cmp (&ipn->ipn_mac, mac))
493             goto check_customers;
494           return -2;
495         }
496
497       /* A dynamic entry can become static, but not vice-versa.
498        * i.e. since if it was programmed by the CP then it must
499        * be removed by the CP */
500       if ((flags & IP_NEIGHBOR_FLAG_STATIC) &&
501           !(ipn->ipn_flags & IP_NEIGHBOR_FLAG_STATIC))
502         {
503           ip_neighbor_list_remove (ipn);
504           ipn->ipn_flags |= IP_NEIGHBOR_FLAG_STATIC;
505           ipn->ipn_flags &= ~IP_NEIGHBOR_FLAG_DYNAMIC;
506         }
507
508       /*
509        * prevent a DoS attack from the data-plane that
510        * spams us with no-op updates to the MAC address
511        */
512       if (0 == mac_address_cmp (&ipn->ipn_mac, mac))
513         {
514           ip_neighbor_refresh (ipn);
515           goto check_customers;
516         }
517
518       mac_address_copy (&ipn->ipn_mac, mac);
519     }
520   else
521     {
522       IP_NEIGHBOR_INFO ("add: %U, %U",
523                         format_vnet_sw_if_index_name, vnet_get_main (),
524                         sw_if_index, format_ip46_address, ip, type,
525                         format_ip_neighbor_flags, flags, format_mac_address_t,
526                         mac);
527
528       ipn = ip_neighbor_alloc (&key, mac, flags);
529
530       if (NULL == ipn)
531         return VNET_API_ERROR_LIMIT_EXCEEDED;
532     }
533
534   /* Update time stamp and flags. */
535   ip_neighbor_refresh (ipn);
536
537   adj_nbr_walk_nh (ipn->ipn_key->ipnk_sw_if_index,
538                    fproto, &ipn->ipn_key->ipnk_ip,
539                    ip_neighbor_mk_complete_walk, ipn);
540
541 check_customers:
542   /* Customer(s) requesting event for this address? */
543   ip_neighbor_publish (ip_neighbor_get_index (ipn));
544
545   if (stats_index)
546     *stats_index = adj_nbr_find (fproto,
547                                  fib_proto_to_link (fproto),
548                                  &ipn->ipn_key->ipnk_ip,
549                                  ipn->ipn_key->ipnk_sw_if_index);
550   return 0;
551 }
552
553 int
554 ip_neighbor_del (const ip46_address_t * ip, ip46_type_t type, u32 sw_if_index)
555 {
556   ip_neighbor_t *ipn;
557
558   /* main thread only */
559   ASSERT (0 == vlib_get_thread_index ());
560
561   IP_NEIGHBOR_INFO ("delete: %U, %U",
562                     format_vnet_sw_if_index_name, vnet_get_main (),
563                     sw_if_index, format_ip46_address, ip, type);
564
565   const ip_neighbor_key_t key = {
566     .ipnk_ip = *ip,
567     .ipnk_sw_if_index = sw_if_index,
568     .ipnk_type = type,
569   };
570
571   ipn = ip_neighbor_db_find (&key);
572
573   if (NULL == ipn)
574     return (VNET_API_ERROR_NO_SUCH_ENTRY);
575
576   ip_neighbor_free (ipn);
577
578   return (0);
579 }
580
581 typedef struct ip_neighbor_del_all_ctx_t_
582 {
583   index_t *ipn_del;
584 } ip_neighbor_del_all_ctx_t;
585
586 static walk_rc_t
587 ip_neighbor_del_all_walk_cb (index_t ipni, void *arg)
588 {
589   ip_neighbor_del_all_ctx_t *ctx = arg;
590
591   vec_add1 (ctx->ipn_del, ipni);
592
593   return (WALK_CONTINUE);
594 }
595
596 void
597 ip_neighbor_del_all (ip46_type_t type, u32 sw_if_index)
598 {
599   IP_NEIGHBOR_INFO ("delete-all: %U, %U",
600                     format_ip46_type, type,
601                     format_vnet_sw_if_index_name, vnet_get_main (),
602                     sw_if_index);
603
604   ip_neighbor_del_all_ctx_t ctx = {
605     .ipn_del = NULL,
606   };
607   index_t *ipni;
608
609   ip_neighbor_walk (type, sw_if_index, ip_neighbor_del_all_walk_cb, &ctx);
610
611   vec_foreach (ipni, ctx.ipn_del) ip_neighbor_free (ip_neighbor_get (*ipni));
612   vec_free (ctx.ipn_del);
613 }
614
615 void
616 ip_neighbor_update (vnet_main_t * vnm, adj_index_t ai)
617 {
618   ip_neighbor_t *ipn;
619   ip_adjacency_t *adj;
620
621   adj = adj_get (ai);
622
623   ip_neighbor_key_t key = {
624     .ipnk_ip = adj->sub_type.nbr.next_hop,
625     .ipnk_type = fib_proto_to_ip46 (adj->ia_nh_proto),
626     .ipnk_sw_if_index = adj->rewrite_header.sw_if_index,
627   };
628   ipn = ip_neighbor_db_find (&key);
629
630   switch (adj->lookup_next_index)
631     {
632     case IP_LOOKUP_NEXT_ARP:
633       if (NULL != ipn)
634         {
635           adj_nbr_walk_nh (adj->rewrite_header.sw_if_index,
636                            adj->ia_nh_proto,
637                            &ipn->ipn_key->ipnk_ip,
638                            ip_neighbor_mk_complete_walk, ipn);
639         }
640       else
641         {
642           /*
643            * no matching ARP entry.
644            * construct the rewrite required to for an ARP packet, and stick
645            * that in the adj's pipe to smoke.
646            */
647           adj_nbr_update_rewrite
648             (ai,
649              ADJ_NBR_REWRITE_FLAG_INCOMPLETE,
650              ethernet_build_rewrite
651              (vnm,
652               adj->rewrite_header.sw_if_index,
653               VNET_LINK_ARP,
654               VNET_REWRITE_FOR_SW_INTERFACE_ADDRESS_BROADCAST));
655
656           /*
657            * since the FIB has added this adj for a route, it makes sense it
658            * may want to forward traffic sometime soon. Let's send a
659            * speculative ARP. just one. If we were to do periodically that
660            * wouldn't be bad either, but that's more code than i'm prepared to
661            * write at this time for relatively little reward.
662            */
663           /*
664            * adj_nbr_update_rewrite may actually call fib_walk_sync.
665            * fib_walk_sync may allocate a new adjacency and potentially cause
666            * a realloc for adj_pool. When that happens, adj pointer is no
667            * longer valid here.x We refresh adj pointer accordingly.
668            */
669           adj = adj_get (ai);
670           ip_neighbor_probe (adj);
671         }
672       break;
673     case IP_LOOKUP_NEXT_GLEAN:
674     case IP_LOOKUP_NEXT_BCAST:
675     case IP_LOOKUP_NEXT_MCAST:
676     case IP_LOOKUP_NEXT_DROP:
677     case IP_LOOKUP_NEXT_PUNT:
678     case IP_LOOKUP_NEXT_LOCAL:
679     case IP_LOOKUP_NEXT_REWRITE:
680     case IP_LOOKUP_NEXT_MCAST_MIDCHAIN:
681     case IP_LOOKUP_NEXT_MIDCHAIN:
682     case IP_LOOKUP_NEXT_ICMP_ERROR:
683     case IP_LOOKUP_N_NEXT:
684       ASSERT (0);
685       break;
686     }
687 }
688
689 void
690 ip_neighbor_learn (const ip_neighbor_learn_t * l)
691 {
692   ip_neighbor_add (&l->ip, l->type, &l->mac, l->sw_if_index,
693                    IP_NEIGHBOR_FLAG_DYNAMIC, NULL);
694 }
695
696 static clib_error_t *
697 ip_neighbor_cmd (vlib_main_t * vm,
698                  unformat_input_t * input, vlib_cli_command_t * cmd)
699 {
700   ip46_address_t ip = ip46_address_initializer;
701   mac_address_t mac = ZERO_MAC_ADDRESS;
702   vnet_main_t *vnm = vnet_get_main ();
703   ip_neighbor_flags_t flags;
704   u32 sw_if_index = ~0;
705   int is_add = 1;
706   int count = 1;
707
708   flags = IP_NEIGHBOR_FLAG_DYNAMIC;
709
710   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
711     {
712       /* set ip arp TenGigE1/1/0/1 1.2.3.4 aa:bb:... or aabb.ccdd... */
713       if (unformat (input, "%U %U %U",
714                     unformat_vnet_sw_interface, vnm, &sw_if_index,
715                     unformat_ip46_address, &ip, IP46_TYPE_ANY,
716                     unformat_mac_address_t, &mac))
717         ;
718       else if (unformat (input, "delete") || unformat (input, "del"))
719         is_add = 0;
720       else if (unformat (input, "static"))
721         {
722           flags |= IP_NEIGHBOR_FLAG_STATIC;
723           flags &= ~IP_NEIGHBOR_FLAG_DYNAMIC;
724         }
725       else if (unformat (input, "no-fib-entry"))
726         flags |= IP_NEIGHBOR_FLAG_NO_FIB_ENTRY;
727       else if (unformat (input, "count %d", &count))
728         ;
729       else
730         break;
731     }
732
733   if (sw_if_index == ~0 ||
734       ip46_address_is_zero (&ip) || mac_address_is_zero (&mac))
735     return clib_error_return (0,
736                               "specify interface, IP address and MAC: `%U'",
737                               format_unformat_error, input);
738
739   while (count)
740     {
741       if (is_add)
742         ip_neighbor_add (&ip, ip46_address_get_type (&ip), &mac, sw_if_index,
743                          flags, NULL);
744       else
745         ip_neighbor_del (&ip, ip46_address_get_type (&ip), sw_if_index);
746
747       ip46_address_increment (ip46_address_get_type (&ip), &ip);
748       mac_address_increment (&mac);
749
750       --count;
751     }
752
753   return NULL;
754 }
755
756 /* *INDENT-OFF* */
757 /*?
758  * Add or delete IPv4 ARP cache entries.
759  *
760  * @note 'set ip neighbor' options (e.g. delete, static, 'fib-id <id>',
761  * 'count <number>', 'interface ip4_addr mac_addr') can be added in
762  * any order and combination.
763  *
764  * @cliexpar
765  * @parblock
766  * Add or delete IPv4 ARP cache entries as follows. MAC Address can be in
767  * either aa:bb:cc:dd:ee:ff format or aabb.ccdd.eeff format.
768  * @cliexcmd{set ip neighbor GigabitEthernet2/0/0 6.0.0.3 dead.beef.babe}
769  * @cliexcmd{set ip neighbor delete GigabitEthernet2/0/0 6.0.0.3 de:ad:be:ef:ba:be}
770  *
771  * To add or delete an IPv4 ARP cache entry to or from a specific fib
772  * table:
773  * @cliexcmd{set ip neighbor fib-id 1 GigabitEthernet2/0/0 6.0.0.3 dead.beef.babe}
774  * @cliexcmd{set ip neighbor fib-id 1 delete GigabitEthernet2/0/0 6.0.0.3 dead.beef.babe}
775  *
776  * Add or delete IPv4 static ARP cache entries as follows:
777  * @cliexcmd{set ip neighbor static GigabitEthernet2/0/0 6.0.0.3 dead.beef.babe}
778  * @cliexcmd{set ip neighbor static delete GigabitEthernet2/0/0 6.0.0.3 dead.beef.babe}
779  *
780  * For testing / debugging purposes, the 'set ip neighbor' command can add or
781  * delete multiple entries. Supply the 'count N' parameter:
782  * @cliexcmd{set ip neighbor count 10 GigabitEthernet2/0/0 6.0.0.3 dead.beef.babe}
783  * @endparblock
784  ?*/
785 VLIB_CLI_COMMAND (ip_neighbor_command, static) = {
786   .path = "set ip neighbor",
787   .short_help =
788   "set ip neighbor [del] <intfc> <ip-address> <mac-address> [static] [no-fib-entry] [count <count>] [fib-id <fib-id>] [proxy <lo-addr> - <hi-addr>]",
789   .function = ip_neighbor_cmd,
790 };
791 VLIB_CLI_COMMAND (ip_neighbor_command2, static) = {
792   .path = "ip neighbor",
793   .short_help =
794   "ip neighbor [del] <intfc> <ip-address> <mac-address> [static] [no-fib-entry] [count <count>] [fib-id <fib-id>] [proxy <lo-addr> - <hi-addr>]",
795   .function = ip_neighbor_cmd,
796 };
797 /* *INDENT-ON* */
798
799 static int
800 ip_neighbor_sort (void *a1, void *a2)
801 {
802   index_t *ipni1 = a1, *ipni2 = a2;
803   ip_neighbor_t *ipn1, *ipn2;
804   int cmp;
805
806   ipn1 = ip_neighbor_get (*ipni1);
807   ipn2 = ip_neighbor_get (*ipni2);
808
809   cmp = vnet_sw_interface_compare (vnet_get_main (),
810                                    ipn1->ipn_key->ipnk_sw_if_index,
811                                    ipn2->ipn_key->ipnk_sw_if_index);
812   if (!cmp)
813     cmp = ip46_address_cmp (&ipn1->ipn_key->ipnk_ip, &ipn2->ipn_key->ipnk_ip);
814   return cmp;
815 }
816
817 static index_t *
818 ip_neighbor_entries (u32 sw_if_index, ip46_type_t type)
819 {
820   index_t *ipnis = NULL;
821   ip_neighbor_t *ipn;
822
823   /* *INDENT-OFF* */
824   pool_foreach (ipn, ip_neighbor_pool,
825   ({
826     if ((sw_if_index == ~0 ||
827         ipn->ipn_key->ipnk_sw_if_index == sw_if_index) &&
828         (IP46_TYPE_ANY == type ||
829          ipn->ipn_key->ipnk_type == type))
830        vec_add1 (ipnis, ip_neighbor_get_index(ipn));
831   }));
832
833   /* *INDENT-ON* */
834
835   if (ipnis)
836     vec_sort_with_function (ipnis, ip_neighbor_sort);
837   return ipnis;
838 }
839
840 static clib_error_t *
841 ip_neighbor_show_sorted_i (vlib_main_t * vm,
842                            unformat_input_t * input,
843                            vlib_cli_command_t * cmd, ip46_type_t type)
844 {
845   ip_neighbor_elt_t *elt, *head;
846
847   head = pool_elt_at_index (ip_neighbor_elt_pool,
848                             ip_neighbor_list_head[type]);
849
850
851   vlib_cli_output (vm, "%=12s%=40s%=6s%=20s%=24s", "Time", "IP",
852                    "Flags", "Ethernet", "Interface");
853
854   /* *INDENT-OFF*/
855   /* the list is time sorted, newest first, so start from the back
856    * and work forwards. Stop when we get to one that is alive */
857   clib_llist_foreach_reverse(ip_neighbor_elt_pool,
858                              ipne_anchor, head, elt,
859   ({
860     vlib_cli_output (vm, "%U", format_ip_neighbor, elt->ipne_index);
861   }));
862   /* *INDENT-ON*/
863
864   return (NULL);
865 }
866
867 static clib_error_t *
868 ip_neighbor_show_i (vlib_main_t * vm,
869                     unformat_input_t * input,
870                     vlib_cli_command_t * cmd, ip46_type_t type)
871 {
872   index_t *ipni, *ipnis = NULL;
873   u32 sw_if_index;
874
875   /* Filter entries by interface if given. */
876   sw_if_index = ~0;
877   (void) unformat_user (input, unformat_vnet_sw_interface, vnet_get_main (),
878                         &sw_if_index);
879
880   ipnis = ip_neighbor_entries (sw_if_index, type);
881
882   if (ipnis)
883     vlib_cli_output (vm, "%=12s%=40s%=6s%=20s%=24s", "Time", "IP",
884                      "Flags", "Ethernet", "Interface");
885
886   vec_foreach (ipni, ipnis)
887   {
888     vlib_cli_output (vm, "%U", format_ip_neighbor, *ipni);
889   }
890   vec_free (ipnis);
891
892   return (NULL);
893 }
894
895 static clib_error_t *
896 ip_neighbor_show (vlib_main_t * vm,
897                   unformat_input_t * input, vlib_cli_command_t * cmd)
898 {
899   return (ip_neighbor_show_i (vm, input, cmd, IP46_TYPE_ANY));
900 }
901
902 static clib_error_t *
903 ip6_neighbor_show (vlib_main_t * vm,
904                    unformat_input_t * input, vlib_cli_command_t * cmd)
905 {
906   return (ip_neighbor_show_i (vm, input, cmd, IP46_TYPE_IP6));
907 }
908
909 static clib_error_t *
910 ip4_neighbor_show (vlib_main_t * vm,
911                    unformat_input_t * input, vlib_cli_command_t * cmd)
912 {
913   return (ip_neighbor_show_i (vm, input, cmd, IP46_TYPE_IP4));
914 }
915
916 static clib_error_t *
917 ip6_neighbor_show_sorted (vlib_main_t * vm,
918                           unformat_input_t * input, vlib_cli_command_t * cmd)
919 {
920   return (ip_neighbor_show_sorted_i (vm, input, cmd, IP46_TYPE_IP6));
921 }
922
923 static clib_error_t *
924 ip4_neighbor_show_sorted (vlib_main_t * vm,
925                           unformat_input_t * input, vlib_cli_command_t * cmd)
926 {
927   return (ip_neighbor_show_sorted_i (vm, input, cmd, IP46_TYPE_IP4));
928 }
929
930 /*?
931  * Display all the IP neighbor entries.
932  *
933  * @cliexpar
934  * Example of how to display the IPv4 ARP table:
935  * @cliexstart{show ip neighbor}
936  *    Time      FIB        IP4       Flags      Ethernet              Interface
937  *    346.3028   0       6.1.1.3            de:ad:be:ef:ba:be   GigabitEthernet2/0/0
938  *   3077.4271   0       6.1.1.4       S    de:ad:be:ef:ff:ff   GigabitEthernet2/0/0
939  *   2998.6409   1       6.2.2.3            de:ad:be:ef:00:01   GigabitEthernet2/0/0
940  * Proxy arps enabled for:
941  * Fib_index 0   6.0.0.1 - 6.0.0.11
942  * @cliexend
943  ?*/
944 /* *INDENT-OFF* */
945 VLIB_CLI_COMMAND (show_ip_neighbors_cmd_node, static) = {
946   .path = "show ip neighbors",
947   .function = ip_neighbor_show,
948   .short_help = "show ip neighbors [interface]",
949 };
950 VLIB_CLI_COMMAND (show_ip4_neighbors_cmd_node, static) = {
951   .path = "show ip4 neighbors",
952   .function = ip4_neighbor_show,
953   .short_help = "show ip4 neighbors [interface]",
954 };
955 VLIB_CLI_COMMAND (show_ip6_neighbors_cmd_node, static) = {
956   .path = "show ip6 neighbors",
957   .function = ip6_neighbor_show,
958   .short_help = "show ip6 neighbors [interface]",
959 };
960 VLIB_CLI_COMMAND (show_ip_neighbor_cmd_node, static) = {
961   .path = "show ip neighbor",
962   .function = ip_neighbor_show,
963   .short_help = "show ip neighbor [interface]",
964 };
965 VLIB_CLI_COMMAND (show_ip4_neighbor_cmd_node, static) = {
966   .path = "show ip4 neighbor",
967   .function = ip4_neighbor_show,
968   .short_help = "show ip4 neighbor [interface]",
969 };
970 VLIB_CLI_COMMAND (show_ip6_neighbor_cmd_node, static) = {
971   .path = "show ip6 neighbor",
972   .function = ip6_neighbor_show,
973   .short_help = "show ip6 neighbor [interface]",
974 };
975 VLIB_CLI_COMMAND (show_ip4_neighbor_sorted_cmd_node, static) = {
976   .path = "show ip4 neighbor-sorted",
977   .function = ip4_neighbor_show_sorted,
978   .short_help = "show ip4 neighbor-sorted",
979 };
980 VLIB_CLI_COMMAND (show_ip6_neighbor_sorted_cmd_node, static) = {
981   .path = "show ip6 neighbor-sorted",
982   .function = ip6_neighbor_show_sorted,
983   .short_help = "show ip6 neighbor-sorted",
984 };
985 /* *INDENT-ON* */
986
987 static ip_neighbor_vft_t ip_nbr_vfts[IP46_N_TYPES];
988
989 void
990 ip_neighbor_register (ip46_type_t type, const ip_neighbor_vft_t * vft)
991 {
992   ip_nbr_vfts[type] = *vft;
993 }
994
995 void
996 ip_neighbor_probe_dst (const ip_adjacency_t * adj, const ip46_address_t * dst)
997 {
998   if (!vnet_sw_interface_is_admin_up (vnet_get_main (),
999                                       adj->rewrite_header.sw_if_index))
1000     return;
1001
1002   switch (adj->ia_nh_proto)
1003     {
1004     case FIB_PROTOCOL_IP6:
1005       ip6_neighbor_probe_dst (adj, &dst->ip6);
1006       break;
1007     case FIB_PROTOCOL_IP4:
1008       ip4_neighbor_probe_dst (adj, &dst->ip4);
1009       break;
1010     case FIB_PROTOCOL_MPLS:
1011       ASSERT (0);
1012       break;
1013     }
1014 }
1015
1016 void
1017 ip_neighbor_probe (const ip_adjacency_t * adj)
1018 {
1019   ip_neighbor_probe_dst (adj, &adj->sub_type.nbr.next_hop);
1020 }
1021
1022 void
1023 ip_neighbor_advertise (vlib_main_t * vm,
1024                        ip46_type_t type,
1025                        const ip46_address_t * addr, u32 sw_if_index)
1026 {
1027   vnet_main_t *vnm = vnet_get_main ();
1028
1029   if (type == IP46_TYPE_IP4 || type == IP46_TYPE_BOTH)
1030     ip4_neighbor_advertise (vm, vnm, sw_if_index, (addr) ? &addr->ip4 : NULL);
1031   if (type == IP46_TYPE_IP6 || type == IP46_TYPE_BOTH)
1032     ip6_neighbor_advertise (vm, vnm, sw_if_index, (addr) ? &addr->ip6 : NULL);
1033 }
1034
1035 void
1036 ip_neighbor_walk (ip46_type_t type,
1037                   u32 sw_if_index, ip_neighbor_walk_cb_t cb, void *ctx)
1038 {
1039   ip_neighbor_key_t *key;
1040   index_t ipni;
1041
1042   if (~0 == sw_if_index)
1043     {
1044       uword **hash;
1045
1046       vec_foreach (hash, ip_neighbor_db[type].ipndb_hash)
1047       {
1048           /* *INDENT-OFF* */
1049           hash_foreach (key, ipni, *hash,
1050           ({
1051             if (WALK_STOP == cb (ipni, ctx))
1052               break;
1053           }));
1054           /* *INDENT-ON* */
1055       }
1056     }
1057   else
1058     {
1059       uword *hash;
1060
1061       if (vec_len (ip_neighbor_db[type].ipndb_hash) <= sw_if_index)
1062         return;
1063       hash = ip_neighbor_db[type].ipndb_hash[sw_if_index];
1064
1065       /* *INDENT-OFF* */
1066       hash_foreach (key, ipni, hash,
1067       ({
1068         if (WALK_STOP == cb (ipni, ctx))
1069           break;
1070       }));
1071       /* *INDENT-ON* */
1072     }
1073 }
1074
1075 int
1076 ip4_neighbor_proxy_add (u32 fib_index,
1077                         const ip4_address_t * start,
1078                         const ip4_address_t * end)
1079 {
1080   if (ip_nbr_vfts[IP46_TYPE_IP4].inv_proxy4_add)
1081     {
1082       return (ip_nbr_vfts[IP46_TYPE_IP4].inv_proxy4_add
1083               (fib_index, start, end));
1084     }
1085
1086   return (-1);
1087 }
1088
1089 int
1090 ip4_neighbor_proxy_delete (u32 fib_index,
1091                            const ip4_address_t * start,
1092                            const ip4_address_t * end)
1093 {
1094   if (ip_nbr_vfts[IP46_TYPE_IP4].inv_proxy4_del)
1095     {
1096       return (ip_nbr_vfts[IP46_TYPE_IP4].inv_proxy4_del
1097               (fib_index, start, end));
1098     }
1099   return -1;
1100 }
1101
1102 int
1103 ip4_neighbor_proxy_enable (u32 sw_if_index)
1104 {
1105   if (ip_nbr_vfts[IP46_TYPE_IP4].inv_proxy4_enable)
1106     {
1107       return (ip_nbr_vfts[IP46_TYPE_IP4].inv_proxy4_enable (sw_if_index));
1108     }
1109   return -1;
1110 }
1111
1112 int
1113 ip4_neighbor_proxy_disable (u32 sw_if_index)
1114 {
1115   if (ip_nbr_vfts[IP46_TYPE_IP4].inv_proxy4_disable)
1116     {
1117       return (ip_nbr_vfts[IP46_TYPE_IP4].inv_proxy4_disable (sw_if_index));
1118     }
1119   return -1;
1120 }
1121
1122 int
1123 ip6_neighbor_proxy_add (u32 sw_if_index, const ip6_address_t * addr)
1124 {
1125   if (ip_nbr_vfts[IP46_TYPE_IP6].inv_proxy6_add)
1126     {
1127       return (ip_nbr_vfts[IP46_TYPE_IP6].inv_proxy6_add (sw_if_index, addr));
1128     }
1129   return -1;
1130 }
1131
1132 int
1133 ip6_neighbor_proxy_del (u32 sw_if_index, const ip6_address_t * addr)
1134 {
1135   if (ip_nbr_vfts[IP46_TYPE_IP6].inv_proxy6_del)
1136     {
1137       return (ip_nbr_vfts[IP46_TYPE_IP6].inv_proxy6_del (sw_if_index, addr));
1138     }
1139   return -1;
1140 }
1141
1142 static void
1143 ip_neighbor_ethernet_change_mac (ethernet_main_t * em,
1144                                  u32 sw_if_index, uword opaque)
1145 {
1146   ip_neighbor_t *ipn;
1147   adj_index_t ai;
1148
1149   IP_NEIGHBOR_DBG ("mac-change: %U",
1150                    format_vnet_sw_if_index_name, vnet_get_main (),
1151                    sw_if_index);
1152
1153   /* *INDENT-OFF* */
1154   pool_foreach (ipn, ip_neighbor_pool,
1155   ({
1156     if (ipn->ipn_key->ipnk_sw_if_index == sw_if_index)
1157       adj_nbr_walk_nh (ipn->ipn_key->ipnk_sw_if_index,
1158                        fib_proto_from_ip46(ipn->ipn_key->ipnk_type),
1159                        &ipn->ipn_key->ipnk_ip,
1160                        ip_neighbor_mk_complete_walk,
1161                        ipn);
1162   }));
1163   /* *INDENT-ON* */
1164
1165   ai = adj_glean_get (FIB_PROTOCOL_IP4, sw_if_index);
1166
1167   if (ADJ_INDEX_INVALID != ai)
1168     adj_glean_update_rewrite (ai);
1169 }
1170
1171 void
1172 ip_neighbor_populate (ip46_type_t type, u32 sw_if_index)
1173 {
1174   index_t *ipnis = NULL, *ipni;
1175   ip_neighbor_t *ipn;
1176
1177   IP_NEIGHBOR_DBG ("populate: %U %U",
1178                    format_vnet_sw_if_index_name, vnet_get_main (),
1179                    sw_if_index, format_ip46_type, type);
1180
1181   /* *INDENT-OFF* */
1182   pool_foreach (ipn, ip_neighbor_pool,
1183   ({
1184     if (ipn->ipn_key->ipnk_type == type &&
1185         ipn->ipn_key->ipnk_sw_if_index == sw_if_index)
1186       vec_add1 (ipnis, ipn - ip_neighbor_pool);
1187   }));
1188   /* *INDENT-ON* */
1189
1190   vec_foreach (ipni, ipnis)
1191   {
1192     ipn = ip_neighbor_get (*ipni);
1193
1194     adj_nbr_walk_nh (ipn->ipn_key->ipnk_sw_if_index,
1195                      fib_proto_from_ip46 (ipn->ipn_key->ipnk_type),
1196                      &ipn->ipn_key->ipnk_ip,
1197                      ip_neighbor_mk_complete_walk, ipn);
1198   }
1199   vec_free (ipnis);
1200 }
1201
1202 void
1203 ip_neighbor_flush (ip46_type_t type, u32 sw_if_index)
1204 {
1205   index_t *ipnis = NULL, *ipni;
1206   ip_neighbor_t *ipn;
1207
1208   IP_NEIGHBOR_DBG ("flush: %U %U",
1209                    format_vnet_sw_if_index_name, vnet_get_main (),
1210                    sw_if_index, format_ip46_type, type);
1211
1212   /* *INDENT-OFF* */
1213   pool_foreach (ipn, ip_neighbor_pool,
1214   ({
1215     if (ipn->ipn_key->ipnk_type == type &&
1216         ipn->ipn_key->ipnk_sw_if_index == sw_if_index &&
1217         ip_neighbor_is_dynamic (ipn))
1218       vec_add1 (ipnis, ipn - ip_neighbor_pool);
1219   }));
1220   /* *INDENT-ON* */
1221
1222   vec_foreach (ipni, ipnis) ip_neighbor_free (ip_neighbor_get (*ipni));
1223   vec_free (ipnis);
1224 }
1225
1226 static walk_rc_t
1227 ip_neighbor_mark_one (index_t ipni, void *ctx)
1228 {
1229   ip_neighbor_t *ipn;
1230
1231   ipn = ip_neighbor_get (ipni);
1232
1233   ipn->ipn_flags |= IP_NEIGHBOR_FLAG_STALE;
1234
1235   return (WALK_CONTINUE);
1236 }
1237
1238 void
1239 ip_neighbor_mark (ip46_type_t type)
1240 {
1241   ip_neighbor_walk (type, ~0, ip_neighbor_mark_one, NULL);
1242 }
1243
1244 typedef struct ip_neighbor_sweep_ctx_t_
1245 {
1246   index_t *ipnsc_stale;
1247 } ip_neighbor_sweep_ctx_t;
1248
1249 static walk_rc_t
1250 ip_neighbor_sweep_one (index_t ipni, void *arg)
1251 {
1252   ip_neighbor_sweep_ctx_t *ctx = arg;
1253   ip_neighbor_t *ipn;
1254
1255   ipn = ip_neighbor_get (ipni);
1256
1257   if (ipn->ipn_flags & IP_NEIGHBOR_FLAG_STALE)
1258     {
1259       vec_add1 (ctx->ipnsc_stale, ipni);
1260     }
1261
1262   return (WALK_CONTINUE);
1263 }
1264
1265 void
1266 ip_neighbor_sweep (ip46_type_t type)
1267 {
1268   ip_neighbor_sweep_ctx_t ctx = { };
1269   index_t *ipni;
1270
1271   ip_neighbor_walk (type, ~0, ip_neighbor_sweep_one, &ctx);
1272
1273   vec_foreach (ipni, ctx.ipnsc_stale)
1274   {
1275     ip_neighbor_free (ip_neighbor_get (*ipni));
1276   }
1277   vec_free (ctx.ipnsc_stale);
1278 }
1279
1280 /*
1281  * Remove any arp entries associated with the specified interface
1282  */
1283 static clib_error_t *
1284 ip_neighbor_interface_admin_change (vnet_main_t * vnm,
1285                                     u32 sw_if_index, u32 flags)
1286 {
1287   ip46_type_t type;
1288
1289   IP_NEIGHBOR_DBG ("interface-admin: %U  %s",
1290                    format_vnet_sw_if_index_name, vnet_get_main (),
1291                    sw_if_index,
1292                    (flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP ? "up" : "down"));
1293
1294   if (flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP)
1295     {
1296       FOREACH_IP46_TYPE (type) ip_neighbor_populate (type, sw_if_index);
1297     }
1298   else
1299     {
1300       /* admin down, flush all neighbours */
1301       FOREACH_IP46_TYPE (type) ip_neighbor_flush (type, sw_if_index);
1302     }
1303
1304   return (NULL);
1305 }
1306
1307 VNET_SW_INTERFACE_ADMIN_UP_DOWN_FUNCTION (ip_neighbor_interface_admin_change);
1308
1309 /*
1310  * Remove any arp entries associated with the specified interface
1311  */
1312 static clib_error_t *
1313 ip_neighbor_delete_sw_interface (vnet_main_t * vnm,
1314                                  u32 sw_if_index, u32 is_add)
1315 {
1316   IP_NEIGHBOR_DBG ("interface-change: %U  %s",
1317                    format_vnet_sw_if_index_name, vnet_get_main (),
1318                    sw_if_index, (is_add ? "add" : "del"));
1319
1320   if (!is_add && sw_if_index != ~0)
1321     {
1322       ip46_type_t type;
1323
1324       FOREACH_IP46_TYPE (type) ip_neighbor_flush (type, sw_if_index);
1325     }
1326
1327   return (NULL);
1328 }
1329
1330 VNET_SW_INTERFACE_ADD_DEL_FUNCTION (ip_neighbor_delete_sw_interface);
1331
1332 typedef struct ip_neighbor_walk_covered_ctx_t_
1333 {
1334   ip46_type_t type;
1335   ip46_address_t addr;
1336   u32 length;
1337   index_t *ipnis;
1338 } ip_neighbor_walk_covered_ctx_t;
1339
1340 static walk_rc_t
1341 ip_neighbor_walk_covered (index_t ipni, void *arg)
1342 {
1343   ip_neighbor_walk_covered_ctx_t *ctx = arg;
1344   ip_neighbor_t *ipn;
1345
1346   ipn = ip_neighbor_get (ipni);
1347
1348   ASSERT (ipn->ipn_key->ipnk_type == ctx->type);
1349
1350   if (IP46_TYPE_IP4 == ctx->type)
1351     {
1352       if (ip4_destination_matches_route (&ip4_main,
1353                                          &ipn->ipn_key->ipnk_ip.ip4,
1354                                          &ctx->addr.ip4,
1355                                          ctx->length) &&
1356           ip_neighbor_is_dynamic (ipn))
1357         {
1358           vec_add1 (ctx->ipnis, ip_neighbor_get_index (ipn));
1359         }
1360     }
1361   return (WALK_CONTINUE);
1362 }
1363
1364
1365 /*
1366  * callback when an interface address is added or deleted
1367  */
1368 static void
1369 ip_neighbor_add_del_interface_address_v4 (ip4_main_t * im,
1370                                           uword opaque,
1371                                           u32 sw_if_index,
1372                                           ip4_address_t * address,
1373                                           u32 address_length,
1374                                           u32 if_address_index, u32 is_del)
1375 {
1376   /*
1377    * Flush the ARP cache of all entries covered by the address
1378    * that is being removed.
1379    */
1380   IP_NEIGHBOR_DBG ("addr-%d: %U, %U/%d",
1381                    (is_del ? "del" : "add"),
1382                    format_vnet_sw_if_index_name, vnet_get_main (),
1383                    sw_if_index, format_ip4_address, address, address_length);
1384
1385   if (is_del)
1386     {
1387       ip_neighbor_walk_covered_ctx_t ctx = {
1388         .addr.ip4 = *address,
1389         .type = IP46_TYPE_IP4,
1390         .length = address_length,
1391       };
1392       index_t *ipni;
1393
1394       ip_neighbor_walk (IP46_TYPE_IP4, sw_if_index,
1395                         ip_neighbor_walk_covered, &ctx);
1396
1397       vec_foreach (ipni, ctx.ipnis)
1398         ip_neighbor_free (ip_neighbor_get (*ipni));
1399
1400       vec_free (ctx.ipnis);
1401     }
1402 }
1403
1404 /*
1405  * callback when an interface address is added or deleted
1406  */
1407 static void
1408 ip_neighbor_add_del_interface_address_v6 (ip6_main_t * im,
1409                                           uword opaque,
1410                                           u32 sw_if_index,
1411                                           ip6_address_t * address,
1412                                           u32 address_length,
1413                                           u32 if_address_index, u32 is_del)
1414 {
1415   /*
1416    * Flush the ARP cache of all entries covered by the address
1417    * that is being removed.
1418    */
1419   IP_NEIGHBOR_DBG ("addr-change: %U, %U/%d %s",
1420                    format_vnet_sw_if_index_name, vnet_get_main (),
1421                    sw_if_index, format_ip6_address, address, address_length,
1422                    (is_del ? "del" : "add"));
1423
1424   if (is_del)
1425     {
1426       ip_neighbor_walk_covered_ctx_t ctx = {
1427         .addr.ip6 = *address,
1428         .type = IP46_TYPE_IP6,
1429         .length = address_length,
1430       };
1431       index_t *ipni;
1432
1433       ip_neighbor_walk (IP46_TYPE_IP6, sw_if_index,
1434                         ip_neighbor_walk_covered, &ctx);
1435
1436       vec_foreach (ipni, ctx.ipnis)
1437         ip_neighbor_free (ip_neighbor_get (*ipni));
1438
1439       vec_free (ctx.ipnis);
1440     }
1441 }
1442
1443 typedef struct ip_neighbor_table_bind_ctx_t_
1444 {
1445   u32 new_fib_index;
1446   u32 old_fib_index;
1447 } ip_neighbor_table_bind_ctx_t;
1448
1449 static walk_rc_t
1450 ip_neighbor_walk_table_bind (index_t ipni, void *arg)
1451 {
1452   ip_neighbor_table_bind_ctx_t *ctx = arg;
1453   ip_neighbor_t *ipn;
1454
1455   ipn = ip_neighbor_get (ipni);
1456   ip_neighbor_adj_fib_remove (ipn, ctx->old_fib_index);
1457   ip_neighbor_adj_fib_add (ipn, ctx->new_fib_index);
1458
1459   return (WALK_CONTINUE);
1460 }
1461
1462 static void
1463 ip_neighbor_table_bind_v4 (ip4_main_t * im,
1464                            uword opaque,
1465                            u32 sw_if_index,
1466                            u32 new_fib_index, u32 old_fib_index)
1467 {
1468   ip_neighbor_table_bind_ctx_t ctx = {
1469     .old_fib_index = old_fib_index,
1470     .new_fib_index = new_fib_index,
1471   };
1472
1473   ip_neighbor_walk (IP46_TYPE_IP4, sw_if_index,
1474                     ip_neighbor_walk_table_bind, &ctx);
1475 }
1476
1477 static void
1478 ip_neighbor_table_bind_v6 (ip6_main_t * im,
1479                            uword opaque,
1480                            u32 sw_if_index,
1481                            u32 new_fib_index, u32 old_fib_index)
1482 {
1483   ip_neighbor_table_bind_ctx_t ctx = {
1484     .old_fib_index = old_fib_index,
1485     .new_fib_index = new_fib_index,
1486   };
1487
1488   ip_neighbor_walk (IP46_TYPE_IP6, sw_if_index,
1489                     ip_neighbor_walk_table_bind, &ctx);
1490 }
1491
1492 typedef enum ip_neighbor_age_state_t_
1493 {
1494   IP_NEIGHBOR_AGE_ALIVE,
1495   IP_NEIGHBOR_AGE_PROBE,
1496   IP_NEIGHBOR_AGE_DEAD,
1497 } ip_neighbor_age_state_t;
1498
1499 #define IP_NEIGHBOR_PROCESS_SLEEP_LONG (0)
1500
1501 static ip_neighbor_age_state_t
1502 ip_neighbour_age_out (index_t ipni, f64 now, f64 * wait)
1503 {
1504   ip_neighbor_t *ipn;
1505   u32 ipndb_age;
1506   u32 ttl;
1507
1508   ipn = ip_neighbor_get (ipni);
1509   ipndb_age = ip_neighbor_db[ipn->ipn_key->ipnk_type].ipndb_age;
1510   ttl = now - ipn->ipn_time_last_updated;
1511   *wait = ipndb_age;
1512
1513   if (ttl > ipndb_age)
1514     {
1515       IP_NEIGHBOR_DBG ("aged: %U @%f - %f > %d",
1516                        format_ip_neighbor, ipni, now,
1517                        ipn->ipn_time_last_updated, ipndb_age);
1518       if (ipn->ipn_n_probes > 2)
1519         {
1520           /* 3 strikes and yea-re out */
1521           IP_NEIGHBOR_DBG ("dead: %U", format_ip_neighbor, ipni);
1522           *wait = 1;
1523           return (IP_NEIGHBOR_AGE_DEAD);
1524         }
1525       else
1526         {
1527           adj_index_t ai;
1528
1529           ai = adj_glean_get (fib_proto_from_ip46 (ipn->ipn_key->ipnk_type),
1530                               ip_neighbor_get_sw_if_index (ipn));
1531
1532           if (ADJ_INDEX_INVALID != ai)
1533             ip_neighbor_probe_dst (adj_get (ai), ip_neighbor_get_ip (ipn));
1534
1535           ipn->ipn_n_probes++;
1536           *wait = 1;
1537         }
1538     }
1539   else
1540     {
1541       /* here we are sure that ttl <= ipndb_age */
1542       *wait = ipndb_age - ttl + 1;
1543       return (IP_NEIGHBOR_AGE_ALIVE);
1544     }
1545
1546   return (IP_NEIGHBOR_AGE_PROBE);
1547 }
1548
1549 typedef enum ip_neighbor_process_event_t_
1550 {
1551   IP_NEIGHBOR_AGE_PROCESS_WAKEUP,
1552 } ip_neighbor_process_event_t;
1553
1554 static uword
1555 ip_neighbor_age_loop (vlib_main_t * vm,
1556                       vlib_node_runtime_t * rt,
1557                       vlib_frame_t * f, ip46_type_t type)
1558 {
1559   uword event_type, *event_data = NULL;
1560   f64 timeout;
1561
1562   /* Set the timeout to an effectively infinite value when the process starts */
1563   timeout = IP_NEIGHBOR_PROCESS_SLEEP_LONG;
1564
1565   while (1)
1566     {
1567       f64 now;
1568
1569       if (!timeout)
1570         vlib_process_wait_for_event (vm);
1571       else
1572         vlib_process_wait_for_event_or_clock (vm, timeout);
1573
1574       event_type = vlib_process_get_events (vm, &event_data);
1575       vec_reset_length (event_data);
1576
1577       now = vlib_time_now (vm);
1578
1579       switch (event_type)
1580         {
1581         case ~0:
1582           {
1583             /* timer expired */
1584             ip_neighbor_elt_t *elt, *head;
1585             f64 wait;
1586
1587             timeout = ip_neighbor_db[type].ipndb_age;
1588             head = pool_elt_at_index (ip_neighbor_elt_pool,
1589                                       ip_neighbor_list_head[type]);
1590
1591           /* *INDENT-OFF*/
1592           /* the list is time sorted, newest first, so start from the back
1593            * and work forwards. Stop when we get to one that is alive */
1594           restart:
1595           clib_llist_foreach_reverse(ip_neighbor_elt_pool,
1596                                      ipne_anchor, head, elt,
1597           ({
1598             ip_neighbor_age_state_t res;
1599
1600             res = ip_neighbour_age_out(elt->ipne_index, now, &wait);
1601
1602             if (IP_NEIGHBOR_AGE_ALIVE == res) {
1603               /* the oldest neighbor has not yet expired, go back to sleep */
1604               timeout = clib_min (wait, timeout);
1605               break;
1606             }
1607             else if (IP_NEIGHBOR_AGE_DEAD == res) {
1608               /* the oldest neighbor is dead, pop it, then restart the walk
1609                * again from the back */
1610               ip_neighbor_free (ip_neighbor_get(elt->ipne_index));
1611               goto restart;
1612             }
1613
1614             timeout = clib_min (wait, timeout);
1615           }));
1616           /* *INDENT-ON* */
1617             break;
1618           }
1619         case IP_NEIGHBOR_AGE_PROCESS_WAKEUP:
1620           {
1621
1622             if (!ip_neighbor_db[type].ipndb_age)
1623               {
1624                 /* aging has been disabled */
1625                 timeout = 0;
1626                 break;
1627               }
1628             ip_neighbor_elt_t *elt, *head;
1629
1630             head = pool_elt_at_index (ip_neighbor_elt_pool,
1631                                       ip_neighbor_list_head[type]);
1632             /* no neighbors yet */
1633             if (clib_llist_is_empty (ip_neighbor_elt_pool, ipne_anchor, head))
1634               {
1635                 timeout = ip_neighbor_db[type].ipndb_age;
1636                 break;
1637               }
1638
1639             /* poke the oldset neighbour for aging, which returns how long we sleep for */
1640             elt = clib_llist_prev (ip_neighbor_elt_pool, ipne_anchor, head);
1641             ip_neighbour_age_out (elt->ipne_index, now, &timeout);
1642             break;
1643           }
1644         }
1645     }
1646   return 0;
1647 }
1648
1649 static uword
1650 ip4_neighbor_age_process (vlib_main_t * vm,
1651                           vlib_node_runtime_t * rt, vlib_frame_t * f)
1652 {
1653   return (ip_neighbor_age_loop (vm, rt, f, IP46_TYPE_IP4));
1654 }
1655
1656 static uword
1657 ip6_neighbor_age_process (vlib_main_t * vm,
1658                           vlib_node_runtime_t * rt, vlib_frame_t * f)
1659 {
1660   return (ip_neighbor_age_loop (vm, rt, f, IP46_TYPE_IP6));
1661 }
1662
1663 /* *INDENT-OFF* */
1664 VLIB_REGISTER_NODE (ip4_neighbor_age_process_node,static) = {
1665   .function = ip4_neighbor_age_process,
1666   .type = VLIB_NODE_TYPE_PROCESS,
1667   .name = "ip4-neighbor-age-process",
1668 };
1669 VLIB_REGISTER_NODE (ip6_neighbor_age_process_node,static) = {
1670   .function = ip6_neighbor_age_process,
1671   .type = VLIB_NODE_TYPE_PROCESS,
1672   .name = "ip6-neighbor-age-process",
1673 };
1674 /* *INDENT-ON* */
1675
1676 int
1677 ip_neighbor_config (ip46_type_t type, u32 limit, u32 age, bool recycle)
1678 {
1679   ip_neighbor_db[type].ipndb_limit = limit;
1680   ip_neighbor_db[type].ipndb_recycle = recycle;
1681   ip_neighbor_db[type].ipndb_age = age;
1682
1683   vlib_process_signal_event (vlib_get_main (),
1684                              (IP46_TYPE_IP4 == type ?
1685                               ip4_neighbor_age_process_node.index :
1686                               ip6_neighbor_age_process_node.index),
1687                              IP_NEIGHBOR_AGE_PROCESS_WAKEUP, 0);
1688
1689   return (0);
1690 }
1691
1692 static clib_error_t *
1693 ip_neighbor_config_show (vlib_main_t * vm,
1694                          unformat_input_t * input, vlib_cli_command_t * cmd)
1695 {
1696   ip46_type_t type;
1697
1698   /* *INDENT-OFF* */
1699   FOREACH_IP46_TYPE(type) {
1700     vlib_cli_output (vm, "%U:", format_ip46_type, type);
1701     vlib_cli_output (vm, "  limit:%d, age:%d, recycle:%d",
1702                      ip_neighbor_db[type].ipndb_limit,
1703                      ip_neighbor_db[type].ipndb_age,
1704                      ip_neighbor_db[type].ipndb_recycle);
1705   }
1706
1707   /* *INDENT-ON* */
1708   return (NULL);
1709 }
1710
1711 /* *INDENT-OFF* */
1712 VLIB_CLI_COMMAND (show_ip_neighbor_cfg_cmd_node, static) = {
1713   .path = "show ip neighbor-config",
1714   .function = ip_neighbor_config_show,
1715   .short_help = "show ip neighbor-config",
1716 };
1717 /* *INDENT-ON* */
1718
1719 static clib_error_t *
1720 ip_neighbor_init (vlib_main_t * vm)
1721 {
1722   {
1723     ip4_add_del_interface_address_callback_t cb = {
1724       .function = ip_neighbor_add_del_interface_address_v4,
1725     };
1726     vec_add1 (ip4_main.add_del_interface_address_callbacks, cb);
1727   }
1728   {
1729     ip6_add_del_interface_address_callback_t cb = {
1730       .function = ip_neighbor_add_del_interface_address_v6,
1731     };
1732     vec_add1 (ip6_main.add_del_interface_address_callbacks, cb);
1733   }
1734   {
1735     ip4_table_bind_callback_t cb = {
1736       .function = ip_neighbor_table_bind_v4,
1737     };
1738     vec_add1 (ip4_main.table_bind_callbacks, cb);
1739   }
1740   {
1741     ip6_table_bind_callback_t cb = {
1742       .function = ip_neighbor_table_bind_v6,
1743     };
1744     vec_add1 (ip6_main.table_bind_callbacks, cb);
1745   }
1746   {
1747     ethernet_address_change_ctx_t ctx = {
1748       .function = ip_neighbor_ethernet_change_mac,
1749       .function_opaque = 0,
1750     };
1751     vec_add1 (ethernet_main.address_change_callbacks, ctx);
1752   }
1753
1754   ipn_logger = vlib_log_register_class ("ip", "neighbor");
1755
1756   ip46_type_t type;
1757
1758   FOREACH_IP46_TYPE (type)
1759     ip_neighbor_list_head[type] =
1760     clib_llist_make_head (ip_neighbor_elt_pool, ipne_anchor);
1761
1762   return (NULL);
1763 }
1764
1765 /* *INDENT-OFF* */
1766 VLIB_INIT_FUNCTION (ip_neighbor_init) =
1767 {
1768   .runs_after = VLIB_INITS("ip_main_init"),
1769 };
1770 /* *INDENT-ON* */
1771
1772 /*
1773  * fd.io coding-style-patch-verification: ON
1774  *
1775  * Local Variables:
1776  * eval: (c-set-style "gnu")
1777  * End:
1778  */