fib: Table Replace
[vpp.git] / src / vnet / ethernet / arp.c
1 /*
2  * ethernet/arp.c: IP v4 ARP node
3  *
4  * Copyright (c) 2010 Cisco and/or its affiliates.
5  * Licensed under the Apache License, Version 2.0 (the "License");
6  * you may not use this file except in compliance with the License.
7  * You may obtain a copy of the License at:
8  *
9  *     http://www.apache.org/licenses/LICENSE-2.0
10  *
11  * Unless required by applicable law or agreed to in writing, software
12  * distributed under the License is distributed on an "AS IS" BASIS,
13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  * See the License for the specific language governing permissions and
15  * limitations under the License.
16  */
17
18 #include <vnet/ip/ip.h>
19 #include <vnet/ip/ip_neighbor.h>
20 #include <vnet/ip/ip6.h>
21 #include <vnet/ethernet/ethernet.h>
22 #include <vnet/ethernet/arp.h>
23 #include <vnet/l2/l2_input.h>
24 #include <vppinfra/mhash.h>
25 #include <vnet/fib/ip4_fib.h>
26 #include <vnet/fib/fib_entry_src.h>
27 #include <vnet/adj/adj_nbr.h>
28 #include <vnet/adj/adj_mcast.h>
29 #include <vnet/mpls/mpls.h>
30 #include <vnet/l2/feat_bitmap.h>
31
32 #include <vlibmemory/api.h>
33
34 /**
35  * @file
36  * @brief IPv4 ARP.
37  *
38  * This file contains code to manage the IPv4 ARP tables (IP Address
39  * to MAC Address lookup).
40  */
41
42
43 /**
44  * @brief Per-interface ARP configuration and state
45  */
46 typedef struct ethernet_arp_interface_t_
47 {
48   /**
49    * Hash table of ARP entries.
50    * Since this hash table is per-interface, the key is only the IPv4 address.
51    */
52   uword *arp_entries;
53   /**
54    * Is ARP enabled on this interface
55    */
56   u32 enabled;
57   /**
58    * Is Proxy ARP enabled on this interface
59    */
60   u32 proxy_enabled;
61 } ethernet_arp_interface_t;
62
63 typedef struct
64 {
65   ip4_address_t lo_addr;
66   ip4_address_t hi_addr;
67   u32 fib_index;
68 } ethernet_proxy_arp_t;
69
70 typedef struct
71 {
72   u32 next_index;
73   uword node_index;
74   uword type_opaque;
75   uword data;
76   /* Used for arp event notification only */
77   arp_change_event_cb_t data_callback;
78   u32 pid;
79 } pending_resolution_t;
80
81 typedef struct
82 {
83   /* Hash tables mapping name to opcode. */
84   uword *opcode_by_name;
85
86   /* lite beer "glean" adjacency handling */
87   uword *pending_resolutions_by_address;
88   pending_resolution_t *pending_resolutions;
89
90   /* Mac address change notification */
91   uword *mac_changes_by_address;
92   pending_resolution_t *mac_changes;
93
94   ethernet_arp_ip4_entry_t *ip4_entry_pool;
95
96   /* ARP attack mitigation */
97   u32 arp_delete_rotor;
98   u32 limit_arp_cache_size;
99
100   /** Per interface state */
101   ethernet_arp_interface_t *ethernet_arp_by_sw_if_index;
102
103   /* Proxy arp vector */
104   ethernet_proxy_arp_t *proxy_arps;
105
106   uword wc_ip4_arp_publisher_node;
107   uword wc_ip4_arp_publisher_et;
108
109   /* ARP feature arc index */
110   u8 feature_arc_index;
111 } ethernet_arp_main_t;
112
113 static ethernet_arp_main_t ethernet_arp_main;
114
115 typedef struct
116 {
117   u32 sw_if_index;
118   ip4_address_t ip4;
119   mac_address_t mac;
120   ip_neighbor_flags_t nbr_flags;
121   u32 flags;
122 #define ETHERNET_ARP_ARGS_REMOVE (1<<0)
123 #define ETHERNET_ARP_ARGS_FLUSH  (1<<1)
124 #define ETHERNET_ARP_ARGS_POPULATE  (1<<2)
125 #define ETHERNET_ARP_ARGS_WC_PUB  (1<<3)
126 } vnet_arp_set_ip4_over_ethernet_rpc_args_t;
127
128 static const u8 vrrp_prefix[] = { 0x00, 0x00, 0x5E, 0x00, 0x01 };
129
130 /* Node index for send_garp_na_process */
131 u32 send_garp_na_process_node_index;
132
133 static void
134 set_ip4_over_ethernet_rpc_callback (vnet_arp_set_ip4_over_ethernet_rpc_args_t
135                                     * a);
136
137 static u8 *
138 format_ethernet_arp_hardware_type (u8 * s, va_list * va)
139 {
140   ethernet_arp_hardware_type_t h = va_arg (*va, ethernet_arp_hardware_type_t);
141   char *t = 0;
142   switch (h)
143     {
144 #define _(n,f) case n: t = #f; break;
145       foreach_ethernet_arp_hardware_type;
146 #undef _
147
148     default:
149       return format (s, "unknown 0x%x", h);
150     }
151
152   return format (s, "%s", t);
153 }
154
155 static u8 *
156 format_ethernet_arp_opcode (u8 * s, va_list * va)
157 {
158   ethernet_arp_opcode_t o = va_arg (*va, ethernet_arp_opcode_t);
159   char *t = 0;
160   switch (o)
161     {
162 #define _(f) case ETHERNET_ARP_OPCODE_##f: t = #f; break;
163       foreach_ethernet_arp_opcode;
164 #undef _
165
166     default:
167       return format (s, "unknown 0x%x", o);
168     }
169
170   return format (s, "%s", t);
171 }
172
173 static uword
174 unformat_ethernet_arp_opcode_host_byte_order (unformat_input_t * input,
175                                               va_list * args)
176 {
177   int *result = va_arg (*args, int *);
178   ethernet_arp_main_t *am = &ethernet_arp_main;
179   int x, i;
180
181   /* Numeric opcode. */
182   if (unformat (input, "0x%x", &x) || unformat (input, "%d", &x))
183     {
184       if (x >= (1 << 16))
185         return 0;
186       *result = x;
187       return 1;
188     }
189
190   /* Named type. */
191   if (unformat_user (input, unformat_vlib_number_by_name,
192                      am->opcode_by_name, &i))
193     {
194       *result = i;
195       return 1;
196     }
197
198   return 0;
199 }
200
201 static uword
202 unformat_ethernet_arp_opcode_net_byte_order (unformat_input_t * input,
203                                              va_list * args)
204 {
205   int *result = va_arg (*args, int *);
206   if (!unformat_user
207       (input, unformat_ethernet_arp_opcode_host_byte_order, result))
208     return 0;
209
210   *result = clib_host_to_net_u16 ((u16) * result);
211   return 1;
212 }
213
214 static u8 *
215 format_ethernet_arp_header (u8 * s, va_list * va)
216 {
217   ethernet_arp_header_t *a = va_arg (*va, ethernet_arp_header_t *);
218   u32 max_header_bytes = va_arg (*va, u32);
219   u32 indent;
220   u16 l2_type, l3_type;
221
222   if (max_header_bytes != 0 && sizeof (a[0]) > max_header_bytes)
223     return format (s, "ARP header truncated");
224
225   l2_type = clib_net_to_host_u16 (a->l2_type);
226   l3_type = clib_net_to_host_u16 (a->l3_type);
227
228   indent = format_get_indent (s);
229
230   s = format (s, "%U, type %U/%U, address size %d/%d",
231               format_ethernet_arp_opcode, clib_net_to_host_u16 (a->opcode),
232               format_ethernet_arp_hardware_type, l2_type,
233               format_ethernet_type, l3_type,
234               a->n_l2_address_bytes, a->n_l3_address_bytes);
235
236   if (l2_type == ETHERNET_ARP_HARDWARE_TYPE_ethernet
237       && l3_type == ETHERNET_TYPE_IP4)
238     {
239       s = format (s, "\n%U%U/%U -> %U/%U",
240                   format_white_space, indent,
241                   format_mac_address_t, &a->ip4_over_ethernet[0].mac,
242                   format_ip4_address, &a->ip4_over_ethernet[0].ip4,
243                   format_mac_address_t, &a->ip4_over_ethernet[1].mac,
244                   format_ip4_address, &a->ip4_over_ethernet[1].ip4);
245     }
246   else
247     {
248       uword n2 = a->n_l2_address_bytes;
249       uword n3 = a->n_l3_address_bytes;
250       s = format (s, "\n%U%U/%U -> %U/%U",
251                   format_white_space, indent,
252                   format_hex_bytes, a->data + 0 * n2 + 0 * n3, n2,
253                   format_hex_bytes, a->data + 1 * n2 + 0 * n3, n3,
254                   format_hex_bytes, a->data + 1 * n2 + 1 * n3, n2,
255                   format_hex_bytes, a->data + 2 * n2 + 1 * n3, n3);
256     }
257
258   return s;
259 }
260
261 u8 *
262 format_ethernet_arp_ip4_entry (u8 * s, va_list * va)
263 {
264   vnet_main_t *vnm = va_arg (*va, vnet_main_t *);
265   ethernet_arp_ip4_entry_t *e = va_arg (*va, ethernet_arp_ip4_entry_t *);
266   vnet_sw_interface_t *si;
267
268   if (!e)
269     return format (s, "%=12s%=16s%=6s%=20s%=24s", "Time", "IP4",
270                    "Flags", "Ethernet", "Interface");
271
272   si = vnet_get_sw_interface (vnm, e->sw_if_index);
273
274   return format (s, "%=12U%=16U%=6U%=20U%U",
275                  format_vlib_time, vnm->vlib_main, e->time_last_updated,
276                  format_ip4_address, &e->ip4_address,
277                  format_ip_neighbor_flags, e->flags,
278                  format_mac_address_t, &e->mac,
279                  format_vnet_sw_interface_name, vnm, si);
280 }
281
282 typedef struct
283 {
284   u8 packet_data[64];
285 } ethernet_arp_input_trace_t;
286
287 static u8 *
288 format_ethernet_arp_input_trace (u8 * s, va_list * va)
289 {
290   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*va, vlib_main_t *);
291   CLIB_UNUSED (vlib_node_t * node) = va_arg (*va, vlib_node_t *);
292   ethernet_arp_input_trace_t *t = va_arg (*va, ethernet_arp_input_trace_t *);
293
294   s = format (s, "%U",
295               format_ethernet_arp_header,
296               t->packet_data, sizeof (t->packet_data));
297
298   return s;
299 }
300
301 static u8 *
302 format_arp_term_input_trace (u8 * s, va_list * va)
303 {
304   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*va, vlib_main_t *);
305   CLIB_UNUSED (vlib_node_t * node) = va_arg (*va, vlib_node_t *);
306   ethernet_arp_input_trace_t *t = va_arg (*va, ethernet_arp_input_trace_t *);
307
308   /* arp-term trace data saved is either arp or ip6/icmp6 packet:
309      - for arp, the 1st 16-bit field is hw type of value of 0x0001.
310      - for ip6, the first nibble has value of 6. */
311   s = format (s, "%U", t->packet_data[0] == 0 ?
312               format_ethernet_arp_header : format_ip6_header,
313               t->packet_data, sizeof (t->packet_data));
314
315   return s;
316 }
317
318 static void
319 arp_nbr_probe (ip_adjacency_t * adj)
320 {
321   vnet_main_t *vnm = vnet_get_main ();
322   ip4_main_t *im = &ip4_main;
323   ip_interface_address_t *ia;
324   ethernet_arp_header_t *h;
325   vnet_hw_interface_t *hi;
326   vnet_sw_interface_t *si;
327   ip4_address_t *src;
328   vlib_buffer_t *b;
329   vlib_main_t *vm;
330   u32 bi = 0;
331
332   vm = vlib_get_main ();
333
334   si = vnet_get_sw_interface (vnm, adj->rewrite_header.sw_if_index);
335
336   if (!(si->flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP))
337     {
338       return;
339     }
340
341   src =
342     ip4_interface_address_matching_destination (im,
343                                                 &adj->sub_type.nbr.next_hop.
344                                                 ip4,
345                                                 adj->rewrite_header.
346                                                 sw_if_index, &ia);
347   if (!src)
348     {
349       return;
350     }
351
352   h =
353     vlib_packet_template_get_packet (vm, &im->ip4_arp_request_packet_template,
354                                      &bi);
355   if (!h)
356     return;
357
358   hi = vnet_get_sup_hw_interface (vnm, adj->rewrite_header.sw_if_index);
359
360   mac_address_from_bytes (&h->ip4_over_ethernet[0].mac, hi->hw_address);
361
362   h->ip4_over_ethernet[0].ip4 = src[0];
363   h->ip4_over_ethernet[1].ip4 = adj->sub_type.nbr.next_hop.ip4;
364
365   b = vlib_get_buffer (vm, bi);
366   vnet_buffer (b)->sw_if_index[VLIB_RX] =
367     vnet_buffer (b)->sw_if_index[VLIB_TX] = adj->rewrite_header.sw_if_index;
368
369   /* Add encapsulation string for software interface (e.g. ethernet header). */
370   vnet_rewrite_one_header (adj[0], h, sizeof (ethernet_header_t));
371   vlib_buffer_advance (b, -adj->rewrite_header.data_bytes);
372
373   {
374     vlib_frame_t *f = vlib_get_frame_to_node (vm, hi->output_node_index);
375     u32 *to_next = vlib_frame_vector_args (f);
376     to_next[0] = bi;
377     f->n_vectors = 1;
378     vlib_put_frame_to_node (vm, hi->output_node_index, f);
379   }
380 }
381
382 static void
383 arp_mk_complete (adj_index_t ai, ethernet_arp_ip4_entry_t * e)
384 {
385   adj_nbr_update_rewrite
386     (ai, ADJ_NBR_REWRITE_FLAG_COMPLETE,
387      ethernet_build_rewrite (vnet_get_main (),
388                              e->sw_if_index,
389                              adj_get_link_type (ai), &e->mac));
390 }
391
392 static void
393 arp_mk_incomplete (adj_index_t ai)
394 {
395   ip_adjacency_t *adj = adj_get (ai);
396
397   adj_nbr_update_rewrite
398     (ai,
399      ADJ_NBR_REWRITE_FLAG_INCOMPLETE,
400      ethernet_build_rewrite (vnet_get_main (),
401                              adj->rewrite_header.sw_if_index,
402                              VNET_LINK_ARP,
403                              VNET_REWRITE_FOR_SW_INTERFACE_ADDRESS_BROADCAST));
404 }
405
406 static ethernet_arp_ip4_entry_t *
407 arp_entry_find (ethernet_arp_interface_t * eai, const ip4_address_t * addr)
408 {
409   ethernet_arp_main_t *am = &ethernet_arp_main;
410   ethernet_arp_ip4_entry_t *e = NULL;
411   uword *p;
412
413   if (NULL != eai->arp_entries)
414     {
415       p = hash_get (eai->arp_entries, addr->as_u32);
416       if (!p)
417         return (NULL);
418
419       e = pool_elt_at_index (am->ip4_entry_pool, p[0]);
420     }
421
422   return (e);
423 }
424
425 static adj_walk_rc_t
426 arp_mk_complete_walk (adj_index_t ai, void *ctx)
427 {
428   ethernet_arp_ip4_entry_t *e = ctx;
429
430   arp_mk_complete (ai, e);
431
432   return (ADJ_WALK_RC_CONTINUE);
433 }
434
435 static adj_walk_rc_t
436 arp_mk_incomplete_walk (adj_index_t ai, void *ctx)
437 {
438   arp_mk_incomplete (ai);
439
440   return (ADJ_WALK_RC_CONTINUE);
441 }
442
443 static int
444 arp_is_enabled (ethernet_arp_main_t * am, u32 sw_if_index)
445 {
446   if (vec_len (am->ethernet_arp_by_sw_if_index) <= sw_if_index)
447     return 0;
448
449   return (am->ethernet_arp_by_sw_if_index[sw_if_index].enabled);
450 }
451
452 static void
453 arp_enable (ethernet_arp_main_t * am, u32 sw_if_index)
454 {
455   if (arp_is_enabled (am, sw_if_index))
456     return;
457
458   vec_validate (am->ethernet_arp_by_sw_if_index, sw_if_index);
459
460   am->ethernet_arp_by_sw_if_index[sw_if_index].enabled = 1;
461
462   vnet_feature_enable_disable ("arp", "arp-reply", sw_if_index, 1, NULL, 0);
463   vnet_feature_enable_disable ("arp", "arp-disabled", sw_if_index, 0, NULL,
464                                0);
465 }
466
467 static int
468 vnet_arp_flush_ip4_over_ethernet_internal (vnet_main_t * vnm,
469                                            vnet_arp_set_ip4_over_ethernet_rpc_args_t
470                                            * args);
471
472 static void
473 arp_disable (ethernet_arp_main_t * am, u32 sw_if_index)
474 {
475   ethernet_arp_interface_t *eai;
476   ethernet_arp_ip4_entry_t *e;
477   u32 i, *to_delete = 0;
478   hash_pair_t *pair;
479
480   if (!arp_is_enabled (am, sw_if_index))
481     return;
482
483   vnet_feature_enable_disable ("arp", "arp-disabled", sw_if_index, 1, NULL,
484                                0);
485   vnet_feature_enable_disable ("arp", "arp-reply", sw_if_index, 0, NULL, 0);
486
487   eai = &am->ethernet_arp_by_sw_if_index[sw_if_index];
488
489
490   /* *INDENT-OFF* */
491   hash_foreach_pair (pair, eai->arp_entries,
492   ({
493     e = pool_elt_at_index(am->ip4_entry_pool,
494                           pair->value[0]);
495     vec_add1 (to_delete, e - am->ip4_entry_pool);
496   }));
497   /* *INDENT-ON* */
498
499   for (i = 0; i < vec_len (to_delete); i++)
500     {
501       e = pool_elt_at_index (am->ip4_entry_pool, to_delete[i]);
502
503       vnet_arp_set_ip4_over_ethernet_rpc_args_t delme = {
504         .ip4.as_u32 = e->ip4_address.as_u32,
505         .sw_if_index = e->sw_if_index,
506         .flags = ETHERNET_ARP_ARGS_FLUSH,
507       };
508       mac_address_copy (&delme.mac, &e->mac);
509
510       vnet_arp_flush_ip4_over_ethernet_internal (vnet_get_main (), &delme);
511     }
512
513   vec_free (to_delete);
514
515   eai->enabled = 0;
516 }
517
518 void
519 arp_update_adjacency (vnet_main_t * vnm, u32 sw_if_index, u32 ai)
520 {
521   ethernet_arp_main_t *am = &ethernet_arp_main;
522   ethernet_arp_interface_t *arp_int;
523   ethernet_arp_ip4_entry_t *e;
524   ip_adjacency_t *adj;
525
526   adj = adj_get (ai);
527
528   arp_enable (am, sw_if_index);
529   arp_int = &am->ethernet_arp_by_sw_if_index[sw_if_index];
530   e = arp_entry_find (arp_int, &adj->sub_type.nbr.next_hop.ip4);
531
532   switch (adj->lookup_next_index)
533     {
534     case IP_LOOKUP_NEXT_GLEAN:
535       adj_glean_update_rewrite (ai);
536       break;
537     case IP_LOOKUP_NEXT_ARP:
538       if (NULL != e)
539         {
540           adj_nbr_walk_nh4 (sw_if_index,
541                             &e->ip4_address, arp_mk_complete_walk, e);
542         }
543       else
544         {
545           /*
546            * no matching ARP entry.
547            * construct the rewrite required to for an ARP packet, and stick
548            * that in the adj's pipe to smoke.
549            */
550           adj_nbr_update_rewrite
551             (ai,
552              ADJ_NBR_REWRITE_FLAG_INCOMPLETE,
553              ethernet_build_rewrite
554              (vnm,
555               sw_if_index,
556               VNET_LINK_ARP,
557               VNET_REWRITE_FOR_SW_INTERFACE_ADDRESS_BROADCAST));
558
559           /*
560            * since the FIB has added this adj for a route, it makes sense it
561            * may want to forward traffic sometime soon. Let's send a
562            * speculative ARP. just one. If we were to do periodically that
563            * wouldn't be bad either, but that's more code than i'm prepared to
564            * write at this time for relatively little reward.
565            */
566           arp_nbr_probe (adj);
567         }
568       break;
569     case IP_LOOKUP_NEXT_BCAST:
570       adj_nbr_update_rewrite (ai,
571                               ADJ_NBR_REWRITE_FLAG_COMPLETE,
572                               ethernet_build_rewrite
573                               (vnm,
574                                sw_if_index,
575                                VNET_LINK_IP4,
576                                VNET_REWRITE_FOR_SW_INTERFACE_ADDRESS_BROADCAST));
577       break;
578     case IP_LOOKUP_NEXT_MCAST:
579       {
580         /*
581          * Construct a partial rewrite from the known ethernet mcast dest MAC
582          */
583         u8 *rewrite;
584         u8 offset;
585
586         rewrite = ethernet_build_rewrite (vnm,
587                                           sw_if_index,
588                                           adj->ia_link,
589                                           ethernet_ip4_mcast_dst_addr ());
590         offset = vec_len (rewrite) - 2;
591
592         /*
593          * Complete the remaining fields of the adj's rewrite to direct the
594          * complete of the rewrite at switch time by copying in the IP
595          * dst address's bytes.
596          * Offset is 2 bytes into the MAC destination address.
597          */
598         adj_mcast_update_rewrite (ai, rewrite, offset);
599
600         break;
601       }
602     case IP_LOOKUP_NEXT_DROP:
603     case IP_LOOKUP_NEXT_PUNT:
604     case IP_LOOKUP_NEXT_LOCAL:
605     case IP_LOOKUP_NEXT_REWRITE:
606     case IP_LOOKUP_NEXT_MCAST_MIDCHAIN:
607     case IP_LOOKUP_NEXT_MIDCHAIN:
608     case IP_LOOKUP_NEXT_ICMP_ERROR:
609     case IP_LOOKUP_N_NEXT:
610       ASSERT (0);
611       break;
612     }
613 }
614
615 static void
616 arp_adj_fib_add (ethernet_arp_ip4_entry_t * e, u32 fib_index)
617 {
618   fib_prefix_t pfx = {
619     .fp_len = 32,
620     .fp_proto = FIB_PROTOCOL_IP4,
621     .fp_addr.ip4 = e->ip4_address,
622   };
623
624   e->fib_entry_index =
625     fib_table_entry_path_add (fib_index, &pfx, FIB_SOURCE_ADJ,
626                               FIB_ENTRY_FLAG_ATTACHED,
627                               DPO_PROTO_IP4, &pfx.fp_addr,
628                               e->sw_if_index, ~0, 1, NULL,
629                               FIB_ROUTE_PATH_FLAG_NONE);
630   fib_table_lock (fib_index, FIB_PROTOCOL_IP4, FIB_SOURCE_ADJ);
631 }
632
633 static void
634 arp_adj_fib_remove (ethernet_arp_ip4_entry_t * e, u32 fib_index)
635 {
636   if (FIB_NODE_INDEX_INVALID != e->fib_entry_index)
637     {
638       fib_prefix_t pfx = {
639         .fp_len = 32,
640         .fp_proto = FIB_PROTOCOL_IP4,
641         .fp_addr.ip4 = e->ip4_address,
642       };
643       u32 fib_index;
644
645       fib_index = ip4_fib_table_get_index_for_sw_if_index (e->sw_if_index);
646
647       fib_table_entry_path_remove (fib_index, &pfx,
648                                    FIB_SOURCE_ADJ,
649                                    DPO_PROTO_IP4,
650                                    &pfx.fp_addr,
651                                    e->sw_if_index, ~0, 1,
652                                    FIB_ROUTE_PATH_FLAG_NONE);
653       fib_table_unlock (fib_index, FIB_PROTOCOL_IP4, FIB_SOURCE_ADJ);
654     }
655 }
656
657 static ethernet_arp_ip4_entry_t *
658 force_reuse_arp_entry (void)
659 {
660   ethernet_arp_ip4_entry_t *e;
661   ethernet_arp_main_t *am = &ethernet_arp_main;
662   u32 count = 0;
663   u32 index = pool_next_index (am->ip4_entry_pool, am->arp_delete_rotor);
664   if (index == ~0)              /* Try again from elt 0 */
665     index = pool_next_index (am->ip4_entry_pool, index);
666
667   /* Find a non-static random entry to free up for reuse */
668   do
669     {
670       if ((count++ == 100) || (index == ~0))
671         return NULL;            /* give up after 100 entries */
672       e = pool_elt_at_index (am->ip4_entry_pool, index);
673       am->arp_delete_rotor = index;
674       index = pool_next_index (am->ip4_entry_pool, index);
675     }
676   while (e->flags & IP_NEIGHBOR_FLAG_STATIC);
677
678   /* Remove ARP entry from its interface and update fib */
679   hash_unset
680     (am->ethernet_arp_by_sw_if_index[e->sw_if_index].arp_entries,
681      e->ip4_address.as_u32);
682   arp_adj_fib_remove
683     (e, ip4_fib_table_get_index_for_sw_if_index (e->sw_if_index));
684   adj_nbr_walk_nh4 (e->sw_if_index,
685                     &e->ip4_address, arp_mk_incomplete_walk, e);
686   return e;
687 }
688
689 static int
690 vnet_arp_set_ip4_over_ethernet_internal (vnet_main_t * vnm,
691                                          vnet_arp_set_ip4_over_ethernet_rpc_args_t
692                                          * args)
693 {
694   ethernet_arp_ip4_entry_t *e = 0;
695   ethernet_arp_main_t *am = &ethernet_arp_main;
696   vlib_main_t *vm = vlib_get_main ();
697   int make_new_arp_cache_entry = 1;
698   uword *p;
699   pending_resolution_t *pr, *mc;
700   ethernet_arp_interface_t *arp_int;
701   u32 sw_if_index = args->sw_if_index;
702
703   arp_enable (am, sw_if_index);
704
705   arp_int = &am->ethernet_arp_by_sw_if_index[sw_if_index];
706
707   if (NULL != arp_int->arp_entries)
708     {
709       p = hash_get (arp_int->arp_entries, args->ip4.as_u32);
710       if (p)
711         {
712           e = pool_elt_at_index (am->ip4_entry_pool, p[0]);
713
714           /* Refuse to over-write static arp. */
715           if (!(args->nbr_flags & IP_NEIGHBOR_FLAG_STATIC) &&
716               (e->flags & IP_NEIGHBOR_FLAG_STATIC))
717             {
718               /* if MAC address match, still check to send event */
719               if (mac_address_equal (&e->mac, &args->mac))
720                 goto check_customers;
721               return -2;
722             }
723           make_new_arp_cache_entry = 0;
724         }
725     }
726
727   if (make_new_arp_cache_entry)
728     {
729       if (am->limit_arp_cache_size &&
730           pool_elts (am->ip4_entry_pool) >= am->limit_arp_cache_size)
731         {
732           e = force_reuse_arp_entry ();
733           if (NULL == e)
734             return -2;
735         }
736       else
737         pool_get (am->ip4_entry_pool, e);
738
739       if (NULL == arp_int->arp_entries)
740         arp_int->arp_entries = hash_create (0, sizeof (u32));
741
742       hash_set (arp_int->arp_entries, args->ip4.as_u32,
743                 e - am->ip4_entry_pool);
744
745       e->sw_if_index = sw_if_index;
746       e->ip4_address = args->ip4;
747       e->fib_entry_index = FIB_NODE_INDEX_INVALID;
748       mac_address_copy (&e->mac, &args->mac);
749
750       if (!(args->nbr_flags & IP_NEIGHBOR_FLAG_NO_FIB_ENTRY))
751         {
752           arp_adj_fib_add (e,
753                            ip4_fib_table_get_index_for_sw_if_index
754                            (e->sw_if_index));
755         }
756       else
757         {
758           e->flags |= IP_NEIGHBOR_FLAG_NO_FIB_ENTRY;
759         }
760     }
761   else
762     {
763       /*
764        * prevent a DoS attack from the data-plane that
765        * spams us with no-op updates to the MAC address
766        */
767       if (mac_address_equal (&e->mac, &args->mac))
768         {
769           e->time_last_updated = vlib_time_now (vm);
770           goto check_customers;
771         }
772
773       /* Update ethernet address. */
774       mac_address_copy (&e->mac, &args->mac);
775     }
776
777   /* Update time stamp and flags. */
778   e->time_last_updated = vlib_time_now (vm);
779   if (args->nbr_flags & IP_NEIGHBOR_FLAG_STATIC)
780     {
781       e->flags &= ~IP_NEIGHBOR_FLAG_DYNAMIC;
782       e->flags |= IP_NEIGHBOR_FLAG_STATIC;
783     }
784   else
785     {
786       e->flags &= ~IP_NEIGHBOR_FLAG_STATIC;
787       e->flags |= IP_NEIGHBOR_FLAG_DYNAMIC;
788     }
789
790   adj_nbr_walk_nh4 (sw_if_index, &e->ip4_address, arp_mk_complete_walk, e);
791
792 check_customers:
793   /* Customer(s) waiting for this address to be resolved? */
794   p = hash_get (am->pending_resolutions_by_address, args->ip4.as_u32);
795   if (p)
796     {
797       u32 next_index;
798       next_index = p[0];
799
800       while (next_index != (u32) ~ 0)
801         {
802           pr = pool_elt_at_index (am->pending_resolutions, next_index);
803           vlib_process_signal_event (vm, pr->node_index,
804                                      pr->type_opaque, pr->data);
805           next_index = pr->next_index;
806           pool_put (am->pending_resolutions, pr);
807         }
808
809       hash_unset (am->pending_resolutions_by_address, args->ip4.as_u32);
810     }
811
812   /* Customer(s) requesting ARP event for this address? */
813   p = hash_get (am->mac_changes_by_address, args->ip4.as_u32);
814   if (p)
815     {
816       u32 next_index;
817       next_index = p[0];
818
819       while (next_index != (u32) ~ 0)
820         {
821           int rv = 1;
822           mc = pool_elt_at_index (am->mac_changes, next_index);
823
824           /* Call the user's data callback, return 1 to suppress dup events */
825           if (mc->data_callback)
826             rv = (mc->data_callback) (mc->data, &args->mac, sw_if_index, 0);
827
828           /*
829            * Signal the resolver process, as long as the user
830            * says they want to be notified
831            */
832           if (rv == 0)
833             vlib_process_signal_event (vm, mc->node_index,
834                                        mc->type_opaque, mc->data);
835           next_index = mc->next_index;
836         }
837     }
838
839   return 0;
840 }
841
842 void
843 vnet_register_ip4_arp_resolution_event (vnet_main_t * vnm,
844                                         void *address_arg,
845                                         uword node_index,
846                                         uword type_opaque, uword data)
847 {
848   ethernet_arp_main_t *am = &ethernet_arp_main;
849   ip4_address_t *address = address_arg;
850   uword *p;
851   pending_resolution_t *pr;
852
853   pool_get (am->pending_resolutions, pr);
854
855   pr->next_index = ~0;
856   pr->node_index = node_index;
857   pr->type_opaque = type_opaque;
858   pr->data = data;
859   pr->data_callback = 0;
860
861   p = hash_get (am->pending_resolutions_by_address, address->as_u32);
862   if (p)
863     {
864       /* Insert new resolution at the head of the list */
865       pr->next_index = p[0];
866       hash_unset (am->pending_resolutions_by_address, address->as_u32);
867     }
868
869   hash_set (am->pending_resolutions_by_address, address->as_u32,
870             pr - am->pending_resolutions);
871 }
872
873 int
874 vnet_add_del_ip4_arp_change_event (vnet_main_t * vnm,
875                                    arp_change_event_cb_t data_callback,
876                                    u32 pid,
877                                    void *address_arg,
878                                    uword node_index,
879                                    uword type_opaque, uword data, int is_add)
880 {
881   ethernet_arp_main_t *am = &ethernet_arp_main;
882   ip4_address_t *address = address_arg;
883
884   /* Try to find an existing entry */
885   u32 *first = (u32 *) hash_get (am->mac_changes_by_address, address->as_u32);
886   u32 *p = first;
887   pending_resolution_t *mc;
888   while (p && *p != ~0)
889     {
890       mc = pool_elt_at_index (am->mac_changes, *p);
891       if (mc->node_index == node_index && mc->type_opaque == type_opaque
892           && mc->pid == pid)
893         break;
894       p = &mc->next_index;
895     }
896
897   int found = p && *p != ~0;
898   if (is_add)
899     {
900       if (found)
901         return VNET_API_ERROR_ENTRY_ALREADY_EXISTS;
902
903       pool_get (am->mac_changes, mc);
904       /* *INDENT-OFF* */
905       *mc = (pending_resolution_t)
906       {
907         .next_index = ~0,
908         .node_index = node_index,
909         .type_opaque = type_opaque,
910         .data = data,
911         .data_callback = data_callback,
912         .pid = pid,
913       };
914       /* *INDENT-ON* */
915
916       /* Insert new resolution at the end of the list */
917       u32 new_idx = mc - am->mac_changes;
918       if (p)
919         p[0] = new_idx;
920       else
921         hash_set (am->mac_changes_by_address, address->as_u32, new_idx);
922     }
923   else
924     {
925       if (!found)
926         return VNET_API_ERROR_NO_SUCH_ENTRY;
927
928       /* Clients may need to clean up pool entries, too */
929       if (data_callback)
930         /* no new mac addrs */
931         (data_callback) (mc->data, NULL, ~0, NULL);
932
933       /* Remove the entry from the list and delete the entry */
934       *p = mc->next_index;
935       pool_put (am->mac_changes, mc);
936
937       /* Remove from hash if we deleted the last entry */
938       if (*p == ~0 && p == first)
939         hash_unset (am->mac_changes_by_address, address->as_u32);
940     }
941   return 0;
942 }
943
944 /* Either we drop the packet or we send a reply to the sender. */
945 typedef enum
946 {
947   ARP_REPLY_NEXT_DROP,
948   ARP_REPLY_NEXT_REPLY_TX,
949   ARP_REPLY_N_NEXT,
950 } arp_reply_next_t;
951
952 #define foreach_ethernet_arp_error                                      \
953   _ (replies_sent, "ARP replies sent")                                  \
954   _ (l2_type_not_ethernet, "L2 type not ethernet")                      \
955   _ (l3_type_not_ip4, "L3 type not IP4")                                \
956   _ (l3_src_address_not_local, "IP4 source address not local to subnet") \
957   _ (l3_dst_address_not_local, "IP4 destination address not local to subnet") \
958   _ (l3_dst_address_unset, "IP4 destination address is unset")          \
959   _ (l3_src_address_is_local, "IP4 source address matches local interface") \
960   _ (l3_src_address_learned, "ARP request IP4 source address learned")  \
961   _ (replies_received, "ARP replies received")                          \
962   _ (opcode_not_request, "ARP opcode not request")                      \
963   _ (proxy_arp_replies_sent, "Proxy ARP replies sent")                  \
964   _ (l2_address_mismatch, "ARP hw addr does not match L2 frame src addr") \
965   _ (gratuitous_arp, "ARP probe or announcement dropped") \
966   _ (interface_no_table, "Interface is not mapped to an IP table") \
967   _ (interface_not_ip_enabled, "Interface is not IP enabled") \
968   _ (unnumbered_mismatch, "RX interface is unnumbered to different subnet") \
969
970 typedef enum
971 {
972 #define _(sym,string) ETHERNET_ARP_ERROR_##sym,
973   foreach_ethernet_arp_error
974 #undef _
975     ETHERNET_ARP_N_ERROR,
976 } ethernet_arp_reply_error_t;
977
978 static int
979 arp_unnumbered (vlib_buffer_t * p0,
980                 u32 input_sw_if_index, u32 conn_sw_if_index)
981 {
982   vnet_main_t *vnm = vnet_get_main ();
983   vnet_interface_main_t *vim = &vnm->interface_main;
984   vnet_sw_interface_t *si;
985
986   /* verify that the input interface is unnumbered to the connected.
987    * the connected interface is the interface on which the subnet is
988    * configured */
989   si = &vim->sw_interfaces[input_sw_if_index];
990
991   if (!(si->flags & VNET_SW_INTERFACE_FLAG_UNNUMBERED &&
992         (si->unnumbered_sw_if_index == conn_sw_if_index)))
993     {
994       /* the input interface is not unnumbered to the interface on which
995        * the sub-net is configured that covers the ARP request.
996        * So this is not the case for unnumbered.. */
997       return 0;
998     }
999
1000   return !0;
1001 }
1002
1003 static u32
1004 arp_learn (vnet_main_t * vnm,
1005            ethernet_arp_main_t * am, u32 sw_if_index,
1006            const ethernet_arp_ip4_over_ethernet_address_t * addr)
1007 {
1008   vnet_arp_set_ip4_over_ethernet (vnm, sw_if_index, addr, 0);
1009   return (ETHERNET_ARP_ERROR_l3_src_address_learned);
1010 }
1011
1012 typedef enum arp_input_next_t_
1013 {
1014   ARP_INPUT_NEXT_DROP,
1015   ARP_INPUT_NEXT_DISABLED,
1016   ARP_INPUT_N_NEXT,
1017 } arp_input_next_t;
1018
1019 static uword
1020 arp_input (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
1021 {
1022   u32 n_left_from, next_index, *from, *to_next, n_left_to_next;
1023   ethernet_arp_main_t *am = &ethernet_arp_main;
1024
1025   from = vlib_frame_vector_args (frame);
1026   n_left_from = frame->n_vectors;
1027   next_index = node->cached_next_index;
1028
1029   if (node->flags & VLIB_NODE_FLAG_TRACE)
1030     vlib_trace_frame_buffers_only (vm, node, from, frame->n_vectors,
1031                                    /* stride */ 1,
1032                                    sizeof (ethernet_arp_input_trace_t));
1033
1034   while (n_left_from > 0)
1035     {
1036       vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
1037
1038       while (n_left_from > 0 && n_left_to_next > 0)
1039         {
1040           const ethernet_arp_header_t *arp0;
1041           arp_input_next_t next0;
1042           vlib_buffer_t *p0;
1043           u32 pi0, error0;
1044
1045           pi0 = to_next[0] = from[0];
1046           from += 1;
1047           to_next += 1;
1048           n_left_from -= 1;
1049           n_left_to_next -= 1;
1050
1051           p0 = vlib_get_buffer (vm, pi0);
1052           arp0 = vlib_buffer_get_current (p0);
1053
1054           error0 = ETHERNET_ARP_ERROR_replies_sent;
1055           next0 = ARP_INPUT_NEXT_DROP;
1056
1057           error0 =
1058             (arp0->l2_type !=
1059              clib_net_to_host_u16 (ETHERNET_ARP_HARDWARE_TYPE_ethernet) ?
1060              ETHERNET_ARP_ERROR_l2_type_not_ethernet : error0);
1061           error0 =
1062             (arp0->l3_type !=
1063              clib_net_to_host_u16 (ETHERNET_TYPE_IP4) ?
1064              ETHERNET_ARP_ERROR_l3_type_not_ip4 : error0);
1065           error0 =
1066             (0 == arp0->ip4_over_ethernet[0].ip4.as_u32 ?
1067              ETHERNET_ARP_ERROR_l3_dst_address_unset : error0);
1068
1069           if (ETHERNET_ARP_ERROR_replies_sent == error0)
1070             {
1071               next0 = ARP_INPUT_NEXT_DISABLED;
1072               vnet_feature_arc_start (am->feature_arc_index,
1073                                       vnet_buffer (p0)->sw_if_index[VLIB_RX],
1074                                       &next0, p0);
1075             }
1076           else
1077             p0->error = node->errors[error0];
1078
1079           vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
1080                                            n_left_to_next, pi0, next0);
1081         }
1082
1083       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
1084     }
1085
1086   return frame->n_vectors;
1087 }
1088
1089 typedef enum arp_disabled_next_t_
1090 {
1091   ARP_DISABLED_NEXT_DROP,
1092   ARP_DISABLED_N_NEXT,
1093 } arp_disabled_next_t;
1094
1095 #define foreach_arp_disabled_error                                      \
1096   _ (DISABLED, "ARP Disabled on this interface")                    \
1097
1098 typedef enum
1099 {
1100 #define _(sym,string) ARP_DISABLED_ERROR_##sym,
1101   foreach_arp_disabled_error
1102 #undef _
1103     ARP_DISABLED_N_ERROR,
1104 } arp_disabled_error_t;
1105
1106 static char *arp_disabled_error_strings[] = {
1107 #define _(sym,string) string,
1108   foreach_arp_disabled_error
1109 #undef _
1110 };
1111
1112 static uword
1113 arp_disabled (vlib_main_t * vm,
1114               vlib_node_runtime_t * node, vlib_frame_t * frame)
1115 {
1116   u32 n_left_from, next_index, *from, *to_next, n_left_to_next;
1117
1118   from = vlib_frame_vector_args (frame);
1119   n_left_from = frame->n_vectors;
1120   next_index = node->cached_next_index;
1121
1122   if (node->flags & VLIB_NODE_FLAG_TRACE)
1123     vlib_trace_frame_buffers_only (vm, node, from, frame->n_vectors,
1124                                    /* stride */ 1,
1125                                    sizeof (ethernet_arp_input_trace_t));
1126
1127   while (n_left_from > 0)
1128     {
1129       vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
1130
1131       while (n_left_from > 0 && n_left_to_next > 0)
1132         {
1133           arp_disabled_next_t next0 = ARP_DISABLED_NEXT_DROP;
1134           vlib_buffer_t *p0;
1135           u32 pi0, error0;
1136
1137           next0 = ARP_DISABLED_NEXT_DROP;
1138           error0 = ARP_DISABLED_ERROR_DISABLED;
1139
1140           pi0 = to_next[0] = from[0];
1141           from += 1;
1142           to_next += 1;
1143           n_left_from -= 1;
1144           n_left_to_next -= 1;
1145
1146           p0 = vlib_get_buffer (vm, pi0);
1147           p0->error = node->errors[error0];
1148
1149           vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
1150                                            n_left_to_next, pi0, next0);
1151         }
1152
1153       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
1154     }
1155
1156   return frame->n_vectors;
1157 }
1158
1159 static_always_inline u32
1160 arp_mk_reply (vnet_main_t * vnm,
1161               vlib_buffer_t * p0,
1162               u32 sw_if_index0,
1163               const ip4_address_t * if_addr0,
1164               ethernet_arp_header_t * arp0, ethernet_header_t * eth_rx)
1165 {
1166   vnet_hw_interface_t *hw_if0;
1167   u8 *rewrite0, rewrite0_len;
1168   ethernet_header_t *eth_tx;
1169   u32 next0;
1170
1171   /* Send a reply.
1172      An adjacency to the sender is not always present,
1173      so we use the interface to build us a rewrite string
1174      which will contain all the necessary tags. */
1175   rewrite0 = ethernet_build_rewrite (vnm, sw_if_index0,
1176                                      VNET_LINK_ARP, eth_rx->src_address);
1177   rewrite0_len = vec_len (rewrite0);
1178
1179   /* Figure out how much to rewind current data from adjacency. */
1180   vlib_buffer_advance (p0, -rewrite0_len);
1181   eth_tx = vlib_buffer_get_current (p0);
1182
1183   vnet_buffer (p0)->sw_if_index[VLIB_TX] = sw_if_index0;
1184   hw_if0 = vnet_get_sup_hw_interface (vnm, sw_if_index0);
1185
1186   /* Send reply back through input interface */
1187   vnet_buffer (p0)->sw_if_index[VLIB_TX] = sw_if_index0;
1188   next0 = ARP_REPLY_NEXT_REPLY_TX;
1189
1190   arp0->opcode = clib_host_to_net_u16 (ETHERNET_ARP_OPCODE_reply);
1191
1192   arp0->ip4_over_ethernet[1] = arp0->ip4_over_ethernet[0];
1193
1194   mac_address_from_bytes (&arp0->ip4_over_ethernet[0].mac,
1195                           hw_if0->hw_address);
1196   clib_mem_unaligned (&arp0->ip4_over_ethernet[0].ip4.data_u32, u32) =
1197     if_addr0->data_u32;
1198
1199   /* Hardware must be ethernet-like. */
1200   ASSERT (vec_len (hw_if0->hw_address) == 6);
1201
1202   /* the rx nd tx ethernet headers wil overlap in the case
1203    * when we received a tagged VLAN=0 packet, but we are sending
1204    * back untagged */
1205   clib_memcpy_fast (eth_tx, rewrite0, vec_len (rewrite0));
1206   vec_free (rewrite0);
1207
1208   return (next0);
1209 }
1210
1211 enum arp_dst_fib_type
1212 {
1213   ARP_DST_FIB_NONE,
1214   ARP_DST_FIB_ADJ,
1215   ARP_DST_FIB_CONN
1216 };
1217
1218 /*
1219  * we're looking for FIB sources that indicate the destination
1220  * is attached. There may be interposed DPO prior to the one
1221  * we are looking for
1222  */
1223 static enum arp_dst_fib_type
1224 arp_dst_fib_check (const fib_node_index_t fei, fib_entry_flag_t * flags)
1225 {
1226   const fib_entry_t *entry = fib_entry_get (fei);
1227   const fib_entry_src_t *entry_src;
1228   fib_source_t src;
1229   /* *INDENT-OFF* */
1230   FOR_EACH_SRC_ADDED(entry, entry_src, src,
1231   ({
1232     *flags = fib_entry_get_flags_for_source (fei, src);
1233     if (fib_entry_is_sourced (fei, FIB_SOURCE_ADJ))
1234         return ARP_DST_FIB_ADJ;
1235       else if (FIB_ENTRY_FLAG_CONNECTED & *flags)
1236         return ARP_DST_FIB_CONN;
1237   }))
1238   /* *INDENT-ON* */
1239
1240   return ARP_DST_FIB_NONE;
1241 }
1242
1243 static uword
1244 arp_reply (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
1245 {
1246   ethernet_arp_main_t *am = &ethernet_arp_main;
1247   vnet_main_t *vnm = vnet_get_main ();
1248   u32 n_left_from, next_index, *from, *to_next;
1249   u32 n_replies_sent = 0;
1250
1251   from = vlib_frame_vector_args (frame);
1252   n_left_from = frame->n_vectors;
1253   next_index = node->cached_next_index;
1254
1255   if (node->flags & VLIB_NODE_FLAG_TRACE)
1256     vlib_trace_frame_buffers_only (vm, node, from, frame->n_vectors,
1257                                    /* stride */ 1,
1258                                    sizeof (ethernet_arp_input_trace_t));
1259
1260   while (n_left_from > 0)
1261     {
1262       u32 n_left_to_next;
1263
1264       vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
1265
1266       while (n_left_from > 0 && n_left_to_next > 0)
1267         {
1268           vlib_buffer_t *p0;
1269           ethernet_arp_header_t *arp0;
1270           ethernet_header_t *eth_rx;
1271           const ip4_address_t *if_addr0;
1272           u32 pi0, error0, next0, sw_if_index0, conn_sw_if_index0, fib_index0;
1273           u8 dst_is_local0, is_vrrp_reply0;
1274           fib_node_index_t dst_fei, src_fei;
1275           const fib_prefix_t *pfx0;
1276           fib_entry_flag_t src_flags, dst_flags;
1277
1278           pi0 = from[0];
1279           to_next[0] = pi0;
1280           from += 1;
1281           to_next += 1;
1282           n_left_from -= 1;
1283           n_left_to_next -= 1;
1284
1285           p0 = vlib_get_buffer (vm, pi0);
1286           arp0 = vlib_buffer_get_current (p0);
1287           /* Fill in ethernet header. */
1288           eth_rx = ethernet_buffer_get_header (p0);
1289
1290           next0 = ARP_REPLY_NEXT_DROP;
1291           error0 = ETHERNET_ARP_ERROR_replies_sent;
1292           sw_if_index0 = vnet_buffer (p0)->sw_if_index[VLIB_RX];
1293
1294           /* Check that IP address is local and matches incoming interface. */
1295           fib_index0 = ip4_fib_table_get_index_for_sw_if_index (sw_if_index0);
1296           if (~0 == fib_index0)
1297             {
1298               error0 = ETHERNET_ARP_ERROR_interface_no_table;
1299               goto drop;
1300
1301             }
1302
1303           {
1304             /*
1305              * we're looking for FIB entries that indicate the source
1306              * is attached. There may be more specific non-attached
1307              * routes that match the source, but these do not influence
1308              * whether we respond to an ARP request, i.e. they do not
1309              * influence whether we are the correct way for the sender
1310              * to reach us, they only affect how we reach the sender.
1311              */
1312             fib_entry_t *src_fib_entry;
1313             const fib_prefix_t *pfx;
1314             fib_entry_src_t *src;
1315             fib_source_t source;
1316             int attached;
1317             int mask;
1318
1319             mask = 32;
1320             attached = 0;
1321
1322             do
1323               {
1324                 src_fei = ip4_fib_table_lookup (ip4_fib_get (fib_index0),
1325                                                 &arp0->
1326                                                 ip4_over_ethernet[0].ip4,
1327                                                 mask);
1328                 src_fib_entry = fib_entry_get (src_fei);
1329
1330                 /*
1331                  * It's possible that the source that provides the
1332                  * flags we need, or the flags we must not have,
1333                  * is not the best source, so check then all.
1334                  */
1335                 /* *INDENT-OFF* */
1336                 FOR_EACH_SRC_ADDED(src_fib_entry, src, source,
1337                 ({
1338                   src_flags = fib_entry_get_flags_for_source (src_fei, source);
1339
1340                   /* Reject requests/replies with our local interface
1341                      address. */
1342                   if (FIB_ENTRY_FLAG_LOCAL & src_flags)
1343                     {
1344                       error0 = ETHERNET_ARP_ERROR_l3_src_address_is_local;
1345                       /*
1346                        * When VPP has an interface whose address is also
1347                        * applied to a TAP interface on the host, then VPP's
1348                        * TAP interface will be unnumbered  to the 'real'
1349                        * interface and do proxy ARP from the host.
1350                        * The curious aspect of this setup is that ARP requests
1351                        * from the host will come from the VPP's own address.
1352                        * So don't drop immediately here, instead go see if this
1353                        * is a proxy ARP case.
1354                        */
1355                       goto next_feature;
1356                     }
1357                   /* A Source must also be local to subnet of matching
1358                    * interface address. */
1359                   if ((FIB_ENTRY_FLAG_ATTACHED & src_flags) ||
1360                       (FIB_ENTRY_FLAG_CONNECTED & src_flags))
1361                     {
1362                       attached = 1;
1363                       break;
1364                     }
1365                   /*
1366                    * else
1367                    *  The packet was sent from an address that is not
1368                    *  connected nor attached i.e. it is not from an
1369                    *  address that is covered by a link's sub-net,
1370                    *  nor is it a already learned host resp.
1371                    */
1372                 }));
1373                 /* *INDENT-ON* */
1374
1375                 /*
1376                  * shorter mask lookup for the next iteration.
1377                  */
1378                 pfx = fib_entry_get_prefix (src_fei);
1379                 mask = pfx->fp_len - 1;
1380
1381                 /*
1382                  * continue until we hit the default route or we find
1383                  * the attached we are looking for. The most likely
1384                  * outcome is we find the attached with the first source
1385                  * on the first lookup.
1386                  */
1387               }
1388             while (!attached &&
1389                    !fib_entry_is_sourced (src_fei, FIB_SOURCE_DEFAULT_ROUTE));
1390
1391             if (!attached)
1392               {
1393                 /*
1394                  * the matching route is a not attached, i.e. it was
1395                  * added as a result of routing, rather than interface/ARP
1396                  * configuration. If the matching route is not a host route
1397                  * (i.e. a /32)
1398                  */
1399                 error0 = ETHERNET_ARP_ERROR_l3_src_address_not_local;
1400                 goto drop;
1401               }
1402           }
1403
1404           dst_fei = ip4_fib_table_lookup (ip4_fib_get (fib_index0),
1405                                           &arp0->ip4_over_ethernet[1].ip4,
1406                                           32);
1407           switch (arp_dst_fib_check (dst_fei, &dst_flags))
1408             {
1409             case ARP_DST_FIB_ADJ:
1410               /*
1411                * We matched an adj-fib on ths source subnet (a /32 previously
1412                * added as a result of ARP). If this request is a gratuitous
1413                * ARP, then learn from it.
1414                * The check for matching an adj-fib, is to prevent hosts
1415                * from spamming us with gratuitous ARPS that might otherwise
1416                * blow our ARP cache
1417                */
1418               if (arp0->ip4_over_ethernet[0].ip4.as_u32 ==
1419                   arp0->ip4_over_ethernet[1].ip4.as_u32)
1420                 error0 = arp_learn (vnm, am, sw_if_index0,
1421                                     &arp0->ip4_over_ethernet[0]);
1422               goto drop;
1423             case ARP_DST_FIB_CONN:
1424               /* destination is connected, continue to process */
1425               break;
1426             case ARP_DST_FIB_NONE:
1427               /* destination is not connected, stop here */
1428               error0 = ETHERNET_ARP_ERROR_l3_dst_address_not_local;
1429               goto next_feature;
1430             }
1431
1432           dst_is_local0 = (FIB_ENTRY_FLAG_LOCAL & dst_flags);
1433           pfx0 = fib_entry_get_prefix (dst_fei);
1434           if_addr0 = &pfx0->fp_addr.ip4;
1435
1436           is_vrrp_reply0 =
1437             ((arp0->opcode ==
1438               clib_host_to_net_u16 (ETHERNET_ARP_OPCODE_reply))
1439              &&
1440              (!memcmp
1441               (arp0->ip4_over_ethernet[0].mac.bytes, vrrp_prefix,
1442                sizeof (vrrp_prefix))));
1443
1444           /* Trash ARP packets whose ARP-level source addresses do not
1445              match their L2-frame-level source addresses, unless it's
1446              a reply from a VRRP virtual router */
1447           if (!ethernet_mac_address_equal
1448               (eth_rx->src_address,
1449                arp0->ip4_over_ethernet[0].mac.bytes) && !is_vrrp_reply0)
1450             {
1451               error0 = ETHERNET_ARP_ERROR_l2_address_mismatch;
1452               goto drop;
1453             }
1454
1455           /* Learn or update sender's mapping only for replies to addresses
1456            * that are local to the subnet */
1457           if (arp0->opcode ==
1458               clib_host_to_net_u16 (ETHERNET_ARP_OPCODE_reply))
1459             {
1460               if (dst_is_local0)
1461                 error0 = arp_learn (vnm, am, sw_if_index0,
1462                                     &arp0->ip4_over_ethernet[0]);
1463               else
1464                 /* a reply for a non-local destination could be a GARP.
1465                  * GARPs for hosts we know were handled above, so this one
1466                  * we drop */
1467                 error0 = ETHERNET_ARP_ERROR_l3_dst_address_not_local;
1468
1469               goto next_feature;
1470             }
1471           else if (arp0->opcode ==
1472                    clib_host_to_net_u16 (ETHERNET_ARP_OPCODE_request) &&
1473                    (dst_is_local0 == 0))
1474             {
1475               goto next_feature;
1476             }
1477
1478           /* Honor unnumbered interface, if any */
1479           conn_sw_if_index0 = fib_entry_get_resolving_interface (dst_fei);
1480           if (sw_if_index0 != conn_sw_if_index0 ||
1481               sw_if_index0 != fib_entry_get_resolving_interface (src_fei))
1482             {
1483               /*
1484                * The interface the ARP is sent to or was received on is not the
1485                * interface on which the covering prefix is configured.
1486                * Maybe this is a case for unnumbered.
1487                */
1488               if (!arp_unnumbered (p0, sw_if_index0, conn_sw_if_index0))
1489                 {
1490                   error0 = ETHERNET_ARP_ERROR_unnumbered_mismatch;
1491                   goto drop;
1492                 }
1493             }
1494           if (arp0->ip4_over_ethernet[0].ip4.as_u32 ==
1495               arp0->ip4_over_ethernet[1].ip4.as_u32)
1496             {
1497               error0 = ETHERNET_ARP_ERROR_gratuitous_arp;
1498               goto drop;
1499             }
1500
1501           next0 = arp_mk_reply (vnm, p0, sw_if_index0,
1502                                 if_addr0, arp0, eth_rx);
1503
1504           /* We are going to reply to this request, so, in the absence of
1505              errors, learn the sender */
1506           if (!error0)
1507             error0 = arp_learn (vnm, am, sw_if_index0,
1508                                 &arp0->ip4_over_ethernet[1]);
1509
1510           n_replies_sent += 1;
1511           goto enqueue;
1512
1513         next_feature:
1514           vnet_feature_next (&next0, p0);
1515           goto enqueue;
1516
1517         drop:
1518           p0->error = node->errors[error0];
1519
1520         enqueue:
1521           vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
1522                                            n_left_to_next, pi0, next0);
1523         }
1524
1525       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
1526     }
1527
1528   vlib_error_count (vm, node->node_index,
1529                     ETHERNET_ARP_ERROR_replies_sent, n_replies_sent);
1530
1531   return frame->n_vectors;
1532 }
1533
1534 static uword
1535 arp_proxy (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
1536 {
1537   ethernet_arp_main_t *am = &ethernet_arp_main;
1538   vnet_main_t *vnm = vnet_get_main ();
1539   u32 n_left_from, next_index, *from, *to_next;
1540   u32 n_arp_replies_sent = 0;
1541
1542   from = vlib_frame_vector_args (frame);
1543   n_left_from = frame->n_vectors;
1544   next_index = node->cached_next_index;
1545
1546   if (node->flags & VLIB_NODE_FLAG_TRACE)
1547     vlib_trace_frame_buffers_only (vm, node, from, frame->n_vectors,
1548                                    /* stride */ 1,
1549                                    sizeof (ethernet_arp_input_trace_t));
1550
1551   while (n_left_from > 0)
1552     {
1553       u32 n_left_to_next;
1554
1555       vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
1556
1557       while (n_left_from > 0 && n_left_to_next > 0)
1558         {
1559           vlib_buffer_t *p0;
1560           ethernet_arp_header_t *arp0;
1561           ethernet_header_t *eth_rx;
1562           ip4_address_t proxy_src;
1563           u32 pi0, error0, next0, sw_if_index0, fib_index0;
1564           u8 is_request0;
1565           ethernet_proxy_arp_t *pa;
1566
1567           pi0 = from[0];
1568           to_next[0] = pi0;
1569           from += 1;
1570           to_next += 1;
1571           n_left_from -= 1;
1572           n_left_to_next -= 1;
1573
1574           p0 = vlib_get_buffer (vm, pi0);
1575           arp0 = vlib_buffer_get_current (p0);
1576           /* Fill in ethernet header. */
1577           eth_rx = ethernet_buffer_get_header (p0);
1578
1579           is_request0 = arp0->opcode
1580             == clib_host_to_net_u16 (ETHERNET_ARP_OPCODE_request);
1581
1582           error0 = ETHERNET_ARP_ERROR_replies_sent;
1583           sw_if_index0 = vnet_buffer (p0)->sw_if_index[VLIB_RX];
1584           next0 = ARP_REPLY_NEXT_DROP;
1585
1586           fib_index0 = ip4_fib_table_get_index_for_sw_if_index (sw_if_index0);
1587           if (~0 == fib_index0)
1588             {
1589               error0 = ETHERNET_ARP_ERROR_interface_no_table;
1590             }
1591
1592           if (0 == error0 && is_request0)
1593             {
1594               u32 this_addr = clib_net_to_host_u32
1595                 (arp0->ip4_over_ethernet[1].ip4.as_u32);
1596
1597               vec_foreach (pa, am->proxy_arps)
1598               {
1599                 u32 lo_addr = clib_net_to_host_u32 (pa->lo_addr.as_u32);
1600                 u32 hi_addr = clib_net_to_host_u32 (pa->hi_addr.as_u32);
1601
1602                 /* an ARP request hit in the proxy-arp table? */
1603                 if ((this_addr >= lo_addr && this_addr <= hi_addr) &&
1604                     (fib_index0 == pa->fib_index))
1605                   {
1606                     proxy_src.as_u32 =
1607                       arp0->ip4_over_ethernet[1].ip4.data_u32;
1608
1609                     /*
1610                      * change the interface address to the proxied
1611                      */
1612                     n_arp_replies_sent++;
1613
1614                     next0 =
1615                       arp_mk_reply (vnm, p0, sw_if_index0, &proxy_src, arp0,
1616                                     eth_rx);
1617                   }
1618               }
1619             }
1620           else
1621             {
1622               p0->error = node->errors[error0];
1623             }
1624
1625           vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
1626                                            n_left_to_next, pi0, next0);
1627         }
1628
1629       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
1630     }
1631
1632   vlib_error_count (vm, node->node_index,
1633                     ETHERNET_ARP_ERROR_replies_sent, n_arp_replies_sent);
1634
1635   return frame->n_vectors;
1636 }
1637
1638 static char *ethernet_arp_error_strings[] = {
1639 #define _(sym,string) string,
1640   foreach_ethernet_arp_error
1641 #undef _
1642 };
1643
1644 /* *INDENT-OFF* */
1645
1646 VLIB_REGISTER_NODE (arp_input_node, static) =
1647 {
1648   .function = arp_input,
1649   .name = "arp-input",
1650   .vector_size = sizeof (u32),
1651   .n_errors = ETHERNET_ARP_N_ERROR,
1652   .error_strings = ethernet_arp_error_strings,
1653   .n_next_nodes = ARP_INPUT_N_NEXT,
1654   .next_nodes = {
1655     [ARP_INPUT_NEXT_DROP] = "error-drop",
1656     [ARP_INPUT_NEXT_DISABLED] = "arp-disabled",
1657   },
1658   .format_buffer = format_ethernet_arp_header,
1659   .format_trace = format_ethernet_arp_input_trace,
1660 };
1661
1662 VLIB_REGISTER_NODE (arp_disabled_node, static) =
1663 {
1664   .function = arp_disabled,
1665   .name = "arp-disabled",
1666   .vector_size = sizeof (u32),
1667   .n_errors = ARP_DISABLED_N_ERROR,
1668   .error_strings = arp_disabled_error_strings,
1669   .n_next_nodes = ARP_DISABLED_N_NEXT,
1670   .next_nodes = {
1671     [ARP_INPUT_NEXT_DROP] = "error-drop",
1672   },
1673   .format_buffer = format_ethernet_arp_header,
1674   .format_trace = format_ethernet_arp_input_trace,
1675 };
1676
1677 VLIB_REGISTER_NODE (arp_reply_node, static) =
1678 {
1679   .function = arp_reply,
1680   .name = "arp-reply",
1681   .vector_size = sizeof (u32),
1682   .n_errors = ETHERNET_ARP_N_ERROR,
1683   .error_strings = ethernet_arp_error_strings,
1684   .n_next_nodes = ARP_REPLY_N_NEXT,
1685   .next_nodes = {
1686     [ARP_REPLY_NEXT_DROP] = "error-drop",
1687     [ARP_REPLY_NEXT_REPLY_TX] = "interface-output",
1688   },
1689   .format_buffer = format_ethernet_arp_header,
1690   .format_trace = format_ethernet_arp_input_trace,
1691 };
1692
1693 VLIB_REGISTER_NODE (arp_proxy_node, static) =
1694 {
1695   .function = arp_proxy,
1696   .name = "arp-proxy",
1697   .vector_size = sizeof (u32),
1698   .n_errors = ETHERNET_ARP_N_ERROR,
1699   .error_strings = ethernet_arp_error_strings,
1700   .n_next_nodes = ARP_REPLY_N_NEXT,
1701   .next_nodes = {
1702     [ARP_REPLY_NEXT_DROP] = "error-drop",
1703     [ARP_REPLY_NEXT_REPLY_TX] = "interface-output",
1704   },
1705   .format_buffer = format_ethernet_arp_header,
1706   .format_trace = format_ethernet_arp_input_trace,
1707 };
1708
1709 /* Built-in ARP rx feature path definition */
1710 VNET_FEATURE_ARC_INIT (arp_feat, static) =
1711 {
1712   .arc_name = "arp",
1713   .start_nodes = VNET_FEATURES ("arp-input"),
1714   .last_in_arc = "error-drop",
1715   .arc_index_ptr = &ethernet_arp_main.feature_arc_index,
1716 };
1717
1718 VNET_FEATURE_INIT (arp_reply_feat_node, static) =
1719 {
1720   .arc_name = "arp",
1721   .node_name = "arp-reply",
1722   .runs_before = VNET_FEATURES ("arp-disabled"),
1723 };
1724
1725 VNET_FEATURE_INIT (arp_proxy_feat_node, static) =
1726 {
1727   .arc_name = "arp",
1728   .node_name = "arp-proxy",
1729   .runs_after = VNET_FEATURES ("arp-reply"),
1730   .runs_before = VNET_FEATURES ("arp-disabled"),
1731 };
1732
1733 VNET_FEATURE_INIT (arp_disabled_feat_node, static) =
1734 {
1735   .arc_name = "arp",
1736   .node_name = "arp-disabled",
1737   .runs_before = VNET_FEATURES ("error-drop"),
1738 };
1739
1740 VNET_FEATURE_INIT (arp_drop_feat_node, static) =
1741 {
1742   .arc_name = "arp",
1743   .node_name = "error-drop",
1744   .runs_before = 0,     /* last feature */
1745 };
1746
1747 /* *INDENT-ON* */
1748
1749 static int
1750 ip4_arp_entry_sort (void *a1, void *a2)
1751 {
1752   ethernet_arp_ip4_entry_t *e1 = a1;
1753   ethernet_arp_ip4_entry_t *e2 = a2;
1754
1755   int cmp;
1756   vnet_main_t *vnm = vnet_get_main ();
1757
1758   cmp = vnet_sw_interface_compare (vnm, e1->sw_if_index, e2->sw_if_index);
1759   if (!cmp)
1760     cmp = ip4_address_compare (&e1->ip4_address, &e2->ip4_address);
1761   return cmp;
1762 }
1763
1764 ethernet_arp_ip4_entry_t *
1765 ip4_neighbors_pool (void)
1766 {
1767   ethernet_arp_main_t *am = &ethernet_arp_main;
1768   return am->ip4_entry_pool;
1769 }
1770
1771 ethernet_arp_ip4_entry_t *
1772 ip4_neighbor_entries (u32 sw_if_index)
1773 {
1774   ethernet_arp_main_t *am = &ethernet_arp_main;
1775   ethernet_arp_ip4_entry_t *n, *ns = 0;
1776
1777   /* *INDENT-OFF* */
1778   pool_foreach (n, am->ip4_entry_pool, ({
1779     if (sw_if_index != ~0 && n->sw_if_index != sw_if_index)
1780       continue;
1781     vec_add1 (ns, n[0]);
1782   }));
1783   /* *INDENT-ON* */
1784
1785   if (ns)
1786     vec_sort_with_function (ns, ip4_arp_entry_sort);
1787   return ns;
1788 }
1789
1790 static clib_error_t *
1791 show_ip4_arp (vlib_main_t * vm,
1792               unformat_input_t * input, vlib_cli_command_t * cmd)
1793 {
1794   vnet_main_t *vnm = vnet_get_main ();
1795   ethernet_arp_main_t *am = &ethernet_arp_main;
1796   ethernet_arp_ip4_entry_t *e, *es;
1797   ethernet_proxy_arp_t *pa;
1798   clib_error_t *error = 0;
1799   u32 sw_if_index;
1800
1801   /* Filter entries by interface if given. */
1802   sw_if_index = ~0;
1803   (void) unformat_user (input, unformat_vnet_sw_interface, vnm, &sw_if_index);
1804
1805   es = ip4_neighbor_entries (sw_if_index);
1806   if (es)
1807     {
1808       vlib_cli_output (vm, "%U", format_ethernet_arp_ip4_entry, vnm, 0);
1809       vec_foreach (e, es)
1810       {
1811         vlib_cli_output (vm, "%U", format_ethernet_arp_ip4_entry, vnm, e);
1812       }
1813       vec_free (es);
1814     }
1815
1816   if (vec_len (am->proxy_arps))
1817     {
1818       vlib_cli_output (vm, "Proxy arps enabled for:");
1819       vec_foreach (pa, am->proxy_arps)
1820       {
1821         vlib_cli_output (vm, "Fib_index %d   %U - %U ",
1822                          pa->fib_index,
1823                          format_ip4_address, &pa->lo_addr,
1824                          format_ip4_address, &pa->hi_addr);
1825       }
1826     }
1827
1828   return error;
1829 }
1830
1831 /*?
1832  * Display all the IPv4 ARP entries.
1833  *
1834  * @cliexpar
1835  * Example of how to display the IPv4 ARP table:
1836  * @cliexstart{show ip arp}
1837  *    Time      FIB        IP4       Flags      Ethernet              Interface
1838  *    346.3028   0       6.1.1.3            de:ad:be:ef:ba:be   GigabitEthernet2/0/0
1839  *   3077.4271   0       6.1.1.4       S    de:ad:be:ef:ff:ff   GigabitEthernet2/0/0
1840  *   2998.6409   1       6.2.2.3            de:ad:be:ef:00:01   GigabitEthernet2/0/0
1841  * Proxy arps enabled for:
1842  * Fib_index 0   6.0.0.1 - 6.0.0.11
1843  * @cliexend
1844  ?*/
1845 /* *INDENT-OFF* */
1846 VLIB_CLI_COMMAND (show_ip4_arp_command, static) = {
1847   .path = "show ip arp",
1848   .function = show_ip4_arp,
1849   .short_help = "show ip arp",
1850 };
1851 /* *INDENT-ON* */
1852
1853 typedef struct
1854 {
1855   pg_edit_t l2_type, l3_type;
1856   pg_edit_t n_l2_address_bytes, n_l3_address_bytes;
1857   pg_edit_t opcode;
1858   struct
1859   {
1860     pg_edit_t mac;
1861     pg_edit_t ip4;
1862   } ip4_over_ethernet[2];
1863 } pg_ethernet_arp_header_t;
1864
1865 static inline void
1866 pg_ethernet_arp_header_init (pg_ethernet_arp_header_t * p)
1867 {
1868   /* Initialize fields that are not bit fields in the IP header. */
1869 #define _(f) pg_edit_init (&p->f, ethernet_arp_header_t, f);
1870   _(l2_type);
1871   _(l3_type);
1872   _(n_l2_address_bytes);
1873   _(n_l3_address_bytes);
1874   _(opcode);
1875   _(ip4_over_ethernet[0].mac);
1876   _(ip4_over_ethernet[0].ip4);
1877   _(ip4_over_ethernet[1].mac);
1878   _(ip4_over_ethernet[1].ip4);
1879 #undef _
1880 }
1881
1882 uword
1883 unformat_pg_arp_header (unformat_input_t * input, va_list * args)
1884 {
1885   pg_stream_t *s = va_arg (*args, pg_stream_t *);
1886   pg_ethernet_arp_header_t *p;
1887   u32 group_index;
1888
1889   p = pg_create_edit_group (s, sizeof (p[0]), sizeof (ethernet_arp_header_t),
1890                             &group_index);
1891   pg_ethernet_arp_header_init (p);
1892
1893   /* Defaults. */
1894   pg_edit_set_fixed (&p->l2_type, ETHERNET_ARP_HARDWARE_TYPE_ethernet);
1895   pg_edit_set_fixed (&p->l3_type, ETHERNET_TYPE_IP4);
1896   pg_edit_set_fixed (&p->n_l2_address_bytes, 6);
1897   pg_edit_set_fixed (&p->n_l3_address_bytes, 4);
1898
1899   if (!unformat (input, "%U: %U/%U -> %U/%U",
1900                  unformat_pg_edit,
1901                  unformat_ethernet_arp_opcode_net_byte_order, &p->opcode,
1902                  unformat_pg_edit,
1903                  unformat_mac_address_t, &p->ip4_over_ethernet[0].mac,
1904                  unformat_pg_edit,
1905                  unformat_ip4_address, &p->ip4_over_ethernet[0].ip4,
1906                  unformat_pg_edit,
1907                  unformat_mac_address_t, &p->ip4_over_ethernet[1].mac,
1908                  unformat_pg_edit,
1909                  unformat_ip4_address, &p->ip4_over_ethernet[1].ip4))
1910     {
1911       /* Free up any edits we may have added. */
1912       pg_free_edit_group (s);
1913       return 0;
1914     }
1915   return 1;
1916 }
1917
1918 clib_error_t *
1919 ip4_set_arp_limit (u32 arp_limit)
1920 {
1921   ethernet_arp_main_t *am = &ethernet_arp_main;
1922
1923   am->limit_arp_cache_size = arp_limit;
1924   return 0;
1925 }
1926
1927 /**
1928  * @brief Control Plane hook to remove an ARP entry
1929  */
1930 int
1931 vnet_arp_unset_ip4_over_ethernet (vnet_main_t * vnm,
1932                                   u32 sw_if_index,
1933                                   const
1934                                   ethernet_arp_ip4_over_ethernet_address_t *
1935                                   a)
1936 {
1937   vnet_arp_set_ip4_over_ethernet_rpc_args_t args = {
1938     .sw_if_index = sw_if_index,
1939     .flags = ETHERNET_ARP_ARGS_REMOVE,
1940     .ip4 = a->ip4,
1941     .mac = a->mac,
1942   };
1943
1944   vl_api_rpc_call_main_thread (set_ip4_over_ethernet_rpc_callback,
1945                                (u8 *) & args, sizeof (args));
1946   return 0;
1947 }
1948
1949 /**
1950  * @brief publish wildcard arp event
1951  * @param sw_if_index The interface on which the ARP entries are acted
1952  */
1953 static int
1954 vnet_arp_wc_publish (u32 sw_if_index,
1955                      const ethernet_arp_ip4_over_ethernet_address_t * a)
1956 {
1957   vnet_arp_set_ip4_over_ethernet_rpc_args_t args = {
1958     .flags = ETHERNET_ARP_ARGS_WC_PUB,
1959     .sw_if_index = sw_if_index,
1960     .ip4 = a->ip4,
1961     .mac = a->mac,
1962   };
1963
1964   vl_api_rpc_call_main_thread (set_ip4_over_ethernet_rpc_callback,
1965                                (u8 *) & args, sizeof (args));
1966   return 0;
1967 }
1968
1969 static void
1970 vnet_arp_wc_publish_internal (vnet_main_t * vnm,
1971                               vnet_arp_set_ip4_over_ethernet_rpc_args_t *
1972                               args)
1973 {
1974   vlib_main_t *vm = vlib_get_main ();
1975   ethernet_arp_main_t *am = &ethernet_arp_main;
1976   uword ni = am->wc_ip4_arp_publisher_node;
1977   uword et = am->wc_ip4_arp_publisher_et;
1978
1979   if (ni == (uword) ~ 0)
1980     return;
1981   wc_arp_report_t *r =
1982     vlib_process_signal_event_data (vm, ni, et, 1, sizeof *r);
1983   r->ip.as_u32 = args->ip4.as_u32;
1984   r->sw_if_index = args->sw_if_index;
1985   mac_address_copy (&r->mac, &args->mac);
1986 }
1987
1988 void
1989 wc_arp_set_publisher_node (uword node_index, uword event_type)
1990 {
1991   ethernet_arp_main_t *am = &ethernet_arp_main;
1992   am->wc_ip4_arp_publisher_node = node_index;
1993   am->wc_ip4_arp_publisher_et = event_type;
1994 }
1995
1996 static void
1997 arp_entry_free (ethernet_arp_interface_t * eai, ethernet_arp_ip4_entry_t * e);
1998
1999 static int
2000 vnet_arp_flush_ip4_over_ethernet_internal (vnet_main_t * vnm,
2001                                            vnet_arp_set_ip4_over_ethernet_rpc_args_t
2002                                            * args)
2003 {
2004   ethernet_arp_main_t *am = &ethernet_arp_main;
2005   ethernet_arp_ip4_entry_t *e;
2006   ethernet_arp_interface_t *eai;
2007
2008   if (vec_len (am->ethernet_arp_by_sw_if_index) <= args->sw_if_index)
2009     return 0;
2010
2011   eai = &am->ethernet_arp_by_sw_if_index[args->sw_if_index];
2012
2013   e = arp_entry_find (eai, &args->ip4);
2014
2015   if (NULL != e)
2016     {
2017       adj_nbr_walk_nh4 (e->sw_if_index,
2018                         &e->ip4_address, arp_mk_incomplete_walk, e);
2019
2020       /*
2021        * The difference between flush and unset, is that an unset
2022        * means delete for static and dynamic entries. A flush
2023        * means delete only for dynamic. Flushing is what the DP
2024        * does in response to interface events. unset is only done
2025        * by the control plane.
2026        */
2027       if (e->flags & IP_NEIGHBOR_FLAG_STATIC)
2028         {
2029           e->flags &= ~IP_NEIGHBOR_FLAG_DYNAMIC;
2030         }
2031       else if (e->flags & IP_NEIGHBOR_FLAG_DYNAMIC)
2032         {
2033           arp_entry_free (eai, e);
2034         }
2035     }
2036   return (0);
2037 }
2038
2039 /*
2040  * arp_add_del_interface_address
2041  *
2042  * callback when an interface address is added or deleted
2043  */
2044 static void
2045 arp_enable_disable_interface (ip4_main_t * im,
2046                               uword opaque, u32 sw_if_index, u32 is_enable)
2047 {
2048   ethernet_arp_main_t *am = &ethernet_arp_main;
2049
2050   if (is_enable)
2051     arp_enable (am, sw_if_index);
2052   else
2053     arp_disable (am, sw_if_index);
2054 }
2055
2056 /*
2057  * arp_add_del_interface_address
2058  *
2059  * callback when an interface address is added or deleted
2060  */
2061 static void
2062 arp_add_del_interface_address (ip4_main_t * im,
2063                                uword opaque,
2064                                u32 sw_if_index,
2065                                ip4_address_t * address,
2066                                u32 address_length,
2067                                u32 if_address_index, u32 is_del)
2068 {
2069   /*
2070    * Flush the ARP cache of all entries covered by the address
2071    * that is being removed.
2072    */
2073   ethernet_arp_main_t *am = &ethernet_arp_main;
2074   ethernet_arp_ip4_entry_t *e;
2075
2076   if (vec_len (am->ethernet_arp_by_sw_if_index) <= sw_if_index)
2077     return;
2078
2079   if (is_del)
2080     {
2081       ethernet_arp_interface_t *eai;
2082       u32 i, *to_delete = 0;
2083       hash_pair_t *pair;
2084
2085       eai = &am->ethernet_arp_by_sw_if_index[sw_if_index];
2086
2087       /* *INDENT-OFF* */
2088       hash_foreach_pair (pair, eai->arp_entries,
2089       ({
2090         e = pool_elt_at_index(am->ip4_entry_pool,
2091                               pair->value[0]);
2092         if (ip4_destination_matches_route (im, &e->ip4_address,
2093                                            address, address_length))
2094           {
2095             vec_add1 (to_delete, e - am->ip4_entry_pool);
2096           }
2097       }));
2098       /* *INDENT-ON* */
2099
2100       for (i = 0; i < vec_len (to_delete); i++)
2101         {
2102           e = pool_elt_at_index (am->ip4_entry_pool, to_delete[i]);
2103
2104           vnet_arp_set_ip4_over_ethernet_rpc_args_t delme = {
2105             .ip4.as_u32 = e->ip4_address.as_u32,
2106             .sw_if_index = e->sw_if_index,
2107             .flags = ETHERNET_ARP_ARGS_FLUSH,
2108           };
2109           mac_address_copy (&delme.mac, &e->mac);
2110
2111           vnet_arp_flush_ip4_over_ethernet_internal (vnet_get_main (),
2112                                                      &delme);
2113         }
2114
2115       vec_free (to_delete);
2116     }
2117 }
2118
2119 static void
2120 arp_table_bind (ip4_main_t * im,
2121                 uword opaque,
2122                 u32 sw_if_index, u32 new_fib_index, u32 old_fib_index)
2123 {
2124   ethernet_arp_main_t *am = &ethernet_arp_main;
2125   ethernet_arp_interface_t *eai;
2126   ethernet_arp_ip4_entry_t *e;
2127   hash_pair_t *pair;
2128
2129   /*
2130    * the IP table that the interface is bound to has changed.
2131    * reinstall all the adj fibs.
2132    */
2133
2134   if (vec_len (am->ethernet_arp_by_sw_if_index) <= sw_if_index)
2135     return;
2136
2137   eai = &am->ethernet_arp_by_sw_if_index[sw_if_index];
2138
2139   /* *INDENT-OFF* */
2140   hash_foreach_pair (pair, eai->arp_entries,
2141   ({
2142     e = pool_elt_at_index(am->ip4_entry_pool,
2143                           pair->value[0]);
2144     /*
2145      * remove the adj-fib from the old table and add to the new
2146      */
2147     arp_adj_fib_remove(e, old_fib_index);
2148     arp_adj_fib_add(e, new_fib_index);
2149   }));
2150   /* *INDENT-ON* */
2151
2152 }
2153
2154 static clib_error_t *
2155 ethernet_arp_init (vlib_main_t * vm)
2156 {
2157   ethernet_arp_main_t *am = &ethernet_arp_main;
2158   ip4_main_t *im = &ip4_main;
2159   pg_node_t *pn;
2160
2161   ethernet_register_input_type (vm, ETHERNET_TYPE_ARP, arp_input_node.index);
2162
2163   pn = pg_get_node (arp_input_node.index);
2164   pn->unformat_edit = unformat_pg_arp_header;
2165
2166   am->opcode_by_name = hash_create_string (0, sizeof (uword));
2167 #define _(o) hash_set_mem (am->opcode_by_name, #o, ETHERNET_ARP_OPCODE_##o);
2168   foreach_ethernet_arp_opcode;
2169 #undef _
2170
2171   /* $$$ configurable */
2172   am->limit_arp_cache_size = 50000;
2173
2174   am->pending_resolutions_by_address = hash_create (0, sizeof (uword));
2175   am->mac_changes_by_address = hash_create (0, sizeof (uword));
2176   am->wc_ip4_arp_publisher_node = (uword) ~ 0;
2177
2178   /* don't trace ARP error packets */
2179   {
2180     vlib_node_runtime_t *rt =
2181       vlib_node_get_runtime (vm, arp_input_node.index);
2182
2183 #define _(a,b)                                  \
2184     vnet_pcap_drop_trace_filter_add_del         \
2185         (rt->errors[ETHERNET_ARP_ERROR_##a],    \
2186          1 /* is_add */);
2187     foreach_ethernet_arp_error
2188 #undef _
2189   }
2190
2191   ip4_add_del_interface_address_callback_t cb;
2192   cb.function = arp_add_del_interface_address;
2193   cb.function_opaque = 0;
2194   vec_add1 (im->add_del_interface_address_callbacks, cb);
2195
2196   ip4_enable_disable_interface_callback_t cbe;
2197   cbe.function = arp_enable_disable_interface;
2198   cbe.function_opaque = 0;
2199   vec_add1 (im->enable_disable_interface_callbacks, cbe);
2200
2201   ip4_table_bind_callback_t cbt;
2202   cbt.function = arp_table_bind;
2203   cbt.function_opaque = 0;
2204   vec_add1 (im->table_bind_callbacks, cbt);
2205
2206   return 0;
2207 }
2208 /* *INDENT-OFF* */
2209 VLIB_INIT_FUNCTION (ethernet_arp_init) =
2210 {
2211   .runs_after = VLIB_INITS("ethernet_init"),
2212 };
2213 /* *INDENT-ON* */
2214
2215 static void
2216 arp_entry_free (ethernet_arp_interface_t * eai, ethernet_arp_ip4_entry_t * e)
2217 {
2218   ethernet_arp_main_t *am = &ethernet_arp_main;
2219
2220   arp_adj_fib_remove
2221     (e, ip4_fib_table_get_index_for_sw_if_index (e->sw_if_index));
2222   hash_unset (eai->arp_entries, e->ip4_address.as_u32);
2223   pool_put (am->ip4_entry_pool, e);
2224 }
2225
2226 static inline int
2227 vnet_arp_unset_ip4_over_ethernet_internal (vnet_main_t * vnm,
2228                                            vnet_arp_set_ip4_over_ethernet_rpc_args_t
2229                                            * args)
2230 {
2231   ethernet_arp_main_t *am = &ethernet_arp_main;
2232   ethernet_arp_ip4_entry_t *e;
2233   ethernet_arp_interface_t *eai;
2234
2235   if (vec_len (am->ethernet_arp_by_sw_if_index) <= args->sw_if_index)
2236     return 0;
2237
2238   eai = &am->ethernet_arp_by_sw_if_index[args->sw_if_index];
2239
2240   e = arp_entry_find (eai, &args->ip4);
2241
2242   if (NULL != e)
2243     {
2244       adj_nbr_walk_nh4 (e->sw_if_index,
2245                         &e->ip4_address, arp_mk_incomplete_walk, e);
2246       arp_entry_free (eai, e);
2247     }
2248
2249   return 0;
2250 }
2251
2252
2253 static int
2254 vnet_arp_populate_ip4_over_ethernet_internal (vnet_main_t * vnm,
2255                                               vnet_arp_set_ip4_over_ethernet_rpc_args_t
2256                                               * args)
2257 {
2258   ethernet_arp_main_t *am = &ethernet_arp_main;
2259   ethernet_arp_ip4_entry_t *e;
2260   ethernet_arp_interface_t *eai;
2261
2262   arp_enable (am, args->sw_if_index);
2263   eai = &am->ethernet_arp_by_sw_if_index[args->sw_if_index];
2264
2265   e = arp_entry_find (eai, &args->ip4);
2266
2267   if (NULL != e)
2268     {
2269       adj_nbr_walk_nh4 (e->sw_if_index,
2270                         &e->ip4_address, arp_mk_complete_walk, e);
2271     }
2272   return (0);
2273 }
2274
2275 static void
2276 set_ip4_over_ethernet_rpc_callback (vnet_arp_set_ip4_over_ethernet_rpc_args_t
2277                                     * a)
2278 {
2279   vnet_main_t *vm = vnet_get_main ();
2280   ASSERT (vlib_get_thread_index () == 0);
2281
2282   if (a->flags & ETHERNET_ARP_ARGS_REMOVE)
2283     vnet_arp_unset_ip4_over_ethernet_internal (vm, a);
2284   else if (a->flags & ETHERNET_ARP_ARGS_FLUSH)
2285     vnet_arp_flush_ip4_over_ethernet_internal (vm, a);
2286   else if (a->flags & ETHERNET_ARP_ARGS_POPULATE)
2287     vnet_arp_populate_ip4_over_ethernet_internal (vm, a);
2288   else if (a->flags & ETHERNET_ARP_ARGS_WC_PUB)
2289     vnet_arp_wc_publish_internal (vm, a);
2290   else
2291     vnet_arp_set_ip4_over_ethernet_internal (vm, a);
2292 }
2293
2294 /**
2295  * @brief Invoked when the interface's admin state changes
2296  */
2297 static clib_error_t *
2298 ethernet_arp_sw_interface_up_down (vnet_main_t * vnm,
2299                                    u32 sw_if_index, u32 flags)
2300 {
2301   ethernet_arp_main_t *am = &ethernet_arp_main;
2302   ethernet_arp_ip4_entry_t *e;
2303   u32 i, *to_update = 0;
2304
2305   /* *INDENT-OFF* */
2306   pool_foreach (e, am->ip4_entry_pool,
2307   ({
2308     if (e->sw_if_index == sw_if_index)
2309       vec_add1 (to_update,
2310                 e - am->ip4_entry_pool);
2311   }));
2312   /* *INDENT-ON* */
2313
2314   for (i = 0; i < vec_len (to_update); i++)
2315     {
2316       e = pool_elt_at_index (am->ip4_entry_pool, to_update[i]);
2317
2318       vnet_arp_set_ip4_over_ethernet_rpc_args_t update_me = {
2319         .ip4.as_u32 = e->ip4_address.as_u32,
2320         .sw_if_index = e->sw_if_index,
2321       };
2322       mac_address_copy (&update_me.mac, &e->mac);
2323
2324       if (flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP)
2325         {
2326           update_me.flags = ETHERNET_ARP_ARGS_POPULATE;
2327           vnet_arp_populate_ip4_over_ethernet_internal (vnm, &update_me);
2328         }
2329       else
2330         {
2331           update_me.flags = ETHERNET_ARP_ARGS_FLUSH;
2332           vnet_arp_flush_ip4_over_ethernet_internal (vnm, &update_me);
2333         }
2334     }
2335   vec_free (to_update);
2336
2337   return 0;
2338 }
2339
2340 VNET_SW_INTERFACE_ADMIN_UP_DOWN_FUNCTION (ethernet_arp_sw_interface_up_down);
2341
2342 static void
2343 increment_ip4_and_mac_address (ethernet_arp_ip4_over_ethernet_address_t * a)
2344 {
2345   u8 old;
2346   int i;
2347
2348   for (i = 3; i >= 0; i--)
2349     {
2350       old = a->ip4.as_u8[i];
2351       a->ip4.as_u8[i] += 1;
2352       if (old < a->ip4.as_u8[i])
2353         break;
2354     }
2355
2356   for (i = 5; i >= 0; i--)
2357     {
2358       old = a->mac.bytes[i];
2359       a->mac.bytes[i] += 1;
2360       if (old < a->mac.bytes[i])
2361         break;
2362     }
2363 }
2364
2365 int
2366 vnet_arp_set_ip4_over_ethernet (vnet_main_t * vnm,
2367                                 u32 sw_if_index,
2368                                 const ethernet_arp_ip4_over_ethernet_address_t
2369                                 * a, ip_neighbor_flags_t flags)
2370 {
2371   vnet_arp_set_ip4_over_ethernet_rpc_args_t args = {
2372     .sw_if_index = sw_if_index,
2373     .nbr_flags = flags,
2374     .flags = 0,
2375     .ip4.as_u32 = a->ip4.as_u32,
2376     .mac = a->mac,
2377   };
2378
2379   vl_api_rpc_call_main_thread (set_ip4_over_ethernet_rpc_callback,
2380                                (u8 *) & args, sizeof (args));
2381   return 0;
2382 }
2383
2384 void
2385 proxy_arp_walk (proxy_arp_walk_t cb, void *data)
2386 {
2387   ethernet_arp_main_t *am = &ethernet_arp_main;
2388   ethernet_proxy_arp_t *pa;
2389
2390   vec_foreach (pa, am->proxy_arps)
2391   {
2392     if (!cb (&pa->lo_addr, &pa->hi_addr, pa->fib_index, data))
2393       break;
2394   }
2395 }
2396
2397 int
2398 vnet_proxy_arp_enable_disable (vnet_main_t * vnm, u32 sw_if_index, u8 enable)
2399 {
2400   ethernet_arp_main_t *am = &ethernet_arp_main;
2401   ethernet_arp_interface_t *eai;
2402
2403   vec_validate (am->ethernet_arp_by_sw_if_index, sw_if_index);
2404
2405   eai = &am->ethernet_arp_by_sw_if_index[sw_if_index];
2406
2407   if (enable)
2408     {
2409       if (!eai->proxy_enabled)
2410         {
2411           vnet_feature_enable_disable ("arp", "arp-proxy",
2412                                        sw_if_index, 1, NULL, 0);
2413         }
2414       eai->proxy_enabled = 1;
2415     }
2416   else
2417     {
2418       if (eai->proxy_enabled)
2419         {
2420           vnet_feature_enable_disable ("arp", "arp-proxy",
2421                                        sw_if_index, 0, NULL, 0);
2422         }
2423       eai->proxy_enabled = 0;
2424     }
2425
2426   return (0);
2427 }
2428
2429 int
2430 vnet_proxy_arp_add_del (ip4_address_t * lo_addr,
2431                         ip4_address_t * hi_addr, u32 fib_index, int is_del)
2432 {
2433   ethernet_arp_main_t *am = &ethernet_arp_main;
2434   ethernet_proxy_arp_t *pa;
2435   u32 found_at_index = ~0;
2436
2437   vec_foreach (pa, am->proxy_arps)
2438   {
2439     if (pa->lo_addr.as_u32 == lo_addr->as_u32 &&
2440         pa->hi_addr.as_u32 == hi_addr->as_u32 && pa->fib_index == fib_index)
2441       {
2442         found_at_index = pa - am->proxy_arps;
2443         break;
2444       }
2445   }
2446
2447   if (found_at_index != ~0)
2448     {
2449       /* Delete, otherwise it's already in the table */
2450       if (is_del)
2451         vec_delete (am->proxy_arps, 1, found_at_index);
2452       return 0;
2453     }
2454   /* delete, no such entry */
2455   if (is_del)
2456     return VNET_API_ERROR_NO_SUCH_ENTRY;
2457
2458   /* add, not in table */
2459   vec_add2 (am->proxy_arps, pa, 1);
2460   pa->lo_addr.as_u32 = lo_addr->as_u32;
2461   pa->hi_addr.as_u32 = hi_addr->as_u32;
2462   pa->fib_index = fib_index;
2463   return 0;
2464 }
2465
2466 void
2467 proxy_arp_intfc_walk (proxy_arp_intf_walk_t cb, void *data)
2468 {
2469   ethernet_arp_main_t *am = &ethernet_arp_main;
2470   ethernet_arp_interface_t *eai;
2471
2472   vec_foreach (eai, am->ethernet_arp_by_sw_if_index)
2473   {
2474     if (eai->proxy_enabled)
2475       cb (eai - am->ethernet_arp_by_sw_if_index, data);
2476   }
2477 }
2478
2479 static clib_error_t *
2480 ip_arp_add_del_command_fn (vlib_main_t * vm,
2481                            unformat_input_t * input, vlib_cli_command_t * cmd)
2482 {
2483   vnet_main_t *vnm = vnet_get_main ();
2484   u32 sw_if_index;
2485   ethernet_arp_ip4_over_ethernet_address_t lo_addr, hi_addr, addr;
2486   int addr_valid = 0;
2487   int is_del = 0;
2488   int count = 1;
2489   u32 fib_index = 0;
2490   u32 fib_id;
2491   int is_proxy = 0;
2492   ip_neighbor_flags_t flags;
2493
2494   flags = IP_NEIGHBOR_FLAG_NONE;
2495
2496   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
2497     {
2498       /* set ip arp TenGigE1/1/0/1 1.2.3.4 aa:bb:... or aabb.ccdd... */
2499       if (unformat (input, "%U %U %U",
2500                     unformat_vnet_sw_interface, vnm, &sw_if_index,
2501                     unformat_ip4_address, &addr.ip4,
2502                     unformat_mac_address_t, &addr.mac))
2503         addr_valid = 1;
2504
2505       else if (unformat (input, "delete") || unformat (input, "del"))
2506         is_del = 1;
2507
2508       else if (unformat (input, "static"))
2509         flags |= IP_NEIGHBOR_FLAG_STATIC;
2510
2511       else if (unformat (input, "no-fib-entry"))
2512         flags |= IP_NEIGHBOR_FLAG_NO_FIB_ENTRY;
2513
2514       else if (unformat (input, "count %d", &count))
2515         ;
2516
2517       else if (unformat (input, "fib-id %d", &fib_id))
2518         {
2519           fib_index = fib_table_find (FIB_PROTOCOL_IP4, fib_id);
2520
2521           if (~0 == fib_index)
2522             return clib_error_return (0, "fib ID %d doesn't exist\n", fib_id);
2523         }
2524
2525       else if (unformat (input, "proxy %U - %U",
2526                          unformat_ip4_address, &lo_addr.ip4,
2527                          unformat_ip4_address, &hi_addr.ip4))
2528         is_proxy = 1;
2529       else
2530         break;
2531     }
2532
2533   if (is_proxy)
2534     {
2535       (void) vnet_proxy_arp_add_del (&lo_addr.ip4, &hi_addr.ip4,
2536                                      fib_index, is_del);
2537       return 0;
2538     }
2539
2540   if (addr_valid)
2541     {
2542       int i;
2543
2544       for (i = 0; i < count; i++)
2545         {
2546           if (is_del == 0)
2547             {
2548               uword event_type, *event_data = 0;
2549
2550               /* Park the debug CLI until the arp entry is installed */
2551               vnet_register_ip4_arp_resolution_event
2552                 (vnm, &addr.ip4, vlib_current_process (vm),
2553                  1 /* type */ , 0 /* data */ );
2554
2555               vnet_arp_set_ip4_over_ethernet (vnm, sw_if_index, &addr, flags);
2556
2557               vlib_process_wait_for_event (vm);
2558               event_type = vlib_process_get_events (vm, &event_data);
2559               vec_reset_length (event_data);
2560               if (event_type != 1)
2561                 clib_warning ("event type %d unexpected", event_type);
2562             }
2563           else
2564             vnet_arp_unset_ip4_over_ethernet (vnm, sw_if_index, &addr);
2565
2566           increment_ip4_and_mac_address (&addr);
2567         }
2568     }
2569   else
2570     {
2571       return clib_error_return (0, "unknown input `%U'",
2572                                 format_unformat_error, input);
2573     }
2574
2575   return 0;
2576 }
2577
2578 /* *INDENT-OFF* */
2579 /*?
2580  * Add or delete IPv4 ARP cache entries.
2581  *
2582  * @note 'set ip arp' options (e.g. delete, static, 'fib-id <id>',
2583  * 'count <number>', 'interface ip4_addr mac_addr') can be added in
2584  * any order and combination.
2585  *
2586  * @cliexpar
2587  * @parblock
2588  * Add or delete IPv4 ARP cache entries as follows. MAC Address can be in
2589  * either aa:bb:cc:dd:ee:ff format or aabb.ccdd.eeff format.
2590  * @cliexcmd{set ip arp GigabitEthernet2/0/0 6.0.0.3 dead.beef.babe}
2591  * @cliexcmd{set ip arp delete GigabitEthernet2/0/0 6.0.0.3 de:ad:be:ef:ba:be}
2592  *
2593  * To add or delete an IPv4 ARP cache entry to or from a specific fib
2594  * table:
2595  * @cliexcmd{set ip arp fib-id 1 GigabitEthernet2/0/0 6.0.0.3 dead.beef.babe}
2596  * @cliexcmd{set ip arp fib-id 1 delete GigabitEthernet2/0/0 6.0.0.3 dead.beef.babe}
2597  *
2598  * Add or delete IPv4 static ARP cache entries as follows:
2599  * @cliexcmd{set ip arp static GigabitEthernet2/0/0 6.0.0.3 dead.beef.babe}
2600  * @cliexcmd{set ip arp static delete GigabitEthernet2/0/0 6.0.0.3 dead.beef.babe}
2601  *
2602  * For testing / debugging purposes, the 'set ip arp' command can add or
2603  * delete multiple entries. Supply the 'count N' parameter:
2604  * @cliexcmd{set ip arp count 10 GigabitEthernet2/0/0 6.0.0.3 dead.beef.babe}
2605  * @endparblock
2606  ?*/
2607 VLIB_CLI_COMMAND (ip_arp_add_del_command, static) = {
2608   .path = "set ip arp",
2609   .short_help =
2610   "set ip arp [del] <intfc> <ip-address> <mac-address> [static] [no-fib-entry] [count <count>] [fib-id <fib-id>] [proxy <lo-addr> - <hi-addr>]",
2611   .function = ip_arp_add_del_command_fn,
2612 };
2613 /* *INDENT-ON* */
2614
2615 static clib_error_t *
2616 set_int_proxy_arp_command_fn (vlib_main_t * vm,
2617                               unformat_input_t *
2618                               input, vlib_cli_command_t * cmd)
2619 {
2620   vnet_main_t *vnm = vnet_get_main ();
2621   u32 sw_if_index;
2622   int enable = 0;
2623
2624   sw_if_index = ~0;
2625
2626   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
2627     {
2628       if (unformat (input, "%U", unformat_vnet_sw_interface,
2629                     vnm, &sw_if_index))
2630         ;
2631       else if (unformat (input, "enable") || unformat (input, "on"))
2632         enable = 1;
2633       else if (unformat (input, "disable") || unformat (input, "off"))
2634         enable = 0;
2635       else
2636         break;
2637     }
2638
2639   if (~0 == sw_if_index)
2640     return clib_error_return (0, "unknown input '%U'",
2641                               format_unformat_error, input);
2642
2643   vnet_proxy_arp_enable_disable (vnm, sw_if_index, enable);
2644
2645   return 0;
2646 }
2647
2648 /* *INDENT-OFF* */
2649 /*?
2650  * Enable proxy-arp on an interface. The vpp stack will answer ARP
2651  * requests for the indicated address range. Multiple proxy-arp
2652  * ranges may be provisioned.
2653  *
2654  * @note Proxy ARP as a technology is infamous for blackholing traffic.
2655  * Also, the underlying implementation has not been performance-tuned.
2656  * Avoid creating an unnecessarily large set of ranges.
2657  *
2658  * @cliexpar
2659  * To enable proxy arp on a range of addresses, use:
2660  * @cliexcmd{set ip arp proxy 6.0.0.1 - 6.0.0.11}
2661  * Append 'del' to delete a range of proxy ARP addresses:
2662  * @cliexcmd{set ip arp proxy 6.0.0.1 - 6.0.0.11 del}
2663  * You must then specifically enable proxy arp on individual interfaces:
2664  * @cliexcmd{set interface proxy-arp GigabitEthernet0/8/0 enable}
2665  * To disable proxy arp on an individual interface:
2666  * @cliexcmd{set interface proxy-arp GigabitEthernet0/8/0 disable}
2667  ?*/
2668 VLIB_CLI_COMMAND (set_int_proxy_enable_command, static) = {
2669   .path = "set interface proxy-arp",
2670   .short_help =
2671   "set interface proxy-arp <intfc> [enable|disable]",
2672   .function = set_int_proxy_arp_command_fn,
2673 };
2674 /* *INDENT-ON* */
2675
2676
2677 /*
2678  * ARP/ND Termination in a L2 Bridge Domain based on IP4/IP6 to MAC
2679  * hash tables mac_by_ip4 and mac_by_ip6 for each BD.
2680  */
2681 typedef enum
2682 {
2683   ARP_TERM_NEXT_L2_OUTPUT,
2684   ARP_TERM_NEXT_DROP,
2685   ARP_TERM_N_NEXT,
2686 } arp_term_next_t;
2687
2688 u32 arp_term_next_node_index[32];
2689
2690 static uword
2691 arp_term_l2bd (vlib_main_t * vm,
2692                vlib_node_runtime_t * node, vlib_frame_t * frame)
2693 {
2694   l2input_main_t *l2im = &l2input_main;
2695   u32 n_left_from, next_index, *from, *to_next;
2696   u32 n_replies_sent = 0;
2697   u16 last_bd_index = ~0;
2698   l2_bridge_domain_t *last_bd_config = 0;
2699   l2_input_config_t *cfg0;
2700
2701   from = vlib_frame_vector_args (frame);
2702   n_left_from = frame->n_vectors;
2703   next_index = node->cached_next_index;
2704
2705   while (n_left_from > 0)
2706     {
2707       u32 n_left_to_next;
2708
2709       vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
2710
2711       while (n_left_from > 0 && n_left_to_next > 0)
2712         {
2713           vlib_buffer_t *p0;
2714           ethernet_header_t *eth0;
2715           ethernet_arp_header_t *arp0;
2716           ip6_header_t *iph0;
2717           u8 *l3h0;
2718           u32 pi0, error0, next0, sw_if_index0;
2719           u16 ethertype0;
2720           u16 bd_index0;
2721           u32 ip0;
2722           u8 *macp0;
2723
2724           pi0 = from[0];
2725           to_next[0] = pi0;
2726           from += 1;
2727           to_next += 1;
2728           n_left_from -= 1;
2729           n_left_to_next -= 1;
2730
2731           p0 = vlib_get_buffer (vm, pi0);
2732           // Terminate only local (SHG == 0) ARP
2733           if (vnet_buffer (p0)->l2.shg != 0)
2734             goto next_l2_feature;
2735
2736           eth0 = vlib_buffer_get_current (p0);
2737           l3h0 = (u8 *) eth0 + vnet_buffer (p0)->l2.l2_len;
2738           ethertype0 = clib_net_to_host_u16 (*(u16 *) (l3h0 - 2));
2739           arp0 = (ethernet_arp_header_t *) l3h0;
2740
2741           if (ethertype0 != ETHERNET_TYPE_ARP)
2742             goto check_ip6_nd;
2743
2744           if ((arp0->opcode !=
2745                clib_host_to_net_u16 (ETHERNET_ARP_OPCODE_request)) &&
2746               (arp0->opcode !=
2747                clib_host_to_net_u16 (ETHERNET_ARP_OPCODE_reply)))
2748             goto check_ip6_nd;
2749
2750           /* Must be ARP request/reply packet here */
2751           if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE) &&
2752                              (p0->flags & VLIB_BUFFER_IS_TRACED)))
2753             {
2754               u8 *t0 = vlib_add_trace (vm, node, p0,
2755                                        sizeof (ethernet_arp_input_trace_t));
2756               clib_memcpy_fast (t0, l3h0,
2757                                 sizeof (ethernet_arp_input_trace_t));
2758             }
2759
2760           error0 = 0;
2761           error0 =
2762             (arp0->l2_type !=
2763              clib_net_to_host_u16 (ETHERNET_ARP_HARDWARE_TYPE_ethernet)
2764              ? ETHERNET_ARP_ERROR_l2_type_not_ethernet : error0);
2765           error0 =
2766             (arp0->l3_type !=
2767              clib_net_to_host_u16 (ETHERNET_TYPE_IP4) ?
2768              ETHERNET_ARP_ERROR_l3_type_not_ip4 : error0);
2769
2770           sw_if_index0 = vnet_buffer (p0)->sw_if_index[VLIB_RX];
2771
2772           if (error0)
2773             goto drop;
2774
2775           /* Trash ARP packets whose ARP-level source addresses do not
2776              match, or if requester address is mcast */
2777           if (PREDICT_FALSE
2778               (!ethernet_mac_address_equal (eth0->src_address,
2779                                             arp0->ip4_over_ethernet[0].
2780                                             mac.bytes))
2781               || ethernet_address_cast (arp0->ip4_over_ethernet[0].mac.bytes))
2782             {
2783               /* VRRP virtual MAC may be different to SMAC in ARP reply */
2784               if (!ethernet_mac_address_equal
2785                   (arp0->ip4_over_ethernet[0].mac.bytes, vrrp_prefix))
2786                 {
2787                   error0 = ETHERNET_ARP_ERROR_l2_address_mismatch;
2788                   goto drop;
2789                 }
2790             }
2791           if (PREDICT_FALSE
2792               (ip4_address_is_multicast (&arp0->ip4_over_ethernet[0].ip4)))
2793             {
2794               error0 = ETHERNET_ARP_ERROR_l3_src_address_not_local;
2795               goto drop;
2796             }
2797
2798           /* Check if anyone want ARP request events for L2 BDs */
2799           {
2800             ethernet_arp_main_t *am = &ethernet_arp_main;
2801             if (am->wc_ip4_arp_publisher_node != (uword) ~ 0)
2802               vnet_arp_wc_publish (sw_if_index0, &arp0->ip4_over_ethernet[0]);
2803           }
2804
2805           /* lookup BD mac_by_ip4 hash table for MAC entry */
2806           ip0 = arp0->ip4_over_ethernet[1].ip4.as_u32;
2807           bd_index0 = vnet_buffer (p0)->l2.bd_index;
2808           if (PREDICT_FALSE ((bd_index0 != last_bd_index)
2809                              || (last_bd_index == (u16) ~ 0)))
2810             {
2811               last_bd_index = bd_index0;
2812               last_bd_config = vec_elt_at_index (l2im->bd_configs, bd_index0);
2813             }
2814           macp0 = (u8 *) hash_get (last_bd_config->mac_by_ip4, ip0);
2815
2816           if (PREDICT_FALSE (!macp0))
2817             goto next_l2_feature;       /* MAC not found */
2818           if (PREDICT_FALSE (arp0->ip4_over_ethernet[0].ip4.as_u32 ==
2819                              arp0->ip4_over_ethernet[1].ip4.as_u32))
2820             goto next_l2_feature;       /* GARP */
2821
2822           /* MAC found, send ARP reply -
2823              Convert ARP request packet to ARP reply */
2824           arp0->opcode = clib_host_to_net_u16 (ETHERNET_ARP_OPCODE_reply);
2825           arp0->ip4_over_ethernet[1] = arp0->ip4_over_ethernet[0];
2826           arp0->ip4_over_ethernet[0].ip4.as_u32 = ip0;
2827           mac_address_from_bytes (&arp0->ip4_over_ethernet[0].mac, macp0);
2828           clib_memcpy_fast (eth0->dst_address, eth0->src_address, 6);
2829           clib_memcpy_fast (eth0->src_address, macp0, 6);
2830           n_replies_sent += 1;
2831
2832         output_response:
2833           /* For BVI, need to use l2-fwd node to send ARP reply as
2834              l2-output node cannot output packet to BVI properly */
2835           cfg0 = vec_elt_at_index (l2im->configs, sw_if_index0);
2836           if (PREDICT_FALSE (cfg0->bvi))
2837             {
2838               vnet_buffer (p0)->l2.feature_bitmap |= L2INPUT_FEAT_FWD;
2839               vnet_buffer (p0)->sw_if_index[VLIB_RX] = 0;
2840               goto next_l2_feature;
2841             }
2842
2843           /* Send ARP/ND reply back out input interface through l2-output */
2844           vnet_buffer (p0)->sw_if_index[VLIB_TX] = sw_if_index0;
2845           next0 = ARP_TERM_NEXT_L2_OUTPUT;
2846           vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
2847                                            to_next, n_left_to_next, pi0,
2848                                            next0);
2849           continue;
2850
2851         check_ip6_nd:
2852           /* IP6 ND event notification or solicitation handling to generate
2853              local response instead of flooding */
2854           iph0 = (ip6_header_t *) l3h0;
2855           if (PREDICT_FALSE (ethertype0 == ETHERNET_TYPE_IP6 &&
2856                              iph0->protocol == IP_PROTOCOL_ICMP6 &&
2857                              !ip6_address_is_unspecified
2858                              (&iph0->src_address)))
2859             {
2860               sw_if_index0 = vnet_buffer (p0)->sw_if_index[VLIB_RX];
2861               if (vnet_ip6_nd_term
2862                   (vm, node, p0, eth0, iph0, sw_if_index0,
2863                    vnet_buffer (p0)->l2.bd_index))
2864                 goto output_response;
2865             }
2866
2867         next_l2_feature:
2868           {
2869             next0 = vnet_l2_feature_next (p0, arp_term_next_node_index,
2870                                           L2INPUT_FEAT_ARP_TERM);
2871             vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
2872                                              to_next, n_left_to_next,
2873                                              pi0, next0);
2874             continue;
2875           }
2876
2877         drop:
2878           if (0 == arp0->ip4_over_ethernet[0].ip4.as_u32 ||
2879               (arp0->ip4_over_ethernet[0].ip4.as_u32 ==
2880                arp0->ip4_over_ethernet[1].ip4.as_u32))
2881             {
2882               error0 = ETHERNET_ARP_ERROR_gratuitous_arp;
2883             }
2884           next0 = ARP_TERM_NEXT_DROP;
2885           p0->error = node->errors[error0];
2886
2887           vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
2888                                            to_next, n_left_to_next, pi0,
2889                                            next0);
2890         }
2891
2892       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
2893     }
2894
2895   vlib_error_count (vm, node->node_index,
2896                     ETHERNET_ARP_ERROR_replies_sent, n_replies_sent);
2897   return frame->n_vectors;
2898 }
2899
2900 /* *INDENT-OFF* */
2901 VLIB_REGISTER_NODE (arp_term_l2bd_node, static) = {
2902   .function = arp_term_l2bd,
2903   .name = "arp-term-l2bd",
2904   .vector_size = sizeof (u32),
2905   .n_errors = ETHERNET_ARP_N_ERROR,
2906   .error_strings = ethernet_arp_error_strings,
2907   .n_next_nodes = ARP_TERM_N_NEXT,
2908   .next_nodes = {
2909     [ARP_TERM_NEXT_L2_OUTPUT] = "l2-output",
2910     [ARP_TERM_NEXT_DROP] = "error-drop",
2911   },
2912   .format_buffer = format_ethernet_arp_header,
2913   .format_trace = format_arp_term_input_trace,
2914 };
2915 /* *INDENT-ON* */
2916
2917 clib_error_t *
2918 arp_term_init (vlib_main_t * vm)
2919 {
2920   // Initialize the feature next-node indexes
2921   feat_bitmap_init_next_nodes (vm,
2922                                arp_term_l2bd_node.index,
2923                                L2INPUT_N_FEAT,
2924                                l2input_get_feat_names (),
2925                                arp_term_next_node_index);
2926   return 0;
2927 }
2928
2929 VLIB_INIT_FUNCTION (arp_term_init);
2930
2931 void
2932 change_arp_mac (u32 sw_if_index, ethernet_arp_ip4_entry_t * e)
2933 {
2934   if (e->sw_if_index == sw_if_index)
2935     {
2936       adj_nbr_walk_nh4 (e->sw_if_index,
2937                         &e->ip4_address, arp_mk_complete_walk, e);
2938     }
2939 }
2940
2941 void
2942 ethernet_arp_change_mac (u32 sw_if_index)
2943 {
2944   ethernet_arp_main_t *am = &ethernet_arp_main;
2945   ethernet_arp_ip4_entry_t *e;
2946   adj_index_t ai;
2947
2948   /* *INDENT-OFF* */
2949   pool_foreach (e, am->ip4_entry_pool,
2950   ({
2951     change_arp_mac (sw_if_index, e);
2952   }));
2953   /* *INDENT-ON* */
2954
2955   ai = adj_glean_get (FIB_PROTOCOL_IP4, sw_if_index);
2956
2957   if (ADJ_INDEX_INVALID != ai)
2958     adj_glean_update_rewrite (ai);
2959 }
2960
2961 void
2962 send_ip4_garp (vlib_main_t * vm, u32 sw_if_index)
2963 {
2964   ip4_main_t *i4m = &ip4_main;
2965   ip4_address_t *ip4_addr = ip4_interface_first_address (i4m, sw_if_index, 0);
2966
2967   send_ip4_garp_w_addr (vm, ip4_addr, sw_if_index);
2968 }
2969
2970 void
2971 send_ip4_garp_w_addr (vlib_main_t * vm,
2972                       const ip4_address_t * ip4_addr, u32 sw_if_index)
2973 {
2974   ip4_main_t *i4m = &ip4_main;
2975   vnet_main_t *vnm = vnet_get_main ();
2976   u8 *rewrite, rewrite_len;
2977   vnet_hw_interface_t *hi = vnet_get_sup_hw_interface (vnm, sw_if_index);
2978
2979   if (ip4_addr)
2980     {
2981       clib_warning ("Sending GARP for IP4 address %U on sw_if_idex %d",
2982                     format_ip4_address, ip4_addr, sw_if_index);
2983
2984       /* Form GARP packet for output - Gratuitous ARP is an ARP request packet
2985          where the interface IP/MAC pair is used for both source and request
2986          MAC/IP pairs in the request */
2987       u32 bi = 0;
2988       ethernet_arp_header_t *h = vlib_packet_template_get_packet
2989         (vm, &i4m->ip4_arp_request_packet_template, &bi);
2990
2991       if (!h)
2992         return;
2993
2994       mac_address_from_bytes (&h->ip4_over_ethernet[0].mac, hi->hw_address);
2995       mac_address_from_bytes (&h->ip4_over_ethernet[1].mac, hi->hw_address);
2996       h->ip4_over_ethernet[0].ip4 = ip4_addr[0];
2997       h->ip4_over_ethernet[1].ip4 = ip4_addr[0];
2998
2999       /* Setup MAC header with ARP Etype and broadcast DMAC */
3000       vlib_buffer_t *b = vlib_get_buffer (vm, bi);
3001       rewrite =
3002         ethernet_build_rewrite (vnm, sw_if_index, VNET_LINK_ARP,
3003                                 VNET_REWRITE_FOR_SW_INTERFACE_ADDRESS_BROADCAST);
3004       rewrite_len = vec_len (rewrite);
3005       vlib_buffer_advance (b, -rewrite_len);
3006       ethernet_header_t *e = vlib_buffer_get_current (b);
3007       clib_memcpy_fast (e->dst_address, rewrite, rewrite_len);
3008       vec_free (rewrite);
3009
3010       /* Send GARP packet out the specified interface */
3011       vnet_buffer (b)->sw_if_index[VLIB_RX] =
3012         vnet_buffer (b)->sw_if_index[VLIB_TX] = sw_if_index;
3013       vlib_frame_t *f = vlib_get_frame_to_node (vm, hi->output_node_index);
3014       u32 *to_next = vlib_frame_vector_args (f);
3015       to_next[0] = bi;
3016       f->n_vectors = 1;
3017       vlib_put_frame_to_node (vm, hi->output_node_index, f);
3018     }
3019 }
3020
3021 /*
3022  * Remove any arp entries associated with the specified interface
3023  */
3024 static clib_error_t *
3025 vnet_arp_delete_sw_interface (vnet_main_t * vnm, u32 sw_if_index, u32 is_add)
3026 {
3027   ethernet_arp_main_t *am = &ethernet_arp_main;
3028
3029   if (!is_add && sw_if_index != ~0)
3030     {
3031       ethernet_arp_ip4_entry_t *e;
3032       /* *INDENT-OFF* */
3033       pool_foreach (e, am->ip4_entry_pool, ({
3034         if (e->sw_if_index != sw_if_index)
3035           continue;
3036         vnet_arp_set_ip4_over_ethernet_rpc_args_t args = {
3037           .sw_if_index = sw_if_index,
3038           .ip4 = e->ip4_address,
3039         };
3040         vnet_arp_unset_ip4_over_ethernet_internal (vnm, &args);
3041       }));
3042       /* *INDENT-ON* */
3043       arp_disable (am, sw_if_index);
3044     }
3045   else if (is_add)
3046     {
3047       vnet_feature_enable_disable ("arp", "arp-disabled",
3048                                    sw_if_index, 1, NULL, 0);
3049     }
3050
3051   return (NULL);
3052 }
3053
3054 VNET_SW_INTERFACE_ADD_DEL_FUNCTION (vnet_arp_delete_sw_interface);
3055
3056 /*
3057  * fd.io coding-style-patch-verification: ON
3058  *
3059  * Local Variables:
3060  * eval: (c-set-style "gnu")
3061  * End:
3062  */