ethernet: Fix node ordering on ARP feautre ARC
[vpp.git] / src / vnet / ethernet / arp.c
1 /*
2  * ethernet/arp.c: IP v4 ARP node
3  *
4  * Copyright (c) 2010 Cisco and/or its affiliates.
5  * Licensed under the Apache License, Version 2.0 (the "License");
6  * you may not use this file except in compliance with the License.
7  * You may obtain a copy of the License at:
8  *
9  *     http://www.apache.org/licenses/LICENSE-2.0
10  *
11  * Unless required by applicable law or agreed to in writing, software
12  * distributed under the License is distributed on an "AS IS" BASIS,
13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  * See the License for the specific language governing permissions and
15  * limitations under the License.
16  */
17
18 #include <vnet/ip/ip.h>
19 #include <vnet/ip/ip_neighbor.h>
20 #include <vnet/ip/ip6.h>
21 #include <vnet/ethernet/ethernet.h>
22 #include <vnet/ethernet/arp.h>
23 #include <vnet/l2/l2_input.h>
24 #include <vppinfra/mhash.h>
25 #include <vnet/fib/ip4_fib.h>
26 #include <vnet/fib/fib_entry_src.h>
27 #include <vnet/adj/adj_nbr.h>
28 #include <vnet/adj/adj_mcast.h>
29 #include <vnet/mpls/mpls.h>
30 #include <vnet/l2/feat_bitmap.h>
31
32 #include <vlibmemory/api.h>
33
34 /**
35  * @file
36  * @brief IPv4 ARP.
37  *
38  * This file contains code to manage the IPv4 ARP tables (IP Address
39  * to MAC Address lookup).
40  */
41
42
43 /**
44  * @brief Per-interface ARP configuration and state
45  */
46 typedef struct ethernet_arp_interface_t_
47 {
48   /**
49    * Hash table of ARP entries.
50    * Since this hash table is per-interface, the key is only the IPv4 address.
51    */
52   uword *arp_entries;
53   /**
54    * Is ARP enabled on this interface
55    */
56   u32 enabled;
57   /**
58    * Is Proxy ARP enabled on this interface
59    */
60   u32 proxy_enabled;
61 } ethernet_arp_interface_t;
62
63 typedef struct
64 {
65   ip4_address_t lo_addr;
66   ip4_address_t hi_addr;
67   u32 fib_index;
68 } ethernet_proxy_arp_t;
69
70 typedef struct
71 {
72   u32 next_index;
73   uword node_index;
74   uword type_opaque;
75   uword data;
76   /* Used for arp event notification only */
77   arp_change_event_cb_t data_callback;
78   u32 pid;
79 } pending_resolution_t;
80
81 typedef struct
82 {
83   /* Hash tables mapping name to opcode. */
84   uword *opcode_by_name;
85
86   /* lite beer "glean" adjacency handling */
87   uword *pending_resolutions_by_address;
88   pending_resolution_t *pending_resolutions;
89
90   /* Mac address change notification */
91   uword *mac_changes_by_address;
92   pending_resolution_t *mac_changes;
93
94   ethernet_arp_ip4_entry_t *ip4_entry_pool;
95
96   /* ARP attack mitigation */
97   u32 arp_delete_rotor;
98   u32 limit_arp_cache_size;
99
100   /** Per interface state */
101   ethernet_arp_interface_t *ethernet_arp_by_sw_if_index;
102
103   /* Proxy arp vector */
104   ethernet_proxy_arp_t *proxy_arps;
105
106   uword wc_ip4_arp_publisher_node;
107   uword wc_ip4_arp_publisher_et;
108
109   /* ARP feature arc index */
110   u8 feature_arc_index;
111 } ethernet_arp_main_t;
112
113 static ethernet_arp_main_t ethernet_arp_main;
114
115 typedef struct
116 {
117   u32 sw_if_index;
118   ip4_address_t ip4;
119   mac_address_t mac;
120   ip_neighbor_flags_t nbr_flags;
121   u32 flags;
122 #define ETHERNET_ARP_ARGS_REMOVE (1<<0)
123 #define ETHERNET_ARP_ARGS_FLUSH  (1<<1)
124 #define ETHERNET_ARP_ARGS_POPULATE  (1<<2)
125 #define ETHERNET_ARP_ARGS_WC_PUB  (1<<3)
126 } vnet_arp_set_ip4_over_ethernet_rpc_args_t;
127
128 static const u8 vrrp_prefix[] = { 0x00, 0x00, 0x5E, 0x00, 0x01 };
129
130 /* Node index for send_garp_na_process */
131 u32 send_garp_na_process_node_index;
132
133 static void
134 set_ip4_over_ethernet_rpc_callback (vnet_arp_set_ip4_over_ethernet_rpc_args_t
135                                     * a);
136
137 static u8 *
138 format_ethernet_arp_hardware_type (u8 * s, va_list * va)
139 {
140   ethernet_arp_hardware_type_t h = va_arg (*va, ethernet_arp_hardware_type_t);
141   char *t = 0;
142   switch (h)
143     {
144 #define _(n,f) case n: t = #f; break;
145       foreach_ethernet_arp_hardware_type;
146 #undef _
147
148     default:
149       return format (s, "unknown 0x%x", h);
150     }
151
152   return format (s, "%s", t);
153 }
154
155 static u8 *
156 format_ethernet_arp_opcode (u8 * s, va_list * va)
157 {
158   ethernet_arp_opcode_t o = va_arg (*va, ethernet_arp_opcode_t);
159   char *t = 0;
160   switch (o)
161     {
162 #define _(f) case ETHERNET_ARP_OPCODE_##f: t = #f; break;
163       foreach_ethernet_arp_opcode;
164 #undef _
165
166     default:
167       return format (s, "unknown 0x%x", o);
168     }
169
170   return format (s, "%s", t);
171 }
172
173 static uword
174 unformat_ethernet_arp_opcode_host_byte_order (unformat_input_t * input,
175                                               va_list * args)
176 {
177   int *result = va_arg (*args, int *);
178   ethernet_arp_main_t *am = &ethernet_arp_main;
179   int x, i;
180
181   /* Numeric opcode. */
182   if (unformat (input, "0x%x", &x) || unformat (input, "%d", &x))
183     {
184       if (x >= (1 << 16))
185         return 0;
186       *result = x;
187       return 1;
188     }
189
190   /* Named type. */
191   if (unformat_user (input, unformat_vlib_number_by_name,
192                      am->opcode_by_name, &i))
193     {
194       *result = i;
195       return 1;
196     }
197
198   return 0;
199 }
200
201 static uword
202 unformat_ethernet_arp_opcode_net_byte_order (unformat_input_t * input,
203                                              va_list * args)
204 {
205   int *result = va_arg (*args, int *);
206   if (!unformat_user
207       (input, unformat_ethernet_arp_opcode_host_byte_order, result))
208     return 0;
209
210   *result = clib_host_to_net_u16 ((u16) * result);
211   return 1;
212 }
213
214 static u8 *
215 format_ethernet_arp_header (u8 * s, va_list * va)
216 {
217   ethernet_arp_header_t *a = va_arg (*va, ethernet_arp_header_t *);
218   u32 max_header_bytes = va_arg (*va, u32);
219   u32 indent;
220   u16 l2_type, l3_type;
221
222   if (max_header_bytes != 0 && sizeof (a[0]) > max_header_bytes)
223     return format (s, "ARP header truncated");
224
225   l2_type = clib_net_to_host_u16 (a->l2_type);
226   l3_type = clib_net_to_host_u16 (a->l3_type);
227
228   indent = format_get_indent (s);
229
230   s = format (s, "%U, type %U/%U, address size %d/%d",
231               format_ethernet_arp_opcode, clib_net_to_host_u16 (a->opcode),
232               format_ethernet_arp_hardware_type, l2_type,
233               format_ethernet_type, l3_type,
234               a->n_l2_address_bytes, a->n_l3_address_bytes);
235
236   if (l2_type == ETHERNET_ARP_HARDWARE_TYPE_ethernet
237       && l3_type == ETHERNET_TYPE_IP4)
238     {
239       s = format (s, "\n%U%U/%U -> %U/%U",
240                   format_white_space, indent,
241                   format_mac_address_t, &a->ip4_over_ethernet[0].mac,
242                   format_ip4_address, &a->ip4_over_ethernet[0].ip4,
243                   format_mac_address_t, &a->ip4_over_ethernet[1].mac,
244                   format_ip4_address, &a->ip4_over_ethernet[1].ip4);
245     }
246   else
247     {
248       uword n2 = a->n_l2_address_bytes;
249       uword n3 = a->n_l3_address_bytes;
250       s = format (s, "\n%U%U/%U -> %U/%U",
251                   format_white_space, indent,
252                   format_hex_bytes, a->data + 0 * n2 + 0 * n3, n2,
253                   format_hex_bytes, a->data + 1 * n2 + 0 * n3, n3,
254                   format_hex_bytes, a->data + 1 * n2 + 1 * n3, n2,
255                   format_hex_bytes, a->data + 2 * n2 + 1 * n3, n3);
256     }
257
258   return s;
259 }
260
261 u8 *
262 format_ethernet_arp_ip4_entry (u8 * s, va_list * va)
263 {
264   vnet_main_t *vnm = va_arg (*va, vnet_main_t *);
265   ethernet_arp_ip4_entry_t *e = va_arg (*va, ethernet_arp_ip4_entry_t *);
266   vnet_sw_interface_t *si;
267
268   if (!e)
269     return format (s, "%=12s%=16s%=6s%=20s%=24s", "Time", "IP4",
270                    "Flags", "Ethernet", "Interface");
271
272   si = vnet_get_sw_interface (vnm, e->sw_if_index);
273
274   return format (s, "%=12U%=16U%=6U%=20U%U",
275                  format_vlib_time, vnm->vlib_main, e->time_last_updated,
276                  format_ip4_address, &e->ip4_address,
277                  format_ip_neighbor_flags, e->flags,
278                  format_mac_address_t, &e->mac,
279                  format_vnet_sw_interface_name, vnm, si);
280 }
281
282 typedef struct
283 {
284   u8 packet_data[64];
285 } ethernet_arp_input_trace_t;
286
287 static u8 *
288 format_ethernet_arp_input_trace (u8 * s, va_list * va)
289 {
290   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*va, vlib_main_t *);
291   CLIB_UNUSED (vlib_node_t * node) = va_arg (*va, vlib_node_t *);
292   ethernet_arp_input_trace_t *t = va_arg (*va, ethernet_arp_input_trace_t *);
293
294   s = format (s, "%U",
295               format_ethernet_arp_header,
296               t->packet_data, sizeof (t->packet_data));
297
298   return s;
299 }
300
301 static u8 *
302 format_arp_term_input_trace (u8 * s, va_list * va)
303 {
304   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*va, vlib_main_t *);
305   CLIB_UNUSED (vlib_node_t * node) = va_arg (*va, vlib_node_t *);
306   ethernet_arp_input_trace_t *t = va_arg (*va, ethernet_arp_input_trace_t *);
307
308   /* arp-term trace data saved is either arp or ip6/icmp6 packet:
309      - for arp, the 1st 16-bit field is hw type of value of 0x0001.
310      - for ip6, the first nibble has value of 6. */
311   s = format (s, "%U", t->packet_data[0] == 0 ?
312               format_ethernet_arp_header : format_ip6_header,
313               t->packet_data, sizeof (t->packet_data));
314
315   return s;
316 }
317
318 static void
319 arp_nbr_probe (ip_adjacency_t * adj)
320 {
321   vnet_main_t *vnm = vnet_get_main ();
322   ip4_main_t *im = &ip4_main;
323   ip_interface_address_t *ia;
324   ethernet_arp_header_t *h;
325   vnet_hw_interface_t *hi;
326   vnet_sw_interface_t *si;
327   ip4_address_t *src;
328   vlib_buffer_t *b;
329   vlib_main_t *vm;
330   u32 bi = 0;
331
332   vm = vlib_get_main ();
333
334   si = vnet_get_sw_interface (vnm, adj->rewrite_header.sw_if_index);
335
336   if (!(si->flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP))
337     {
338       return;
339     }
340
341   src =
342     ip4_interface_address_matching_destination (im,
343                                                 &adj->sub_type.nbr.next_hop.
344                                                 ip4,
345                                                 adj->rewrite_header.
346                                                 sw_if_index, &ia);
347   if (!src)
348     {
349       return;
350     }
351
352   h =
353     vlib_packet_template_get_packet (vm, &im->ip4_arp_request_packet_template,
354                                      &bi);
355   if (!h)
356     return;
357
358   hi = vnet_get_sup_hw_interface (vnm, adj->rewrite_header.sw_if_index);
359
360   mac_address_from_bytes (&h->ip4_over_ethernet[0].mac, hi->hw_address);
361
362   h->ip4_over_ethernet[0].ip4 = src[0];
363   h->ip4_over_ethernet[1].ip4 = adj->sub_type.nbr.next_hop.ip4;
364
365   b = vlib_get_buffer (vm, bi);
366   vnet_buffer (b)->sw_if_index[VLIB_RX] =
367     vnet_buffer (b)->sw_if_index[VLIB_TX] = adj->rewrite_header.sw_if_index;
368
369   /* Add encapsulation string for software interface (e.g. ethernet header). */
370   vnet_rewrite_one_header (adj[0], h, sizeof (ethernet_header_t));
371   vlib_buffer_advance (b, -adj->rewrite_header.data_bytes);
372
373   {
374     vlib_frame_t *f = vlib_get_frame_to_node (vm, hi->output_node_index);
375     u32 *to_next = vlib_frame_vector_args (f);
376     to_next[0] = bi;
377     f->n_vectors = 1;
378     vlib_put_frame_to_node (vm, hi->output_node_index, f);
379   }
380 }
381
382 static void
383 arp_mk_complete (adj_index_t ai, ethernet_arp_ip4_entry_t * e)
384 {
385   adj_nbr_update_rewrite
386     (ai, ADJ_NBR_REWRITE_FLAG_COMPLETE,
387      ethernet_build_rewrite (vnet_get_main (),
388                              e->sw_if_index,
389                              adj_get_link_type (ai), &e->mac));
390 }
391
392 static void
393 arp_mk_incomplete (adj_index_t ai)
394 {
395   ip_adjacency_t *adj = adj_get (ai);
396
397   adj_nbr_update_rewrite
398     (ai,
399      ADJ_NBR_REWRITE_FLAG_INCOMPLETE,
400      ethernet_build_rewrite (vnet_get_main (),
401                              adj->rewrite_header.sw_if_index,
402                              VNET_LINK_ARP,
403                              VNET_REWRITE_FOR_SW_INTERFACE_ADDRESS_BROADCAST));
404 }
405
406 static ethernet_arp_ip4_entry_t *
407 arp_entry_find (ethernet_arp_interface_t * eai, const ip4_address_t * addr)
408 {
409   ethernet_arp_main_t *am = &ethernet_arp_main;
410   ethernet_arp_ip4_entry_t *e = NULL;
411   uword *p;
412
413   if (NULL != eai->arp_entries)
414     {
415       p = hash_get (eai->arp_entries, addr->as_u32);
416       if (!p)
417         return (NULL);
418
419       e = pool_elt_at_index (am->ip4_entry_pool, p[0]);
420     }
421
422   return (e);
423 }
424
425 static adj_walk_rc_t
426 arp_mk_complete_walk (adj_index_t ai, void *ctx)
427 {
428   ethernet_arp_ip4_entry_t *e = ctx;
429
430   arp_mk_complete (ai, e);
431
432   return (ADJ_WALK_RC_CONTINUE);
433 }
434
435 static adj_walk_rc_t
436 arp_mk_incomplete_walk (adj_index_t ai, void *ctx)
437 {
438   arp_mk_incomplete (ai);
439
440   return (ADJ_WALK_RC_CONTINUE);
441 }
442
443 static int
444 arp_is_enabled (ethernet_arp_main_t * am, u32 sw_if_index)
445 {
446   if (vec_len (am->ethernet_arp_by_sw_if_index) <= sw_if_index)
447     return 0;
448
449   return (am->ethernet_arp_by_sw_if_index[sw_if_index].enabled);
450 }
451
452 static void
453 arp_enable (ethernet_arp_main_t * am, u32 sw_if_index)
454 {
455   if (arp_is_enabled (am, sw_if_index))
456     return;
457
458   vec_validate (am->ethernet_arp_by_sw_if_index, sw_if_index);
459
460   am->ethernet_arp_by_sw_if_index[sw_if_index].enabled = 1;
461
462   vnet_feature_enable_disable ("arp", "arp-reply", sw_if_index, 1, NULL, 0);
463   vnet_feature_enable_disable ("arp", "arp-disabled", sw_if_index, 0, NULL,
464                                0);
465 }
466
467 static int
468 vnet_arp_flush_ip4_over_ethernet_internal (vnet_main_t * vnm,
469                                            vnet_arp_set_ip4_over_ethernet_rpc_args_t
470                                            * args);
471
472 static void
473 arp_disable (ethernet_arp_main_t * am, u32 sw_if_index)
474 {
475   ethernet_arp_interface_t *eai;
476   ethernet_arp_ip4_entry_t *e;
477   u32 i, *to_delete = 0;
478   hash_pair_t *pair;
479
480   if (!arp_is_enabled (am, sw_if_index))
481     return;
482
483   vnet_feature_enable_disable ("arp", "arp-disabled", sw_if_index, 1, NULL,
484                                0);
485   vnet_feature_enable_disable ("arp", "arp-reply", sw_if_index, 0, NULL, 0);
486
487   eai = &am->ethernet_arp_by_sw_if_index[sw_if_index];
488
489
490   /* *INDENT-OFF* */
491   hash_foreach_pair (pair, eai->arp_entries,
492   ({
493     e = pool_elt_at_index(am->ip4_entry_pool,
494                           pair->value[0]);
495     vec_add1 (to_delete, e - am->ip4_entry_pool);
496   }));
497   /* *INDENT-ON* */
498
499   for (i = 0; i < vec_len (to_delete); i++)
500     {
501       e = pool_elt_at_index (am->ip4_entry_pool, to_delete[i]);
502
503       vnet_arp_set_ip4_over_ethernet_rpc_args_t delme = {
504         .ip4.as_u32 = e->ip4_address.as_u32,
505         .sw_if_index = e->sw_if_index,
506         .flags = ETHERNET_ARP_ARGS_FLUSH,
507       };
508       mac_address_copy (&delme.mac, &e->mac);
509
510       vnet_arp_flush_ip4_over_ethernet_internal (vnet_get_main (), &delme);
511     }
512
513   vec_free (to_delete);
514
515   eai->enabled = 0;
516 }
517
518 void
519 arp_update_adjacency (vnet_main_t * vnm, u32 sw_if_index, u32 ai)
520 {
521   ethernet_arp_main_t *am = &ethernet_arp_main;
522   ethernet_arp_interface_t *arp_int;
523   ethernet_arp_ip4_entry_t *e;
524   ip_adjacency_t *adj;
525
526   adj = adj_get (ai);
527
528   arp_enable (am, sw_if_index);
529   arp_int = &am->ethernet_arp_by_sw_if_index[sw_if_index];
530   e = arp_entry_find (arp_int, &adj->sub_type.nbr.next_hop.ip4);
531
532   switch (adj->lookup_next_index)
533     {
534     case IP_LOOKUP_NEXT_GLEAN:
535       adj_glean_update_rewrite (ai);
536       break;
537     case IP_LOOKUP_NEXT_ARP:
538       if (NULL != e)
539         {
540           adj_nbr_walk_nh4 (sw_if_index,
541                             &e->ip4_address, arp_mk_complete_walk, e);
542         }
543       else
544         {
545           /*
546            * no matching ARP entry.
547            * construct the rewrite required to for an ARP packet, and stick
548            * that in the adj's pipe to smoke.
549            */
550           adj_nbr_update_rewrite
551             (ai,
552              ADJ_NBR_REWRITE_FLAG_INCOMPLETE,
553              ethernet_build_rewrite
554              (vnm,
555               sw_if_index,
556               VNET_LINK_ARP,
557               VNET_REWRITE_FOR_SW_INTERFACE_ADDRESS_BROADCAST));
558
559           /*
560            * since the FIB has added this adj for a route, it makes sense it
561            * may want to forward traffic sometime soon. Let's send a
562            * speculative ARP. just one. If we were to do periodically that
563            * wouldn't be bad either, but that's more code than i'm prepared to
564            * write at this time for relatively little reward.
565            */
566           arp_nbr_probe (adj);
567         }
568       break;
569     case IP_LOOKUP_NEXT_BCAST:
570       adj_nbr_update_rewrite (ai,
571                               ADJ_NBR_REWRITE_FLAG_COMPLETE,
572                               ethernet_build_rewrite
573                               (vnm,
574                                sw_if_index,
575                                VNET_LINK_IP4,
576                                VNET_REWRITE_FOR_SW_INTERFACE_ADDRESS_BROADCAST));
577       break;
578     case IP_LOOKUP_NEXT_MCAST:
579       {
580         /*
581          * Construct a partial rewrite from the known ethernet mcast dest MAC
582          */
583         u8 *rewrite;
584         u8 offset;
585
586         rewrite = ethernet_build_rewrite (vnm,
587                                           sw_if_index,
588                                           adj->ia_link,
589                                           ethernet_ip4_mcast_dst_addr ());
590         offset = vec_len (rewrite) - 2;
591
592         /*
593          * Complete the remaining fields of the adj's rewrite to direct the
594          * complete of the rewrite at switch time by copying in the IP
595          * dst address's bytes.
596          * Offset is 2 bytes into the MAC destination address.
597          */
598         adj_mcast_update_rewrite (ai, rewrite, offset);
599
600         break;
601       }
602     case IP_LOOKUP_NEXT_DROP:
603     case IP_LOOKUP_NEXT_PUNT:
604     case IP_LOOKUP_NEXT_LOCAL:
605     case IP_LOOKUP_NEXT_REWRITE:
606     case IP_LOOKUP_NEXT_MCAST_MIDCHAIN:
607     case IP_LOOKUP_NEXT_MIDCHAIN:
608     case IP_LOOKUP_NEXT_ICMP_ERROR:
609     case IP_LOOKUP_N_NEXT:
610       ASSERT (0);
611       break;
612     }
613 }
614
615 static void
616 arp_adj_fib_add (ethernet_arp_ip4_entry_t * e, u32 fib_index)
617 {
618   fib_prefix_t pfx = {
619     .fp_len = 32,
620     .fp_proto = FIB_PROTOCOL_IP4,
621     .fp_addr.ip4 = e->ip4_address,
622   };
623
624   e->fib_entry_index =
625     fib_table_entry_path_add (fib_index, &pfx, FIB_SOURCE_ADJ,
626                               FIB_ENTRY_FLAG_ATTACHED,
627                               DPO_PROTO_IP4, &pfx.fp_addr,
628                               e->sw_if_index, ~0, 1, NULL,
629                               FIB_ROUTE_PATH_FLAG_NONE);
630   fib_table_lock (fib_index, FIB_PROTOCOL_IP4, FIB_SOURCE_ADJ);
631 }
632
633 static void
634 arp_adj_fib_remove (ethernet_arp_ip4_entry_t * e, u32 fib_index)
635 {
636   if (FIB_NODE_INDEX_INVALID != e->fib_entry_index)
637     {
638       fib_prefix_t pfx = {
639         .fp_len = 32,
640         .fp_proto = FIB_PROTOCOL_IP4,
641         .fp_addr.ip4 = e->ip4_address,
642       };
643       u32 fib_index;
644
645       fib_index = ip4_fib_table_get_index_for_sw_if_index (e->sw_if_index);
646
647       fib_table_entry_path_remove (fib_index, &pfx,
648                                    FIB_SOURCE_ADJ,
649                                    DPO_PROTO_IP4,
650                                    &pfx.fp_addr,
651                                    e->sw_if_index, ~0, 1,
652                                    FIB_ROUTE_PATH_FLAG_NONE);
653       fib_table_unlock (fib_index, FIB_PROTOCOL_IP4, FIB_SOURCE_ADJ);
654     }
655 }
656
657 static ethernet_arp_ip4_entry_t *
658 force_reuse_arp_entry (void)
659 {
660   ethernet_arp_ip4_entry_t *e;
661   ethernet_arp_main_t *am = &ethernet_arp_main;
662   u32 count = 0;
663   u32 index = pool_next_index (am->ip4_entry_pool, am->arp_delete_rotor);
664   if (index == ~0)              /* Try again from elt 0 */
665     index = pool_next_index (am->ip4_entry_pool, index);
666
667   /* Find a non-static random entry to free up for reuse */
668   do
669     {
670       if ((count++ == 100) || (index == ~0))
671         return NULL;            /* give up after 100 entries */
672       e = pool_elt_at_index (am->ip4_entry_pool, index);
673       am->arp_delete_rotor = index;
674       index = pool_next_index (am->ip4_entry_pool, index);
675     }
676   while (e->flags & IP_NEIGHBOR_FLAG_STATIC);
677
678   /* Remove ARP entry from its interface and update fib */
679   hash_unset
680     (am->ethernet_arp_by_sw_if_index[e->sw_if_index].arp_entries,
681      e->ip4_address.as_u32);
682   arp_adj_fib_remove
683     (e, ip4_fib_table_get_index_for_sw_if_index (e->sw_if_index));
684   adj_nbr_walk_nh4 (e->sw_if_index,
685                     &e->ip4_address, arp_mk_incomplete_walk, e);
686   return e;
687 }
688
689 static int
690 vnet_arp_set_ip4_over_ethernet_internal (vnet_main_t * vnm,
691                                          vnet_arp_set_ip4_over_ethernet_rpc_args_t
692                                          * args)
693 {
694   ethernet_arp_ip4_entry_t *e = 0;
695   ethernet_arp_main_t *am = &ethernet_arp_main;
696   vlib_main_t *vm = vlib_get_main ();
697   int make_new_arp_cache_entry = 1;
698   uword *p;
699   pending_resolution_t *pr, *mc;
700   ethernet_arp_interface_t *arp_int;
701   u32 sw_if_index = args->sw_if_index;
702
703   arp_enable (am, sw_if_index);
704
705   arp_int = &am->ethernet_arp_by_sw_if_index[sw_if_index];
706
707   if (NULL != arp_int->arp_entries)
708     {
709       p = hash_get (arp_int->arp_entries, args->ip4.as_u32);
710       if (p)
711         {
712           e = pool_elt_at_index (am->ip4_entry_pool, p[0]);
713
714           /* Refuse to over-write static arp. */
715           if (!(args->nbr_flags & IP_NEIGHBOR_FLAG_STATIC) &&
716               (e->flags & IP_NEIGHBOR_FLAG_STATIC))
717             {
718               /* if MAC address match, still check to send event */
719               if (mac_address_equal (&e->mac, &args->mac))
720                 goto check_customers;
721               return -2;
722             }
723           make_new_arp_cache_entry = 0;
724         }
725     }
726
727   if (make_new_arp_cache_entry)
728     {
729       if (am->limit_arp_cache_size &&
730           pool_elts (am->ip4_entry_pool) >= am->limit_arp_cache_size)
731         {
732           e = force_reuse_arp_entry ();
733           if (NULL == e)
734             return -2;
735         }
736       else
737         pool_get (am->ip4_entry_pool, e);
738
739       if (NULL == arp_int->arp_entries)
740         arp_int->arp_entries = hash_create (0, sizeof (u32));
741
742       hash_set (arp_int->arp_entries, args->ip4.as_u32,
743                 e - am->ip4_entry_pool);
744
745       e->sw_if_index = sw_if_index;
746       e->ip4_address = args->ip4;
747       e->fib_entry_index = FIB_NODE_INDEX_INVALID;
748       mac_address_copy (&e->mac, &args->mac);
749
750       if (!(args->nbr_flags & IP_NEIGHBOR_FLAG_NO_FIB_ENTRY))
751         {
752           arp_adj_fib_add (e,
753                            ip4_fib_table_get_index_for_sw_if_index
754                            (e->sw_if_index));
755         }
756       else
757         {
758           e->flags |= IP_NEIGHBOR_FLAG_NO_FIB_ENTRY;
759         }
760     }
761   else
762     {
763       /*
764        * prevent a DoS attack from the data-plane that
765        * spams us with no-op updates to the MAC address
766        */
767       if (mac_address_equal (&e->mac, &args->mac))
768         {
769           e->time_last_updated = vlib_time_now (vm);
770           goto check_customers;
771         }
772
773       /* Update ethernet address. */
774       mac_address_copy (&e->mac, &args->mac);
775     }
776
777   /* Update time stamp and flags. */
778   e->time_last_updated = vlib_time_now (vm);
779   if (args->nbr_flags & IP_NEIGHBOR_FLAG_STATIC)
780     {
781       e->flags &= ~IP_NEIGHBOR_FLAG_DYNAMIC;
782       e->flags |= IP_NEIGHBOR_FLAG_STATIC;
783     }
784   else
785     {
786       e->flags &= ~IP_NEIGHBOR_FLAG_STATIC;
787       e->flags |= IP_NEIGHBOR_FLAG_DYNAMIC;
788     }
789
790   adj_nbr_walk_nh4 (sw_if_index, &e->ip4_address, arp_mk_complete_walk, e);
791
792 check_customers:
793   /* Customer(s) waiting for this address to be resolved? */
794   p = hash_get (am->pending_resolutions_by_address, args->ip4.as_u32);
795   if (p)
796     {
797       u32 next_index;
798       next_index = p[0];
799
800       while (next_index != (u32) ~ 0)
801         {
802           pr = pool_elt_at_index (am->pending_resolutions, next_index);
803           vlib_process_signal_event (vm, pr->node_index,
804                                      pr->type_opaque, pr->data);
805           next_index = pr->next_index;
806           pool_put (am->pending_resolutions, pr);
807         }
808
809       hash_unset (am->pending_resolutions_by_address, args->ip4.as_u32);
810     }
811
812   /* Customer(s) requesting ARP event for this address? */
813   p = hash_get (am->mac_changes_by_address, args->ip4.as_u32);
814   if (p)
815     {
816       u32 next_index;
817       next_index = p[0];
818
819       while (next_index != (u32) ~ 0)
820         {
821           int rv = 1;
822           mc = pool_elt_at_index (am->mac_changes, next_index);
823
824           /* Call the user's data callback, return 1 to suppress dup events */
825           if (mc->data_callback)
826             rv = (mc->data_callback) (mc->data, &args->mac, sw_if_index, 0);
827
828           /*
829            * Signal the resolver process, as long as the user
830            * says they want to be notified
831            */
832           if (rv == 0)
833             vlib_process_signal_event (vm, mc->node_index,
834                                        mc->type_opaque, mc->data);
835           next_index = mc->next_index;
836         }
837     }
838
839   return 0;
840 }
841
842 void
843 vnet_register_ip4_arp_resolution_event (vnet_main_t * vnm,
844                                         void *address_arg,
845                                         uword node_index,
846                                         uword type_opaque, uword data)
847 {
848   ethernet_arp_main_t *am = &ethernet_arp_main;
849   ip4_address_t *address = address_arg;
850   uword *p;
851   pending_resolution_t *pr;
852
853   pool_get (am->pending_resolutions, pr);
854
855   pr->next_index = ~0;
856   pr->node_index = node_index;
857   pr->type_opaque = type_opaque;
858   pr->data = data;
859   pr->data_callback = 0;
860
861   p = hash_get (am->pending_resolutions_by_address, address->as_u32);
862   if (p)
863     {
864       /* Insert new resolution at the head of the list */
865       pr->next_index = p[0];
866       hash_unset (am->pending_resolutions_by_address, address->as_u32);
867     }
868
869   hash_set (am->pending_resolutions_by_address, address->as_u32,
870             pr - am->pending_resolutions);
871 }
872
873 int
874 vnet_add_del_ip4_arp_change_event (vnet_main_t * vnm,
875                                    arp_change_event_cb_t data_callback,
876                                    u32 pid,
877                                    void *address_arg,
878                                    uword node_index,
879                                    uword type_opaque, uword data, int is_add)
880 {
881   ethernet_arp_main_t *am = &ethernet_arp_main;
882   ip4_address_t *address = address_arg;
883
884   /* Try to find an existing entry */
885   u32 *first = (u32 *) hash_get (am->mac_changes_by_address, address->as_u32);
886   u32 *p = first;
887   pending_resolution_t *mc;
888   while (p && *p != ~0)
889     {
890       mc = pool_elt_at_index (am->mac_changes, *p);
891       if (mc->node_index == node_index && mc->type_opaque == type_opaque
892           && mc->pid == pid)
893         break;
894       p = &mc->next_index;
895     }
896
897   int found = p && *p != ~0;
898   if (is_add)
899     {
900       if (found)
901         return VNET_API_ERROR_ENTRY_ALREADY_EXISTS;
902
903       pool_get (am->mac_changes, mc);
904       /* *INDENT-OFF* */
905       *mc = (pending_resolution_t)
906       {
907         .next_index = ~0,
908         .node_index = node_index,
909         .type_opaque = type_opaque,
910         .data = data,
911         .data_callback = data_callback,
912         .pid = pid,
913       };
914       /* *INDENT-ON* */
915
916       /* Insert new resolution at the end of the list */
917       u32 new_idx = mc - am->mac_changes;
918       if (p)
919         p[0] = new_idx;
920       else
921         hash_set (am->mac_changes_by_address, address->as_u32, new_idx);
922     }
923   else
924     {
925       if (!found)
926         return VNET_API_ERROR_NO_SUCH_ENTRY;
927
928       /* Clients may need to clean up pool entries, too */
929       if (data_callback)
930         /* no new mac addrs */
931         (data_callback) (mc->data, NULL, ~0, NULL);
932
933       /* Remove the entry from the list and delete the entry */
934       *p = mc->next_index;
935       pool_put (am->mac_changes, mc);
936
937       /* Remove from hash if we deleted the last entry */
938       if (*p == ~0 && p == first)
939         hash_unset (am->mac_changes_by_address, address->as_u32);
940     }
941   return 0;
942 }
943
944 /* Either we drop the packet or we send a reply to the sender. */
945 typedef enum
946 {
947   ARP_REPLY_NEXT_DROP,
948   ARP_REPLY_NEXT_REPLY_TX,
949   ARP_REPLY_N_NEXT,
950 } arp_reply_next_t;
951
952 #define foreach_ethernet_arp_error                                      \
953   _ (replies_sent, "ARP replies sent")                                  \
954   _ (l2_type_not_ethernet, "L2 type not ethernet")                      \
955   _ (l3_type_not_ip4, "L3 type not IP4")                                \
956   _ (l3_src_address_not_local, "IP4 source address not local to subnet") \
957   _ (l3_dst_address_not_local, "IP4 destination address not local to subnet") \
958   _ (l3_dst_address_unset, "IP4 destination address is unset")          \
959   _ (l3_src_address_is_local, "IP4 source address matches local interface") \
960   _ (l3_src_address_learned, "ARP request IP4 source address learned")  \
961   _ (replies_received, "ARP replies received")                          \
962   _ (opcode_not_request, "ARP opcode not request")                      \
963   _ (proxy_arp_replies_sent, "Proxy ARP replies sent")                  \
964   _ (l2_address_mismatch, "ARP hw addr does not match L2 frame src addr") \
965   _ (gratuitous_arp, "ARP probe or announcement dropped") \
966   _ (interface_no_table, "Interface is not mapped to an IP table") \
967   _ (interface_not_ip_enabled, "Interface is not IP enabled") \
968   _ (unnumbered_mismatch, "RX interface is unnumbered to different subnet") \
969
970 typedef enum
971 {
972 #define _(sym,string) ETHERNET_ARP_ERROR_##sym,
973   foreach_ethernet_arp_error
974 #undef _
975     ETHERNET_ARP_N_ERROR,
976 } ethernet_arp_reply_error_t;
977
978 static int
979 arp_unnumbered (vlib_buffer_t * p0,
980                 u32 input_sw_if_index, u32 conn_sw_if_index)
981 {
982   vnet_main_t *vnm = vnet_get_main ();
983   vnet_interface_main_t *vim = &vnm->interface_main;
984   vnet_sw_interface_t *si;
985
986   /* verify that the input interface is unnumbered to the connected.
987    * the connected interface is the interface on which the subnet is
988    * configured */
989   si = &vim->sw_interfaces[input_sw_if_index];
990
991   if (!(si->flags & VNET_SW_INTERFACE_FLAG_UNNUMBERED &&
992         (si->unnumbered_sw_if_index == conn_sw_if_index)))
993     {
994       /* the input interface is not unnumbered to the interface on which
995        * the sub-net is configured that covers the ARP request.
996        * So this is not the case for unnumbered.. */
997       return 0;
998     }
999
1000   return !0;
1001 }
1002
1003 static u32
1004 arp_learn (vnet_main_t * vnm,
1005            ethernet_arp_main_t * am, u32 sw_if_index,
1006            const ethernet_arp_ip4_over_ethernet_address_t * addr)
1007 {
1008   vnet_arp_set_ip4_over_ethernet (vnm, sw_if_index, addr, 0);
1009   return (ETHERNET_ARP_ERROR_l3_src_address_learned);
1010 }
1011
1012 typedef enum arp_input_next_t_
1013 {
1014   ARP_INPUT_NEXT_DROP,
1015   ARP_INPUT_NEXT_DISABLED,
1016   ARP_INPUT_N_NEXT,
1017 } arp_input_next_t;
1018
1019 static uword
1020 arp_input (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
1021 {
1022   u32 n_left_from, next_index, *from, *to_next, n_left_to_next;
1023   ethernet_arp_main_t *am = &ethernet_arp_main;
1024
1025   from = vlib_frame_vector_args (frame);
1026   n_left_from = frame->n_vectors;
1027   next_index = node->cached_next_index;
1028
1029   if (node->flags & VLIB_NODE_FLAG_TRACE)
1030     vlib_trace_frame_buffers_only (vm, node, from, frame->n_vectors,
1031                                    /* stride */ 1,
1032                                    sizeof (ethernet_arp_input_trace_t));
1033
1034   while (n_left_from > 0)
1035     {
1036       vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
1037
1038       while (n_left_from > 0 && n_left_to_next > 0)
1039         {
1040           const ethernet_arp_header_t *arp0;
1041           arp_input_next_t next0;
1042           vlib_buffer_t *p0;
1043           u32 pi0, error0;
1044
1045           pi0 = to_next[0] = from[0];
1046           from += 1;
1047           to_next += 1;
1048           n_left_from -= 1;
1049           n_left_to_next -= 1;
1050
1051           p0 = vlib_get_buffer (vm, pi0);
1052           arp0 = vlib_buffer_get_current (p0);
1053
1054           error0 = ETHERNET_ARP_ERROR_replies_sent;
1055           next0 = ARP_INPUT_NEXT_DROP;
1056
1057           error0 =
1058             (arp0->l2_type !=
1059              clib_net_to_host_u16 (ETHERNET_ARP_HARDWARE_TYPE_ethernet) ?
1060              ETHERNET_ARP_ERROR_l2_type_not_ethernet : error0);
1061           error0 =
1062             (arp0->l3_type !=
1063              clib_net_to_host_u16 (ETHERNET_TYPE_IP4) ?
1064              ETHERNET_ARP_ERROR_l3_type_not_ip4 : error0);
1065           error0 =
1066             (0 == arp0->ip4_over_ethernet[0].ip4.as_u32 ?
1067              ETHERNET_ARP_ERROR_l3_dst_address_unset : error0);
1068
1069           if (ETHERNET_ARP_ERROR_replies_sent == error0)
1070             {
1071               next0 = ARP_INPUT_NEXT_DISABLED;
1072               vnet_feature_arc_start (am->feature_arc_index,
1073                                       vnet_buffer (p0)->sw_if_index[VLIB_RX],
1074                                       &next0, p0);
1075             }
1076           else
1077             p0->error = node->errors[error0];
1078
1079           vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
1080                                            n_left_to_next, pi0, next0);
1081         }
1082
1083       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
1084     }
1085
1086   return frame->n_vectors;
1087 }
1088
1089 typedef enum arp_disabled_next_t_
1090 {
1091   ARP_DISABLED_NEXT_DROP,
1092   ARP_DISABLED_N_NEXT,
1093 } arp_disabled_next_t;
1094
1095 #define foreach_arp_disabled_error                                      \
1096   _ (DISABLED, "ARP Disabled on this interface")                    \
1097
1098 typedef enum
1099 {
1100 #define _(sym,string) ARP_DISABLED_ERROR_##sym,
1101   foreach_arp_disabled_error
1102 #undef _
1103     ARP_DISABLED_N_ERROR,
1104 } arp_disabled_error_t;
1105
1106 static char *arp_disabled_error_strings[] = {
1107 #define _(sym,string) string,
1108   foreach_arp_disabled_error
1109 #undef _
1110 };
1111
1112 static uword
1113 arp_disabled (vlib_main_t * vm,
1114               vlib_node_runtime_t * node, vlib_frame_t * frame)
1115 {
1116   u32 n_left_from, next_index, *from, *to_next, n_left_to_next;
1117
1118   from = vlib_frame_vector_args (frame);
1119   n_left_from = frame->n_vectors;
1120   next_index = node->cached_next_index;
1121
1122   if (node->flags & VLIB_NODE_FLAG_TRACE)
1123     vlib_trace_frame_buffers_only (vm, node, from, frame->n_vectors,
1124                                    /* stride */ 1,
1125                                    sizeof (ethernet_arp_input_trace_t));
1126
1127   while (n_left_from > 0)
1128     {
1129       vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
1130
1131       while (n_left_from > 0 && n_left_to_next > 0)
1132         {
1133           arp_disabled_next_t next0 = ARP_DISABLED_NEXT_DROP;
1134           vlib_buffer_t *p0;
1135           u32 pi0, error0;
1136
1137           next0 = ARP_DISABLED_NEXT_DROP;
1138           error0 = ARP_DISABLED_ERROR_DISABLED;
1139
1140           pi0 = to_next[0] = from[0];
1141           from += 1;
1142           to_next += 1;
1143           n_left_from -= 1;
1144           n_left_to_next -= 1;
1145
1146           p0 = vlib_get_buffer (vm, pi0);
1147           p0->error = node->errors[error0];
1148
1149           vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
1150                                            n_left_to_next, pi0, next0);
1151         }
1152
1153       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
1154     }
1155
1156   return frame->n_vectors;
1157 }
1158
1159 static_always_inline u32
1160 arp_mk_reply (vnet_main_t * vnm,
1161               vlib_buffer_t * p0,
1162               u32 sw_if_index0,
1163               const ip4_address_t * if_addr0,
1164               ethernet_arp_header_t * arp0, ethernet_header_t * eth_rx)
1165 {
1166   vnet_hw_interface_t *hw_if0;
1167   u8 *rewrite0, rewrite0_len;
1168   ethernet_header_t *eth_tx;
1169   u32 next0;
1170
1171   /* Send a reply.
1172      An adjacency to the sender is not always present,
1173      so we use the interface to build us a rewrite string
1174      which will contain all the necessary tags. */
1175   rewrite0 = ethernet_build_rewrite (vnm, sw_if_index0,
1176                                      VNET_LINK_ARP, eth_rx->src_address);
1177   rewrite0_len = vec_len (rewrite0);
1178
1179   /* Figure out how much to rewind current data from adjacency. */
1180   vlib_buffer_advance (p0, -rewrite0_len);
1181   eth_tx = vlib_buffer_get_current (p0);
1182
1183   vnet_buffer (p0)->sw_if_index[VLIB_TX] = sw_if_index0;
1184   hw_if0 = vnet_get_sup_hw_interface (vnm, sw_if_index0);
1185
1186   /* Send reply back through input interface */
1187   vnet_buffer (p0)->sw_if_index[VLIB_TX] = sw_if_index0;
1188   next0 = ARP_REPLY_NEXT_REPLY_TX;
1189
1190   arp0->opcode = clib_host_to_net_u16 (ETHERNET_ARP_OPCODE_reply);
1191
1192   arp0->ip4_over_ethernet[1] = arp0->ip4_over_ethernet[0];
1193
1194   mac_address_from_bytes (&arp0->ip4_over_ethernet[0].mac,
1195                           hw_if0->hw_address);
1196   clib_mem_unaligned (&arp0->ip4_over_ethernet[0].ip4.data_u32, u32) =
1197     if_addr0->data_u32;
1198
1199   /* Hardware must be ethernet-like. */
1200   ASSERT (vec_len (hw_if0->hw_address) == 6);
1201
1202   /* the rx nd tx ethernet headers wil overlap in the case
1203    * when we received a tagged VLAN=0 packet, but we are sending
1204    * back untagged */
1205   clib_memcpy_fast (eth_tx, rewrite0, vec_len (rewrite0));
1206   vec_free (rewrite0);
1207
1208   return (next0);
1209 }
1210
1211 enum arp_dst_fib_type
1212 {
1213   ARP_DST_FIB_NONE,
1214   ARP_DST_FIB_ADJ,
1215   ARP_DST_FIB_CONN
1216 };
1217
1218 /*
1219  * we're looking for FIB sources that indicate the destination
1220  * is attached. There may be interposed DPO prior to the one
1221  * we are looking for
1222  */
1223 static enum arp_dst_fib_type
1224 arp_dst_fib_check (const fib_node_index_t fei, fib_entry_flag_t * flags)
1225 {
1226   const fib_entry_t *entry = fib_entry_get (fei);
1227   const fib_entry_src_t *entry_src;
1228   fib_source_t src;
1229   /* *INDENT-OFF* */
1230   FOR_EACH_SRC_ADDED(entry, entry_src, src,
1231   ({
1232     *flags = fib_entry_get_flags_for_source (fei, src);
1233     if (fib_entry_is_sourced (fei, FIB_SOURCE_ADJ))
1234         return ARP_DST_FIB_ADJ;
1235       else if (FIB_ENTRY_FLAG_CONNECTED & *flags)
1236         return ARP_DST_FIB_CONN;
1237   }))
1238   /* *INDENT-ON* */
1239
1240   return ARP_DST_FIB_NONE;
1241 }
1242
1243 static uword
1244 arp_reply (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
1245 {
1246   ethernet_arp_main_t *am = &ethernet_arp_main;
1247   vnet_main_t *vnm = vnet_get_main ();
1248   u32 n_left_from, next_index, *from, *to_next;
1249   u32 n_replies_sent = 0;
1250
1251   from = vlib_frame_vector_args (frame);
1252   n_left_from = frame->n_vectors;
1253   next_index = node->cached_next_index;
1254
1255   if (node->flags & VLIB_NODE_FLAG_TRACE)
1256     vlib_trace_frame_buffers_only (vm, node, from, frame->n_vectors,
1257                                    /* stride */ 1,
1258                                    sizeof (ethernet_arp_input_trace_t));
1259
1260   while (n_left_from > 0)
1261     {
1262       u32 n_left_to_next;
1263
1264       vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
1265
1266       while (n_left_from > 0 && n_left_to_next > 0)
1267         {
1268           vlib_buffer_t *p0;
1269           ethernet_arp_header_t *arp0;
1270           ethernet_header_t *eth_rx;
1271           const ip4_address_t *if_addr0;
1272           u32 pi0, error0, next0, sw_if_index0, conn_sw_if_index0, fib_index0;
1273           u8 dst_is_local0, is_vrrp_reply0;
1274           fib_node_index_t dst_fei, src_fei;
1275           const fib_prefix_t *pfx0;
1276           fib_entry_flag_t src_flags, dst_flags;
1277
1278           pi0 = from[0];
1279           to_next[0] = pi0;
1280           from += 1;
1281           to_next += 1;
1282           n_left_from -= 1;
1283           n_left_to_next -= 1;
1284
1285           p0 = vlib_get_buffer (vm, pi0);
1286           arp0 = vlib_buffer_get_current (p0);
1287           /* Fill in ethernet header. */
1288           eth_rx = ethernet_buffer_get_header (p0);
1289
1290           next0 = ARP_REPLY_NEXT_DROP;
1291           error0 = ETHERNET_ARP_ERROR_replies_sent;
1292           sw_if_index0 = vnet_buffer (p0)->sw_if_index[VLIB_RX];
1293
1294           /* Check that IP address is local and matches incoming interface. */
1295           fib_index0 = ip4_fib_table_get_index_for_sw_if_index (sw_if_index0);
1296           if (~0 == fib_index0)
1297             {
1298               error0 = ETHERNET_ARP_ERROR_interface_no_table;
1299               goto drop;
1300
1301             }
1302
1303           {
1304             /*
1305              * we're looking for FIB entries that indicate the source
1306              * is attached. There may be more specific non-attached
1307              * routes that match the source, but these do not influence
1308              * whether we respond to an ARP request, i.e. they do not
1309              * influence whether we are the correct way for the sender
1310              * to reach us, they only affect how we reach the sender.
1311              */
1312             fib_entry_t *src_fib_entry;
1313             const fib_prefix_t *pfx;
1314             fib_entry_src_t *src;
1315             fib_source_t source;
1316             int attached;
1317             int mask;
1318
1319             mask = 32;
1320             attached = 0;
1321
1322             do
1323               {
1324                 src_fei = ip4_fib_table_lookup (ip4_fib_get (fib_index0),
1325                                                 &arp0->
1326                                                 ip4_over_ethernet[0].ip4,
1327                                                 mask);
1328                 src_fib_entry = fib_entry_get (src_fei);
1329
1330                 /*
1331                  * It's possible that the source that provides the
1332                  * flags we need, or the flags we must not have,
1333                  * is not the best source, so check then all.
1334                  */
1335                 /* *INDENT-OFF* */
1336                 FOR_EACH_SRC_ADDED(src_fib_entry, src, source,
1337                 ({
1338                   src_flags = fib_entry_get_flags_for_source (src_fei, source);
1339
1340                   /* Reject requests/replies with our local interface
1341                      address. */
1342                   if (FIB_ENTRY_FLAG_LOCAL & src_flags)
1343                     {
1344                       error0 = ETHERNET_ARP_ERROR_l3_src_address_is_local;
1345                       /*
1346                        * When VPP has an interface whose address is also
1347                        * applied to a TAP interface on the host, then VPP's
1348                        * TAP interface will be unnumbered  to the 'real'
1349                        * interface and do proxy ARP from the host.
1350                        * The curious aspect of this setup is that ARP requests
1351                        * from the host will come from the VPP's own address.
1352                        * So don't drop immediately here, instead go see if this
1353                        * is a proxy ARP case.
1354                        */
1355                       goto next_feature;
1356                     }
1357                   /* A Source must also be local to subnet of matching
1358                    * interface address. */
1359                   if ((FIB_ENTRY_FLAG_ATTACHED & src_flags) ||
1360                       (FIB_ENTRY_FLAG_CONNECTED & src_flags))
1361                     {
1362                       attached = 1;
1363                       break;
1364                     }
1365                   /*
1366                    * else
1367                    *  The packet was sent from an address that is not
1368                    *  connected nor attached i.e. it is not from an
1369                    *  address that is covered by a link's sub-net,
1370                    *  nor is it a already learned host resp.
1371                    */
1372                 }));
1373                 /* *INDENT-ON* */
1374
1375                 /*
1376                  * shorter mask lookup for the next iteration.
1377                  */
1378                 pfx = fib_entry_get_prefix (src_fei);
1379                 mask = pfx->fp_len - 1;
1380
1381                 /*
1382                  * continue until we hit the default route or we find
1383                  * the attached we are looking for. The most likely
1384                  * outcome is we find the attached with the first source
1385                  * on the first lookup.
1386                  */
1387               }
1388             while (!attached &&
1389                    !fib_entry_is_sourced (src_fei, FIB_SOURCE_DEFAULT_ROUTE));
1390
1391             if (!attached)
1392               {
1393                 /*
1394                  * the matching route is a not attached, i.e. it was
1395                  * added as a result of routing, rather than interface/ARP
1396                  * configuration. If the matching route is not a host route
1397                  * (i.e. a /32)
1398                  */
1399                 error0 = ETHERNET_ARP_ERROR_l3_src_address_not_local;
1400                 goto drop;
1401               }
1402           }
1403
1404           dst_fei = ip4_fib_table_lookup (ip4_fib_get (fib_index0),
1405                                           &arp0->ip4_over_ethernet[1].ip4,
1406                                           32);
1407           switch (arp_dst_fib_check (dst_fei, &dst_flags))
1408             {
1409             case ARP_DST_FIB_ADJ:
1410               /*
1411                * We matched an adj-fib on ths source subnet (a /32 previously
1412                * added as a result of ARP). If this request is a gratuitous
1413                * ARP, then learn from it.
1414                * The check for matching an adj-fib, is to prevent hosts
1415                * from spamming us with gratuitous ARPS that might otherwise
1416                * blow our ARP cache
1417                */
1418               if (arp0->ip4_over_ethernet[0].ip4.as_u32 ==
1419                   arp0->ip4_over_ethernet[1].ip4.as_u32)
1420                 error0 = arp_learn (vnm, am, sw_if_index0,
1421                                     &arp0->ip4_over_ethernet[0]);
1422               goto drop;
1423             case ARP_DST_FIB_CONN:
1424               /* destination is connected, continue to process */
1425               break;
1426             case ARP_DST_FIB_NONE:
1427               /* destination is not connected, stop here */
1428               error0 = ETHERNET_ARP_ERROR_l3_dst_address_not_local;
1429               goto next_feature;
1430             }
1431
1432           dst_is_local0 = (FIB_ENTRY_FLAG_LOCAL & dst_flags);
1433           pfx0 = fib_entry_get_prefix (dst_fei);
1434           if_addr0 = &pfx0->fp_addr.ip4;
1435
1436           is_vrrp_reply0 =
1437             ((arp0->opcode ==
1438               clib_host_to_net_u16 (ETHERNET_ARP_OPCODE_reply))
1439              &&
1440              (!memcmp
1441               (arp0->ip4_over_ethernet[0].mac.bytes, vrrp_prefix,
1442                sizeof (vrrp_prefix))));
1443
1444           /* Trash ARP packets whose ARP-level source addresses do not
1445              match their L2-frame-level source addresses, unless it's
1446              a reply from a VRRP virtual router */
1447           if (!ethernet_mac_address_equal
1448               (eth_rx->src_address,
1449                arp0->ip4_over_ethernet[0].mac.bytes) && !is_vrrp_reply0)
1450             {
1451               error0 = ETHERNET_ARP_ERROR_l2_address_mismatch;
1452               goto drop;
1453             }
1454
1455           /* Learn or update sender's mapping only for replies to addresses
1456            * that are local to the subnet */
1457           if (arp0->opcode ==
1458               clib_host_to_net_u16 (ETHERNET_ARP_OPCODE_reply))
1459             {
1460               if (dst_is_local0)
1461                 error0 = arp_learn (vnm, am, sw_if_index0,
1462                                     &arp0->ip4_over_ethernet[0]);
1463               else
1464                 /* a reply for a non-local destination could be a GARP.
1465                  * GARPs for hosts we know were handled above, so this one
1466                  * we drop */
1467                 error0 = ETHERNET_ARP_ERROR_l3_dst_address_not_local;
1468
1469               goto next_feature;
1470             }
1471           else if (arp0->opcode ==
1472                    clib_host_to_net_u16 (ETHERNET_ARP_OPCODE_request) &&
1473                    (dst_is_local0 == 0))
1474             {
1475               goto next_feature;
1476             }
1477
1478           /* Honor unnumbered interface, if any */
1479           conn_sw_if_index0 = fib_entry_get_resolving_interface (dst_fei);
1480           if (sw_if_index0 != conn_sw_if_index0 ||
1481               sw_if_index0 != fib_entry_get_resolving_interface (src_fei))
1482             {
1483               /*
1484                * The interface the ARP is sent to or was received on is not the
1485                * interface on which the covering prefix is configured.
1486                * Maybe this is a case for unnumbered.
1487                */
1488               if (!arp_unnumbered (p0, sw_if_index0, conn_sw_if_index0))
1489                 {
1490                   error0 = ETHERNET_ARP_ERROR_unnumbered_mismatch;
1491                   goto drop;
1492                 }
1493             }
1494           if (arp0->ip4_over_ethernet[0].ip4.as_u32 ==
1495               arp0->ip4_over_ethernet[1].ip4.as_u32)
1496             {
1497               error0 = ETHERNET_ARP_ERROR_gratuitous_arp;
1498               goto drop;
1499             }
1500
1501           next0 = arp_mk_reply (vnm, p0, sw_if_index0,
1502                                 if_addr0, arp0, eth_rx);
1503
1504           /* We are going to reply to this request, so, in the absence of
1505              errors, learn the sender */
1506           if (!error0)
1507             error0 = arp_learn (vnm, am, sw_if_index0,
1508                                 &arp0->ip4_over_ethernet[1]);
1509
1510           n_replies_sent += 1;
1511           goto enqueue;
1512
1513         next_feature:
1514           vnet_feature_next (&next0, p0);
1515           goto enqueue;
1516
1517         drop:
1518           p0->error = node->errors[error0];
1519
1520         enqueue:
1521           vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
1522                                            n_left_to_next, pi0, next0);
1523         }
1524
1525       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
1526     }
1527
1528   vlib_error_count (vm, node->node_index,
1529                     ETHERNET_ARP_ERROR_replies_sent, n_replies_sent);
1530
1531   return frame->n_vectors;
1532 }
1533
1534 static uword
1535 arp_proxy (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
1536 {
1537   ethernet_arp_main_t *am = &ethernet_arp_main;
1538   vnet_main_t *vnm = vnet_get_main ();
1539   u32 n_left_from, next_index, *from, *to_next;
1540   u32 n_arp_replies_sent = 0;
1541
1542   from = vlib_frame_vector_args (frame);
1543   n_left_from = frame->n_vectors;
1544   next_index = node->cached_next_index;
1545
1546   if (node->flags & VLIB_NODE_FLAG_TRACE)
1547     vlib_trace_frame_buffers_only (vm, node, from, frame->n_vectors,
1548                                    /* stride */ 1,
1549                                    sizeof (ethernet_arp_input_trace_t));
1550
1551   while (n_left_from > 0)
1552     {
1553       u32 n_left_to_next;
1554
1555       vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
1556
1557       while (n_left_from > 0 && n_left_to_next > 0)
1558         {
1559           vlib_buffer_t *p0;
1560           ethernet_arp_header_t *arp0;
1561           ethernet_header_t *eth_rx;
1562           ip4_address_t proxy_src;
1563           u32 pi0, error0, next0, sw_if_index0, fib_index0;
1564           u8 is_request0;
1565           ethernet_proxy_arp_t *pa;
1566
1567           pi0 = from[0];
1568           to_next[0] = pi0;
1569           from += 1;
1570           to_next += 1;
1571           n_left_from -= 1;
1572           n_left_to_next -= 1;
1573
1574           p0 = vlib_get_buffer (vm, pi0);
1575           arp0 = vlib_buffer_get_current (p0);
1576           /* Fill in ethernet header. */
1577           eth_rx = ethernet_buffer_get_header (p0);
1578
1579           is_request0 = arp0->opcode
1580             == clib_host_to_net_u16 (ETHERNET_ARP_OPCODE_request);
1581
1582           error0 = ETHERNET_ARP_ERROR_replies_sent;
1583           sw_if_index0 = vnet_buffer (p0)->sw_if_index[VLIB_RX];
1584           next0 = ARP_REPLY_NEXT_DROP;
1585
1586           fib_index0 = ip4_fib_table_get_index_for_sw_if_index (sw_if_index0);
1587           if (~0 == fib_index0)
1588             {
1589               error0 = ETHERNET_ARP_ERROR_interface_no_table;
1590             }
1591
1592           if (0 == error0 && is_request0)
1593             {
1594               u32 this_addr = clib_net_to_host_u32
1595                 (arp0->ip4_over_ethernet[1].ip4.as_u32);
1596
1597               vec_foreach (pa, am->proxy_arps)
1598               {
1599                 u32 lo_addr = clib_net_to_host_u32 (pa->lo_addr.as_u32);
1600                 u32 hi_addr = clib_net_to_host_u32 (pa->hi_addr.as_u32);
1601
1602                 /* an ARP request hit in the proxy-arp table? */
1603                 if ((this_addr >= lo_addr && this_addr <= hi_addr) &&
1604                     (fib_index0 == pa->fib_index))
1605                   {
1606                     proxy_src.as_u32 =
1607                       arp0->ip4_over_ethernet[1].ip4.data_u32;
1608
1609                     /*
1610                      * change the interface address to the proxied
1611                      */
1612                     n_arp_replies_sent++;
1613
1614                     next0 =
1615                       arp_mk_reply (vnm, p0, sw_if_index0, &proxy_src, arp0,
1616                                     eth_rx);
1617                   }
1618               }
1619             }
1620           else
1621             {
1622               p0->error = node->errors[error0];
1623             }
1624
1625           vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
1626                                            n_left_to_next, pi0, next0);
1627         }
1628
1629       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
1630     }
1631
1632   vlib_error_count (vm, node->node_index,
1633                     ETHERNET_ARP_ERROR_replies_sent, n_arp_replies_sent);
1634
1635   return frame->n_vectors;
1636 }
1637
1638 static char *ethernet_arp_error_strings[] = {
1639 #define _(sym,string) string,
1640   foreach_ethernet_arp_error
1641 #undef _
1642 };
1643
1644 /* *INDENT-OFF* */
1645
1646 VLIB_REGISTER_NODE (arp_input_node, static) =
1647 {
1648   .function = arp_input,
1649   .name = "arp-input",
1650   .vector_size = sizeof (u32),
1651   .n_errors = ETHERNET_ARP_N_ERROR,
1652   .error_strings = ethernet_arp_error_strings,
1653   .n_next_nodes = ARP_INPUT_N_NEXT,
1654   .next_nodes = {
1655     [ARP_INPUT_NEXT_DROP] = "error-drop",
1656     [ARP_INPUT_NEXT_DISABLED] = "arp-disabled",
1657   },
1658   .format_buffer = format_ethernet_arp_header,
1659   .format_trace = format_ethernet_arp_input_trace,
1660 };
1661
1662 VLIB_REGISTER_NODE (arp_disabled_node, static) =
1663 {
1664   .function = arp_disabled,
1665   .name = "arp-disabled",
1666   .vector_size = sizeof (u32),
1667   .n_errors = ARP_DISABLED_N_ERROR,
1668   .error_strings = arp_disabled_error_strings,
1669   .n_next_nodes = ARP_DISABLED_N_NEXT,
1670   .next_nodes = {
1671     [ARP_INPUT_NEXT_DROP] = "error-drop",
1672   },
1673   .format_buffer = format_ethernet_arp_header,
1674   .format_trace = format_ethernet_arp_input_trace,
1675 };
1676
1677 VLIB_REGISTER_NODE (arp_reply_node, static) =
1678 {
1679   .function = arp_reply,
1680   .name = "arp-reply",
1681   .vector_size = sizeof (u32),
1682   .n_errors = ETHERNET_ARP_N_ERROR,
1683   .error_strings = ethernet_arp_error_strings,
1684   .n_next_nodes = ARP_REPLY_N_NEXT,
1685   .next_nodes = {
1686     [ARP_REPLY_NEXT_DROP] = "error-drop",
1687     [ARP_REPLY_NEXT_REPLY_TX] = "interface-output",
1688   },
1689   .format_buffer = format_ethernet_arp_header,
1690   .format_trace = format_ethernet_arp_input_trace,
1691 };
1692
1693 VLIB_REGISTER_NODE (arp_proxy_node, static) =
1694 {
1695   .function = arp_proxy,
1696   .name = "arp-proxy",
1697   .vector_size = sizeof (u32),
1698   .n_errors = ETHERNET_ARP_N_ERROR,
1699   .error_strings = ethernet_arp_error_strings,
1700   .n_next_nodes = ARP_REPLY_N_NEXT,
1701   .next_nodes = {
1702     [ARP_REPLY_NEXT_DROP] = "error-drop",
1703     [ARP_REPLY_NEXT_REPLY_TX] = "interface-output",
1704   },
1705   .format_buffer = format_ethernet_arp_header,
1706   .format_trace = format_ethernet_arp_input_trace,
1707 };
1708
1709 /* Built-in ARP rx feature path definition */
1710 VNET_FEATURE_ARC_INIT (arp_feat, static) =
1711 {
1712   .arc_name = "arp",
1713   .start_nodes = VNET_FEATURES ("arp-input"),
1714   .last_in_arc = "error-drop",
1715   .arc_index_ptr = &ethernet_arp_main.feature_arc_index,
1716 };
1717
1718 VNET_FEATURE_INIT (arp_reply_feat_node, static) =
1719 {
1720   .arc_name = "arp",
1721   .node_name = "arp-reply",
1722   .runs_before = VNET_FEATURES ("arp-disabled"),
1723 };
1724
1725 VNET_FEATURE_INIT (arp_proxy_feat_node, static) =
1726 {
1727   .arc_name = "arp",
1728   .node_name = "arp-proxy",
1729   .runs_after = VNET_FEATURES ("arp-reply"),
1730   .runs_before = VNET_FEATURES ("arp-disabled"),
1731 };
1732
1733 VNET_FEATURE_INIT (arp_drop_feat_node, static) =
1734 {
1735   .arc_name = "arp",
1736   .node_name = "arp-disabled",
1737   .runs_before = 0,     /* last feature */
1738 };
1739
1740 /* *INDENT-ON* */
1741
1742 static int
1743 ip4_arp_entry_sort (void *a1, void *a2)
1744 {
1745   ethernet_arp_ip4_entry_t *e1 = a1;
1746   ethernet_arp_ip4_entry_t *e2 = a2;
1747
1748   int cmp;
1749   vnet_main_t *vnm = vnet_get_main ();
1750
1751   cmp = vnet_sw_interface_compare (vnm, e1->sw_if_index, e2->sw_if_index);
1752   if (!cmp)
1753     cmp = ip4_address_compare (&e1->ip4_address, &e2->ip4_address);
1754   return cmp;
1755 }
1756
1757 ethernet_arp_ip4_entry_t *
1758 ip4_neighbors_pool (void)
1759 {
1760   ethernet_arp_main_t *am = &ethernet_arp_main;
1761   return am->ip4_entry_pool;
1762 }
1763
1764 ethernet_arp_ip4_entry_t *
1765 ip4_neighbor_entries (u32 sw_if_index)
1766 {
1767   ethernet_arp_main_t *am = &ethernet_arp_main;
1768   ethernet_arp_ip4_entry_t *n, *ns = 0;
1769
1770   /* *INDENT-OFF* */
1771   pool_foreach (n, am->ip4_entry_pool, ({
1772     if (sw_if_index != ~0 && n->sw_if_index != sw_if_index)
1773       continue;
1774     vec_add1 (ns, n[0]);
1775   }));
1776   /* *INDENT-ON* */
1777
1778   if (ns)
1779     vec_sort_with_function (ns, ip4_arp_entry_sort);
1780   return ns;
1781 }
1782
1783 static clib_error_t *
1784 show_ip4_arp (vlib_main_t * vm,
1785               unformat_input_t * input, vlib_cli_command_t * cmd)
1786 {
1787   vnet_main_t *vnm = vnet_get_main ();
1788   ethernet_arp_main_t *am = &ethernet_arp_main;
1789   ethernet_arp_ip4_entry_t *e, *es;
1790   ethernet_proxy_arp_t *pa;
1791   clib_error_t *error = 0;
1792   u32 sw_if_index;
1793
1794   /* Filter entries by interface if given. */
1795   sw_if_index = ~0;
1796   (void) unformat_user (input, unformat_vnet_sw_interface, vnm, &sw_if_index);
1797
1798   es = ip4_neighbor_entries (sw_if_index);
1799   if (es)
1800     {
1801       vlib_cli_output (vm, "%U", format_ethernet_arp_ip4_entry, vnm, 0);
1802       vec_foreach (e, es)
1803       {
1804         vlib_cli_output (vm, "%U", format_ethernet_arp_ip4_entry, vnm, e);
1805       }
1806       vec_free (es);
1807     }
1808
1809   if (vec_len (am->proxy_arps))
1810     {
1811       vlib_cli_output (vm, "Proxy arps enabled for:");
1812       vec_foreach (pa, am->proxy_arps)
1813       {
1814         vlib_cli_output (vm, "Fib_index %d   %U - %U ",
1815                          pa->fib_index,
1816                          format_ip4_address, &pa->lo_addr,
1817                          format_ip4_address, &pa->hi_addr);
1818       }
1819     }
1820
1821   return error;
1822 }
1823
1824 /*?
1825  * Display all the IPv4 ARP entries.
1826  *
1827  * @cliexpar
1828  * Example of how to display the IPv4 ARP table:
1829  * @cliexstart{show ip arp}
1830  *    Time      FIB        IP4       Flags      Ethernet              Interface
1831  *    346.3028   0       6.1.1.3            de:ad:be:ef:ba:be   GigabitEthernet2/0/0
1832  *   3077.4271   0       6.1.1.4       S    de:ad:be:ef:ff:ff   GigabitEthernet2/0/0
1833  *   2998.6409   1       6.2.2.3            de:ad:be:ef:00:01   GigabitEthernet2/0/0
1834  * Proxy arps enabled for:
1835  * Fib_index 0   6.0.0.1 - 6.0.0.11
1836  * @cliexend
1837  ?*/
1838 /* *INDENT-OFF* */
1839 VLIB_CLI_COMMAND (show_ip4_arp_command, static) = {
1840   .path = "show ip arp",
1841   .function = show_ip4_arp,
1842   .short_help = "show ip arp",
1843 };
1844 /* *INDENT-ON* */
1845
1846 typedef struct
1847 {
1848   pg_edit_t l2_type, l3_type;
1849   pg_edit_t n_l2_address_bytes, n_l3_address_bytes;
1850   pg_edit_t opcode;
1851   struct
1852   {
1853     pg_edit_t mac;
1854     pg_edit_t ip4;
1855   } ip4_over_ethernet[2];
1856 } pg_ethernet_arp_header_t;
1857
1858 static inline void
1859 pg_ethernet_arp_header_init (pg_ethernet_arp_header_t * p)
1860 {
1861   /* Initialize fields that are not bit fields in the IP header. */
1862 #define _(f) pg_edit_init (&p->f, ethernet_arp_header_t, f);
1863   _(l2_type);
1864   _(l3_type);
1865   _(n_l2_address_bytes);
1866   _(n_l3_address_bytes);
1867   _(opcode);
1868   _(ip4_over_ethernet[0].mac);
1869   _(ip4_over_ethernet[0].ip4);
1870   _(ip4_over_ethernet[1].mac);
1871   _(ip4_over_ethernet[1].ip4);
1872 #undef _
1873 }
1874
1875 uword
1876 unformat_pg_arp_header (unformat_input_t * input, va_list * args)
1877 {
1878   pg_stream_t *s = va_arg (*args, pg_stream_t *);
1879   pg_ethernet_arp_header_t *p;
1880   u32 group_index;
1881
1882   p = pg_create_edit_group (s, sizeof (p[0]), sizeof (ethernet_arp_header_t),
1883                             &group_index);
1884   pg_ethernet_arp_header_init (p);
1885
1886   /* Defaults. */
1887   pg_edit_set_fixed (&p->l2_type, ETHERNET_ARP_HARDWARE_TYPE_ethernet);
1888   pg_edit_set_fixed (&p->l3_type, ETHERNET_TYPE_IP4);
1889   pg_edit_set_fixed (&p->n_l2_address_bytes, 6);
1890   pg_edit_set_fixed (&p->n_l3_address_bytes, 4);
1891
1892   if (!unformat (input, "%U: %U/%U -> %U/%U",
1893                  unformat_pg_edit,
1894                  unformat_ethernet_arp_opcode_net_byte_order, &p->opcode,
1895                  unformat_pg_edit,
1896                  unformat_mac_address_t, &p->ip4_over_ethernet[0].mac,
1897                  unformat_pg_edit,
1898                  unformat_ip4_address, &p->ip4_over_ethernet[0].ip4,
1899                  unformat_pg_edit,
1900                  unformat_mac_address_t, &p->ip4_over_ethernet[1].mac,
1901                  unformat_pg_edit,
1902                  unformat_ip4_address, &p->ip4_over_ethernet[1].ip4))
1903     {
1904       /* Free up any edits we may have added. */
1905       pg_free_edit_group (s);
1906       return 0;
1907     }
1908   return 1;
1909 }
1910
1911 clib_error_t *
1912 ip4_set_arp_limit (u32 arp_limit)
1913 {
1914   ethernet_arp_main_t *am = &ethernet_arp_main;
1915
1916   am->limit_arp_cache_size = arp_limit;
1917   return 0;
1918 }
1919
1920 /**
1921  * @brief Control Plane hook to remove an ARP entry
1922  */
1923 int
1924 vnet_arp_unset_ip4_over_ethernet (vnet_main_t * vnm,
1925                                   u32 sw_if_index,
1926                                   const
1927                                   ethernet_arp_ip4_over_ethernet_address_t *
1928                                   a)
1929 {
1930   vnet_arp_set_ip4_over_ethernet_rpc_args_t args = {
1931     .sw_if_index = sw_if_index,
1932     .flags = ETHERNET_ARP_ARGS_REMOVE,
1933     .ip4 = a->ip4,
1934     .mac = a->mac,
1935   };
1936
1937   vl_api_rpc_call_main_thread (set_ip4_over_ethernet_rpc_callback,
1938                                (u8 *) & args, sizeof (args));
1939   return 0;
1940 }
1941
1942 /**
1943  * @brief publish wildcard arp event
1944  * @param sw_if_index The interface on which the ARP entries are acted
1945  */
1946 static int
1947 vnet_arp_wc_publish (u32 sw_if_index,
1948                      const ethernet_arp_ip4_over_ethernet_address_t * a)
1949 {
1950   vnet_arp_set_ip4_over_ethernet_rpc_args_t args = {
1951     .flags = ETHERNET_ARP_ARGS_WC_PUB,
1952     .sw_if_index = sw_if_index,
1953     .ip4 = a->ip4,
1954     .mac = a->mac,
1955   };
1956
1957   vl_api_rpc_call_main_thread (set_ip4_over_ethernet_rpc_callback,
1958                                (u8 *) & args, sizeof (args));
1959   return 0;
1960 }
1961
1962 static void
1963 vnet_arp_wc_publish_internal (vnet_main_t * vnm,
1964                               vnet_arp_set_ip4_over_ethernet_rpc_args_t *
1965                               args)
1966 {
1967   vlib_main_t *vm = vlib_get_main ();
1968   ethernet_arp_main_t *am = &ethernet_arp_main;
1969   uword ni = am->wc_ip4_arp_publisher_node;
1970   uword et = am->wc_ip4_arp_publisher_et;
1971
1972   if (ni == (uword) ~ 0)
1973     return;
1974   wc_arp_report_t *r =
1975     vlib_process_signal_event_data (vm, ni, et, 1, sizeof *r);
1976   r->ip.as_u32 = args->ip4.as_u32;
1977   r->sw_if_index = args->sw_if_index;
1978   mac_address_copy (&r->mac, &args->mac);
1979 }
1980
1981 void
1982 wc_arp_set_publisher_node (uword node_index, uword event_type)
1983 {
1984   ethernet_arp_main_t *am = &ethernet_arp_main;
1985   am->wc_ip4_arp_publisher_node = node_index;
1986   am->wc_ip4_arp_publisher_et = event_type;
1987 }
1988
1989 static void
1990 arp_entry_free (ethernet_arp_interface_t * eai, ethernet_arp_ip4_entry_t * e);
1991
1992 static int
1993 vnet_arp_flush_ip4_over_ethernet_internal (vnet_main_t * vnm,
1994                                            vnet_arp_set_ip4_over_ethernet_rpc_args_t
1995                                            * args)
1996 {
1997   ethernet_arp_main_t *am = &ethernet_arp_main;
1998   ethernet_arp_ip4_entry_t *e;
1999   ethernet_arp_interface_t *eai;
2000
2001   if (vec_len (am->ethernet_arp_by_sw_if_index) <= args->sw_if_index)
2002     return 0;
2003
2004   eai = &am->ethernet_arp_by_sw_if_index[args->sw_if_index];
2005
2006   e = arp_entry_find (eai, &args->ip4);
2007
2008   if (NULL != e)
2009     {
2010       adj_nbr_walk_nh4 (e->sw_if_index,
2011                         &e->ip4_address, arp_mk_incomplete_walk, e);
2012
2013       /*
2014        * The difference between flush and unset, is that an unset
2015        * means delete for static and dynamic entries. A flush
2016        * means delete only for dynamic. Flushing is what the DP
2017        * does in response to interface events. unset is only done
2018        * by the control plane.
2019        */
2020       if (e->flags & IP_NEIGHBOR_FLAG_STATIC)
2021         {
2022           e->flags &= ~IP_NEIGHBOR_FLAG_DYNAMIC;
2023         }
2024       else if (e->flags & IP_NEIGHBOR_FLAG_DYNAMIC)
2025         {
2026           arp_entry_free (eai, e);
2027         }
2028     }
2029   return (0);
2030 }
2031
2032 /*
2033  * arp_add_del_interface_address
2034  *
2035  * callback when an interface address is added or deleted
2036  */
2037 static void
2038 arp_enable_disable_interface (ip4_main_t * im,
2039                               uword opaque, u32 sw_if_index, u32 is_enable)
2040 {
2041   ethernet_arp_main_t *am = &ethernet_arp_main;
2042
2043   if (is_enable)
2044     arp_enable (am, sw_if_index);
2045   else
2046     arp_disable (am, sw_if_index);
2047 }
2048
2049 /*
2050  * arp_add_del_interface_address
2051  *
2052  * callback when an interface address is added or deleted
2053  */
2054 static void
2055 arp_add_del_interface_address (ip4_main_t * im,
2056                                uword opaque,
2057                                u32 sw_if_index,
2058                                ip4_address_t * address,
2059                                u32 address_length,
2060                                u32 if_address_index, u32 is_del)
2061 {
2062   /*
2063    * Flush the ARP cache of all entries covered by the address
2064    * that is being removed.
2065    */
2066   ethernet_arp_main_t *am = &ethernet_arp_main;
2067   ethernet_arp_ip4_entry_t *e;
2068
2069   if (vec_len (am->ethernet_arp_by_sw_if_index) <= sw_if_index)
2070     return;
2071
2072   if (is_del)
2073     {
2074       ethernet_arp_interface_t *eai;
2075       u32 i, *to_delete = 0;
2076       hash_pair_t *pair;
2077
2078       eai = &am->ethernet_arp_by_sw_if_index[sw_if_index];
2079
2080       /* *INDENT-OFF* */
2081       hash_foreach_pair (pair, eai->arp_entries,
2082       ({
2083         e = pool_elt_at_index(am->ip4_entry_pool,
2084                               pair->value[0]);
2085         if (ip4_destination_matches_route (im, &e->ip4_address,
2086                                            address, address_length))
2087           {
2088             vec_add1 (to_delete, e - am->ip4_entry_pool);
2089           }
2090       }));
2091       /* *INDENT-ON* */
2092
2093       for (i = 0; i < vec_len (to_delete); i++)
2094         {
2095           e = pool_elt_at_index (am->ip4_entry_pool, to_delete[i]);
2096
2097           vnet_arp_set_ip4_over_ethernet_rpc_args_t delme = {
2098             .ip4.as_u32 = e->ip4_address.as_u32,
2099             .sw_if_index = e->sw_if_index,
2100             .flags = ETHERNET_ARP_ARGS_FLUSH,
2101           };
2102           mac_address_copy (&delme.mac, &e->mac);
2103
2104           vnet_arp_flush_ip4_over_ethernet_internal (vnet_get_main (),
2105                                                      &delme);
2106         }
2107
2108       vec_free (to_delete);
2109     }
2110 }
2111
2112 static void
2113 arp_table_bind (ip4_main_t * im,
2114                 uword opaque,
2115                 u32 sw_if_index, u32 new_fib_index, u32 old_fib_index)
2116 {
2117   ethernet_arp_main_t *am = &ethernet_arp_main;
2118   ethernet_arp_interface_t *eai;
2119   ethernet_arp_ip4_entry_t *e;
2120   hash_pair_t *pair;
2121
2122   /*
2123    * the IP table that the interface is bound to has changed.
2124    * reinstall all the adj fibs.
2125    */
2126
2127   if (vec_len (am->ethernet_arp_by_sw_if_index) <= sw_if_index)
2128     return;
2129
2130   eai = &am->ethernet_arp_by_sw_if_index[sw_if_index];
2131
2132   /* *INDENT-OFF* */
2133   hash_foreach_pair (pair, eai->arp_entries,
2134   ({
2135     e = pool_elt_at_index(am->ip4_entry_pool,
2136                           pair->value[0]);
2137     /*
2138      * remove the adj-fib from the old table and add to the new
2139      */
2140     arp_adj_fib_remove(e, old_fib_index);
2141     arp_adj_fib_add(e, new_fib_index);
2142   }));
2143   /* *INDENT-ON* */
2144
2145 }
2146
2147 static clib_error_t *
2148 ethernet_arp_init (vlib_main_t * vm)
2149 {
2150   ethernet_arp_main_t *am = &ethernet_arp_main;
2151   ip4_main_t *im = &ip4_main;
2152   pg_node_t *pn;
2153
2154   ethernet_register_input_type (vm, ETHERNET_TYPE_ARP, arp_input_node.index);
2155
2156   pn = pg_get_node (arp_input_node.index);
2157   pn->unformat_edit = unformat_pg_arp_header;
2158
2159   am->opcode_by_name = hash_create_string (0, sizeof (uword));
2160 #define _(o) hash_set_mem (am->opcode_by_name, #o, ETHERNET_ARP_OPCODE_##o);
2161   foreach_ethernet_arp_opcode;
2162 #undef _
2163
2164   /* $$$ configurable */
2165   am->limit_arp_cache_size = 50000;
2166
2167   am->pending_resolutions_by_address = hash_create (0, sizeof (uword));
2168   am->mac_changes_by_address = hash_create (0, sizeof (uword));
2169   am->wc_ip4_arp_publisher_node = (uword) ~ 0;
2170
2171   /* don't trace ARP error packets */
2172   {
2173     vlib_node_runtime_t *rt =
2174       vlib_node_get_runtime (vm, arp_input_node.index);
2175
2176 #define _(a,b)                                  \
2177     vnet_pcap_drop_trace_filter_add_del         \
2178         (rt->errors[ETHERNET_ARP_ERROR_##a],    \
2179          1 /* is_add */);
2180     foreach_ethernet_arp_error
2181 #undef _
2182   }
2183
2184   ip4_add_del_interface_address_callback_t cb;
2185   cb.function = arp_add_del_interface_address;
2186   cb.function_opaque = 0;
2187   vec_add1 (im->add_del_interface_address_callbacks, cb);
2188
2189   ip4_enable_disable_interface_callback_t cbe;
2190   cbe.function = arp_enable_disable_interface;
2191   cbe.function_opaque = 0;
2192   vec_add1 (im->enable_disable_interface_callbacks, cbe);
2193
2194   ip4_table_bind_callback_t cbt;
2195   cbt.function = arp_table_bind;
2196   cbt.function_opaque = 0;
2197   vec_add1 (im->table_bind_callbacks, cbt);
2198
2199   return 0;
2200 }
2201 /* *INDENT-OFF* */
2202 VLIB_INIT_FUNCTION (ethernet_arp_init) =
2203 {
2204   .runs_after = VLIB_INITS("ethernet_init"),
2205 };
2206 /* *INDENT-ON* */
2207
2208 static void
2209 arp_entry_free (ethernet_arp_interface_t * eai, ethernet_arp_ip4_entry_t * e)
2210 {
2211   ethernet_arp_main_t *am = &ethernet_arp_main;
2212
2213   arp_adj_fib_remove
2214     (e, ip4_fib_table_get_index_for_sw_if_index (e->sw_if_index));
2215   hash_unset (eai->arp_entries, e->ip4_address.as_u32);
2216   pool_put (am->ip4_entry_pool, e);
2217 }
2218
2219 static inline int
2220 vnet_arp_unset_ip4_over_ethernet_internal (vnet_main_t * vnm,
2221                                            vnet_arp_set_ip4_over_ethernet_rpc_args_t
2222                                            * args)
2223 {
2224   ethernet_arp_main_t *am = &ethernet_arp_main;
2225   ethernet_arp_ip4_entry_t *e;
2226   ethernet_arp_interface_t *eai;
2227
2228   if (vec_len (am->ethernet_arp_by_sw_if_index) <= args->sw_if_index)
2229     return 0;
2230
2231   eai = &am->ethernet_arp_by_sw_if_index[args->sw_if_index];
2232
2233   e = arp_entry_find (eai, &args->ip4);
2234
2235   if (NULL != e)
2236     {
2237       adj_nbr_walk_nh4 (e->sw_if_index,
2238                         &e->ip4_address, arp_mk_incomplete_walk, e);
2239       arp_entry_free (eai, e);
2240     }
2241
2242   return 0;
2243 }
2244
2245
2246 static int
2247 vnet_arp_populate_ip4_over_ethernet_internal (vnet_main_t * vnm,
2248                                               vnet_arp_set_ip4_over_ethernet_rpc_args_t
2249                                               * args)
2250 {
2251   ethernet_arp_main_t *am = &ethernet_arp_main;
2252   ethernet_arp_ip4_entry_t *e;
2253   ethernet_arp_interface_t *eai;
2254
2255   arp_enable (am, args->sw_if_index);
2256   eai = &am->ethernet_arp_by_sw_if_index[args->sw_if_index];
2257
2258   e = arp_entry_find (eai, &args->ip4);
2259
2260   if (NULL != e)
2261     {
2262       adj_nbr_walk_nh4 (e->sw_if_index,
2263                         &e->ip4_address, arp_mk_complete_walk, e);
2264     }
2265   return (0);
2266 }
2267
2268 static void
2269 set_ip4_over_ethernet_rpc_callback (vnet_arp_set_ip4_over_ethernet_rpc_args_t
2270                                     * a)
2271 {
2272   vnet_main_t *vm = vnet_get_main ();
2273   ASSERT (vlib_get_thread_index () == 0);
2274
2275   if (a->flags & ETHERNET_ARP_ARGS_REMOVE)
2276     vnet_arp_unset_ip4_over_ethernet_internal (vm, a);
2277   else if (a->flags & ETHERNET_ARP_ARGS_FLUSH)
2278     vnet_arp_flush_ip4_over_ethernet_internal (vm, a);
2279   else if (a->flags & ETHERNET_ARP_ARGS_POPULATE)
2280     vnet_arp_populate_ip4_over_ethernet_internal (vm, a);
2281   else if (a->flags & ETHERNET_ARP_ARGS_WC_PUB)
2282     vnet_arp_wc_publish_internal (vm, a);
2283   else
2284     vnet_arp_set_ip4_over_ethernet_internal (vm, a);
2285 }
2286
2287 /**
2288  * @brief Invoked when the interface's admin state changes
2289  */
2290 static clib_error_t *
2291 ethernet_arp_sw_interface_up_down (vnet_main_t * vnm,
2292                                    u32 sw_if_index, u32 flags)
2293 {
2294   ethernet_arp_main_t *am = &ethernet_arp_main;
2295   ethernet_arp_ip4_entry_t *e;
2296   u32 i, *to_update = 0;
2297
2298   /* *INDENT-OFF* */
2299   pool_foreach (e, am->ip4_entry_pool,
2300   ({
2301     if (e->sw_if_index == sw_if_index)
2302       vec_add1 (to_update,
2303                 e - am->ip4_entry_pool);
2304   }));
2305   /* *INDENT-ON* */
2306
2307   for (i = 0; i < vec_len (to_update); i++)
2308     {
2309       e = pool_elt_at_index (am->ip4_entry_pool, to_update[i]);
2310
2311       vnet_arp_set_ip4_over_ethernet_rpc_args_t update_me = {
2312         .ip4.as_u32 = e->ip4_address.as_u32,
2313         .sw_if_index = e->sw_if_index,
2314       };
2315       mac_address_copy (&update_me.mac, &e->mac);
2316
2317       if (flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP)
2318         {
2319           update_me.flags = ETHERNET_ARP_ARGS_POPULATE;
2320           vnet_arp_populate_ip4_over_ethernet_internal (vnm, &update_me);
2321         }
2322       else
2323         {
2324           update_me.flags = ETHERNET_ARP_ARGS_FLUSH;
2325           vnet_arp_flush_ip4_over_ethernet_internal (vnm, &update_me);
2326         }
2327     }
2328   vec_free (to_update);
2329
2330   return 0;
2331 }
2332
2333 VNET_SW_INTERFACE_ADMIN_UP_DOWN_FUNCTION (ethernet_arp_sw_interface_up_down);
2334
2335 static void
2336 increment_ip4_and_mac_address (ethernet_arp_ip4_over_ethernet_address_t * a)
2337 {
2338   u8 old;
2339   int i;
2340
2341   for (i = 3; i >= 0; i--)
2342     {
2343       old = a->ip4.as_u8[i];
2344       a->ip4.as_u8[i] += 1;
2345       if (old < a->ip4.as_u8[i])
2346         break;
2347     }
2348
2349   for (i = 5; i >= 0; i--)
2350     {
2351       old = a->mac.bytes[i];
2352       a->mac.bytes[i] += 1;
2353       if (old < a->mac.bytes[i])
2354         break;
2355     }
2356 }
2357
2358 int
2359 vnet_arp_set_ip4_over_ethernet (vnet_main_t * vnm,
2360                                 u32 sw_if_index,
2361                                 const ethernet_arp_ip4_over_ethernet_address_t
2362                                 * a, ip_neighbor_flags_t flags)
2363 {
2364   vnet_arp_set_ip4_over_ethernet_rpc_args_t args = {
2365     .sw_if_index = sw_if_index,
2366     .nbr_flags = flags,
2367     .flags = 0,
2368     .ip4.as_u32 = a->ip4.as_u32,
2369     .mac = a->mac,
2370   };
2371
2372   vl_api_rpc_call_main_thread (set_ip4_over_ethernet_rpc_callback,
2373                                (u8 *) & args, sizeof (args));
2374   return 0;
2375 }
2376
2377 void
2378 proxy_arp_walk (proxy_arp_walk_t cb, void *data)
2379 {
2380   ethernet_arp_main_t *am = &ethernet_arp_main;
2381   ethernet_proxy_arp_t *pa;
2382
2383   vec_foreach (pa, am->proxy_arps)
2384   {
2385     if (!cb (&pa->lo_addr, &pa->hi_addr, pa->fib_index, data))
2386       break;
2387   }
2388 }
2389
2390 int
2391 vnet_proxy_arp_enable_disable (vnet_main_t * vnm, u32 sw_if_index, u8 enable)
2392 {
2393   ethernet_arp_main_t *am = &ethernet_arp_main;
2394   ethernet_arp_interface_t *eai;
2395
2396   vec_validate (am->ethernet_arp_by_sw_if_index, sw_if_index);
2397
2398   eai = &am->ethernet_arp_by_sw_if_index[sw_if_index];
2399
2400   if (enable)
2401     {
2402       if (!eai->proxy_enabled)
2403         {
2404           vnet_feature_enable_disable ("arp", "arp-proxy",
2405                                        sw_if_index, 1, NULL, 0);
2406         }
2407       eai->proxy_enabled = 1;
2408     }
2409   else
2410     {
2411       if (eai->proxy_enabled)
2412         {
2413           vnet_feature_enable_disable ("arp", "arp-proxy",
2414                                        sw_if_index, 0, NULL, 0);
2415         }
2416       eai->proxy_enabled = 0;
2417     }
2418
2419   return (0);
2420 }
2421
2422 int
2423 vnet_proxy_arp_add_del (ip4_address_t * lo_addr,
2424                         ip4_address_t * hi_addr, u32 fib_index, int is_del)
2425 {
2426   ethernet_arp_main_t *am = &ethernet_arp_main;
2427   ethernet_proxy_arp_t *pa;
2428   u32 found_at_index = ~0;
2429
2430   vec_foreach (pa, am->proxy_arps)
2431   {
2432     if (pa->lo_addr.as_u32 == lo_addr->as_u32 &&
2433         pa->hi_addr.as_u32 == hi_addr->as_u32 && pa->fib_index == fib_index)
2434       {
2435         found_at_index = pa - am->proxy_arps;
2436         break;
2437       }
2438   }
2439
2440   if (found_at_index != ~0)
2441     {
2442       /* Delete, otherwise it's already in the table */
2443       if (is_del)
2444         vec_delete (am->proxy_arps, 1, found_at_index);
2445       return 0;
2446     }
2447   /* delete, no such entry */
2448   if (is_del)
2449     return VNET_API_ERROR_NO_SUCH_ENTRY;
2450
2451   /* add, not in table */
2452   vec_add2 (am->proxy_arps, pa, 1);
2453   pa->lo_addr.as_u32 = lo_addr->as_u32;
2454   pa->hi_addr.as_u32 = hi_addr->as_u32;
2455   pa->fib_index = fib_index;
2456   return 0;
2457 }
2458
2459 void
2460 proxy_arp_intfc_walk (proxy_arp_intf_walk_t cb, void *data)
2461 {
2462   ethernet_arp_main_t *am = &ethernet_arp_main;
2463   ethernet_arp_interface_t *eai;
2464
2465   vec_foreach (eai, am->ethernet_arp_by_sw_if_index)
2466   {
2467     if (eai->proxy_enabled)
2468       cb (eai - am->ethernet_arp_by_sw_if_index, data);
2469   }
2470 }
2471
2472 /*
2473  * Remove any proxy arp entries associated with the
2474  * specified fib.
2475  */
2476 int
2477 vnet_proxy_arp_fib_reset (u32 fib_id)
2478 {
2479   ethernet_arp_main_t *am = &ethernet_arp_main;
2480   ethernet_proxy_arp_t *pa;
2481   u32 *entries_to_delete = 0;
2482   u32 fib_index;
2483   int i;
2484
2485   fib_index = fib_table_find (FIB_PROTOCOL_IP4, fib_id);
2486   if (~0 == fib_index)
2487     return VNET_API_ERROR_NO_SUCH_ENTRY;
2488
2489   vec_foreach (pa, am->proxy_arps)
2490   {
2491     if (pa->fib_index == fib_index)
2492       {
2493         vec_add1 (entries_to_delete, pa - am->proxy_arps);
2494       }
2495   }
2496
2497   for (i = 0; i < vec_len (entries_to_delete); i++)
2498     {
2499       vec_delete (am->proxy_arps, 1, entries_to_delete[i]);
2500     }
2501
2502   vec_free (entries_to_delete);
2503
2504   return 0;
2505 }
2506
2507 static clib_error_t *
2508 ip_arp_add_del_command_fn (vlib_main_t * vm,
2509                            unformat_input_t * input, vlib_cli_command_t * cmd)
2510 {
2511   vnet_main_t *vnm = vnet_get_main ();
2512   u32 sw_if_index;
2513   ethernet_arp_ip4_over_ethernet_address_t lo_addr, hi_addr, addr;
2514   int addr_valid = 0;
2515   int is_del = 0;
2516   int count = 1;
2517   u32 fib_index = 0;
2518   u32 fib_id;
2519   int is_proxy = 0;
2520   ip_neighbor_flags_t flags;
2521
2522   flags = IP_NEIGHBOR_FLAG_NONE;
2523
2524   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
2525     {
2526       /* set ip arp TenGigE1/1/0/1 1.2.3.4 aa:bb:... or aabb.ccdd... */
2527       if (unformat (input, "%U %U %U",
2528                     unformat_vnet_sw_interface, vnm, &sw_if_index,
2529                     unformat_ip4_address, &addr.ip4,
2530                     unformat_mac_address_t, &addr.mac))
2531         addr_valid = 1;
2532
2533       else if (unformat (input, "delete") || unformat (input, "del"))
2534         is_del = 1;
2535
2536       else if (unformat (input, "static"))
2537         flags |= IP_NEIGHBOR_FLAG_STATIC;
2538
2539       else if (unformat (input, "no-fib-entry"))
2540         flags |= IP_NEIGHBOR_FLAG_NO_FIB_ENTRY;
2541
2542       else if (unformat (input, "count %d", &count))
2543         ;
2544
2545       else if (unformat (input, "fib-id %d", &fib_id))
2546         {
2547           fib_index = fib_table_find (FIB_PROTOCOL_IP4, fib_id);
2548
2549           if (~0 == fib_index)
2550             return clib_error_return (0, "fib ID %d doesn't exist\n", fib_id);
2551         }
2552
2553       else if (unformat (input, "proxy %U - %U",
2554                          unformat_ip4_address, &lo_addr.ip4,
2555                          unformat_ip4_address, &hi_addr.ip4))
2556         is_proxy = 1;
2557       else
2558         break;
2559     }
2560
2561   if (is_proxy)
2562     {
2563       (void) vnet_proxy_arp_add_del (&lo_addr.ip4, &hi_addr.ip4,
2564                                      fib_index, is_del);
2565       return 0;
2566     }
2567
2568   if (addr_valid)
2569     {
2570       int i;
2571
2572       for (i = 0; i < count; i++)
2573         {
2574           if (is_del == 0)
2575             {
2576               uword event_type, *event_data = 0;
2577
2578               /* Park the debug CLI until the arp entry is installed */
2579               vnet_register_ip4_arp_resolution_event
2580                 (vnm, &addr.ip4, vlib_current_process (vm),
2581                  1 /* type */ , 0 /* data */ );
2582
2583               vnet_arp_set_ip4_over_ethernet (vnm, sw_if_index, &addr, flags);
2584
2585               vlib_process_wait_for_event (vm);
2586               event_type = vlib_process_get_events (vm, &event_data);
2587               vec_reset_length (event_data);
2588               if (event_type != 1)
2589                 clib_warning ("event type %d unexpected", event_type);
2590             }
2591           else
2592             vnet_arp_unset_ip4_over_ethernet (vnm, sw_if_index, &addr);
2593
2594           increment_ip4_and_mac_address (&addr);
2595         }
2596     }
2597   else
2598     {
2599       return clib_error_return (0, "unknown input `%U'",
2600                                 format_unformat_error, input);
2601     }
2602
2603   return 0;
2604 }
2605
2606 /* *INDENT-OFF* */
2607 /*?
2608  * Add or delete IPv4 ARP cache entries.
2609  *
2610  * @note 'set ip arp' options (e.g. delete, static, 'fib-id <id>',
2611  * 'count <number>', 'interface ip4_addr mac_addr') can be added in
2612  * any order and combination.
2613  *
2614  * @cliexpar
2615  * @parblock
2616  * Add or delete IPv4 ARP cache entries as follows. MAC Address can be in
2617  * either aa:bb:cc:dd:ee:ff format or aabb.ccdd.eeff format.
2618  * @cliexcmd{set ip arp GigabitEthernet2/0/0 6.0.0.3 dead.beef.babe}
2619  * @cliexcmd{set ip arp delete GigabitEthernet2/0/0 6.0.0.3 de:ad:be:ef:ba:be}
2620  *
2621  * To add or delete an IPv4 ARP cache entry to or from a specific fib
2622  * table:
2623  * @cliexcmd{set ip arp fib-id 1 GigabitEthernet2/0/0 6.0.0.3 dead.beef.babe}
2624  * @cliexcmd{set ip arp fib-id 1 delete GigabitEthernet2/0/0 6.0.0.3 dead.beef.babe}
2625  *
2626  * Add or delete IPv4 static ARP cache entries as follows:
2627  * @cliexcmd{set ip arp static GigabitEthernet2/0/0 6.0.0.3 dead.beef.babe}
2628  * @cliexcmd{set ip arp static delete GigabitEthernet2/0/0 6.0.0.3 dead.beef.babe}
2629  *
2630  * For testing / debugging purposes, the 'set ip arp' command can add or
2631  * delete multiple entries. Supply the 'count N' parameter:
2632  * @cliexcmd{set ip arp count 10 GigabitEthernet2/0/0 6.0.0.3 dead.beef.babe}
2633  * @endparblock
2634  ?*/
2635 VLIB_CLI_COMMAND (ip_arp_add_del_command, static) = {
2636   .path = "set ip arp",
2637   .short_help =
2638   "set ip arp [del] <intfc> <ip-address> <mac-address> [static] [no-fib-entry] [count <count>] [fib-id <fib-id>] [proxy <lo-addr> - <hi-addr>]",
2639   .function = ip_arp_add_del_command_fn,
2640 };
2641 /* *INDENT-ON* */
2642
2643 static clib_error_t *
2644 set_int_proxy_arp_command_fn (vlib_main_t * vm,
2645                               unformat_input_t *
2646                               input, vlib_cli_command_t * cmd)
2647 {
2648   vnet_main_t *vnm = vnet_get_main ();
2649   u32 sw_if_index;
2650   int enable = 0;
2651
2652   sw_if_index = ~0;
2653
2654   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
2655     {
2656       if (unformat (input, "%U", unformat_vnet_sw_interface,
2657                     vnm, &sw_if_index))
2658         ;
2659       else if (unformat (input, "enable") || unformat (input, "on"))
2660         enable = 1;
2661       else if (unformat (input, "disable") || unformat (input, "off"))
2662         enable = 0;
2663       else
2664         break;
2665     }
2666
2667   if (~0 == sw_if_index)
2668     return clib_error_return (0, "unknown input '%U'",
2669                               format_unformat_error, input);
2670
2671   vnet_proxy_arp_enable_disable (vnm, sw_if_index, enable);
2672
2673   return 0;
2674 }
2675
2676 /* *INDENT-OFF* */
2677 /*?
2678  * Enable proxy-arp on an interface. The vpp stack will answer ARP
2679  * requests for the indicated address range. Multiple proxy-arp
2680  * ranges may be provisioned.
2681  *
2682  * @note Proxy ARP as a technology is infamous for blackholing traffic.
2683  * Also, the underlying implementation has not been performance-tuned.
2684  * Avoid creating an unnecessarily large set of ranges.
2685  *
2686  * @cliexpar
2687  * To enable proxy arp on a range of addresses, use:
2688  * @cliexcmd{set ip arp proxy 6.0.0.1 - 6.0.0.11}
2689  * Append 'del' to delete a range of proxy ARP addresses:
2690  * @cliexcmd{set ip arp proxy 6.0.0.1 - 6.0.0.11 del}
2691  * You must then specifically enable proxy arp on individual interfaces:
2692  * @cliexcmd{set interface proxy-arp GigabitEthernet0/8/0 enable}
2693  * To disable proxy arp on an individual interface:
2694  * @cliexcmd{set interface proxy-arp GigabitEthernet0/8/0 disable}
2695  ?*/
2696 VLIB_CLI_COMMAND (set_int_proxy_enable_command, static) = {
2697   .path = "set interface proxy-arp",
2698   .short_help =
2699   "set interface proxy-arp <intfc> [enable|disable]",
2700   .function = set_int_proxy_arp_command_fn,
2701 };
2702 /* *INDENT-ON* */
2703
2704
2705 /*
2706  * ARP/ND Termination in a L2 Bridge Domain based on IP4/IP6 to MAC
2707  * hash tables mac_by_ip4 and mac_by_ip6 for each BD.
2708  */
2709 typedef enum
2710 {
2711   ARP_TERM_NEXT_L2_OUTPUT,
2712   ARP_TERM_NEXT_DROP,
2713   ARP_TERM_N_NEXT,
2714 } arp_term_next_t;
2715
2716 u32 arp_term_next_node_index[32];
2717
2718 static uword
2719 arp_term_l2bd (vlib_main_t * vm,
2720                vlib_node_runtime_t * node, vlib_frame_t * frame)
2721 {
2722   l2input_main_t *l2im = &l2input_main;
2723   u32 n_left_from, next_index, *from, *to_next;
2724   u32 n_replies_sent = 0;
2725   u16 last_bd_index = ~0;
2726   l2_bridge_domain_t *last_bd_config = 0;
2727   l2_input_config_t *cfg0;
2728
2729   from = vlib_frame_vector_args (frame);
2730   n_left_from = frame->n_vectors;
2731   next_index = node->cached_next_index;
2732
2733   while (n_left_from > 0)
2734     {
2735       u32 n_left_to_next;
2736
2737       vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
2738
2739       while (n_left_from > 0 && n_left_to_next > 0)
2740         {
2741           vlib_buffer_t *p0;
2742           ethernet_header_t *eth0;
2743           ethernet_arp_header_t *arp0;
2744           ip6_header_t *iph0;
2745           u8 *l3h0;
2746           u32 pi0, error0, next0, sw_if_index0;
2747           u16 ethertype0;
2748           u16 bd_index0;
2749           u32 ip0;
2750           u8 *macp0;
2751
2752           pi0 = from[0];
2753           to_next[0] = pi0;
2754           from += 1;
2755           to_next += 1;
2756           n_left_from -= 1;
2757           n_left_to_next -= 1;
2758
2759           p0 = vlib_get_buffer (vm, pi0);
2760           // Terminate only local (SHG == 0) ARP
2761           if (vnet_buffer (p0)->l2.shg != 0)
2762             goto next_l2_feature;
2763
2764           eth0 = vlib_buffer_get_current (p0);
2765           l3h0 = (u8 *) eth0 + vnet_buffer (p0)->l2.l2_len;
2766           ethertype0 = clib_net_to_host_u16 (*(u16 *) (l3h0 - 2));
2767           arp0 = (ethernet_arp_header_t *) l3h0;
2768
2769           if (ethertype0 != ETHERNET_TYPE_ARP)
2770             goto check_ip6_nd;
2771
2772           if ((arp0->opcode !=
2773                clib_host_to_net_u16 (ETHERNET_ARP_OPCODE_request)) &&
2774               (arp0->opcode !=
2775                clib_host_to_net_u16 (ETHERNET_ARP_OPCODE_reply)))
2776             goto check_ip6_nd;
2777
2778           /* Must be ARP request/reply packet here */
2779           if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE) &&
2780                              (p0->flags & VLIB_BUFFER_IS_TRACED)))
2781             {
2782               u8 *t0 = vlib_add_trace (vm, node, p0,
2783                                        sizeof (ethernet_arp_input_trace_t));
2784               clib_memcpy_fast (t0, l3h0,
2785                                 sizeof (ethernet_arp_input_trace_t));
2786             }
2787
2788           error0 = 0;
2789           error0 =
2790             (arp0->l2_type !=
2791              clib_net_to_host_u16 (ETHERNET_ARP_HARDWARE_TYPE_ethernet)
2792              ? ETHERNET_ARP_ERROR_l2_type_not_ethernet : error0);
2793           error0 =
2794             (arp0->l3_type !=
2795              clib_net_to_host_u16 (ETHERNET_TYPE_IP4) ?
2796              ETHERNET_ARP_ERROR_l3_type_not_ip4 : error0);
2797
2798           sw_if_index0 = vnet_buffer (p0)->sw_if_index[VLIB_RX];
2799
2800           if (error0)
2801             goto drop;
2802
2803           /* Trash ARP packets whose ARP-level source addresses do not
2804              match, or if requester address is mcast */
2805           if (PREDICT_FALSE
2806               (!ethernet_mac_address_equal (eth0->src_address,
2807                                             arp0->ip4_over_ethernet[0].
2808                                             mac.bytes))
2809               || ethernet_address_cast (arp0->ip4_over_ethernet[0].mac.bytes))
2810             {
2811               /* VRRP virtual MAC may be different to SMAC in ARP reply */
2812               if (!ethernet_mac_address_equal
2813                   (arp0->ip4_over_ethernet[0].mac.bytes, vrrp_prefix))
2814                 {
2815                   error0 = ETHERNET_ARP_ERROR_l2_address_mismatch;
2816                   goto drop;
2817                 }
2818             }
2819           if (PREDICT_FALSE
2820               (ip4_address_is_multicast (&arp0->ip4_over_ethernet[0].ip4)))
2821             {
2822               error0 = ETHERNET_ARP_ERROR_l3_src_address_not_local;
2823               goto drop;
2824             }
2825
2826           /* Check if anyone want ARP request events for L2 BDs */
2827           {
2828             ethernet_arp_main_t *am = &ethernet_arp_main;
2829             if (am->wc_ip4_arp_publisher_node != (uword) ~ 0)
2830               vnet_arp_wc_publish (sw_if_index0, &arp0->ip4_over_ethernet[0]);
2831           }
2832
2833           /* lookup BD mac_by_ip4 hash table for MAC entry */
2834           ip0 = arp0->ip4_over_ethernet[1].ip4.as_u32;
2835           bd_index0 = vnet_buffer (p0)->l2.bd_index;
2836           if (PREDICT_FALSE ((bd_index0 != last_bd_index)
2837                              || (last_bd_index == (u16) ~ 0)))
2838             {
2839               last_bd_index = bd_index0;
2840               last_bd_config = vec_elt_at_index (l2im->bd_configs, bd_index0);
2841             }
2842           macp0 = (u8 *) hash_get (last_bd_config->mac_by_ip4, ip0);
2843
2844           if (PREDICT_FALSE (!macp0))
2845             goto next_l2_feature;       /* MAC not found */
2846           if (PREDICT_FALSE (arp0->ip4_over_ethernet[0].ip4.as_u32 ==
2847                              arp0->ip4_over_ethernet[1].ip4.as_u32))
2848             goto next_l2_feature;       /* GARP */
2849
2850           /* MAC found, send ARP reply -
2851              Convert ARP request packet to ARP reply */
2852           arp0->opcode = clib_host_to_net_u16 (ETHERNET_ARP_OPCODE_reply);
2853           arp0->ip4_over_ethernet[1] = arp0->ip4_over_ethernet[0];
2854           arp0->ip4_over_ethernet[0].ip4.as_u32 = ip0;
2855           mac_address_from_bytes (&arp0->ip4_over_ethernet[0].mac, macp0);
2856           clib_memcpy_fast (eth0->dst_address, eth0->src_address, 6);
2857           clib_memcpy_fast (eth0->src_address, macp0, 6);
2858           n_replies_sent += 1;
2859
2860         output_response:
2861           /* For BVI, need to use l2-fwd node to send ARP reply as
2862              l2-output node cannot output packet to BVI properly */
2863           cfg0 = vec_elt_at_index (l2im->configs, sw_if_index0);
2864           if (PREDICT_FALSE (cfg0->bvi))
2865             {
2866               vnet_buffer (p0)->l2.feature_bitmap |= L2INPUT_FEAT_FWD;
2867               vnet_buffer (p0)->sw_if_index[VLIB_RX] = 0;
2868               goto next_l2_feature;
2869             }
2870
2871           /* Send ARP/ND reply back out input interface through l2-output */
2872           vnet_buffer (p0)->sw_if_index[VLIB_TX] = sw_if_index0;
2873           next0 = ARP_TERM_NEXT_L2_OUTPUT;
2874           vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
2875                                            to_next, n_left_to_next, pi0,
2876                                            next0);
2877           continue;
2878
2879         check_ip6_nd:
2880           /* IP6 ND event notification or solicitation handling to generate
2881              local response instead of flooding */
2882           iph0 = (ip6_header_t *) l3h0;
2883           if (PREDICT_FALSE (ethertype0 == ETHERNET_TYPE_IP6 &&
2884                              iph0->protocol == IP_PROTOCOL_ICMP6 &&
2885                              !ip6_address_is_unspecified
2886                              (&iph0->src_address)))
2887             {
2888               sw_if_index0 = vnet_buffer (p0)->sw_if_index[VLIB_RX];
2889               if (vnet_ip6_nd_term
2890                   (vm, node, p0, eth0, iph0, sw_if_index0,
2891                    vnet_buffer (p0)->l2.bd_index))
2892                 goto output_response;
2893             }
2894
2895         next_l2_feature:
2896           {
2897             next0 = vnet_l2_feature_next (p0, arp_term_next_node_index,
2898                                           L2INPUT_FEAT_ARP_TERM);
2899             vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
2900                                              to_next, n_left_to_next,
2901                                              pi0, next0);
2902             continue;
2903           }
2904
2905         drop:
2906           if (0 == arp0->ip4_over_ethernet[0].ip4.as_u32 ||
2907               (arp0->ip4_over_ethernet[0].ip4.as_u32 ==
2908                arp0->ip4_over_ethernet[1].ip4.as_u32))
2909             {
2910               error0 = ETHERNET_ARP_ERROR_gratuitous_arp;
2911             }
2912           next0 = ARP_TERM_NEXT_DROP;
2913           p0->error = node->errors[error0];
2914
2915           vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
2916                                            to_next, n_left_to_next, pi0,
2917                                            next0);
2918         }
2919
2920       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
2921     }
2922
2923   vlib_error_count (vm, node->node_index,
2924                     ETHERNET_ARP_ERROR_replies_sent, n_replies_sent);
2925   return frame->n_vectors;
2926 }
2927
2928 /* *INDENT-OFF* */
2929 VLIB_REGISTER_NODE (arp_term_l2bd_node, static) = {
2930   .function = arp_term_l2bd,
2931   .name = "arp-term-l2bd",
2932   .vector_size = sizeof (u32),
2933   .n_errors = ETHERNET_ARP_N_ERROR,
2934   .error_strings = ethernet_arp_error_strings,
2935   .n_next_nodes = ARP_TERM_N_NEXT,
2936   .next_nodes = {
2937     [ARP_TERM_NEXT_L2_OUTPUT] = "l2-output",
2938     [ARP_TERM_NEXT_DROP] = "error-drop",
2939   },
2940   .format_buffer = format_ethernet_arp_header,
2941   .format_trace = format_arp_term_input_trace,
2942 };
2943 /* *INDENT-ON* */
2944
2945 clib_error_t *
2946 arp_term_init (vlib_main_t * vm)
2947 {
2948   // Initialize the feature next-node indexes
2949   feat_bitmap_init_next_nodes (vm,
2950                                arp_term_l2bd_node.index,
2951                                L2INPUT_N_FEAT,
2952                                l2input_get_feat_names (),
2953                                arp_term_next_node_index);
2954   return 0;
2955 }
2956
2957 VLIB_INIT_FUNCTION (arp_term_init);
2958
2959 void
2960 change_arp_mac (u32 sw_if_index, ethernet_arp_ip4_entry_t * e)
2961 {
2962   if (e->sw_if_index == sw_if_index)
2963     {
2964       adj_nbr_walk_nh4 (e->sw_if_index,
2965                         &e->ip4_address, arp_mk_complete_walk, e);
2966     }
2967 }
2968
2969 void
2970 ethernet_arp_change_mac (u32 sw_if_index)
2971 {
2972   ethernet_arp_main_t *am = &ethernet_arp_main;
2973   ethernet_arp_ip4_entry_t *e;
2974   adj_index_t ai;
2975
2976   /* *INDENT-OFF* */
2977   pool_foreach (e, am->ip4_entry_pool,
2978   ({
2979     change_arp_mac (sw_if_index, e);
2980   }));
2981   /* *INDENT-ON* */
2982
2983   ai = adj_glean_get (FIB_PROTOCOL_IP4, sw_if_index);
2984
2985   if (ADJ_INDEX_INVALID != ai)
2986     adj_glean_update_rewrite (ai);
2987 }
2988
2989 void
2990 send_ip4_garp (vlib_main_t * vm, u32 sw_if_index)
2991 {
2992   ip4_main_t *i4m = &ip4_main;
2993   ip4_address_t *ip4_addr = ip4_interface_first_address (i4m, sw_if_index, 0);
2994
2995   send_ip4_garp_w_addr (vm, ip4_addr, sw_if_index);
2996 }
2997
2998 void
2999 send_ip4_garp_w_addr (vlib_main_t * vm,
3000                       const ip4_address_t * ip4_addr, u32 sw_if_index)
3001 {
3002   ip4_main_t *i4m = &ip4_main;
3003   vnet_main_t *vnm = vnet_get_main ();
3004   u8 *rewrite, rewrite_len;
3005   vnet_hw_interface_t *hi = vnet_get_sup_hw_interface (vnm, sw_if_index);
3006
3007   if (ip4_addr)
3008     {
3009       clib_warning ("Sending GARP for IP4 address %U on sw_if_idex %d",
3010                     format_ip4_address, ip4_addr, sw_if_index);
3011
3012       /* Form GARP packet for output - Gratuitous ARP is an ARP request packet
3013          where the interface IP/MAC pair is used for both source and request
3014          MAC/IP pairs in the request */
3015       u32 bi = 0;
3016       ethernet_arp_header_t *h = vlib_packet_template_get_packet
3017         (vm, &i4m->ip4_arp_request_packet_template, &bi);
3018
3019       if (!h)
3020         return;
3021
3022       mac_address_from_bytes (&h->ip4_over_ethernet[0].mac, hi->hw_address);
3023       mac_address_from_bytes (&h->ip4_over_ethernet[1].mac, hi->hw_address);
3024       h->ip4_over_ethernet[0].ip4 = ip4_addr[0];
3025       h->ip4_over_ethernet[1].ip4 = ip4_addr[0];
3026
3027       /* Setup MAC header with ARP Etype and broadcast DMAC */
3028       vlib_buffer_t *b = vlib_get_buffer (vm, bi);
3029       rewrite =
3030         ethernet_build_rewrite (vnm, sw_if_index, VNET_LINK_ARP,
3031                                 VNET_REWRITE_FOR_SW_INTERFACE_ADDRESS_BROADCAST);
3032       rewrite_len = vec_len (rewrite);
3033       vlib_buffer_advance (b, -rewrite_len);
3034       ethernet_header_t *e = vlib_buffer_get_current (b);
3035       clib_memcpy_fast (e->dst_address, rewrite, rewrite_len);
3036       vec_free (rewrite);
3037
3038       /* Send GARP packet out the specified interface */
3039       vnet_buffer (b)->sw_if_index[VLIB_RX] =
3040         vnet_buffer (b)->sw_if_index[VLIB_TX] = sw_if_index;
3041       vlib_frame_t *f = vlib_get_frame_to_node (vm, hi->output_node_index);
3042       u32 *to_next = vlib_frame_vector_args (f);
3043       to_next[0] = bi;
3044       f->n_vectors = 1;
3045       vlib_put_frame_to_node (vm, hi->output_node_index, f);
3046     }
3047 }
3048
3049 /*
3050  * Remove any arp entries associated with the specified interface
3051  */
3052 static clib_error_t *
3053 vnet_arp_delete_sw_interface (vnet_main_t * vnm, u32 sw_if_index, u32 is_add)
3054 {
3055   ethernet_arp_main_t *am = &ethernet_arp_main;
3056
3057   if (!is_add && sw_if_index != ~0)
3058     {
3059       ethernet_arp_ip4_entry_t *e;
3060       /* *INDENT-OFF* */
3061       pool_foreach (e, am->ip4_entry_pool, ({
3062         if (e->sw_if_index != sw_if_index)
3063           continue;
3064         vnet_arp_set_ip4_over_ethernet_rpc_args_t args = {
3065           .sw_if_index = sw_if_index,
3066           .ip4 = e->ip4_address,
3067         };
3068         vnet_arp_unset_ip4_over_ethernet_internal (vnm, &args);
3069       }));
3070       /* *INDENT-ON* */
3071       arp_disable (am, sw_if_index);
3072     }
3073   else if (is_add)
3074     {
3075       vnet_feature_enable_disable ("arp", "arp-disabled",
3076                                    sw_if_index, 1, NULL, 0);
3077     }
3078
3079   return (NULL);
3080 }
3081
3082 VNET_SW_INTERFACE_ADD_DEL_FUNCTION (vnet_arp_delete_sw_interface);
3083
3084 /*
3085  * fd.io coding-style-patch-verification: ON
3086  *
3087  * Local Variables:
3088  * eval: (c-set-style "gnu")
3089  * End:
3090  */