ARP: add feature arc
[vpp.git] / src / vnet / ethernet / arp.c
1 /*
2  * ethernet/arp.c: IP v4 ARP node
3  *
4  * Copyright (c) 2010 Cisco and/or its affiliates.
5  * Licensed under the Apache License, Version 2.0 (the "License");
6  * you may not use this file except in compliance with the License.
7  * You may obtain a copy of the License at:
8  *
9  *     http://www.apache.org/licenses/LICENSE-2.0
10  *
11  * Unless required by applicable law or agreed to in writing, software
12  * distributed under the License is distributed on an "AS IS" BASIS,
13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  * See the License for the specific language governing permissions and
15  * limitations under the License.
16  */
17
18 #include <vnet/ip/ip.h>
19 #include <vnet/ip/ip_neighbor.h>
20 #include <vnet/ip/ip6.h>
21 #include <vnet/ethernet/ethernet.h>
22 #include <vnet/ethernet/arp.h>
23 #include <vnet/l2/l2_input.h>
24 #include <vppinfra/mhash.h>
25 #include <vnet/fib/ip4_fib.h>
26 #include <vnet/fib/fib_entry_src.h>
27 #include <vnet/adj/adj_nbr.h>
28 #include <vnet/adj/adj_mcast.h>
29 #include <vnet/mpls/mpls.h>
30 #include <vnet/l2/feat_bitmap.h>
31
32 #include <vlibmemory/api.h>
33
34 /**
35  * @file
36  * @brief IPv4 ARP.
37  *
38  * This file contains code to manage the IPv4 ARP tables (IP Address
39  * to MAC Address lookup).
40  */
41
42
43 /**
44  * @brief Per-interface ARP configuration and state
45  */
46 typedef struct ethernet_arp_interface_t_
47 {
48   /**
49    * Hash table of ARP entries.
50    * Since this hash table is per-interface, the key is only the IPv4 address.
51    */
52   uword *arp_entries;
53   /**
54    * Is ARP enabled on this interface
55    */
56   u32 enabled;
57   /**
58    * Is Proxy ARP enabled on this interface
59    */
60   u32 proxy_enabled;
61 } ethernet_arp_interface_t;
62
63 typedef struct
64 {
65   ip4_address_t lo_addr;
66   ip4_address_t hi_addr;
67   u32 fib_index;
68 } ethernet_proxy_arp_t;
69
70 typedef struct
71 {
72   u32 next_index;
73   uword node_index;
74   uword type_opaque;
75   uword data;
76   /* Used for arp event notification only */
77   arp_change_event_cb_t data_callback;
78   u32 pid;
79 } pending_resolution_t;
80
81 typedef struct
82 {
83   /* Hash tables mapping name to opcode. */
84   uword *opcode_by_name;
85
86   /* lite beer "glean" adjacency handling */
87   uword *pending_resolutions_by_address;
88   pending_resolution_t *pending_resolutions;
89
90   /* Mac address change notification */
91   uword *mac_changes_by_address;
92   pending_resolution_t *mac_changes;
93
94   ethernet_arp_ip4_entry_t *ip4_entry_pool;
95
96   /* ARP attack mitigation */
97   u32 arp_delete_rotor;
98   u32 limit_arp_cache_size;
99
100   /** Per interface state */
101   ethernet_arp_interface_t *ethernet_arp_by_sw_if_index;
102
103   /* Proxy arp vector */
104   ethernet_proxy_arp_t *proxy_arps;
105
106   uword wc_ip4_arp_publisher_node;
107   uword wc_ip4_arp_publisher_et;
108
109   /* ARP feature arc index */
110   u8 feature_arc_index;
111 } ethernet_arp_main_t;
112
113 static ethernet_arp_main_t ethernet_arp_main;
114
115 typedef struct
116 {
117   u32 sw_if_index;
118   ip4_address_t ip4;
119   mac_address_t mac;
120   ip_neighbor_flags_t nbr_flags;
121   u32 flags;
122 #define ETHERNET_ARP_ARGS_REMOVE (1<<0)
123 #define ETHERNET_ARP_ARGS_FLUSH  (1<<1)
124 #define ETHERNET_ARP_ARGS_POPULATE  (1<<2)
125 #define ETHERNET_ARP_ARGS_WC_PUB  (1<<3)
126 } vnet_arp_set_ip4_over_ethernet_rpc_args_t;
127
128 static const u8 vrrp_prefix[] = { 0x00, 0x00, 0x5E, 0x00, 0x01 };
129
130 /* Node index for send_garp_na_process */
131 u32 send_garp_na_process_node_index;
132
133 static void
134 set_ip4_over_ethernet_rpc_callback (vnet_arp_set_ip4_over_ethernet_rpc_args_t
135                                     * a);
136
137 static u8 *
138 format_ethernet_arp_hardware_type (u8 * s, va_list * va)
139 {
140   ethernet_arp_hardware_type_t h = va_arg (*va, ethernet_arp_hardware_type_t);
141   char *t = 0;
142   switch (h)
143     {
144 #define _(n,f) case n: t = #f; break;
145       foreach_ethernet_arp_hardware_type;
146 #undef _
147
148     default:
149       return format (s, "unknown 0x%x", h);
150     }
151
152   return format (s, "%s", t);
153 }
154
155 static u8 *
156 format_ethernet_arp_opcode (u8 * s, va_list * va)
157 {
158   ethernet_arp_opcode_t o = va_arg (*va, ethernet_arp_opcode_t);
159   char *t = 0;
160   switch (o)
161     {
162 #define _(f) case ETHERNET_ARP_OPCODE_##f: t = #f; break;
163       foreach_ethernet_arp_opcode;
164 #undef _
165
166     default:
167       return format (s, "unknown 0x%x", o);
168     }
169
170   return format (s, "%s", t);
171 }
172
173 static uword
174 unformat_ethernet_arp_opcode_host_byte_order (unformat_input_t * input,
175                                               va_list * args)
176 {
177   int *result = va_arg (*args, int *);
178   ethernet_arp_main_t *am = &ethernet_arp_main;
179   int x, i;
180
181   /* Numeric opcode. */
182   if (unformat (input, "0x%x", &x) || unformat (input, "%d", &x))
183     {
184       if (x >= (1 << 16))
185         return 0;
186       *result = x;
187       return 1;
188     }
189
190   /* Named type. */
191   if (unformat_user (input, unformat_vlib_number_by_name,
192                      am->opcode_by_name, &i))
193     {
194       *result = i;
195       return 1;
196     }
197
198   return 0;
199 }
200
201 static uword
202 unformat_ethernet_arp_opcode_net_byte_order (unformat_input_t * input,
203                                              va_list * args)
204 {
205   int *result = va_arg (*args, int *);
206   if (!unformat_user
207       (input, unformat_ethernet_arp_opcode_host_byte_order, result))
208     return 0;
209
210   *result = clib_host_to_net_u16 ((u16) * result);
211   return 1;
212 }
213
214 static u8 *
215 format_ethernet_arp_header (u8 * s, va_list * va)
216 {
217   ethernet_arp_header_t *a = va_arg (*va, ethernet_arp_header_t *);
218   u32 max_header_bytes = va_arg (*va, u32);
219   u32 indent;
220   u16 l2_type, l3_type;
221
222   if (max_header_bytes != 0 && sizeof (a[0]) > max_header_bytes)
223     return format (s, "ARP header truncated");
224
225   l2_type = clib_net_to_host_u16 (a->l2_type);
226   l3_type = clib_net_to_host_u16 (a->l3_type);
227
228   indent = format_get_indent (s);
229
230   s = format (s, "%U, type %U/%U, address size %d/%d",
231               format_ethernet_arp_opcode, clib_net_to_host_u16 (a->opcode),
232               format_ethernet_arp_hardware_type, l2_type,
233               format_ethernet_type, l3_type,
234               a->n_l2_address_bytes, a->n_l3_address_bytes);
235
236   if (l2_type == ETHERNET_ARP_HARDWARE_TYPE_ethernet
237       && l3_type == ETHERNET_TYPE_IP4)
238     {
239       s = format (s, "\n%U%U/%U -> %U/%U",
240                   format_white_space, indent,
241                   format_mac_address_t, &a->ip4_over_ethernet[0].mac,
242                   format_ip4_address, &a->ip4_over_ethernet[0].ip4,
243                   format_mac_address_t, &a->ip4_over_ethernet[1].mac,
244                   format_ip4_address, &a->ip4_over_ethernet[1].ip4);
245     }
246   else
247     {
248       uword n2 = a->n_l2_address_bytes;
249       uword n3 = a->n_l3_address_bytes;
250       s = format (s, "\n%U%U/%U -> %U/%U",
251                   format_white_space, indent,
252                   format_hex_bytes, a->data + 0 * n2 + 0 * n3, n2,
253                   format_hex_bytes, a->data + 1 * n2 + 0 * n3, n3,
254                   format_hex_bytes, a->data + 1 * n2 + 1 * n3, n2,
255                   format_hex_bytes, a->data + 2 * n2 + 1 * n3, n3);
256     }
257
258   return s;
259 }
260
261 u8 *
262 format_ethernet_arp_ip4_entry (u8 * s, va_list * va)
263 {
264   vnet_main_t *vnm = va_arg (*va, vnet_main_t *);
265   ethernet_arp_ip4_entry_t *e = va_arg (*va, ethernet_arp_ip4_entry_t *);
266   vnet_sw_interface_t *si;
267   u8 *flags = 0;
268
269   if (!e)
270     return format (s, "%=12s%=16s%=6s%=20s%=24s", "Time", "IP4",
271                    "Flags", "Ethernet", "Interface");
272
273   si = vnet_get_sw_interface (vnm, e->sw_if_index);
274
275   if (e->flags & IP_NEIGHBOR_FLAG_STATIC)
276     flags = format (flags, "S");
277
278   if (e->flags & IP_NEIGHBOR_FLAG_DYNAMIC)
279     flags = format (flags, "D");
280
281   if (e->flags & IP_NEIGHBOR_FLAG_NO_FIB_ENTRY)
282     flags = format (flags, "N");
283
284   s = format (s, "%=12U%=16U%=6s%=20U%U",
285               format_vlib_time, vnm->vlib_main, e->time_last_updated,
286               format_ip4_address, &e->ip4_address,
287               flags ? (char *) flags : "",
288               format_mac_address_t, &e->mac,
289               format_vnet_sw_interface_name, vnm, si);
290
291   vec_free (flags);
292   return s;
293 }
294
295 typedef struct
296 {
297   u8 packet_data[64];
298 } ethernet_arp_input_trace_t;
299
300 static u8 *
301 format_ethernet_arp_input_trace (u8 * s, va_list * va)
302 {
303   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*va, vlib_main_t *);
304   CLIB_UNUSED (vlib_node_t * node) = va_arg (*va, vlib_node_t *);
305   ethernet_arp_input_trace_t *t = va_arg (*va, ethernet_arp_input_trace_t *);
306
307   s = format (s, "%U",
308               format_ethernet_arp_header,
309               t->packet_data, sizeof (t->packet_data));
310
311   return s;
312 }
313
314 static u8 *
315 format_arp_term_input_trace (u8 * s, va_list * va)
316 {
317   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*va, vlib_main_t *);
318   CLIB_UNUSED (vlib_node_t * node) = va_arg (*va, vlib_node_t *);
319   ethernet_arp_input_trace_t *t = va_arg (*va, ethernet_arp_input_trace_t *);
320
321   /* arp-term trace data saved is either arp or ip6/icmp6 packet:
322      - for arp, the 1st 16-bit field is hw type of value of 0x0001.
323      - for ip6, the first nibble has value of 6. */
324   s = format (s, "%U", t->packet_data[0] == 0 ?
325               format_ethernet_arp_header : format_ip6_header,
326               t->packet_data, sizeof (t->packet_data));
327
328   return s;
329 }
330
331 static void
332 arp_nbr_probe (ip_adjacency_t * adj)
333 {
334   vnet_main_t *vnm = vnet_get_main ();
335   ip4_main_t *im = &ip4_main;
336   ip_interface_address_t *ia;
337   ethernet_arp_header_t *h;
338   vnet_hw_interface_t *hi;
339   vnet_sw_interface_t *si;
340   ip4_address_t *src;
341   vlib_buffer_t *b;
342   vlib_main_t *vm;
343   u32 bi = 0;
344
345   vm = vlib_get_main ();
346
347   si = vnet_get_sw_interface (vnm, adj->rewrite_header.sw_if_index);
348
349   if (!(si->flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP))
350     {
351       return;
352     }
353
354   src =
355     ip4_interface_address_matching_destination (im,
356                                                 &adj->sub_type.nbr.next_hop.
357                                                 ip4,
358                                                 adj->rewrite_header.
359                                                 sw_if_index, &ia);
360   if (!src)
361     {
362       return;
363     }
364
365   h =
366     vlib_packet_template_get_packet (vm, &im->ip4_arp_request_packet_template,
367                                      &bi);
368   if (!h)
369     return;
370
371   hi = vnet_get_sup_hw_interface (vnm, adj->rewrite_header.sw_if_index);
372
373   mac_address_from_bytes (&h->ip4_over_ethernet[0].mac, hi->hw_address);
374
375   h->ip4_over_ethernet[0].ip4 = src[0];
376   h->ip4_over_ethernet[1].ip4 = adj->sub_type.nbr.next_hop.ip4;
377
378   b = vlib_get_buffer (vm, bi);
379   vnet_buffer (b)->sw_if_index[VLIB_RX] =
380     vnet_buffer (b)->sw_if_index[VLIB_TX] = adj->rewrite_header.sw_if_index;
381
382   /* Add encapsulation string for software interface (e.g. ethernet header). */
383   vnet_rewrite_one_header (adj[0], h, sizeof (ethernet_header_t));
384   vlib_buffer_advance (b, -adj->rewrite_header.data_bytes);
385
386   {
387     vlib_frame_t *f = vlib_get_frame_to_node (vm, hi->output_node_index);
388     u32 *to_next = vlib_frame_vector_args (f);
389     to_next[0] = bi;
390     f->n_vectors = 1;
391     vlib_put_frame_to_node (vm, hi->output_node_index, f);
392   }
393 }
394
395 static void
396 arp_mk_complete (adj_index_t ai, ethernet_arp_ip4_entry_t * e)
397 {
398   adj_nbr_update_rewrite
399     (ai, ADJ_NBR_REWRITE_FLAG_COMPLETE,
400      ethernet_build_rewrite (vnet_get_main (),
401                              e->sw_if_index,
402                              adj_get_link_type (ai), &e->mac));
403 }
404
405 static void
406 arp_mk_incomplete (adj_index_t ai)
407 {
408   ip_adjacency_t *adj = adj_get (ai);
409
410   adj_nbr_update_rewrite
411     (ai,
412      ADJ_NBR_REWRITE_FLAG_INCOMPLETE,
413      ethernet_build_rewrite (vnet_get_main (),
414                              adj->rewrite_header.sw_if_index,
415                              VNET_LINK_ARP,
416                              VNET_REWRITE_FOR_SW_INTERFACE_ADDRESS_BROADCAST));
417 }
418
419 static ethernet_arp_ip4_entry_t *
420 arp_entry_find (ethernet_arp_interface_t * eai, const ip4_address_t * addr)
421 {
422   ethernet_arp_main_t *am = &ethernet_arp_main;
423   ethernet_arp_ip4_entry_t *e = NULL;
424   uword *p;
425
426   if (NULL != eai->arp_entries)
427     {
428       p = hash_get (eai->arp_entries, addr->as_u32);
429       if (!p)
430         return (NULL);
431
432       e = pool_elt_at_index (am->ip4_entry_pool, p[0]);
433     }
434
435   return (e);
436 }
437
438 static adj_walk_rc_t
439 arp_mk_complete_walk (adj_index_t ai, void *ctx)
440 {
441   ethernet_arp_ip4_entry_t *e = ctx;
442
443   arp_mk_complete (ai, e);
444
445   return (ADJ_WALK_RC_CONTINUE);
446 }
447
448 static adj_walk_rc_t
449 arp_mk_incomplete_walk (adj_index_t ai, void *ctx)
450 {
451   arp_mk_incomplete (ai);
452
453   return (ADJ_WALK_RC_CONTINUE);
454 }
455
456 static int
457 arp_is_enabled (ethernet_arp_main_t * am, u32 sw_if_index)
458 {
459   if (vec_len (am->ethernet_arp_by_sw_if_index) <= sw_if_index)
460     return 0;
461
462   return (am->ethernet_arp_by_sw_if_index[sw_if_index].enabled);
463 }
464
465 static void
466 arp_enable (ethernet_arp_main_t * am, u32 sw_if_index)
467 {
468   if (arp_is_enabled (am, sw_if_index))
469     return;
470
471   vec_validate (am->ethernet_arp_by_sw_if_index, sw_if_index);
472
473   am->ethernet_arp_by_sw_if_index[sw_if_index].enabled = 1;
474
475   vnet_feature_enable_disable ("arp", "arp-reply", sw_if_index, 1, NULL, 0);
476 }
477
478 static int
479 vnet_arp_flush_ip4_over_ethernet_internal (vnet_main_t * vnm,
480                                            vnet_arp_set_ip4_over_ethernet_rpc_args_t
481                                            * args);
482
483 static void
484 arp_disable (ethernet_arp_main_t * am, u32 sw_if_index)
485 {
486   ethernet_arp_interface_t *eai;
487   ethernet_arp_ip4_entry_t *e;
488   u32 i, *to_delete = 0;
489   hash_pair_t *pair;
490
491   if (!arp_is_enabled (am, sw_if_index))
492     return;
493
494   vnet_feature_enable_disable ("arp", "arp-reply", sw_if_index, 0, NULL, 0);
495
496   eai = &am->ethernet_arp_by_sw_if_index[sw_if_index];
497
498
499   /* *INDENT-OFF* */
500   hash_foreach_pair (pair, eai->arp_entries,
501   ({
502     e = pool_elt_at_index(am->ip4_entry_pool,
503                           pair->value[0]);
504     vec_add1 (to_delete, e - am->ip4_entry_pool);
505   }));
506   /* *INDENT-ON* */
507
508   for (i = 0; i < vec_len (to_delete); i++)
509     {
510       e = pool_elt_at_index (am->ip4_entry_pool, to_delete[i]);
511
512       vnet_arp_set_ip4_over_ethernet_rpc_args_t delme = {
513         .ip4.as_u32 = e->ip4_address.as_u32,
514         .sw_if_index = e->sw_if_index,
515         .flags = ETHERNET_ARP_ARGS_FLUSH,
516       };
517       mac_address_copy (&delme.mac, &e->mac);
518
519       vnet_arp_flush_ip4_over_ethernet_internal (vnet_get_main (), &delme);
520     }
521
522   vec_free (to_delete);
523
524   eai->enabled = 0;
525 }
526
527 void
528 arp_update_adjacency (vnet_main_t * vnm, u32 sw_if_index, u32 ai)
529 {
530   ethernet_arp_main_t *am = &ethernet_arp_main;
531   ethernet_arp_interface_t *arp_int;
532   ethernet_arp_ip4_entry_t *e;
533   ip_adjacency_t *adj;
534
535   adj = adj_get (ai);
536
537   arp_enable (am, sw_if_index);
538   arp_int = &am->ethernet_arp_by_sw_if_index[sw_if_index];
539   e = arp_entry_find (arp_int, &adj->sub_type.nbr.next_hop.ip4);
540
541   switch (adj->lookup_next_index)
542     {
543     case IP_LOOKUP_NEXT_GLEAN:
544       adj_glean_update_rewrite (ai);
545       break;
546     case IP_LOOKUP_NEXT_ARP:
547       if (NULL != e)
548         {
549           adj_nbr_walk_nh4 (sw_if_index,
550                             &e->ip4_address, arp_mk_complete_walk, e);
551         }
552       else
553         {
554           /*
555            * no matching ARP entry.
556            * construct the rewrite required to for an ARP packet, and stick
557            * that in the adj's pipe to smoke.
558            */
559           adj_nbr_update_rewrite
560             (ai,
561              ADJ_NBR_REWRITE_FLAG_INCOMPLETE,
562              ethernet_build_rewrite
563              (vnm,
564               sw_if_index,
565               VNET_LINK_ARP,
566               VNET_REWRITE_FOR_SW_INTERFACE_ADDRESS_BROADCAST));
567
568           /*
569            * since the FIB has added this adj for a route, it makes sense it
570            * may want to forward traffic sometime soon. Let's send a
571            * speculative ARP. just one. If we were to do periodically that
572            * wouldn't be bad either, but that's more code than i'm prepared to
573            * write at this time for relatively little reward.
574            */
575           arp_nbr_probe (adj);
576         }
577       break;
578     case IP_LOOKUP_NEXT_BCAST:
579       adj_nbr_update_rewrite (ai,
580                               ADJ_NBR_REWRITE_FLAG_COMPLETE,
581                               ethernet_build_rewrite
582                               (vnm,
583                                sw_if_index,
584                                VNET_LINK_IP4,
585                                VNET_REWRITE_FOR_SW_INTERFACE_ADDRESS_BROADCAST));
586       break;
587     case IP_LOOKUP_NEXT_MCAST:
588       {
589         /*
590          * Construct a partial rewrite from the known ethernet mcast dest MAC
591          */
592         u8 *rewrite;
593         u8 offset;
594
595         rewrite = ethernet_build_rewrite (vnm,
596                                           sw_if_index,
597                                           adj->ia_link,
598                                           ethernet_ip4_mcast_dst_addr ());
599         offset = vec_len (rewrite) - 2;
600
601         /*
602          * Complete the remaining fields of the adj's rewrite to direct the
603          * complete of the rewrite at switch time by copying in the IP
604          * dst address's bytes.
605          * Offset is 2 bytes into the MAC destination address.
606          */
607         adj_mcast_update_rewrite (ai, rewrite, offset);
608
609         break;
610       }
611     case IP_LOOKUP_NEXT_DROP:
612     case IP_LOOKUP_NEXT_PUNT:
613     case IP_LOOKUP_NEXT_LOCAL:
614     case IP_LOOKUP_NEXT_REWRITE:
615     case IP_LOOKUP_NEXT_MCAST_MIDCHAIN:
616     case IP_LOOKUP_NEXT_MIDCHAIN:
617     case IP_LOOKUP_NEXT_ICMP_ERROR:
618     case IP_LOOKUP_N_NEXT:
619       ASSERT (0);
620       break;
621     }
622 }
623
624 static void
625 arp_adj_fib_add (ethernet_arp_ip4_entry_t * e, u32 fib_index)
626 {
627   fib_prefix_t pfx = {
628     .fp_len = 32,
629     .fp_proto = FIB_PROTOCOL_IP4,
630     .fp_addr.ip4 = e->ip4_address,
631   };
632
633   e->fib_entry_index =
634     fib_table_entry_path_add (fib_index, &pfx, FIB_SOURCE_ADJ,
635                               FIB_ENTRY_FLAG_ATTACHED,
636                               DPO_PROTO_IP4, &pfx.fp_addr,
637                               e->sw_if_index, ~0, 1, NULL,
638                               FIB_ROUTE_PATH_FLAG_NONE);
639   fib_table_lock (fib_index, FIB_PROTOCOL_IP4, FIB_SOURCE_ADJ);
640 }
641
642 static void
643 arp_adj_fib_remove (ethernet_arp_ip4_entry_t * e, u32 fib_index)
644 {
645   if (FIB_NODE_INDEX_INVALID != e->fib_entry_index)
646     {
647       fib_prefix_t pfx = {
648         .fp_len = 32,
649         .fp_proto = FIB_PROTOCOL_IP4,
650         .fp_addr.ip4 = e->ip4_address,
651       };
652       u32 fib_index;
653
654       fib_index = ip4_fib_table_get_index_for_sw_if_index (e->sw_if_index);
655
656       fib_table_entry_path_remove (fib_index, &pfx,
657                                    FIB_SOURCE_ADJ,
658                                    DPO_PROTO_IP4,
659                                    &pfx.fp_addr,
660                                    e->sw_if_index, ~0, 1,
661                                    FIB_ROUTE_PATH_FLAG_NONE);
662       fib_table_unlock (fib_index, FIB_PROTOCOL_IP4, FIB_SOURCE_ADJ);
663     }
664 }
665
666 static ethernet_arp_ip4_entry_t *
667 force_reuse_arp_entry (void)
668 {
669   ethernet_arp_ip4_entry_t *e;
670   ethernet_arp_main_t *am = &ethernet_arp_main;
671   u32 count = 0;
672   u32 index = pool_next_index (am->ip4_entry_pool, am->arp_delete_rotor);
673   if (index == ~0)              /* Try again from elt 0 */
674     index = pool_next_index (am->ip4_entry_pool, index);
675
676   /* Find a non-static random entry to free up for reuse */
677   do
678     {
679       if ((count++ == 100) || (index == ~0))
680         return NULL;            /* give up after 100 entries */
681       e = pool_elt_at_index (am->ip4_entry_pool, index);
682       am->arp_delete_rotor = index;
683       index = pool_next_index (am->ip4_entry_pool, index);
684     }
685   while (e->flags & IP_NEIGHBOR_FLAG_STATIC);
686
687   /* Remove ARP entry from its interface and update fib */
688   hash_unset
689     (am->ethernet_arp_by_sw_if_index[e->sw_if_index].arp_entries,
690      e->ip4_address.as_u32);
691   arp_adj_fib_remove
692     (e, ip4_fib_table_get_index_for_sw_if_index (e->sw_if_index));
693   adj_nbr_walk_nh4 (e->sw_if_index,
694                     &e->ip4_address, arp_mk_incomplete_walk, e);
695   return e;
696 }
697
698 static int
699 vnet_arp_set_ip4_over_ethernet_internal (vnet_main_t * vnm,
700                                          vnet_arp_set_ip4_over_ethernet_rpc_args_t
701                                          * args)
702 {
703   ethernet_arp_ip4_entry_t *e = 0;
704   ethernet_arp_main_t *am = &ethernet_arp_main;
705   vlib_main_t *vm = vlib_get_main ();
706   int make_new_arp_cache_entry = 1;
707   uword *p;
708   pending_resolution_t *pr, *mc;
709   ethernet_arp_interface_t *arp_int;
710   u32 sw_if_index = args->sw_if_index;
711
712   arp_enable (am, sw_if_index);
713
714   arp_int = &am->ethernet_arp_by_sw_if_index[sw_if_index];
715
716   if (NULL != arp_int->arp_entries)
717     {
718       p = hash_get (arp_int->arp_entries, args->ip4.as_u32);
719       if (p)
720         {
721           e = pool_elt_at_index (am->ip4_entry_pool, p[0]);
722
723           /* Refuse to over-write static arp. */
724           if (!(args->nbr_flags & IP_NEIGHBOR_FLAG_STATIC) &&
725               (e->flags & IP_NEIGHBOR_FLAG_STATIC))
726             {
727               /* if MAC address match, still check to send event */
728               if (mac_address_equal (&e->mac, &args->mac))
729                 goto check_customers;
730               return -2;
731             }
732           make_new_arp_cache_entry = 0;
733         }
734     }
735
736   if (make_new_arp_cache_entry)
737     {
738       if (am->limit_arp_cache_size &&
739           pool_elts (am->ip4_entry_pool) >= am->limit_arp_cache_size)
740         {
741           e = force_reuse_arp_entry ();
742           if (NULL == e)
743             return -2;
744         }
745       else
746         pool_get (am->ip4_entry_pool, e);
747
748       if (NULL == arp_int->arp_entries)
749         arp_int->arp_entries = hash_create (0, sizeof (u32));
750
751       hash_set (arp_int->arp_entries, args->ip4.as_u32,
752                 e - am->ip4_entry_pool);
753
754       e->sw_if_index = sw_if_index;
755       e->ip4_address = args->ip4;
756       e->fib_entry_index = FIB_NODE_INDEX_INVALID;
757       mac_address_copy (&e->mac, &args->mac);
758
759       if (!(args->nbr_flags & IP_NEIGHBOR_FLAG_NO_FIB_ENTRY))
760         {
761           arp_adj_fib_add (e,
762                            ip4_fib_table_get_index_for_sw_if_index
763                            (e->sw_if_index));
764         }
765       else
766         {
767           e->flags |= IP_NEIGHBOR_FLAG_NO_FIB_ENTRY;
768         }
769     }
770   else
771     {
772       /*
773        * prevent a DoS attack from the data-plane that
774        * spams us with no-op updates to the MAC address
775        */
776       if (mac_address_equal (&e->mac, &args->mac))
777         {
778           e->time_last_updated = vlib_time_now (vm);
779           goto check_customers;
780         }
781
782       /* Update ethernet address. */
783       mac_address_copy (&e->mac, &args->mac);
784     }
785
786   /* Update time stamp and flags. */
787   e->time_last_updated = vlib_time_now (vm);
788   if (args->nbr_flags & IP_NEIGHBOR_FLAG_STATIC)
789     {
790       e->flags &= ~IP_NEIGHBOR_FLAG_DYNAMIC;
791       e->flags |= IP_NEIGHBOR_FLAG_STATIC;
792     }
793   else
794     {
795       e->flags &= ~IP_NEIGHBOR_FLAG_STATIC;
796       e->flags |= IP_NEIGHBOR_FLAG_DYNAMIC;
797     }
798
799   adj_nbr_walk_nh4 (sw_if_index, &e->ip4_address, arp_mk_complete_walk, e);
800
801 check_customers:
802   /* Customer(s) waiting for this address to be resolved? */
803   p = hash_get (am->pending_resolutions_by_address, args->ip4.as_u32);
804   if (p)
805     {
806       u32 next_index;
807       next_index = p[0];
808
809       while (next_index != (u32) ~ 0)
810         {
811           pr = pool_elt_at_index (am->pending_resolutions, next_index);
812           vlib_process_signal_event (vm, pr->node_index,
813                                      pr->type_opaque, pr->data);
814           next_index = pr->next_index;
815           pool_put (am->pending_resolutions, pr);
816         }
817
818       hash_unset (am->pending_resolutions_by_address, args->ip4.as_u32);
819     }
820
821   /* Customer(s) requesting ARP event for this address? */
822   p = hash_get (am->mac_changes_by_address, args->ip4.as_u32);
823   if (p)
824     {
825       u32 next_index;
826       next_index = p[0];
827
828       while (next_index != (u32) ~ 0)
829         {
830           int rv = 1;
831           mc = pool_elt_at_index (am->mac_changes, next_index);
832
833           /* Call the user's data callback, return 1 to suppress dup events */
834           if (mc->data_callback)
835             rv = (mc->data_callback) (mc->data, &args->mac, sw_if_index, 0);
836
837           /*
838            * Signal the resolver process, as long as the user
839            * says they want to be notified
840            */
841           if (rv == 0)
842             vlib_process_signal_event (vm, mc->node_index,
843                                        mc->type_opaque, mc->data);
844           next_index = mc->next_index;
845         }
846     }
847
848   return 0;
849 }
850
851 void
852 vnet_register_ip4_arp_resolution_event (vnet_main_t * vnm,
853                                         void *address_arg,
854                                         uword node_index,
855                                         uword type_opaque, uword data)
856 {
857   ethernet_arp_main_t *am = &ethernet_arp_main;
858   ip4_address_t *address = address_arg;
859   uword *p;
860   pending_resolution_t *pr;
861
862   pool_get (am->pending_resolutions, pr);
863
864   pr->next_index = ~0;
865   pr->node_index = node_index;
866   pr->type_opaque = type_opaque;
867   pr->data = data;
868   pr->data_callback = 0;
869
870   p = hash_get (am->pending_resolutions_by_address, address->as_u32);
871   if (p)
872     {
873       /* Insert new resolution at the head of the list */
874       pr->next_index = p[0];
875       hash_unset (am->pending_resolutions_by_address, address->as_u32);
876     }
877
878   hash_set (am->pending_resolutions_by_address, address->as_u32,
879             pr - am->pending_resolutions);
880 }
881
882 int
883 vnet_add_del_ip4_arp_change_event (vnet_main_t * vnm,
884                                    arp_change_event_cb_t data_callback,
885                                    u32 pid,
886                                    void *address_arg,
887                                    uword node_index,
888                                    uword type_opaque, uword data, int is_add)
889 {
890   ethernet_arp_main_t *am = &ethernet_arp_main;
891   ip4_address_t *address = address_arg;
892
893   /* Try to find an existing entry */
894   u32 *first = (u32 *) hash_get (am->mac_changes_by_address, address->as_u32);
895   u32 *p = first;
896   pending_resolution_t *mc;
897   while (p && *p != ~0)
898     {
899       mc = pool_elt_at_index (am->mac_changes, *p);
900       if (mc->node_index == node_index && mc->type_opaque == type_opaque
901           && mc->pid == pid)
902         break;
903       p = &mc->next_index;
904     }
905
906   int found = p && *p != ~0;
907   if (is_add)
908     {
909       if (found)
910         return VNET_API_ERROR_ENTRY_ALREADY_EXISTS;
911
912       pool_get (am->mac_changes, mc);
913       /* *INDENT-OFF* */
914       *mc = (pending_resolution_t)
915       {
916         .next_index = ~0,
917         .node_index = node_index,
918         .type_opaque = type_opaque,
919         .data = data,
920         .data_callback = data_callback,
921         .pid = pid,
922       };
923       /* *INDENT-ON* */
924
925       /* Insert new resolution at the end of the list */
926       u32 new_idx = mc - am->mac_changes;
927       if (p)
928         p[0] = new_idx;
929       else
930         hash_set (am->mac_changes_by_address, address->as_u32, new_idx);
931     }
932   else
933     {
934       if (!found)
935         return VNET_API_ERROR_NO_SUCH_ENTRY;
936
937       /* Clients may need to clean up pool entries, too */
938       if (data_callback)
939         /* no new mac addrs */
940         (data_callback) (mc->data, NULL, ~0, NULL);
941
942       /* Remove the entry from the list and delete the entry */
943       *p = mc->next_index;
944       pool_put (am->mac_changes, mc);
945
946       /* Remove from hash if we deleted the last entry */
947       if (*p == ~0 && p == first)
948         hash_unset (am->mac_changes_by_address, address->as_u32);
949     }
950   return 0;
951 }
952
953 /* Either we drop the packet or we send a reply to the sender. */
954 typedef enum
955 {
956   ARP_REPLY_NEXT_DROP,
957   ARP_REPLY_NEXT_REPLY_TX,
958   ARP_REPLY_N_NEXT,
959 } arp_reply_next_t;
960
961 #define foreach_ethernet_arp_error                                      \
962   _ (replies_sent, "ARP replies sent")                                  \
963   _ (l2_type_not_ethernet, "L2 type not ethernet")                      \
964   _ (l3_type_not_ip4, "L3 type not IP4")                                \
965   _ (l3_src_address_not_local, "IP4 source address not local to subnet") \
966   _ (l3_dst_address_not_local, "IP4 destination address not local to subnet") \
967   _ (l3_dst_address_unset, "IP4 destination address is unset")          \
968   _ (l3_src_address_is_local, "IP4 source address matches local interface") \
969   _ (l3_src_address_learned, "ARP request IP4 source address learned")  \
970   _ (replies_received, "ARP replies received")                          \
971   _ (opcode_not_request, "ARP opcode not request")                      \
972   _ (proxy_arp_replies_sent, "Proxy ARP replies sent")                  \
973   _ (l2_address_mismatch, "ARP hw addr does not match L2 frame src addr") \
974   _ (gratuitous_arp, "ARP probe or announcement dropped") \
975   _ (interface_no_table, "Interface is not mapped to an IP table") \
976   _ (interface_not_ip_enabled, "Interface is not IP enabled") \
977   _ (unnumbered_mismatch, "RX interface is unnumbered to different subnet") \
978
979 typedef enum
980 {
981 #define _(sym,string) ETHERNET_ARP_ERROR_##sym,
982   foreach_ethernet_arp_error
983 #undef _
984     ETHERNET_ARP_N_ERROR,
985 } ethernet_arp_reply_error_t;
986
987 static int
988 arp_unnumbered (vlib_buffer_t * p0,
989                 u32 input_sw_if_index, u32 conn_sw_if_index)
990 {
991   vnet_main_t *vnm = vnet_get_main ();
992   vnet_interface_main_t *vim = &vnm->interface_main;
993   vnet_sw_interface_t *si;
994
995   /* verify that the input interface is unnumbered to the connected.
996    * the connected interface is the interface on which the subnet is
997    * configured */
998   si = &vim->sw_interfaces[input_sw_if_index];
999
1000   if (!(si->flags & VNET_SW_INTERFACE_FLAG_UNNUMBERED &&
1001         (si->unnumbered_sw_if_index == conn_sw_if_index)))
1002     {
1003       /* the input interface is not unnumbered to the interface on which
1004        * the sub-net is configured that covers the ARP request.
1005        * So this is not the case for unnumbered.. */
1006       return 0;
1007     }
1008
1009   return !0;
1010 }
1011
1012 static u32
1013 arp_learn (vnet_main_t * vnm,
1014            ethernet_arp_main_t * am, u32 sw_if_index,
1015            const ethernet_arp_ip4_over_ethernet_address_t * addr)
1016 {
1017   vnet_arp_set_ip4_over_ethernet (vnm, sw_if_index, addr, 0);
1018   return (ETHERNET_ARP_ERROR_l3_src_address_learned);
1019 }
1020
1021 typedef enum arp_input_next_t_
1022 {
1023   ARP_INPUT_NEXT_DROP,
1024   ARP_INPUT_N_NEXT,
1025 } arp_input_next_t;
1026
1027 static uword
1028 arp_input (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
1029 {
1030   u32 n_left_from, next_index, *from, *to_next, n_left_to_next;
1031   ethernet_arp_main_t *am = &ethernet_arp_main;
1032
1033   from = vlib_frame_vector_args (frame);
1034   n_left_from = frame->n_vectors;
1035   next_index = node->cached_next_index;
1036
1037   if (node->flags & VLIB_NODE_FLAG_TRACE)
1038     vlib_trace_frame_buffers_only (vm, node, from, frame->n_vectors,
1039                                    /* stride */ 1,
1040                                    sizeof (ethernet_arp_input_trace_t));
1041
1042   while (n_left_from > 0)
1043     {
1044       vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
1045
1046       while (n_left_from > 0 && n_left_to_next > 0)
1047         {
1048           const ethernet_arp_header_t *arp0;
1049           arp_input_next_t next0;
1050           vlib_buffer_t *p0;
1051           u32 pi0, error0;
1052
1053           pi0 = to_next[0] = from[0];
1054           from += 1;
1055           to_next += 1;
1056           n_left_from -= 1;
1057           n_left_to_next -= 1;
1058
1059           p0 = vlib_get_buffer (vm, pi0);
1060           arp0 = vlib_buffer_get_current (p0);
1061
1062           error0 = ETHERNET_ARP_ERROR_replies_sent;
1063           next0 = ARP_INPUT_NEXT_DROP;
1064
1065           error0 =
1066             (arp0->l2_type !=
1067              clib_net_to_host_u16 (ETHERNET_ARP_HARDWARE_TYPE_ethernet) ?
1068              ETHERNET_ARP_ERROR_l2_type_not_ethernet : error0);
1069           error0 =
1070             (arp0->l3_type !=
1071              clib_net_to_host_u16 (ETHERNET_TYPE_IP4) ?
1072              ETHERNET_ARP_ERROR_l3_type_not_ip4 : error0);
1073           error0 =
1074             (0 == arp0->ip4_over_ethernet[0].ip4.as_u32 ?
1075              ETHERNET_ARP_ERROR_l3_dst_address_unset : error0);
1076
1077           if (ETHERNET_ARP_ERROR_replies_sent == error0)
1078             vnet_feature_arc_start (am->feature_arc_index,
1079                                     vnet_buffer (p0)->sw_if_index[VLIB_RX],
1080                                     &next0, p0);
1081           else
1082             p0->error = node->errors[error0];
1083
1084           vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
1085                                            n_left_to_next, pi0, next0);
1086         }
1087
1088       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
1089     }
1090
1091   return frame->n_vectors;
1092 }
1093
1094 static_always_inline u32
1095 arp_mk_reply (vnet_main_t * vnm,
1096               vlib_buffer_t * p0,
1097               u32 sw_if_index0,
1098               const ip4_address_t * if_addr0,
1099               ethernet_arp_header_t * arp0, ethernet_header_t * eth_rx)
1100 {
1101   vnet_hw_interface_t *hw_if0;
1102   u8 *rewrite0, rewrite0_len;
1103   ethernet_header_t *eth_tx;
1104   u32 next0;
1105
1106   /* Send a reply.
1107      An adjacency to the sender is not always present,
1108      so we use the interface to build us a rewrite string
1109      which will contain all the necessary tags. */
1110   rewrite0 = ethernet_build_rewrite (vnm, sw_if_index0,
1111                                      VNET_LINK_ARP, eth_rx->src_address);
1112   rewrite0_len = vec_len (rewrite0);
1113
1114   /* Figure out how much to rewind current data from adjacency. */
1115   vlib_buffer_advance (p0, -rewrite0_len);
1116   eth_tx = vlib_buffer_get_current (p0);
1117
1118   vnet_buffer (p0)->sw_if_index[VLIB_TX] = sw_if_index0;
1119   hw_if0 = vnet_get_sup_hw_interface (vnm, sw_if_index0);
1120
1121   /* Send reply back through input interface */
1122   vnet_buffer (p0)->sw_if_index[VLIB_TX] = sw_if_index0;
1123   next0 = ARP_REPLY_NEXT_REPLY_TX;
1124
1125   arp0->opcode = clib_host_to_net_u16 (ETHERNET_ARP_OPCODE_reply);
1126
1127   arp0->ip4_over_ethernet[1] = arp0->ip4_over_ethernet[0];
1128
1129   mac_address_from_bytes (&arp0->ip4_over_ethernet[0].mac,
1130                           hw_if0->hw_address);
1131   clib_mem_unaligned (&arp0->ip4_over_ethernet[0].ip4.data_u32, u32) =
1132     if_addr0->data_u32;
1133
1134   /* Hardware must be ethernet-like. */
1135   ASSERT (vec_len (hw_if0->hw_address) == 6);
1136
1137   /* the rx nd tx ethernet headers wil overlap in the case
1138    * when we received a tagged VLAN=0 packet, but we are sending
1139    * back untagged */
1140   clib_memcpy_fast (eth_tx, rewrite0, vec_len (rewrite0));
1141   vec_free (rewrite0);
1142
1143   return (next0);
1144 }
1145
1146 static uword
1147 arp_reply (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
1148 {
1149   ethernet_arp_main_t *am = &ethernet_arp_main;
1150   vnet_main_t *vnm = vnet_get_main ();
1151   u32 n_left_from, next_index, *from, *to_next;
1152   u32 n_replies_sent = 0;
1153
1154   from = vlib_frame_vector_args (frame);
1155   n_left_from = frame->n_vectors;
1156   next_index = node->cached_next_index;
1157
1158   if (node->flags & VLIB_NODE_FLAG_TRACE)
1159     vlib_trace_frame_buffers_only (vm, node, from, frame->n_vectors,
1160                                    /* stride */ 1,
1161                                    sizeof (ethernet_arp_input_trace_t));
1162
1163   while (n_left_from > 0)
1164     {
1165       u32 n_left_to_next;
1166
1167       vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
1168
1169       while (n_left_from > 0 && n_left_to_next > 0)
1170         {
1171           vlib_buffer_t *p0;
1172           ethernet_arp_header_t *arp0;
1173           ethernet_header_t *eth_rx;
1174           const ip4_address_t *if_addr0;
1175           u32 pi0, error0, next0, sw_if_index0, conn_sw_if_index0, fib_index0;
1176           u8 dst_is_local0, is_unnum0, is_vrrp_reply0;
1177           fib_node_index_t dst_fei, src_fei;
1178           const fib_prefix_t *pfx0;
1179           fib_entry_flag_t src_flags, dst_flags;
1180
1181           pi0 = from[0];
1182           to_next[0] = pi0;
1183           from += 1;
1184           to_next += 1;
1185           n_left_from -= 1;
1186           n_left_to_next -= 1;
1187
1188           p0 = vlib_get_buffer (vm, pi0);
1189           arp0 = vlib_buffer_get_current (p0);
1190           /* Fill in ethernet header. */
1191           eth_rx = ethernet_buffer_get_header (p0);
1192
1193           next0 = ARP_REPLY_NEXT_DROP;
1194           error0 = ETHERNET_ARP_ERROR_replies_sent;
1195           sw_if_index0 = vnet_buffer (p0)->sw_if_index[VLIB_RX];
1196
1197           /* Check that IP address is local and matches incoming interface. */
1198           fib_index0 = ip4_fib_table_get_index_for_sw_if_index (sw_if_index0);
1199           if (~0 == fib_index0)
1200             {
1201               error0 = ETHERNET_ARP_ERROR_interface_no_table;
1202               goto drop;
1203
1204             }
1205           dst_fei = ip4_fib_table_lookup (ip4_fib_get (fib_index0),
1206                                           &arp0->ip4_over_ethernet[1].ip4,
1207                                           32);
1208           dst_flags = fib_entry_get_flags (dst_fei);
1209
1210           conn_sw_if_index0 = fib_entry_get_resolving_interface (dst_fei);
1211
1212           /* Honor unnumbered interface, if any */
1213           is_unnum0 = sw_if_index0 != conn_sw_if_index0;
1214
1215           {
1216             /*
1217              * we're looking for FIB entries that indicate the source
1218              * is attached. There may be more specific non-attached
1219              * routes that match the source, but these do not influence
1220              * whether we respond to an ARP request, i.e. they do not
1221              * influence whether we are the correct way for the sender
1222              * to reach us, they only affect how we reach the sender.
1223              */
1224             fib_entry_t *src_fib_entry;
1225             const fib_prefix_t *pfx;
1226             fib_entry_src_t *src;
1227             fib_source_t source;
1228             int attached;
1229             int mask;
1230
1231             mask = 32;
1232             attached = 0;
1233
1234             do
1235               {
1236                 src_fei = ip4_fib_table_lookup (ip4_fib_get (fib_index0),
1237                                                 &arp0->
1238                                                 ip4_over_ethernet[0].ip4,
1239                                                 mask);
1240                 src_fib_entry = fib_entry_get (src_fei);
1241
1242                 /*
1243                  * It's possible that the source that provides the
1244                  * flags we need, or the flags we must not have,
1245                  * is not the best source, so check then all.
1246                  */
1247                 /* *INDENT-OFF* */
1248                 FOR_EACH_SRC_ADDED(src_fib_entry, src, source,
1249                 ({
1250                   src_flags = fib_entry_get_flags_for_source (src_fei, source);
1251
1252                   /* Reject requests/replies with our local interface
1253                      address. */
1254                   if (FIB_ENTRY_FLAG_LOCAL & src_flags)
1255                     {
1256                       error0 = ETHERNET_ARP_ERROR_l3_src_address_is_local;
1257                       /*
1258                        * When VPP has an interface whose address is also
1259                        * applied to a TAP interface on the host, then VPP's
1260                        * TAP interface will be unnumbered  to the 'real'
1261                        * interface and do proxy ARP from the host.
1262                        * The curious aspect of this setup is that ARP requests
1263                        * from the host will come from the VPP's own address.
1264                        * So don't drop immediately here, instead go see if this
1265                        * is a proxy ARP case.
1266                        */
1267                       goto next_feature;
1268                     }
1269                   /* A Source must also be local to subnet of matching
1270                    * interface address. */
1271                   if ((FIB_ENTRY_FLAG_ATTACHED & src_flags) ||
1272                       (FIB_ENTRY_FLAG_CONNECTED & src_flags))
1273                     {
1274                       attached = 1;
1275                       break;
1276                     }
1277                   /*
1278                    * else
1279                    *  The packet was sent from an address that is not
1280                    *  connected nor attached i.e. it is not from an
1281                    *  address that is covered by a link's sub-net,
1282                    *  nor is it a already learned host resp.
1283                    */
1284                 }));
1285                 /* *INDENT-ON* */
1286
1287                 /*
1288                  * shorter mask lookup for the next iteration.
1289                  */
1290                 pfx = fib_entry_get_prefix (src_fei);
1291                 mask = pfx->fp_len - 1;
1292
1293                 /*
1294                  * continue until we hit the default route or we find
1295                  * the attached we are looking for. The most likely
1296                  * outcome is we find the attached with the first source
1297                  * on the first lookup.
1298                  */
1299               }
1300             while (!attached &&
1301                    !fib_entry_is_sourced (src_fei, FIB_SOURCE_DEFAULT_ROUTE));
1302
1303             if (!attached)
1304               {
1305                 /*
1306                  * the matching route is a not attached, i.e. it was
1307                  * added as a result of routing, rather than interface/ARP
1308                  * configuration. If the matching route is not a host route
1309                  * (i.e. a /32)
1310                  */
1311                 error0 = ETHERNET_ARP_ERROR_l3_src_address_not_local;
1312                 goto drop;
1313               }
1314           }
1315
1316           if (fib_entry_is_sourced (dst_fei, FIB_SOURCE_ADJ))
1317             {
1318               /*
1319                * We matched an adj-fib on ths source subnet (a /32 previously
1320                * added as a result of ARP). If this request is a gratuitous
1321                * ARP, then learn from it.
1322                * The check for matching an adj-fib, is to prevent hosts
1323                * from spamming us with gratuitous ARPS that might otherwise
1324                * blow our ARP cache
1325                */
1326               if (arp0->ip4_over_ethernet[0].ip4.as_u32 ==
1327                   arp0->ip4_over_ethernet[1].ip4.as_u32)
1328                 error0 = arp_learn (vnm, am, sw_if_index0,
1329                                     &arp0->ip4_over_ethernet[0]);
1330               goto drop;
1331             }
1332           else if (!(FIB_ENTRY_FLAG_CONNECTED & dst_flags))
1333             {
1334               error0 = ETHERNET_ARP_ERROR_l3_dst_address_not_local;
1335               goto next_feature;
1336             }
1337
1338           if (sw_if_index0 != fib_entry_get_resolving_interface (src_fei))
1339             {
1340               /*
1341                * The interface the ARP was received on is not the interface
1342                * on which the covering prefix is configured. Maybe this is a
1343                * case for unnumbered.
1344                */
1345               is_unnum0 = 1;
1346             }
1347
1348           dst_is_local0 = (FIB_ENTRY_FLAG_LOCAL & dst_flags);
1349           pfx0 = fib_entry_get_prefix (dst_fei);
1350           if_addr0 = &pfx0->fp_addr.ip4;
1351
1352           is_vrrp_reply0 =
1353             ((arp0->opcode ==
1354               clib_host_to_net_u16 (ETHERNET_ARP_OPCODE_reply))
1355              &&
1356              (!memcmp
1357               (arp0->ip4_over_ethernet[0].mac.bytes, vrrp_prefix,
1358                sizeof (vrrp_prefix))));
1359
1360           /* Trash ARP packets whose ARP-level source addresses do not
1361              match their L2-frame-level source addresses, unless it's
1362              a reply from a VRRP virtual router */
1363           if (!ethernet_mac_address_equal
1364               (eth_rx->src_address,
1365                arp0->ip4_over_ethernet[0].mac.bytes) && !is_vrrp_reply0)
1366             {
1367               error0 = ETHERNET_ARP_ERROR_l2_address_mismatch;
1368               goto drop;
1369             }
1370
1371           /* Learn or update sender's mapping only for replies to addresses
1372            * that are local to the subnet */
1373           if (arp0->opcode ==
1374               clib_host_to_net_u16 (ETHERNET_ARP_OPCODE_reply))
1375             {
1376               if (dst_is_local0)
1377                 error0 = arp_learn (vnm, am, sw_if_index0,
1378                                     &arp0->ip4_over_ethernet[0]);
1379               else
1380                 /* a reply for a non-local destination could be a GARP.
1381                  * GARPs for hosts we know were handled above, so this one
1382                  * we drop */
1383                 error0 = ETHERNET_ARP_ERROR_l3_dst_address_not_local;
1384
1385               goto next_feature;
1386             }
1387           else if (arp0->opcode ==
1388                    clib_host_to_net_u16 (ETHERNET_ARP_OPCODE_request) &&
1389                    (dst_is_local0 == 0))
1390             {
1391               goto next_feature;
1392             }
1393           if (is_unnum0)
1394             {
1395               if (!arp_unnumbered (p0, sw_if_index0, conn_sw_if_index0))
1396                 {
1397                   error0 = ETHERNET_ARP_ERROR_unnumbered_mismatch;
1398                   goto drop;
1399                 }
1400             }
1401           if (arp0->ip4_over_ethernet[0].ip4.as_u32 ==
1402               arp0->ip4_over_ethernet[1].ip4.as_u32)
1403             {
1404               error0 = ETHERNET_ARP_ERROR_gratuitous_arp;
1405               goto drop;
1406             }
1407
1408           next0 = arp_mk_reply (vnm, p0, sw_if_index0,
1409                                 if_addr0, arp0, eth_rx);
1410
1411           /* We are going to reply to this request, so, in the absence of
1412              errors, learn the sender */
1413           if (!error0)
1414             error0 = arp_learn (vnm, am, sw_if_index0,
1415                                 &arp0->ip4_over_ethernet[1]);
1416
1417           n_replies_sent += 1;
1418           goto enqueue;
1419
1420         next_feature:
1421           vnet_feature_next (&next0, p0);
1422           goto enqueue;
1423
1424         drop:
1425           p0->error = node->errors[error0];
1426
1427         enqueue:
1428           vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
1429                                            n_left_to_next, pi0, next0);
1430         }
1431
1432       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
1433     }
1434
1435   vlib_error_count (vm, node->node_index,
1436                     ETHERNET_ARP_ERROR_replies_sent, n_replies_sent);
1437
1438   return frame->n_vectors;
1439 }
1440
1441 static uword
1442 arp_proxy (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
1443 {
1444   ethernet_arp_main_t *am = &ethernet_arp_main;
1445   vnet_main_t *vnm = vnet_get_main ();
1446   u32 n_left_from, next_index, *from, *to_next;
1447   u32 n_arp_replies_sent = 0;
1448
1449   from = vlib_frame_vector_args (frame);
1450   n_left_from = frame->n_vectors;
1451   next_index = node->cached_next_index;
1452
1453   if (node->flags & VLIB_NODE_FLAG_TRACE)
1454     vlib_trace_frame_buffers_only (vm, node, from, frame->n_vectors,
1455                                    /* stride */ 1,
1456                                    sizeof (ethernet_arp_input_trace_t));
1457
1458   while (n_left_from > 0)
1459     {
1460       u32 n_left_to_next;
1461
1462       vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
1463
1464       while (n_left_from > 0 && n_left_to_next > 0)
1465         {
1466           vlib_buffer_t *p0;
1467           ethernet_arp_header_t *arp0;
1468           ethernet_header_t *eth_rx;
1469           ip4_address_t proxy_src;
1470           u32 pi0, error0, next0, sw_if_index0, fib_index0;
1471           u8 is_request0;
1472           ethernet_proxy_arp_t *pa;
1473
1474           pi0 = from[0];
1475           to_next[0] = pi0;
1476           from += 1;
1477           to_next += 1;
1478           n_left_from -= 1;
1479           n_left_to_next -= 1;
1480
1481           p0 = vlib_get_buffer (vm, pi0);
1482           arp0 = vlib_buffer_get_current (p0);
1483           /* Fill in ethernet header. */
1484           eth_rx = ethernet_buffer_get_header (p0);
1485
1486           is_request0 = arp0->opcode
1487             == clib_host_to_net_u16 (ETHERNET_ARP_OPCODE_request);
1488
1489           error0 = ETHERNET_ARP_ERROR_replies_sent;
1490           sw_if_index0 = vnet_buffer (p0)->sw_if_index[VLIB_RX];
1491           next0 = ARP_REPLY_NEXT_DROP;
1492
1493           fib_index0 = ip4_fib_table_get_index_for_sw_if_index (sw_if_index0);
1494           if (~0 == fib_index0)
1495             {
1496               error0 = ETHERNET_ARP_ERROR_interface_no_table;
1497             }
1498
1499           if (0 == error0 && is_request0)
1500             {
1501               u32 this_addr = clib_net_to_host_u32
1502                 (arp0->ip4_over_ethernet[1].ip4.as_u32);
1503
1504               vec_foreach (pa, am->proxy_arps)
1505               {
1506                 u32 lo_addr = clib_net_to_host_u32 (pa->lo_addr.as_u32);
1507                 u32 hi_addr = clib_net_to_host_u32 (pa->hi_addr.as_u32);
1508
1509                 /* an ARP request hit in the proxy-arp table? */
1510                 if ((this_addr >= lo_addr && this_addr <= hi_addr) &&
1511                     (fib_index0 == pa->fib_index))
1512                   {
1513                     proxy_src.as_u32 =
1514                       arp0->ip4_over_ethernet[1].ip4.data_u32;
1515
1516                     /*
1517                      * change the interface address to the proxied
1518                      */
1519                     n_arp_replies_sent++;
1520
1521                     next0 =
1522                       arp_mk_reply (vnm, p0, sw_if_index0, &proxy_src, arp0,
1523                                     eth_rx);
1524                   }
1525               }
1526             }
1527           else
1528             {
1529               p0->error = node->errors[error0];
1530             }
1531
1532           vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
1533                                            n_left_to_next, pi0, next0);
1534         }
1535
1536       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
1537     }
1538
1539   vlib_error_count (vm, node->node_index,
1540                     ETHERNET_ARP_ERROR_replies_sent, n_arp_replies_sent);
1541
1542   return frame->n_vectors;
1543 }
1544
1545 static char *ethernet_arp_error_strings[] = {
1546 #define _(sym,string) string,
1547   foreach_ethernet_arp_error
1548 #undef _
1549 };
1550
1551 /* *INDENT-OFF* */
1552
1553 /* Built-in ARP rx feature path definition */
1554 VNET_FEATURE_ARC_INIT (arp_feat, static) =
1555 {
1556   .arc_name = "arp",
1557   .start_nodes = VNET_FEATURES ("arp-input"),
1558   .last_in_arc = "error-drop",
1559   .arc_index_ptr = &ethernet_arp_main.feature_arc_index,
1560 };
1561
1562 VLIB_REGISTER_NODE (arp_input_node, static) =
1563 {
1564   .function = arp_input,
1565   .name = "arp-input",
1566   .vector_size = sizeof (u32),
1567   .n_errors = ETHERNET_ARP_N_ERROR,
1568   .error_strings = ethernet_arp_error_strings,
1569   .n_next_nodes = ARP_INPUT_N_NEXT,
1570   .next_nodes = {
1571     [ARP_INPUT_NEXT_DROP] = "error-drop",
1572   },
1573   .format_buffer = format_ethernet_arp_header,
1574   .format_trace = format_ethernet_arp_input_trace,
1575 };
1576
1577 VLIB_REGISTER_NODE (arp_reply_node, static) =
1578 {
1579   .function = arp_reply,
1580   .name = "arp-reply",
1581   .vector_size = sizeof (u32),
1582   .n_errors = ETHERNET_ARP_N_ERROR,
1583   .error_strings = ethernet_arp_error_strings,
1584   .n_next_nodes = ARP_REPLY_N_NEXT,
1585   .next_nodes = {
1586     [ARP_REPLY_NEXT_DROP] = "error-drop",
1587     [ARP_REPLY_NEXT_REPLY_TX] = "interface-output",
1588   },
1589   .format_buffer = format_ethernet_arp_header,
1590   .format_trace = format_ethernet_arp_input_trace,
1591 };
1592
1593 VLIB_REGISTER_NODE (arp_proxy_node, static) =
1594 {
1595   .function = arp_proxy,
1596   .name = "arp-proxy",
1597   .vector_size = sizeof (u32),
1598   .n_errors = ETHERNET_ARP_N_ERROR,
1599   .error_strings = ethernet_arp_error_strings,
1600   .n_next_nodes = ARP_REPLY_N_NEXT,
1601   .next_nodes = {
1602     [ARP_REPLY_NEXT_DROP] = "error-drop",
1603     [ARP_REPLY_NEXT_REPLY_TX] = "interface-output",
1604   },
1605   .format_buffer = format_ethernet_arp_header,
1606   .format_trace = format_ethernet_arp_input_trace,
1607 };
1608
1609 VNET_FEATURE_INIT (arp_reply_feat_node, static) =
1610 {
1611   .arc_name = "arp",
1612   .node_name = "arp-reply",
1613   .runs_before = VNET_FEATURES ("error-drop"),
1614 };
1615
1616 VNET_FEATURE_INIT (arp_proxy_feat_node, static) =
1617 {
1618   .arc_name = "arp",
1619   .node_name = "arp-proxy",
1620   .runs_after = VNET_FEATURES ("arp-reply"),
1621   .runs_before = VNET_FEATURES ("error-drop"),
1622 };
1623
1624 VNET_FEATURE_INIT (arp_drop_feat_node, static) =
1625 {
1626   .arc_name = "arp",
1627   .node_name = "error-drop",
1628 };
1629
1630 /* *INDENT-ON* */
1631
1632 static int
1633 ip4_arp_entry_sort (void *a1, void *a2)
1634 {
1635   ethernet_arp_ip4_entry_t *e1 = a1;
1636   ethernet_arp_ip4_entry_t *e2 = a2;
1637
1638   int cmp;
1639   vnet_main_t *vnm = vnet_get_main ();
1640
1641   cmp = vnet_sw_interface_compare (vnm, e1->sw_if_index, e2->sw_if_index);
1642   if (!cmp)
1643     cmp = ip4_address_compare (&e1->ip4_address, &e2->ip4_address);
1644   return cmp;
1645 }
1646
1647 ethernet_arp_ip4_entry_t *
1648 ip4_neighbors_pool (void)
1649 {
1650   ethernet_arp_main_t *am = &ethernet_arp_main;
1651   return am->ip4_entry_pool;
1652 }
1653
1654 ethernet_arp_ip4_entry_t *
1655 ip4_neighbor_entries (u32 sw_if_index)
1656 {
1657   ethernet_arp_main_t *am = &ethernet_arp_main;
1658   ethernet_arp_ip4_entry_t *n, *ns = 0;
1659
1660   /* *INDENT-OFF* */
1661   pool_foreach (n, am->ip4_entry_pool, ({
1662     if (sw_if_index != ~0 && n->sw_if_index != sw_if_index)
1663       continue;
1664     vec_add1 (ns, n[0]);
1665   }));
1666   /* *INDENT-ON* */
1667
1668   if (ns)
1669     vec_sort_with_function (ns, ip4_arp_entry_sort);
1670   return ns;
1671 }
1672
1673 static clib_error_t *
1674 show_ip4_arp (vlib_main_t * vm,
1675               unformat_input_t * input, vlib_cli_command_t * cmd)
1676 {
1677   vnet_main_t *vnm = vnet_get_main ();
1678   ethernet_arp_main_t *am = &ethernet_arp_main;
1679   ethernet_arp_ip4_entry_t *e, *es;
1680   ethernet_proxy_arp_t *pa;
1681   clib_error_t *error = 0;
1682   u32 sw_if_index;
1683
1684   /* Filter entries by interface if given. */
1685   sw_if_index = ~0;
1686   (void) unformat_user (input, unformat_vnet_sw_interface, vnm, &sw_if_index);
1687
1688   es = ip4_neighbor_entries (sw_if_index);
1689   if (es)
1690     {
1691       vlib_cli_output (vm, "%U", format_ethernet_arp_ip4_entry, vnm, 0);
1692       vec_foreach (e, es)
1693       {
1694         vlib_cli_output (vm, "%U", format_ethernet_arp_ip4_entry, vnm, e);
1695       }
1696       vec_free (es);
1697     }
1698
1699   if (vec_len (am->proxy_arps))
1700     {
1701       vlib_cli_output (vm, "Proxy arps enabled for:");
1702       vec_foreach (pa, am->proxy_arps)
1703       {
1704         vlib_cli_output (vm, "Fib_index %d   %U - %U ",
1705                          pa->fib_index,
1706                          format_ip4_address, &pa->lo_addr,
1707                          format_ip4_address, &pa->hi_addr);
1708       }
1709     }
1710
1711   return error;
1712 }
1713
1714 /*?
1715  * Display all the IPv4 ARP entries.
1716  *
1717  * @cliexpar
1718  * Example of how to display the IPv4 ARP table:
1719  * @cliexstart{show ip arp}
1720  *    Time      FIB        IP4       Flags      Ethernet              Interface
1721  *    346.3028   0       6.1.1.3            de:ad:be:ef:ba:be   GigabitEthernet2/0/0
1722  *   3077.4271   0       6.1.1.4       S    de:ad:be:ef:ff:ff   GigabitEthernet2/0/0
1723  *   2998.6409   1       6.2.2.3            de:ad:be:ef:00:01   GigabitEthernet2/0/0
1724  * Proxy arps enabled for:
1725  * Fib_index 0   6.0.0.1 - 6.0.0.11
1726  * @cliexend
1727  ?*/
1728 /* *INDENT-OFF* */
1729 VLIB_CLI_COMMAND (show_ip4_arp_command, static) = {
1730   .path = "show ip arp",
1731   .function = show_ip4_arp,
1732   .short_help = "show ip arp",
1733 };
1734 /* *INDENT-ON* */
1735
1736 typedef struct
1737 {
1738   pg_edit_t l2_type, l3_type;
1739   pg_edit_t n_l2_address_bytes, n_l3_address_bytes;
1740   pg_edit_t opcode;
1741   struct
1742   {
1743     pg_edit_t mac;
1744     pg_edit_t ip4;
1745   } ip4_over_ethernet[2];
1746 } pg_ethernet_arp_header_t;
1747
1748 static inline void
1749 pg_ethernet_arp_header_init (pg_ethernet_arp_header_t * p)
1750 {
1751   /* Initialize fields that are not bit fields in the IP header. */
1752 #define _(f) pg_edit_init (&p->f, ethernet_arp_header_t, f);
1753   _(l2_type);
1754   _(l3_type);
1755   _(n_l2_address_bytes);
1756   _(n_l3_address_bytes);
1757   _(opcode);
1758   _(ip4_over_ethernet[0].mac);
1759   _(ip4_over_ethernet[0].ip4);
1760   _(ip4_over_ethernet[1].mac);
1761   _(ip4_over_ethernet[1].ip4);
1762 #undef _
1763 }
1764
1765 uword
1766 unformat_pg_arp_header (unformat_input_t * input, va_list * args)
1767 {
1768   pg_stream_t *s = va_arg (*args, pg_stream_t *);
1769   pg_ethernet_arp_header_t *p;
1770   u32 group_index;
1771
1772   p = pg_create_edit_group (s, sizeof (p[0]), sizeof (ethernet_arp_header_t),
1773                             &group_index);
1774   pg_ethernet_arp_header_init (p);
1775
1776   /* Defaults. */
1777   pg_edit_set_fixed (&p->l2_type, ETHERNET_ARP_HARDWARE_TYPE_ethernet);
1778   pg_edit_set_fixed (&p->l3_type, ETHERNET_TYPE_IP4);
1779   pg_edit_set_fixed (&p->n_l2_address_bytes, 6);
1780   pg_edit_set_fixed (&p->n_l3_address_bytes, 4);
1781
1782   if (!unformat (input, "%U: %U/%U -> %U/%U",
1783                  unformat_pg_edit,
1784                  unformat_ethernet_arp_opcode_net_byte_order, &p->opcode,
1785                  unformat_pg_edit,
1786                  unformat_mac_address_t, &p->ip4_over_ethernet[0].mac,
1787                  unformat_pg_edit,
1788                  unformat_ip4_address, &p->ip4_over_ethernet[0].ip4,
1789                  unformat_pg_edit,
1790                  unformat_mac_address_t, &p->ip4_over_ethernet[1].mac,
1791                  unformat_pg_edit,
1792                  unformat_ip4_address, &p->ip4_over_ethernet[1].ip4))
1793     {
1794       /* Free up any edits we may have added. */
1795       pg_free_edit_group (s);
1796       return 0;
1797     }
1798   return 1;
1799 }
1800
1801 clib_error_t *
1802 ip4_set_arp_limit (u32 arp_limit)
1803 {
1804   ethernet_arp_main_t *am = &ethernet_arp_main;
1805
1806   am->limit_arp_cache_size = arp_limit;
1807   return 0;
1808 }
1809
1810 /**
1811  * @brief Control Plane hook to remove an ARP entry
1812  */
1813 int
1814 vnet_arp_unset_ip4_over_ethernet (vnet_main_t * vnm,
1815                                   u32 sw_if_index,
1816                                   const
1817                                   ethernet_arp_ip4_over_ethernet_address_t *
1818                                   a)
1819 {
1820   vnet_arp_set_ip4_over_ethernet_rpc_args_t args = {
1821     .sw_if_index = sw_if_index,
1822     .flags = ETHERNET_ARP_ARGS_REMOVE,
1823     .ip4 = a->ip4,
1824     .mac = a->mac,
1825   };
1826
1827   vl_api_rpc_call_main_thread (set_ip4_over_ethernet_rpc_callback,
1828                                (u8 *) & args, sizeof (args));
1829   return 0;
1830 }
1831
1832 /**
1833  * @brief publish wildcard arp event
1834  * @param sw_if_index The interface on which the ARP entries are acted
1835  */
1836 static int
1837 vnet_arp_wc_publish (u32 sw_if_index,
1838                      const ethernet_arp_ip4_over_ethernet_address_t * a)
1839 {
1840   vnet_arp_set_ip4_over_ethernet_rpc_args_t args = {
1841     .flags = ETHERNET_ARP_ARGS_WC_PUB,
1842     .sw_if_index = sw_if_index,
1843     .ip4 = a->ip4,
1844     .mac = a->mac,
1845   };
1846
1847   vl_api_rpc_call_main_thread (set_ip4_over_ethernet_rpc_callback,
1848                                (u8 *) & args, sizeof (args));
1849   return 0;
1850 }
1851
1852 static void
1853 vnet_arp_wc_publish_internal (vnet_main_t * vnm,
1854                               vnet_arp_set_ip4_over_ethernet_rpc_args_t *
1855                               args)
1856 {
1857   vlib_main_t *vm = vlib_get_main ();
1858   ethernet_arp_main_t *am = &ethernet_arp_main;
1859   uword ni = am->wc_ip4_arp_publisher_node;
1860   uword et = am->wc_ip4_arp_publisher_et;
1861
1862   if (ni == (uword) ~ 0)
1863     return;
1864   wc_arp_report_t *r =
1865     vlib_process_signal_event_data (vm, ni, et, 1, sizeof *r);
1866   r->ip.as_u32 = args->ip4.as_u32;
1867   r->sw_if_index = args->sw_if_index;
1868   mac_address_copy (&r->mac, &args->mac);
1869 }
1870
1871 void
1872 wc_arp_set_publisher_node (uword node_index, uword event_type)
1873 {
1874   ethernet_arp_main_t *am = &ethernet_arp_main;
1875   am->wc_ip4_arp_publisher_node = node_index;
1876   am->wc_ip4_arp_publisher_et = event_type;
1877 }
1878
1879 static void
1880 arp_entry_free (ethernet_arp_interface_t * eai, ethernet_arp_ip4_entry_t * e);
1881
1882 static int
1883 vnet_arp_flush_ip4_over_ethernet_internal (vnet_main_t * vnm,
1884                                            vnet_arp_set_ip4_over_ethernet_rpc_args_t
1885                                            * args)
1886 {
1887   ethernet_arp_main_t *am = &ethernet_arp_main;
1888   ethernet_arp_ip4_entry_t *e;
1889   ethernet_arp_interface_t *eai;
1890
1891   if (vec_len (am->ethernet_arp_by_sw_if_index) <= args->sw_if_index)
1892     return 0;
1893
1894   eai = &am->ethernet_arp_by_sw_if_index[args->sw_if_index];
1895
1896   e = arp_entry_find (eai, &args->ip4);
1897
1898   if (NULL != e)
1899     {
1900       adj_nbr_walk_nh4 (e->sw_if_index,
1901                         &e->ip4_address, arp_mk_incomplete_walk, e);
1902
1903       /*
1904        * The difference between flush and unset, is that an unset
1905        * means delete for static and dynamic entries. A flush
1906        * means delete only for dynamic. Flushing is what the DP
1907        * does in response to interface events. unset is only done
1908        * by the control plane.
1909        */
1910       if (e->flags & IP_NEIGHBOR_FLAG_STATIC)
1911         {
1912           e->flags &= ~IP_NEIGHBOR_FLAG_DYNAMIC;
1913         }
1914       else if (e->flags & IP_NEIGHBOR_FLAG_DYNAMIC)
1915         {
1916           arp_entry_free (eai, e);
1917         }
1918     }
1919   return (0);
1920 }
1921
1922 /*
1923  * arp_add_del_interface_address
1924  *
1925  * callback when an interface address is added or deleted
1926  */
1927 static void
1928 arp_enable_disable_interface (ip4_main_t * im,
1929                               uword opaque, u32 sw_if_index, u32 is_enable)
1930 {
1931   ethernet_arp_main_t *am = &ethernet_arp_main;
1932
1933   if (is_enable)
1934     arp_enable (am, sw_if_index);
1935   else
1936     arp_disable (am, sw_if_index);
1937 }
1938
1939 /*
1940  * arp_add_del_interface_address
1941  *
1942  * callback when an interface address is added or deleted
1943  */
1944 static void
1945 arp_add_del_interface_address (ip4_main_t * im,
1946                                uword opaque,
1947                                u32 sw_if_index,
1948                                ip4_address_t * address,
1949                                u32 address_length,
1950                                u32 if_address_index, u32 is_del)
1951 {
1952   /*
1953    * Flush the ARP cache of all entries covered by the address
1954    * that is being removed.
1955    */
1956   ethernet_arp_main_t *am = &ethernet_arp_main;
1957   ethernet_arp_ip4_entry_t *e;
1958
1959   if (vec_len (am->ethernet_arp_by_sw_if_index) <= sw_if_index)
1960     return;
1961
1962   if (is_del)
1963     {
1964       ethernet_arp_interface_t *eai;
1965       u32 i, *to_delete = 0;
1966       hash_pair_t *pair;
1967
1968       eai = &am->ethernet_arp_by_sw_if_index[sw_if_index];
1969
1970       /* *INDENT-OFF* */
1971       hash_foreach_pair (pair, eai->arp_entries,
1972       ({
1973         e = pool_elt_at_index(am->ip4_entry_pool,
1974                               pair->value[0]);
1975         if (ip4_destination_matches_route (im, &e->ip4_address,
1976                                            address, address_length))
1977           {
1978             vec_add1 (to_delete, e - am->ip4_entry_pool);
1979           }
1980       }));
1981       /* *INDENT-ON* */
1982
1983       for (i = 0; i < vec_len (to_delete); i++)
1984         {
1985           e = pool_elt_at_index (am->ip4_entry_pool, to_delete[i]);
1986
1987           vnet_arp_set_ip4_over_ethernet_rpc_args_t delme = {
1988             .ip4.as_u32 = e->ip4_address.as_u32,
1989             .sw_if_index = e->sw_if_index,
1990             .flags = ETHERNET_ARP_ARGS_FLUSH,
1991           };
1992           mac_address_copy (&delme.mac, &e->mac);
1993
1994           vnet_arp_flush_ip4_over_ethernet_internal (vnet_get_main (),
1995                                                      &delme);
1996         }
1997
1998       vec_free (to_delete);
1999     }
2000 }
2001
2002 static void
2003 arp_table_bind (ip4_main_t * im,
2004                 uword opaque,
2005                 u32 sw_if_index, u32 new_fib_index, u32 old_fib_index)
2006 {
2007   ethernet_arp_main_t *am = &ethernet_arp_main;
2008   ethernet_arp_interface_t *eai;
2009   ethernet_arp_ip4_entry_t *e;
2010   hash_pair_t *pair;
2011
2012   /*
2013    * the IP table that the interface is bound to has changed.
2014    * reinstall all the adj fibs.
2015    */
2016
2017   if (vec_len (am->ethernet_arp_by_sw_if_index) <= sw_if_index)
2018     return;
2019
2020   eai = &am->ethernet_arp_by_sw_if_index[sw_if_index];
2021
2022   /* *INDENT-OFF* */
2023   hash_foreach_pair (pair, eai->arp_entries,
2024   ({
2025     e = pool_elt_at_index(am->ip4_entry_pool,
2026                           pair->value[0]);
2027     /*
2028      * remove the adj-fib from the old table and add to the new
2029      */
2030     arp_adj_fib_remove(e, old_fib_index);
2031     arp_adj_fib_add(e, new_fib_index);
2032   }));
2033   /* *INDENT-ON* */
2034
2035 }
2036
2037 static clib_error_t *
2038 ethernet_arp_init (vlib_main_t * vm)
2039 {
2040   ethernet_arp_main_t *am = &ethernet_arp_main;
2041   ip4_main_t *im = &ip4_main;
2042   pg_node_t *pn;
2043
2044   ethernet_register_input_type (vm, ETHERNET_TYPE_ARP, arp_input_node.index);
2045
2046   pn = pg_get_node (arp_input_node.index);
2047   pn->unformat_edit = unformat_pg_arp_header;
2048
2049   am->opcode_by_name = hash_create_string (0, sizeof (uword));
2050 #define _(o) hash_set_mem (am->opcode_by_name, #o, ETHERNET_ARP_OPCODE_##o);
2051   foreach_ethernet_arp_opcode;
2052 #undef _
2053
2054   /* $$$ configurable */
2055   am->limit_arp_cache_size = 50000;
2056
2057   am->pending_resolutions_by_address = hash_create (0, sizeof (uword));
2058   am->mac_changes_by_address = hash_create (0, sizeof (uword));
2059   am->wc_ip4_arp_publisher_node = (uword) ~ 0;
2060
2061   /* don't trace ARP error packets */
2062   {
2063     vlib_node_runtime_t *rt =
2064       vlib_node_get_runtime (vm, arp_input_node.index);
2065
2066 #define _(a,b)                                  \
2067     vnet_pcap_drop_trace_filter_add_del         \
2068         (rt->errors[ETHERNET_ARP_ERROR_##a],    \
2069          1 /* is_add */);
2070     foreach_ethernet_arp_error
2071 #undef _
2072   }
2073
2074   ip4_add_del_interface_address_callback_t cb;
2075   cb.function = arp_add_del_interface_address;
2076   cb.function_opaque = 0;
2077   vec_add1 (im->add_del_interface_address_callbacks, cb);
2078
2079   ip4_enable_disable_interface_callback_t cbe;
2080   cbe.function = arp_enable_disable_interface;
2081   cbe.function_opaque = 0;
2082   vec_add1 (im->enable_disable_interface_callbacks, cbe);
2083
2084   ip4_table_bind_callback_t cbt;
2085   cbt.function = arp_table_bind;
2086   cbt.function_opaque = 0;
2087   vec_add1 (im->table_bind_callbacks, cbt);
2088
2089   return 0;
2090 }
2091 /* *INDENT-OFF* */
2092 VLIB_INIT_FUNCTION (ethernet_arp_init) =
2093 {
2094   .runs_after = VLIB_INITS("ethernet_init"),
2095 };
2096 /* *INDENT-ON* */
2097
2098 static void
2099 arp_entry_free (ethernet_arp_interface_t * eai, ethernet_arp_ip4_entry_t * e)
2100 {
2101   ethernet_arp_main_t *am = &ethernet_arp_main;
2102
2103   arp_adj_fib_remove
2104     (e, ip4_fib_table_get_index_for_sw_if_index (e->sw_if_index));
2105   hash_unset (eai->arp_entries, e->ip4_address.as_u32);
2106   pool_put (am->ip4_entry_pool, e);
2107 }
2108
2109 static inline int
2110 vnet_arp_unset_ip4_over_ethernet_internal (vnet_main_t * vnm,
2111                                            vnet_arp_set_ip4_over_ethernet_rpc_args_t
2112                                            * args)
2113 {
2114   ethernet_arp_main_t *am = &ethernet_arp_main;
2115   ethernet_arp_ip4_entry_t *e;
2116   ethernet_arp_interface_t *eai;
2117
2118   if (vec_len (am->ethernet_arp_by_sw_if_index) <= args->sw_if_index)
2119     return 0;
2120
2121   eai = &am->ethernet_arp_by_sw_if_index[args->sw_if_index];
2122
2123   e = arp_entry_find (eai, &args->ip4);
2124
2125   if (NULL != e)
2126     {
2127       adj_nbr_walk_nh4 (e->sw_if_index,
2128                         &e->ip4_address, arp_mk_incomplete_walk, e);
2129       arp_entry_free (eai, e);
2130     }
2131
2132   return 0;
2133 }
2134
2135
2136 static int
2137 vnet_arp_populate_ip4_over_ethernet_internal (vnet_main_t * vnm,
2138                                               vnet_arp_set_ip4_over_ethernet_rpc_args_t
2139                                               * args)
2140 {
2141   ethernet_arp_main_t *am = &ethernet_arp_main;
2142   ethernet_arp_ip4_entry_t *e;
2143   ethernet_arp_interface_t *eai;
2144
2145   arp_enable (am, args->sw_if_index);
2146   eai = &am->ethernet_arp_by_sw_if_index[args->sw_if_index];
2147
2148   e = arp_entry_find (eai, &args->ip4);
2149
2150   if (NULL != e)
2151     {
2152       adj_nbr_walk_nh4 (e->sw_if_index,
2153                         &e->ip4_address, arp_mk_complete_walk, e);
2154     }
2155   return (0);
2156 }
2157
2158 static void
2159 set_ip4_over_ethernet_rpc_callback (vnet_arp_set_ip4_over_ethernet_rpc_args_t
2160                                     * a)
2161 {
2162   vnet_main_t *vm = vnet_get_main ();
2163   ASSERT (vlib_get_thread_index () == 0);
2164
2165   if (a->flags & ETHERNET_ARP_ARGS_REMOVE)
2166     vnet_arp_unset_ip4_over_ethernet_internal (vm, a);
2167   else if (a->flags & ETHERNET_ARP_ARGS_FLUSH)
2168     vnet_arp_flush_ip4_over_ethernet_internal (vm, a);
2169   else if (a->flags & ETHERNET_ARP_ARGS_POPULATE)
2170     vnet_arp_populate_ip4_over_ethernet_internal (vm, a);
2171   else if (a->flags & ETHERNET_ARP_ARGS_WC_PUB)
2172     vnet_arp_wc_publish_internal (vm, a);
2173   else
2174     vnet_arp_set_ip4_over_ethernet_internal (vm, a);
2175 }
2176
2177 /**
2178  * @brief Invoked when the interface's admin state changes
2179  */
2180 static clib_error_t *
2181 ethernet_arp_sw_interface_up_down (vnet_main_t * vnm,
2182                                    u32 sw_if_index, u32 flags)
2183 {
2184   ethernet_arp_main_t *am = &ethernet_arp_main;
2185   ethernet_arp_ip4_entry_t *e;
2186   u32 i, *to_update = 0;
2187
2188   /* *INDENT-OFF* */
2189   pool_foreach (e, am->ip4_entry_pool,
2190   ({
2191     if (e->sw_if_index == sw_if_index)
2192       vec_add1 (to_update,
2193                 e - am->ip4_entry_pool);
2194   }));
2195   /* *INDENT-ON* */
2196
2197   for (i = 0; i < vec_len (to_update); i++)
2198     {
2199       e = pool_elt_at_index (am->ip4_entry_pool, to_update[i]);
2200
2201       vnet_arp_set_ip4_over_ethernet_rpc_args_t update_me = {
2202         .ip4.as_u32 = e->ip4_address.as_u32,
2203         .sw_if_index = e->sw_if_index,
2204       };
2205       mac_address_copy (&update_me.mac, &e->mac);
2206
2207       if (flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP)
2208         {
2209           update_me.flags = ETHERNET_ARP_ARGS_POPULATE;
2210           vnet_arp_populate_ip4_over_ethernet_internal (vnm, &update_me);
2211         }
2212       else
2213         {
2214           update_me.flags = ETHERNET_ARP_ARGS_FLUSH;
2215           vnet_arp_flush_ip4_over_ethernet_internal (vnm, &update_me);
2216         }
2217     }
2218   vec_free (to_update);
2219
2220   return 0;
2221 }
2222
2223 VNET_SW_INTERFACE_ADMIN_UP_DOWN_FUNCTION (ethernet_arp_sw_interface_up_down);
2224
2225 static void
2226 increment_ip4_and_mac_address (ethernet_arp_ip4_over_ethernet_address_t * a)
2227 {
2228   u8 old;
2229   int i;
2230
2231   for (i = 3; i >= 0; i--)
2232     {
2233       old = a->ip4.as_u8[i];
2234       a->ip4.as_u8[i] += 1;
2235       if (old < a->ip4.as_u8[i])
2236         break;
2237     }
2238
2239   for (i = 5; i >= 0; i--)
2240     {
2241       old = a->mac.bytes[i];
2242       a->mac.bytes[i] += 1;
2243       if (old < a->mac.bytes[i])
2244         break;
2245     }
2246 }
2247
2248 int
2249 vnet_arp_set_ip4_over_ethernet (vnet_main_t * vnm,
2250                                 u32 sw_if_index,
2251                                 const ethernet_arp_ip4_over_ethernet_address_t
2252                                 * a, ip_neighbor_flags_t flags)
2253 {
2254   vnet_arp_set_ip4_over_ethernet_rpc_args_t args = {
2255     .sw_if_index = sw_if_index,
2256     .nbr_flags = flags,
2257     .flags = 0,
2258     .ip4.as_u32 = a->ip4.as_u32,
2259     .mac = a->mac,
2260   };
2261
2262   vl_api_rpc_call_main_thread (set_ip4_over_ethernet_rpc_callback,
2263                                (u8 *) & args, sizeof (args));
2264   return 0;
2265 }
2266
2267 void
2268 proxy_arp_walk (proxy_arp_walk_t cb, void *data)
2269 {
2270   ethernet_arp_main_t *am = &ethernet_arp_main;
2271   ethernet_proxy_arp_t *pa;
2272
2273   vec_foreach (pa, am->proxy_arps)
2274   {
2275     if (!cb (&pa->lo_addr, &pa->hi_addr, pa->fib_index, data))
2276       break;
2277   }
2278 }
2279
2280 int
2281 vnet_proxy_arp_enable_disable (vnet_main_t * vnm, u32 sw_if_index, u8 enable)
2282 {
2283   ethernet_arp_main_t *am = &ethernet_arp_main;
2284   ethernet_arp_interface_t *eai;
2285
2286   vec_validate (am->ethernet_arp_by_sw_if_index, sw_if_index);
2287
2288   eai = &am->ethernet_arp_by_sw_if_index[sw_if_index];
2289
2290   if (enable)
2291     {
2292       if (!eai->proxy_enabled)
2293         {
2294           vnet_feature_enable_disable ("arp", "arp-proxy",
2295                                        sw_if_index, 1, NULL, 0);
2296         }
2297       eai->proxy_enabled = 1;
2298     }
2299   else
2300     {
2301       if (eai->proxy_enabled)
2302         {
2303           vnet_feature_enable_disable ("arp", "arp-proxy",
2304                                        sw_if_index, 0, NULL, 0);
2305         }
2306       eai->proxy_enabled = 0;
2307     }
2308
2309   return (0);
2310 }
2311
2312 int
2313 vnet_proxy_arp_add_del (ip4_address_t * lo_addr,
2314                         ip4_address_t * hi_addr, u32 fib_index, int is_del)
2315 {
2316   ethernet_arp_main_t *am = &ethernet_arp_main;
2317   ethernet_proxy_arp_t *pa;
2318   u32 found_at_index = ~0;
2319
2320   vec_foreach (pa, am->proxy_arps)
2321   {
2322     if (pa->lo_addr.as_u32 == lo_addr->as_u32 &&
2323         pa->hi_addr.as_u32 == hi_addr->as_u32 && pa->fib_index == fib_index)
2324       {
2325         found_at_index = pa - am->proxy_arps;
2326         break;
2327       }
2328   }
2329
2330   if (found_at_index != ~0)
2331     {
2332       /* Delete, otherwise it's already in the table */
2333       if (is_del)
2334         vec_delete (am->proxy_arps, 1, found_at_index);
2335       return 0;
2336     }
2337   /* delete, no such entry */
2338   if (is_del)
2339     return VNET_API_ERROR_NO_SUCH_ENTRY;
2340
2341   /* add, not in table */
2342   vec_add2 (am->proxy_arps, pa, 1);
2343   pa->lo_addr.as_u32 = lo_addr->as_u32;
2344   pa->hi_addr.as_u32 = hi_addr->as_u32;
2345   pa->fib_index = fib_index;
2346   return 0;
2347 }
2348
2349 void
2350 proxy_arp_intfc_walk (proxy_arp_intf_walk_t cb, void *data)
2351 {
2352   ethernet_arp_main_t *am = &ethernet_arp_main;
2353   ethernet_arp_interface_t *eai;
2354
2355   vec_foreach (eai, am->ethernet_arp_by_sw_if_index)
2356   {
2357     if (eai->proxy_enabled)
2358       cb (eai - am->ethernet_arp_by_sw_if_index, data);
2359   }
2360 }
2361
2362 /*
2363  * Remove any proxy arp entries associated with the
2364  * specified fib.
2365  */
2366 int
2367 vnet_proxy_arp_fib_reset (u32 fib_id)
2368 {
2369   ethernet_arp_main_t *am = &ethernet_arp_main;
2370   ethernet_proxy_arp_t *pa;
2371   u32 *entries_to_delete = 0;
2372   u32 fib_index;
2373   int i;
2374
2375   fib_index = fib_table_find (FIB_PROTOCOL_IP4, fib_id);
2376   if (~0 == fib_index)
2377     return VNET_API_ERROR_NO_SUCH_ENTRY;
2378
2379   vec_foreach (pa, am->proxy_arps)
2380   {
2381     if (pa->fib_index == fib_index)
2382       {
2383         vec_add1 (entries_to_delete, pa - am->proxy_arps);
2384       }
2385   }
2386
2387   for (i = 0; i < vec_len (entries_to_delete); i++)
2388     {
2389       vec_delete (am->proxy_arps, 1, entries_to_delete[i]);
2390     }
2391
2392   vec_free (entries_to_delete);
2393
2394   return 0;
2395 }
2396
2397 static clib_error_t *
2398 ip_arp_add_del_command_fn (vlib_main_t * vm,
2399                            unformat_input_t * input, vlib_cli_command_t * cmd)
2400 {
2401   vnet_main_t *vnm = vnet_get_main ();
2402   u32 sw_if_index;
2403   ethernet_arp_ip4_over_ethernet_address_t lo_addr, hi_addr, addr;
2404   int addr_valid = 0;
2405   int is_del = 0;
2406   int count = 1;
2407   u32 fib_index = 0;
2408   u32 fib_id;
2409   int is_proxy = 0;
2410   ip_neighbor_flags_t flags;
2411
2412   flags = IP_NEIGHBOR_FLAG_NONE;
2413
2414   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
2415     {
2416       /* set ip arp TenGigE1/1/0/1 1.2.3.4 aa:bb:... or aabb.ccdd... */
2417       if (unformat (input, "%U %U %U",
2418                     unformat_vnet_sw_interface, vnm, &sw_if_index,
2419                     unformat_ip4_address, &addr.ip4,
2420                     unformat_mac_address_t, &addr.mac))
2421         addr_valid = 1;
2422
2423       else if (unformat (input, "delete") || unformat (input, "del"))
2424         is_del = 1;
2425
2426       else if (unformat (input, "static"))
2427         flags |= IP_NEIGHBOR_FLAG_STATIC;
2428
2429       else if (unformat (input, "no-fib-entry"))
2430         flags |= IP_NEIGHBOR_FLAG_NO_FIB_ENTRY;
2431
2432       else if (unformat (input, "count %d", &count))
2433         ;
2434
2435       else if (unformat (input, "fib-id %d", &fib_id))
2436         {
2437           fib_index = fib_table_find (FIB_PROTOCOL_IP4, fib_id);
2438
2439           if (~0 == fib_index)
2440             return clib_error_return (0, "fib ID %d doesn't exist\n", fib_id);
2441         }
2442
2443       else if (unformat (input, "proxy %U - %U",
2444                          unformat_ip4_address, &lo_addr.ip4,
2445                          unformat_ip4_address, &hi_addr.ip4))
2446         is_proxy = 1;
2447       else
2448         break;
2449     }
2450
2451   if (is_proxy)
2452     {
2453       (void) vnet_proxy_arp_add_del (&lo_addr.ip4, &hi_addr.ip4,
2454                                      fib_index, is_del);
2455       return 0;
2456     }
2457
2458   if (addr_valid)
2459     {
2460       int i;
2461
2462       for (i = 0; i < count; i++)
2463         {
2464           if (is_del == 0)
2465             {
2466               uword event_type, *event_data = 0;
2467
2468               /* Park the debug CLI until the arp entry is installed */
2469               vnet_register_ip4_arp_resolution_event
2470                 (vnm, &addr.ip4, vlib_current_process (vm),
2471                  1 /* type */ , 0 /* data */ );
2472
2473               vnet_arp_set_ip4_over_ethernet (vnm, sw_if_index, &addr, flags);
2474
2475               vlib_process_wait_for_event (vm);
2476               event_type = vlib_process_get_events (vm, &event_data);
2477               vec_reset_length (event_data);
2478               if (event_type != 1)
2479                 clib_warning ("event type %d unexpected", event_type);
2480             }
2481           else
2482             vnet_arp_unset_ip4_over_ethernet (vnm, sw_if_index, &addr);
2483
2484           increment_ip4_and_mac_address (&addr);
2485         }
2486     }
2487   else
2488     {
2489       return clib_error_return (0, "unknown input `%U'",
2490                                 format_unformat_error, input);
2491     }
2492
2493   return 0;
2494 }
2495
2496 /* *INDENT-OFF* */
2497 /*?
2498  * Add or delete IPv4 ARP cache entries.
2499  *
2500  * @note 'set ip arp' options (e.g. delete, static, 'fib-id <id>',
2501  * 'count <number>', 'interface ip4_addr mac_addr') can be added in
2502  * any order and combination.
2503  *
2504  * @cliexpar
2505  * @parblock
2506  * Add or delete IPv4 ARP cache entries as follows. MAC Address can be in
2507  * either aa:bb:cc:dd:ee:ff format or aabb.ccdd.eeff format.
2508  * @cliexcmd{set ip arp GigabitEthernet2/0/0 6.0.0.3 dead.beef.babe}
2509  * @cliexcmd{set ip arp delete GigabitEthernet2/0/0 6.0.0.3 de:ad:be:ef:ba:be}
2510  *
2511  * To add or delete an IPv4 ARP cache entry to or from a specific fib
2512  * table:
2513  * @cliexcmd{set ip arp fib-id 1 GigabitEthernet2/0/0 6.0.0.3 dead.beef.babe}
2514  * @cliexcmd{set ip arp fib-id 1 delete GigabitEthernet2/0/0 6.0.0.3 dead.beef.babe}
2515  *
2516  * Add or delete IPv4 static ARP cache entries as follows:
2517  * @cliexcmd{set ip arp static GigabitEthernet2/0/0 6.0.0.3 dead.beef.babe}
2518  * @cliexcmd{set ip arp static delete GigabitEthernet2/0/0 6.0.0.3 dead.beef.babe}
2519  *
2520  * For testing / debugging purposes, the 'set ip arp' command can add or
2521  * delete multiple entries. Supply the 'count N' parameter:
2522  * @cliexcmd{set ip arp count 10 GigabitEthernet2/0/0 6.0.0.3 dead.beef.babe}
2523  * @endparblock
2524  ?*/
2525 VLIB_CLI_COMMAND (ip_arp_add_del_command, static) = {
2526   .path = "set ip arp",
2527   .short_help =
2528   "set ip arp [del] <intfc> <ip-address> <mac-address> [static] [no-fib-entry] [count <count>] [fib-id <fib-id>] [proxy <lo-addr> - <hi-addr>]",
2529   .function = ip_arp_add_del_command_fn,
2530 };
2531 /* *INDENT-ON* */
2532
2533 static clib_error_t *
2534 set_int_proxy_arp_command_fn (vlib_main_t * vm,
2535                               unformat_input_t *
2536                               input, vlib_cli_command_t * cmd)
2537 {
2538   vnet_main_t *vnm = vnet_get_main ();
2539   u32 sw_if_index;
2540   int enable = 0;
2541
2542   sw_if_index = ~0;
2543
2544   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
2545     {
2546       if (unformat (input, "%U", unformat_vnet_sw_interface,
2547                     vnm, &sw_if_index))
2548         ;
2549       else if (unformat (input, "enable") || unformat (input, "on"))
2550         enable = 1;
2551       else if (unformat (input, "disable") || unformat (input, "off"))
2552         enable = 0;
2553       else
2554         break;
2555     }
2556
2557   if (~0 == sw_if_index)
2558     return clib_error_return (0, "unknown input '%U'",
2559                               format_unformat_error, input);
2560
2561   vnet_proxy_arp_enable_disable (vnm, sw_if_index, enable);
2562
2563   return 0;
2564 }
2565
2566 /* *INDENT-OFF* */
2567 /*?
2568  * Enable proxy-arp on an interface. The vpp stack will answer ARP
2569  * requests for the indicated address range. Multiple proxy-arp
2570  * ranges may be provisioned.
2571  *
2572  * @note Proxy ARP as a technology is infamous for blackholing traffic.
2573  * Also, the underlying implementation has not been performance-tuned.
2574  * Avoid creating an unnecessarily large set of ranges.
2575  *
2576  * @cliexpar
2577  * To enable proxy arp on a range of addresses, use:
2578  * @cliexcmd{set ip arp proxy 6.0.0.1 - 6.0.0.11}
2579  * Append 'del' to delete a range of proxy ARP addresses:
2580  * @cliexcmd{set ip arp proxy 6.0.0.1 - 6.0.0.11 del}
2581  * You must then specifically enable proxy arp on individual interfaces:
2582  * @cliexcmd{set interface proxy-arp GigabitEthernet0/8/0 enable}
2583  * To disable proxy arp on an individual interface:
2584  * @cliexcmd{set interface proxy-arp GigabitEthernet0/8/0 disable}
2585  ?*/
2586 VLIB_CLI_COMMAND (set_int_proxy_enable_command, static) = {
2587   .path = "set interface proxy-arp",
2588   .short_help =
2589   "set interface proxy-arp <intfc> [enable|disable]",
2590   .function = set_int_proxy_arp_command_fn,
2591 };
2592 /* *INDENT-ON* */
2593
2594
2595 /*
2596  * ARP/ND Termination in a L2 Bridge Domain based on IP4/IP6 to MAC
2597  * hash tables mac_by_ip4 and mac_by_ip6 for each BD.
2598  */
2599 typedef enum
2600 {
2601   ARP_TERM_NEXT_L2_OUTPUT,
2602   ARP_TERM_NEXT_DROP,
2603   ARP_TERM_N_NEXT,
2604 } arp_term_next_t;
2605
2606 u32 arp_term_next_node_index[32];
2607
2608 static uword
2609 arp_term_l2bd (vlib_main_t * vm,
2610                vlib_node_runtime_t * node, vlib_frame_t * frame)
2611 {
2612   l2input_main_t *l2im = &l2input_main;
2613   u32 n_left_from, next_index, *from, *to_next;
2614   u32 n_replies_sent = 0;
2615   u16 last_bd_index = ~0;
2616   l2_bridge_domain_t *last_bd_config = 0;
2617   l2_input_config_t *cfg0;
2618
2619   from = vlib_frame_vector_args (frame);
2620   n_left_from = frame->n_vectors;
2621   next_index = node->cached_next_index;
2622
2623   while (n_left_from > 0)
2624     {
2625       u32 n_left_to_next;
2626
2627       vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
2628
2629       while (n_left_from > 0 && n_left_to_next > 0)
2630         {
2631           vlib_buffer_t *p0;
2632           ethernet_header_t *eth0;
2633           ethernet_arp_header_t *arp0;
2634           ip6_header_t *iph0;
2635           u8 *l3h0;
2636           u32 pi0, error0, next0, sw_if_index0;
2637           u16 ethertype0;
2638           u16 bd_index0;
2639           u32 ip0;
2640           u8 *macp0;
2641
2642           pi0 = from[0];
2643           to_next[0] = pi0;
2644           from += 1;
2645           to_next += 1;
2646           n_left_from -= 1;
2647           n_left_to_next -= 1;
2648
2649           p0 = vlib_get_buffer (vm, pi0);
2650           // Terminate only local (SHG == 0) ARP
2651           if (vnet_buffer (p0)->l2.shg != 0)
2652             goto next_l2_feature;
2653
2654           eth0 = vlib_buffer_get_current (p0);
2655           l3h0 = (u8 *) eth0 + vnet_buffer (p0)->l2.l2_len;
2656           ethertype0 = clib_net_to_host_u16 (*(u16 *) (l3h0 - 2));
2657           arp0 = (ethernet_arp_header_t *) l3h0;
2658
2659           if (ethertype0 != ETHERNET_TYPE_ARP)
2660             goto check_ip6_nd;
2661
2662           if ((arp0->opcode !=
2663                clib_host_to_net_u16 (ETHERNET_ARP_OPCODE_request)) &&
2664               (arp0->opcode !=
2665                clib_host_to_net_u16 (ETHERNET_ARP_OPCODE_reply)))
2666             goto check_ip6_nd;
2667
2668           /* Must be ARP request/reply packet here */
2669           if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE) &&
2670                              (p0->flags & VLIB_BUFFER_IS_TRACED)))
2671             {
2672               u8 *t0 = vlib_add_trace (vm, node, p0,
2673                                        sizeof (ethernet_arp_input_trace_t));
2674               clib_memcpy_fast (t0, l3h0,
2675                                 sizeof (ethernet_arp_input_trace_t));
2676             }
2677
2678           error0 = 0;
2679           error0 =
2680             (arp0->l2_type !=
2681              clib_net_to_host_u16 (ETHERNET_ARP_HARDWARE_TYPE_ethernet)
2682              ? ETHERNET_ARP_ERROR_l2_type_not_ethernet : error0);
2683           error0 =
2684             (arp0->l3_type !=
2685              clib_net_to_host_u16 (ETHERNET_TYPE_IP4) ?
2686              ETHERNET_ARP_ERROR_l3_type_not_ip4 : error0);
2687
2688           sw_if_index0 = vnet_buffer (p0)->sw_if_index[VLIB_RX];
2689
2690           if (error0)
2691             goto drop;
2692
2693           /* Trash ARP packets whose ARP-level source addresses do not
2694              match, or if requester address is mcast */
2695           if (PREDICT_FALSE
2696               (!ethernet_mac_address_equal (eth0->src_address,
2697                                             arp0->ip4_over_ethernet[0].
2698                                             mac.bytes))
2699               || ethernet_address_cast (arp0->ip4_over_ethernet[0].mac.bytes))
2700             {
2701               /* VRRP virtual MAC may be different to SMAC in ARP reply */
2702               if (!ethernet_mac_address_equal
2703                   (arp0->ip4_over_ethernet[0].mac.bytes, vrrp_prefix))
2704                 {
2705                   error0 = ETHERNET_ARP_ERROR_l2_address_mismatch;
2706                   goto drop;
2707                 }
2708             }
2709           if (PREDICT_FALSE
2710               (ip4_address_is_multicast (&arp0->ip4_over_ethernet[0].ip4)))
2711             {
2712               error0 = ETHERNET_ARP_ERROR_l3_src_address_not_local;
2713               goto drop;
2714             }
2715
2716           /* Check if anyone want ARP request events for L2 BDs */
2717           {
2718             ethernet_arp_main_t *am = &ethernet_arp_main;
2719             if (am->wc_ip4_arp_publisher_node != (uword) ~ 0)
2720               vnet_arp_wc_publish (sw_if_index0, &arp0->ip4_over_ethernet[0]);
2721           }
2722
2723           /* lookup BD mac_by_ip4 hash table for MAC entry */
2724           ip0 = arp0->ip4_over_ethernet[1].ip4.as_u32;
2725           bd_index0 = vnet_buffer (p0)->l2.bd_index;
2726           if (PREDICT_FALSE ((bd_index0 != last_bd_index)
2727                              || (last_bd_index == (u16) ~ 0)))
2728             {
2729               last_bd_index = bd_index0;
2730               last_bd_config = vec_elt_at_index (l2im->bd_configs, bd_index0);
2731             }
2732           macp0 = (u8 *) hash_get (last_bd_config->mac_by_ip4, ip0);
2733
2734           if (PREDICT_FALSE (!macp0))
2735             goto next_l2_feature;       /* MAC not found */
2736           if (PREDICT_FALSE (arp0->ip4_over_ethernet[0].ip4.as_u32 ==
2737                              arp0->ip4_over_ethernet[1].ip4.as_u32))
2738             goto next_l2_feature;       /* GARP */
2739
2740           /* MAC found, send ARP reply -
2741              Convert ARP request packet to ARP reply */
2742           arp0->opcode = clib_host_to_net_u16 (ETHERNET_ARP_OPCODE_reply);
2743           arp0->ip4_over_ethernet[1] = arp0->ip4_over_ethernet[0];
2744           arp0->ip4_over_ethernet[0].ip4.as_u32 = ip0;
2745           mac_address_from_bytes (&arp0->ip4_over_ethernet[0].mac, macp0);
2746           clib_memcpy_fast (eth0->dst_address, eth0->src_address, 6);
2747           clib_memcpy_fast (eth0->src_address, macp0, 6);
2748           n_replies_sent += 1;
2749
2750         output_response:
2751           /* For BVI, need to use l2-fwd node to send ARP reply as
2752              l2-output node cannot output packet to BVI properly */
2753           cfg0 = vec_elt_at_index (l2im->configs, sw_if_index0);
2754           if (PREDICT_FALSE (cfg0->bvi))
2755             {
2756               vnet_buffer (p0)->l2.feature_bitmap |= L2INPUT_FEAT_FWD;
2757               vnet_buffer (p0)->sw_if_index[VLIB_RX] = 0;
2758               goto next_l2_feature;
2759             }
2760
2761           /* Send ARP/ND reply back out input interface through l2-output */
2762           vnet_buffer (p0)->sw_if_index[VLIB_TX] = sw_if_index0;
2763           next0 = ARP_TERM_NEXT_L2_OUTPUT;
2764           vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
2765                                            to_next, n_left_to_next, pi0,
2766                                            next0);
2767           continue;
2768
2769         check_ip6_nd:
2770           /* IP6 ND event notification or solicitation handling to generate
2771              local response instead of flooding */
2772           iph0 = (ip6_header_t *) l3h0;
2773           if (PREDICT_FALSE (ethertype0 == ETHERNET_TYPE_IP6 &&
2774                              iph0->protocol == IP_PROTOCOL_ICMP6 &&
2775                              !ip6_address_is_unspecified
2776                              (&iph0->src_address)))
2777             {
2778               sw_if_index0 = vnet_buffer (p0)->sw_if_index[VLIB_RX];
2779               if (vnet_ip6_nd_term
2780                   (vm, node, p0, eth0, iph0, sw_if_index0,
2781                    vnet_buffer (p0)->l2.bd_index))
2782                 goto output_response;
2783             }
2784
2785         next_l2_feature:
2786           {
2787             next0 = vnet_l2_feature_next (p0, arp_term_next_node_index,
2788                                           L2INPUT_FEAT_ARP_TERM);
2789             vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
2790                                              to_next, n_left_to_next,
2791                                              pi0, next0);
2792             continue;
2793           }
2794
2795         drop:
2796           if (0 == arp0->ip4_over_ethernet[0].ip4.as_u32 ||
2797               (arp0->ip4_over_ethernet[0].ip4.as_u32 ==
2798                arp0->ip4_over_ethernet[1].ip4.as_u32))
2799             {
2800               error0 = ETHERNET_ARP_ERROR_gratuitous_arp;
2801             }
2802           next0 = ARP_TERM_NEXT_DROP;
2803           p0->error = node->errors[error0];
2804
2805           vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
2806                                            to_next, n_left_to_next, pi0,
2807                                            next0);
2808         }
2809
2810       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
2811     }
2812
2813   vlib_error_count (vm, node->node_index,
2814                     ETHERNET_ARP_ERROR_replies_sent, n_replies_sent);
2815   return frame->n_vectors;
2816 }
2817
2818 /* *INDENT-OFF* */
2819 VLIB_REGISTER_NODE (arp_term_l2bd_node, static) = {
2820   .function = arp_term_l2bd,
2821   .name = "arp-term-l2bd",
2822   .vector_size = sizeof (u32),
2823   .n_errors = ETHERNET_ARP_N_ERROR,
2824   .error_strings = ethernet_arp_error_strings,
2825   .n_next_nodes = ARP_TERM_N_NEXT,
2826   .next_nodes = {
2827     [ARP_TERM_NEXT_L2_OUTPUT] = "l2-output",
2828     [ARP_TERM_NEXT_DROP] = "error-drop",
2829   },
2830   .format_buffer = format_ethernet_arp_header,
2831   .format_trace = format_arp_term_input_trace,
2832 };
2833 /* *INDENT-ON* */
2834
2835 clib_error_t *
2836 arp_term_init (vlib_main_t * vm)
2837 {
2838   // Initialize the feature next-node indexes
2839   feat_bitmap_init_next_nodes (vm,
2840                                arp_term_l2bd_node.index,
2841                                L2INPUT_N_FEAT,
2842                                l2input_get_feat_names (),
2843                                arp_term_next_node_index);
2844   return 0;
2845 }
2846
2847 VLIB_INIT_FUNCTION (arp_term_init);
2848
2849 void
2850 change_arp_mac (u32 sw_if_index, ethernet_arp_ip4_entry_t * e)
2851 {
2852   if (e->sw_if_index == sw_if_index)
2853     {
2854       adj_nbr_walk_nh4 (e->sw_if_index,
2855                         &e->ip4_address, arp_mk_complete_walk, e);
2856     }
2857 }
2858
2859 void
2860 ethernet_arp_change_mac (u32 sw_if_index)
2861 {
2862   ethernet_arp_main_t *am = &ethernet_arp_main;
2863   ethernet_arp_ip4_entry_t *e;
2864   adj_index_t ai;
2865
2866   /* *INDENT-OFF* */
2867   pool_foreach (e, am->ip4_entry_pool,
2868   ({
2869     change_arp_mac (sw_if_index, e);
2870   }));
2871   /* *INDENT-ON* */
2872
2873   ai = adj_glean_get (FIB_PROTOCOL_IP4, sw_if_index);
2874
2875   if (ADJ_INDEX_INVALID != ai)
2876     adj_glean_update_rewrite (ai);
2877 }
2878
2879 void
2880 send_ip4_garp (vlib_main_t * vm, u32 sw_if_index)
2881 {
2882   ip4_main_t *i4m = &ip4_main;
2883   ip4_address_t *ip4_addr = ip4_interface_first_address (i4m, sw_if_index, 0);
2884
2885   send_ip4_garp_w_addr (vm, ip4_addr, sw_if_index);
2886 }
2887
2888 void
2889 send_ip4_garp_w_addr (vlib_main_t * vm,
2890                       const ip4_address_t * ip4_addr, u32 sw_if_index)
2891 {
2892   ip4_main_t *i4m = &ip4_main;
2893   vnet_main_t *vnm = vnet_get_main ();
2894   u8 *rewrite, rewrite_len;
2895   vnet_hw_interface_t *hi = vnet_get_sup_hw_interface (vnm, sw_if_index);
2896
2897   if (ip4_addr)
2898     {
2899       clib_warning ("Sending GARP for IP4 address %U on sw_if_idex %d",
2900                     format_ip4_address, ip4_addr, sw_if_index);
2901
2902       /* Form GARP packet for output - Gratuitous ARP is an ARP request packet
2903          where the interface IP/MAC pair is used for both source and request
2904          MAC/IP pairs in the request */
2905       u32 bi = 0;
2906       ethernet_arp_header_t *h = vlib_packet_template_get_packet
2907         (vm, &i4m->ip4_arp_request_packet_template, &bi);
2908
2909       if (!h)
2910         return;
2911
2912       mac_address_from_bytes (&h->ip4_over_ethernet[0].mac, hi->hw_address);
2913       mac_address_from_bytes (&h->ip4_over_ethernet[1].mac, hi->hw_address);
2914       h->ip4_over_ethernet[0].ip4 = ip4_addr[0];
2915       h->ip4_over_ethernet[1].ip4 = ip4_addr[0];
2916
2917       /* Setup MAC header with ARP Etype and broadcast DMAC */
2918       vlib_buffer_t *b = vlib_get_buffer (vm, bi);
2919       rewrite =
2920         ethernet_build_rewrite (vnm, sw_if_index, VNET_LINK_ARP,
2921                                 VNET_REWRITE_FOR_SW_INTERFACE_ADDRESS_BROADCAST);
2922       rewrite_len = vec_len (rewrite);
2923       vlib_buffer_advance (b, -rewrite_len);
2924       ethernet_header_t *e = vlib_buffer_get_current (b);
2925       clib_memcpy_fast (e->dst_address, rewrite, rewrite_len);
2926       vec_free (rewrite);
2927
2928       /* Send GARP packet out the specified interface */
2929       vnet_buffer (b)->sw_if_index[VLIB_RX] =
2930         vnet_buffer (b)->sw_if_index[VLIB_TX] = sw_if_index;
2931       vlib_frame_t *f = vlib_get_frame_to_node (vm, hi->output_node_index);
2932       u32 *to_next = vlib_frame_vector_args (f);
2933       to_next[0] = bi;
2934       f->n_vectors = 1;
2935       vlib_put_frame_to_node (vm, hi->output_node_index, f);
2936     }
2937 }
2938
2939 /*
2940  * Remove any arp entries associated with the specified interface
2941  */
2942 static clib_error_t *
2943 vnet_arp_delete_sw_interface (vnet_main_t * vnm, u32 sw_if_index, u32 is_add)
2944 {
2945   if (!is_add && sw_if_index != ~0)
2946     {
2947       ethernet_arp_main_t *am = &ethernet_arp_main;
2948       ethernet_arp_ip4_entry_t *e;
2949       /* *INDENT-OFF* */
2950       pool_foreach (e, am->ip4_entry_pool, ({
2951         if (e->sw_if_index != sw_if_index)
2952           continue;
2953         vnet_arp_set_ip4_over_ethernet_rpc_args_t args = {
2954           .sw_if_index = sw_if_index,
2955           .ip4 = e->ip4_address,
2956         };
2957         vnet_arp_unset_ip4_over_ethernet_internal (vnm, &args);
2958       }));
2959       /* *INDENT-ON* */
2960     }
2961
2962   return (NULL);
2963 }
2964
2965 VNET_SW_INTERFACE_ADD_DEL_FUNCTION (vnet_arp_delete_sw_interface);
2966
2967 /*
2968  * fd.io coding-style-patch-verification: ON
2969  *
2970  * Local Variables:
2971  * eval: (c-set-style "gnu")
2972  * End:
2973  */