ARP un-unmbered called when no interfaces are unnumbered
[vpp.git] / vnet / vnet / ethernet / arp.c
1 /*
2  * ethernet/arp.c: IP v4 ARP node
3  *
4  * Copyright (c) 2010 Cisco and/or its affiliates.
5  * Licensed under the Apache License, Version 2.0 (the "License");
6  * you may not use this file except in compliance with the License.
7  * You may obtain a copy of the License at:
8  *
9  *     http://www.apache.org/licenses/LICENSE-2.0
10  *
11  * Unless required by applicable law or agreed to in writing, software
12  * distributed under the License is distributed on an "AS IS" BASIS,
13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  * See the License for the specific language governing permissions and
15  * limitations under the License.
16  */
17
18 #include <vnet/ip/ip.h>
19 #include <vnet/ip/ip6.h>
20 #include <vnet/ethernet/ethernet.h>
21 #include <vnet/ethernet/arp_packet.h>
22 #include <vnet/l2/l2_input.h>
23 #include <vppinfra/mhash.h>
24 #include <vnet/fib/ip4_fib.h>
25 #include <vnet/adj/adj_nbr.h>
26 #include <vnet/mpls/mpls.h>
27
28 /**
29  * @file
30  * @brief IPv4 ARP.
31  *
32  * This file contains code to manage the IPv4 ARP tables (IP Address
33  * to MAC Address lookup).
34  */
35
36
37 void vl_api_rpc_call_main_thread (void *fp, u8 * data, u32 data_length);
38
39 typedef struct
40 {
41   u32 sw_if_index;
42   ip4_address_t ip4_address;
43
44   u8 ethernet_address[6];
45
46   u16 flags;
47 #define ETHERNET_ARP_IP4_ENTRY_FLAG_STATIC  (1 << 0)
48 #define ETHERNET_ARP_IP4_ENTRY_FLAG_DYNAMIC (1 << 1)
49
50   u64 cpu_time_last_updated;
51
52   /**
53    * The index of the adj-fib entry created
54    */
55   fib_node_index_t fib_entry_index;
56 } ethernet_arp_ip4_entry_t;
57
58 /**
59  * @brief Per-interface ARP configuration and state
60  */
61 typedef struct ethernet_arp_interface_t_
62 {
63   /**
64    * Hash table of ARP entries.
65    * Since this hash table is per-interface, the key is only the IPv4 address.
66    */
67   uword *arp_entries;
68 } ethernet_arp_interface_t;
69
70 typedef struct
71 {
72   u32 lo_addr;
73   u32 hi_addr;
74   u32 fib_index;
75 } ethernet_proxy_arp_t;
76
77 typedef struct
78 {
79   u32 next_index;
80   uword node_index;
81   uword type_opaque;
82   uword data;
83   /* Used for arp event notification only */
84   void *data_callback;
85   u32 pid;
86 } pending_resolution_t;
87
88 typedef struct
89 {
90   /* Hash tables mapping name to opcode. */
91   uword *opcode_by_name;
92
93   /* lite beer "glean" adjacency handling */
94   uword *pending_resolutions_by_address;
95   pending_resolution_t *pending_resolutions;
96
97   /* Mac address change notification */
98   uword *mac_changes_by_address;
99   pending_resolution_t *mac_changes;
100
101   ethernet_arp_ip4_entry_t *ip4_entry_pool;
102
103   /* ARP attack mitigation */
104   u32 arp_delete_rotor;
105   u32 limit_arp_cache_size;
106
107   /** Per interface state */
108   ethernet_arp_interface_t *ethernet_arp_by_sw_if_index;
109
110   /* Proxy arp vector */
111   ethernet_proxy_arp_t *proxy_arps;
112 } ethernet_arp_main_t;
113
114 static ethernet_arp_main_t ethernet_arp_main;
115
116 typedef struct
117 {
118   u32 sw_if_index;
119   ethernet_arp_ip4_over_ethernet_address_t a;
120   int is_static;
121   int flags;
122 #define ETHERNET_ARP_ARGS_REMOVE (1<<0)
123 #define ETHERNET_ARP_ARGS_FLUSH  (1<<1)
124 #define ETHERNET_ARP_ARGS_POPULATE  (1<<2)
125 } vnet_arp_set_ip4_over_ethernet_rpc_args_t;
126
127 static void
128 set_ip4_over_ethernet_rpc_callback (vnet_arp_set_ip4_over_ethernet_rpc_args_t
129                                     * a);
130
131 static u8 *
132 format_ethernet_arp_hardware_type (u8 * s, va_list * va)
133 {
134   ethernet_arp_hardware_type_t h = va_arg (*va, ethernet_arp_hardware_type_t);
135   char *t = 0;
136   switch (h)
137     {
138 #define _(n,f) case n: t = #f; break;
139       foreach_ethernet_arp_hardware_type;
140 #undef _
141
142     default:
143       return format (s, "unknown 0x%x", h);
144     }
145
146   return format (s, "%s", t);
147 }
148
149 static u8 *
150 format_ethernet_arp_opcode (u8 * s, va_list * va)
151 {
152   ethernet_arp_opcode_t o = va_arg (*va, ethernet_arp_opcode_t);
153   char *t = 0;
154   switch (o)
155     {
156 #define _(f) case ETHERNET_ARP_OPCODE_##f: t = #f; break;
157       foreach_ethernet_arp_opcode;
158 #undef _
159
160     default:
161       return format (s, "unknown 0x%x", o);
162     }
163
164   return format (s, "%s", t);
165 }
166
167 static uword
168 unformat_ethernet_arp_opcode_host_byte_order (unformat_input_t * input,
169                                               va_list * args)
170 {
171   int *result = va_arg (*args, int *);
172   ethernet_arp_main_t *am = &ethernet_arp_main;
173   int x, i;
174
175   /* Numeric opcode. */
176   if (unformat (input, "0x%x", &x) || unformat (input, "%d", &x))
177     {
178       if (x >= (1 << 16))
179         return 0;
180       *result = x;
181       return 1;
182     }
183
184   /* Named type. */
185   if (unformat_user (input, unformat_vlib_number_by_name,
186                      am->opcode_by_name, &i))
187     {
188       *result = i;
189       return 1;
190     }
191
192   return 0;
193 }
194
195 static uword
196 unformat_ethernet_arp_opcode_net_byte_order (unformat_input_t * input,
197                                              va_list * args)
198 {
199   int *result = va_arg (*args, int *);
200   if (!unformat_user
201       (input, unformat_ethernet_arp_opcode_host_byte_order, result))
202     return 0;
203
204   *result = clib_host_to_net_u16 ((u16) * result);
205   return 1;
206 }
207
208 static u8 *
209 format_ethernet_arp_header (u8 * s, va_list * va)
210 {
211   ethernet_arp_header_t *a = va_arg (*va, ethernet_arp_header_t *);
212   u32 max_header_bytes = va_arg (*va, u32);
213   uword indent;
214   u16 l2_type, l3_type;
215
216   if (max_header_bytes != 0 && sizeof (a[0]) > max_header_bytes)
217     return format (s, "ARP header truncated");
218
219   l2_type = clib_net_to_host_u16 (a->l2_type);
220   l3_type = clib_net_to_host_u16 (a->l3_type);
221
222   indent = format_get_indent (s);
223
224   s = format (s, "%U, type %U/%U, address size %d/%d",
225               format_ethernet_arp_opcode, clib_net_to_host_u16 (a->opcode),
226               format_ethernet_arp_hardware_type, l2_type,
227               format_ethernet_type, l3_type,
228               a->n_l2_address_bytes, a->n_l3_address_bytes);
229
230   if (l2_type == ETHERNET_ARP_HARDWARE_TYPE_ethernet
231       && l3_type == ETHERNET_TYPE_IP4)
232     {
233       s = format (s, "\n%U%U/%U -> %U/%U",
234                   format_white_space, indent,
235                   format_ethernet_address, a->ip4_over_ethernet[0].ethernet,
236                   format_ip4_address, &a->ip4_over_ethernet[0].ip4,
237                   format_ethernet_address, a->ip4_over_ethernet[1].ethernet,
238                   format_ip4_address, &a->ip4_over_ethernet[1].ip4);
239     }
240   else
241     {
242       uword n2 = a->n_l2_address_bytes;
243       uword n3 = a->n_l3_address_bytes;
244       s = format (s, "\n%U%U/%U -> %U/%U",
245                   format_white_space, indent,
246                   format_hex_bytes, a->data + 0 * n2 + 0 * n3, n2,
247                   format_hex_bytes, a->data + 1 * n2 + 0 * n3, n3,
248                   format_hex_bytes, a->data + 1 * n2 + 1 * n3, n2,
249                   format_hex_bytes, a->data + 2 * n2 + 1 * n3, n3);
250     }
251
252   return s;
253 }
254
255 static u8 *
256 format_ethernet_arp_ip4_entry (u8 * s, va_list * va)
257 {
258   vnet_main_t *vnm = va_arg (*va, vnet_main_t *);
259   ethernet_arp_ip4_entry_t *e = va_arg (*va, ethernet_arp_ip4_entry_t *);
260   vnet_sw_interface_t *si;
261   u8 *flags = 0;
262
263   if (!e)
264     return format (s, "%=12s%=16s%=6s%=20s%=24s", "Time", "IP4",
265                    "Flags", "Ethernet", "Interface");
266
267   si = vnet_get_sw_interface (vnm, e->sw_if_index);
268
269   if (e->flags & ETHERNET_ARP_IP4_ENTRY_FLAG_STATIC)
270     flags = format (flags, "S");
271
272   if (e->flags & ETHERNET_ARP_IP4_ENTRY_FLAG_DYNAMIC)
273     flags = format (flags, "D");
274
275   s = format (s, "%=12U%=16U%=6s%=20U%=24U",
276               format_vlib_cpu_time, vnm->vlib_main, e->cpu_time_last_updated,
277               format_ip4_address, &e->ip4_address,
278               flags ? (char *) flags : "",
279               format_ethernet_address, e->ethernet_address,
280               format_vnet_sw_interface_name, vnm, si);
281
282   vec_free (flags);
283   return s;
284 }
285
286 typedef struct
287 {
288   u8 packet_data[64];
289 } ethernet_arp_input_trace_t;
290
291 static u8 *
292 format_ethernet_arp_input_trace (u8 * s, va_list * va)
293 {
294   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*va, vlib_main_t *);
295   CLIB_UNUSED (vlib_node_t * node) = va_arg (*va, vlib_node_t *);
296   ethernet_arp_input_trace_t *t = va_arg (*va, ethernet_arp_input_trace_t *);
297
298   s = format (s, "%U",
299               format_ethernet_arp_header,
300               t->packet_data, sizeof (t->packet_data));
301
302   return s;
303 }
304
305 static u8 *
306 format_arp_term_input_trace (u8 * s, va_list * va)
307 {
308   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*va, vlib_main_t *);
309   CLIB_UNUSED (vlib_node_t * node) = va_arg (*va, vlib_node_t *);
310   ethernet_arp_input_trace_t *t = va_arg (*va, ethernet_arp_input_trace_t *);
311
312   /* arp-term trace data saved is either arp or ip6/icmp6 packet:
313      - for arp, the 1st 16-bit field is hw type of value of 0x0001.
314      - for ip6, the first nibble has value of 6. */
315   s = format (s, "%U", t->packet_data[0] == 0 ?
316               format_ethernet_arp_header : format_ip6_header,
317               t->packet_data, sizeof (t->packet_data));
318
319   return s;
320 }
321
322 static void
323 arp_nbr_probe (ip_adjacency_t * adj)
324 {
325   vnet_main_t *vnm = vnet_get_main ();
326   ip4_main_t *im = &ip4_main;
327   ip_interface_address_t *ia;
328   ethernet_arp_header_t *h;
329   vnet_hw_interface_t *hi;
330   vnet_sw_interface_t *si;
331   ip4_address_t *src;
332   vlib_buffer_t *b;
333   vlib_main_t *vm;
334   u32 bi = 0;
335
336   vm = vlib_get_main ();
337
338   si = vnet_get_sw_interface (vnm, adj->rewrite_header.sw_if_index);
339
340   if (!(si->flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP))
341     {
342       return;
343     }
344
345   src =
346     ip4_interface_address_matching_destination (im,
347                                                 &adj->sub_type.nbr.next_hop.
348                                                 ip4,
349                                                 adj->rewrite_header.
350                                                 sw_if_index, &ia);
351   if (!src)
352     {
353       return;
354     }
355
356   h =
357     vlib_packet_template_get_packet (vm, &im->ip4_arp_request_packet_template,
358                                      &bi);
359
360   hi = vnet_get_sup_hw_interface (vnm, adj->rewrite_header.sw_if_index);
361
362   clib_memcpy (h->ip4_over_ethernet[0].ethernet,
363                hi->hw_address, sizeof (h->ip4_over_ethernet[0].ethernet));
364
365   h->ip4_over_ethernet[0].ip4 = src[0];
366   h->ip4_over_ethernet[1].ip4 = adj->sub_type.nbr.next_hop.ip4;
367
368   b = vlib_get_buffer (vm, bi);
369   vnet_buffer (b)->sw_if_index[VLIB_RX] =
370     vnet_buffer (b)->sw_if_index[VLIB_TX] = adj->rewrite_header.sw_if_index;
371
372   /* Add encapsulation string for software interface (e.g. ethernet header). */
373   vnet_rewrite_one_header (adj[0], h, sizeof (ethernet_header_t));
374   vlib_buffer_advance (b, -adj->rewrite_header.data_bytes);
375
376   {
377     vlib_frame_t *f = vlib_get_frame_to_node (vm, hi->output_node_index);
378     u32 *to_next = vlib_frame_vector_args (f);
379     to_next[0] = bi;
380     f->n_vectors = 1;
381     vlib_put_frame_to_node (vm, hi->output_node_index, f);
382   }
383 }
384
385 static void
386 arp_mk_complete (adj_index_t ai, ethernet_arp_ip4_entry_t * e)
387 {
388   adj_nbr_update_rewrite
389     (ai, ADJ_NBR_REWRITE_FLAG_COMPLETE,
390      ethernet_build_rewrite (vnet_get_main (),
391                              e->sw_if_index,
392                              adj_get_link_type (ai), e->ethernet_address));
393 }
394
395 static void
396 arp_mk_incomplete (adj_index_t ai, ethernet_arp_ip4_entry_t * e)
397 {
398   adj_nbr_update_rewrite
399     (ai,
400      ADJ_NBR_REWRITE_FLAG_INCOMPLETE,
401      ethernet_build_rewrite (vnet_get_main (),
402                              e->sw_if_index,
403                              VNET_LINK_ARP,
404                              VNET_REWRITE_FOR_SW_INTERFACE_ADDRESS_BROADCAST));
405 }
406
407 static ethernet_arp_ip4_entry_t *
408 arp_entry_find (ethernet_arp_interface_t * eai, const ip4_address_t * addr)
409 {
410   ethernet_arp_main_t *am = &ethernet_arp_main;
411   ethernet_arp_ip4_entry_t *e = NULL;
412   uword *p;
413
414   if (NULL != eai->arp_entries)
415     {
416       p = hash_get (eai->arp_entries, addr->as_u32);
417       if (!p)
418         return (NULL);
419
420       e = pool_elt_at_index (am->ip4_entry_pool, p[0]);
421     }
422
423   return (e);
424 }
425
426 static adj_walk_rc_t
427 arp_mk_complete_walk (adj_index_t ai, void *ctx)
428 {
429   ethernet_arp_ip4_entry_t *e = ctx;
430
431   arp_mk_complete (ai, e);
432
433   return (ADJ_WALK_RC_CONTINUE);
434 }
435
436 static adj_walk_rc_t
437 arp_mk_incomplete_walk (adj_index_t ai, void *ctx)
438 {
439   ethernet_arp_ip4_entry_t *e = ctx;
440
441   arp_mk_incomplete (ai, e);
442
443   return (ADJ_WALK_RC_CONTINUE);
444 }
445
446 void
447 arp_update_adjacency (vnet_main_t * vnm, u32 sw_if_index, u32 ai)
448 {
449   ethernet_arp_main_t *am = &ethernet_arp_main;
450   ethernet_arp_interface_t *arp_int;
451   ethernet_arp_ip4_entry_t *e;
452   ip_adjacency_t *adj;
453
454   adj = adj_get (ai);
455
456   vec_validate (am->ethernet_arp_by_sw_if_index, sw_if_index);
457   arp_int = &am->ethernet_arp_by_sw_if_index[sw_if_index];
458   e = arp_entry_find (arp_int, &adj->sub_type.nbr.next_hop.ip4);
459
460   if (NULL != e)
461     {
462       adj_nbr_walk_nh4 (sw_if_index,
463                         &e->ip4_address, arp_mk_complete_walk, e);
464     }
465   else
466     {
467       /*
468        * no matching ARP entry.
469        * construct the rewire required to for an ARP packet, and stick
470        * that in the adj's pipe to smoke.
471        */
472       adj_nbr_update_rewrite (ai,
473                               ADJ_NBR_REWRITE_FLAG_INCOMPLETE,
474                               ethernet_build_rewrite (vnm,
475                                                       sw_if_index,
476                                                       VNET_LINK_ARP,
477                                                       VNET_REWRITE_FOR_SW_INTERFACE_ADDRESS_BROADCAST));
478
479       /*
480        * since the FIB has added this adj for a route, it makes sense it may
481        * want to forward traffic sometime soon. Let's send a speculative ARP.
482        * just one. If we were to do periodically that wouldn't be bad either,
483        * but that's more code than i'm prepared to write at this time for
484        * relatively little reward.
485        */
486       arp_nbr_probe (adj);
487     }
488 }
489
490 int
491 vnet_arp_set_ip4_over_ethernet_internal (vnet_main_t * vnm,
492                                          vnet_arp_set_ip4_over_ethernet_rpc_args_t
493                                          * args)
494 {
495   ethernet_arp_ip4_entry_t *e = 0;
496   ethernet_arp_main_t *am = &ethernet_arp_main;
497   ethernet_arp_ip4_over_ethernet_address_t *a = &args->a;
498   vlib_main_t *vm = vlib_get_main ();
499   int make_new_arp_cache_entry = 1;
500   uword *p;
501   pending_resolution_t *pr, *mc;
502   ethernet_arp_interface_t *arp_int;
503   int is_static = args->is_static;
504   u32 sw_if_index = args->sw_if_index;
505
506   vec_validate (am->ethernet_arp_by_sw_if_index, sw_if_index);
507
508   arp_int = &am->ethernet_arp_by_sw_if_index[sw_if_index];
509
510   if (NULL != arp_int->arp_entries)
511     {
512       p = hash_get (arp_int->arp_entries, a->ip4.as_u32);
513       if (p)
514         {
515           e = pool_elt_at_index (am->ip4_entry_pool, p[0]);
516
517           /* Refuse to over-write static arp. */
518           if (!is_static && (e->flags & ETHERNET_ARP_IP4_ENTRY_FLAG_STATIC))
519             return -2;
520           make_new_arp_cache_entry = 0;
521         }
522     }
523
524   if (make_new_arp_cache_entry)
525     {
526       fib_prefix_t pfx = {
527         .fp_len = 32,
528         .fp_proto = FIB_PROTOCOL_IP4,
529         .fp_addr = {
530                     .ip4 = a->ip4,
531                     }
532         ,
533       };
534       u32 fib_index;
535
536       pool_get (am->ip4_entry_pool, e);
537
538       if (NULL == arp_int->arp_entries)
539         {
540           arp_int->arp_entries = hash_create (0, sizeof (u32));
541         }
542
543       hash_set (arp_int->arp_entries, a->ip4.as_u32, e - am->ip4_entry_pool);
544
545       e->sw_if_index = sw_if_index;
546       e->ip4_address = a->ip4;
547       clib_memcpy (e->ethernet_address,
548                    a->ethernet, sizeof (e->ethernet_address));
549
550       fib_index = ip4_fib_table_get_index_for_sw_if_index (e->sw_if_index);
551       e->fib_entry_index =
552         fib_table_entry_update_one_path (fib_index,
553                                          &pfx,
554                                          FIB_SOURCE_ADJ,
555                                          FIB_ENTRY_FLAG_ATTACHED,
556                                          FIB_PROTOCOL_IP4,
557                                          &pfx.fp_addr,
558                                          e->sw_if_index,
559                                          ~0,
560                                          1,
561                                          MPLS_LABEL_INVALID,
562                                          FIB_ROUTE_PATH_FLAG_NONE);
563     }
564   else
565     {
566       /*
567        * prevent a DoS attack from the data-plane that
568        * spams us with no-op updates to the MAC address
569        */
570       if (0 == memcmp (e->ethernet_address,
571                        a->ethernet, sizeof (e->ethernet_address)))
572         return -1;
573
574       /* Update time stamp and ethernet address. */
575       clib_memcpy (e->ethernet_address, a->ethernet,
576                    sizeof (e->ethernet_address));
577     }
578
579   e->cpu_time_last_updated = clib_cpu_time_now ();
580   if (is_static)
581     e->flags |= ETHERNET_ARP_IP4_ENTRY_FLAG_STATIC;
582   else
583     e->flags |= ETHERNET_ARP_IP4_ENTRY_FLAG_DYNAMIC;
584
585   adj_nbr_walk_nh4 (sw_if_index, &e->ip4_address, arp_mk_complete_walk, e);
586
587   /* Customer(s) waiting for this address to be resolved? */
588   p = hash_get (am->pending_resolutions_by_address, a->ip4.as_u32);
589   if (p)
590     {
591       u32 next_index;
592       next_index = p[0];
593
594       while (next_index != (u32) ~ 0)
595         {
596           pr = pool_elt_at_index (am->pending_resolutions, next_index);
597           vlib_process_signal_event (vm, pr->node_index,
598                                      pr->type_opaque, pr->data);
599           next_index = pr->next_index;
600           pool_put (am->pending_resolutions, pr);
601         }
602
603       hash_unset (am->pending_resolutions_by_address, a->ip4.as_u32);
604     }
605
606   /* Customer(s) requesting ARP event for this address? */
607   p = hash_get (am->mac_changes_by_address, a->ip4.as_u32);
608   if (p)
609     {
610       u32 next_index;
611       next_index = p[0];
612
613       while (next_index != (u32) ~ 0)
614         {
615           int (*fp) (u32, u8 *, u32, u32);
616           int rv = 1;
617           mc = pool_elt_at_index (am->mac_changes, next_index);
618           fp = mc->data_callback;
619
620           /* Call the user's data callback, return 1 to suppress dup events */
621           if (fp)
622             rv = (*fp) (mc->data, a->ethernet, sw_if_index, 0);
623
624           /*
625            * Signal the resolver process, as long as the user
626            * says they want to be notified
627            */
628           if (rv == 0)
629             vlib_process_signal_event (vm, mc->node_index,
630                                        mc->type_opaque, mc->data);
631           next_index = mc->next_index;
632         }
633     }
634
635   return 0;
636 }
637
638 void
639 vnet_register_ip4_arp_resolution_event (vnet_main_t * vnm,
640                                         void *address_arg,
641                                         uword node_index,
642                                         uword type_opaque, uword data)
643 {
644   ethernet_arp_main_t *am = &ethernet_arp_main;
645   ip4_address_t *address = address_arg;
646   uword *p;
647   pending_resolution_t *pr;
648
649   pool_get (am->pending_resolutions, pr);
650
651   pr->next_index = ~0;
652   pr->node_index = node_index;
653   pr->type_opaque = type_opaque;
654   pr->data = data;
655   pr->data_callback = 0;
656
657   p = hash_get (am->pending_resolutions_by_address, address->as_u32);
658   if (p)
659     {
660       /* Insert new resolution at the head of the list */
661       pr->next_index = p[0];
662       hash_unset (am->pending_resolutions_by_address, address->as_u32);
663     }
664
665   hash_set (am->pending_resolutions_by_address, address->as_u32,
666             pr - am->pending_resolutions);
667 }
668
669 int
670 vnet_add_del_ip4_arp_change_event (vnet_main_t * vnm,
671                                    void *data_callback,
672                                    u32 pid,
673                                    void *address_arg,
674                                    uword node_index,
675                                    uword type_opaque, uword data, int is_add)
676 {
677   ethernet_arp_main_t *am = &ethernet_arp_main;
678   ip4_address_t *address = address_arg;
679   uword *p;
680   pending_resolution_t *mc;
681   void (*fp) (u32, u8 *) = data_callback;
682
683   if (is_add)
684     {
685       pool_get (am->mac_changes, mc);
686
687       mc->next_index = ~0;
688       mc->node_index = node_index;
689       mc->type_opaque = type_opaque;
690       mc->data = data;
691       mc->data_callback = data_callback;
692       mc->pid = pid;
693
694       p = hash_get (am->mac_changes_by_address, address->as_u32);
695       if (p)
696         {
697           /* Insert new resolution at the head of the list */
698           mc->next_index = p[0];
699           hash_unset (am->mac_changes_by_address, address->as_u32);
700         }
701
702       hash_set (am->mac_changes_by_address, address->as_u32,
703                 mc - am->mac_changes);
704       return 0;
705     }
706   else
707     {
708       u32 index;
709       pending_resolution_t *mc_last = 0;
710
711       p = hash_get (am->mac_changes_by_address, address->as_u32);
712       if (p == 0)
713         return VNET_API_ERROR_NO_SUCH_ENTRY;
714
715       index = p[0];
716
717       while (index != (u32) ~ 0)
718         {
719           mc = pool_elt_at_index (am->mac_changes, index);
720           if (mc->node_index == node_index &&
721               mc->type_opaque == type_opaque && mc->pid == pid)
722             {
723               /* Clients may need to clean up pool entries, too */
724               if (fp)
725                 (*fp) (mc->data, 0 /* no new mac addrs */ );
726               if (index == p[0])
727                 {
728                   hash_unset (am->mac_changes_by_address, address->as_u32);
729                   if (mc->next_index != ~0)
730                     hash_set (am->mac_changes_by_address, address->as_u32,
731                               mc->next_index);
732                   pool_put (am->mac_changes, mc);
733                   return 0;
734                 }
735               else
736                 {
737                   ASSERT (mc_last);
738                   mc_last->next_index = mc->next_index;
739                   pool_put (am->mac_changes, mc);
740                   return 0;
741                 }
742             }
743           mc_last = mc;
744           index = mc->next_index;
745         }
746
747       return VNET_API_ERROR_NO_SUCH_ENTRY;
748     }
749 }
750
751 /* Either we drop the packet or we send a reply to the sender. */
752 typedef enum
753 {
754   ARP_INPUT_NEXT_DROP,
755   ARP_INPUT_NEXT_REPLY_TX,
756   ARP_INPUT_N_NEXT,
757 } arp_input_next_t;
758
759 #define foreach_ethernet_arp_error                                      \
760   _ (replies_sent, "ARP replies sent")                                  \
761   _ (l2_type_not_ethernet, "L2 type not ethernet")                      \
762   _ (l3_type_not_ip4, "L3 type not IP4")                                \
763   _ (l3_src_address_not_local, "IP4 source address not local to subnet") \
764   _ (l3_dst_address_not_local, "IP4 destination address not local to subnet") \
765   _ (l3_src_address_is_local, "IP4 source address matches local interface") \
766   _ (l3_src_address_learned, "ARP request IP4 source address learned")  \
767   _ (replies_received, "ARP replies received")                          \
768   _ (opcode_not_request, "ARP opcode not request")                      \
769   _ (proxy_arp_replies_sent, "Proxy ARP replies sent")                  \
770   _ (l2_address_mismatch, "ARP hw addr does not match L2 frame src addr") \
771   _ (missing_interface_address, "ARP missing interface address") \
772   _ (gratuitous_arp, "ARP probe or announcement dropped") \
773   _ (interface_no_table, "Interface is not mapped to an IP table") \
774
775 typedef enum
776 {
777 #define _(sym,string) ETHERNET_ARP_ERROR_##sym,
778   foreach_ethernet_arp_error
779 #undef _
780     ETHERNET_ARP_N_ERROR,
781 } ethernet_arp_input_error_t;
782
783
784 static void
785 unset_random_arp_entry (void)
786 {
787   ethernet_arp_main_t *am = &ethernet_arp_main;
788   ethernet_arp_ip4_entry_t *e;
789   vnet_main_t *vnm = vnet_get_main ();
790   ethernet_arp_ip4_over_ethernet_address_t delme;
791   u32 index;
792
793   index = pool_next_index (am->ip4_entry_pool, am->arp_delete_rotor);
794   am->arp_delete_rotor = index;
795
796   /* Try again from elt 0, could happen if an intfc goes down */
797   if (index == ~0)
798     {
799       index = pool_next_index (am->ip4_entry_pool, am->arp_delete_rotor);
800       am->arp_delete_rotor = index;
801     }
802
803   /* Nothing left in the pool */
804   if (index == ~0)
805     return;
806
807   e = pool_elt_at_index (am->ip4_entry_pool, index);
808
809   clib_memcpy (&delme.ethernet, e->ethernet_address, 6);
810   delme.ip4.as_u32 = e->ip4_address.as_u32;
811
812   vnet_arp_unset_ip4_over_ethernet (vnm, e->sw_if_index, &delme);
813 }
814
815 static int
816 arp_unnumbered (vlib_buffer_t * p0,
817                 u32 pi0, ethernet_header_t * eth0, u32 sw_if_index)
818 {
819   vlib_main_t *vm = vlib_get_main ();
820   vnet_main_t *vnm = vnet_get_main ();
821   vnet_interface_main_t *vim = &vnm->interface_main;
822   vnet_sw_interface_t *si;
823   vnet_hw_interface_t *hi;
824   u32 unnum_src_sw_if_index;
825   u32 *broadcast_swifs = 0;
826   u32 *buffers = 0;
827   u32 n_alloc = 0;
828   vlib_buffer_t *b0;
829   int i;
830   u8 dst_mac_address[6];
831   i16 header_size;
832   ethernet_arp_header_t *arp0;
833
834   /* Save the dst mac address */
835   clib_memcpy (dst_mac_address, eth0->dst_address, sizeof (dst_mac_address));
836
837   /* Figure out which sw_if_index supplied the address */
838   unnum_src_sw_if_index = sw_if_index;
839
840   /* Track down all users of the unnumbered source */
841   /* *INDENT-OFF* */
842   pool_foreach (si, vim->sw_interfaces,
843   ({
844     if (si->flags & VNET_SW_INTERFACE_FLAG_UNNUMBERED &&
845         (si->unnumbered_sw_if_index == unnum_src_sw_if_index))
846       {
847         vec_add1 (broadcast_swifs, si->sw_if_index);
848       }
849   }));
850   /* *INDENT-ON* */
851
852   /* If there are no interfaces un-unmbered to this interface,
853      we are done  here. */
854   if (0 == vec_len (broadcast_swifs))
855     return 0;
856
857   /* Allocate buffering if we need it */
858   if (vec_len (broadcast_swifs) > 1)
859     {
860       vec_validate (buffers, vec_len (broadcast_swifs) - 2);
861       n_alloc = vlib_buffer_alloc (vm, buffers, vec_len (buffers));
862       _vec_len (buffers) = n_alloc;
863       for (i = 0; i < n_alloc; i++)
864         {
865           b0 = vlib_get_buffer (vm, buffers[i]);
866
867           /* xerox (partially built) ARP pkt */
868           clib_memcpy (b0->data, p0->data,
869                        p0->current_length + p0->current_data);
870           b0->current_data = p0->current_data;
871           b0->current_length = p0->current_length;
872           vnet_buffer (b0)->sw_if_index[VLIB_RX] =
873             vnet_buffer (p0)->sw_if_index[VLIB_RX];
874         }
875     }
876
877   vec_insert (buffers, 1, 0);
878   buffers[0] = pi0;
879
880   for (i = 0; i < vec_len (buffers); i++)
881     {
882       b0 = vlib_get_buffer (vm, buffers[i]);
883       arp0 = vlib_buffer_get_current (b0);
884
885       hi = vnet_get_sup_hw_interface (vnm, broadcast_swifs[i]);
886       si = vnet_get_sw_interface (vnm, broadcast_swifs[i]);
887
888       /* For decoration, most likely */
889       vnet_buffer (b0)->sw_if_index[VLIB_TX] = hi->sw_if_index;
890
891       /* Fix ARP pkt src address */
892       clib_memcpy (arp0->ip4_over_ethernet[0].ethernet, hi->hw_address, 6);
893
894       /* Build L2 encaps for this swif */
895       header_size = sizeof (ethernet_header_t);
896       if (si->sub.eth.flags.one_tag)
897         header_size += 4;
898       else if (si->sub.eth.flags.two_tags)
899         header_size += 8;
900
901       vlib_buffer_advance (b0, -header_size);
902       eth0 = vlib_buffer_get_current (b0);
903
904       if (si->sub.eth.flags.one_tag)
905         {
906           ethernet_vlan_header_t *outer = (void *) (eth0 + 1);
907
908           eth0->type = si->sub.eth.flags.dot1ad ?
909             clib_host_to_net_u16 (ETHERNET_TYPE_DOT1AD) :
910             clib_host_to_net_u16 (ETHERNET_TYPE_VLAN);
911           outer->priority_cfi_and_id =
912             clib_host_to_net_u16 (si->sub.eth.outer_vlan_id);
913           outer->type = clib_host_to_net_u16 (ETHERNET_TYPE_ARP);
914
915         }
916       else if (si->sub.eth.flags.two_tags)
917         {
918           ethernet_vlan_header_t *outer = (void *) (eth0 + 1);
919           ethernet_vlan_header_t *inner = (void *) (outer + 1);
920
921           eth0->type = si->sub.eth.flags.dot1ad ?
922             clib_host_to_net_u16 (ETHERNET_TYPE_DOT1AD) :
923             clib_host_to_net_u16 (ETHERNET_TYPE_VLAN);
924           outer->priority_cfi_and_id =
925             clib_host_to_net_u16 (si->sub.eth.outer_vlan_id);
926           outer->type = clib_host_to_net_u16 (ETHERNET_TYPE_VLAN);
927           inner->priority_cfi_and_id =
928             clib_host_to_net_u16 (si->sub.eth.inner_vlan_id);
929           inner->type = clib_host_to_net_u16 (ETHERNET_TYPE_ARP);
930
931         }
932       else
933         {
934           eth0->type = clib_host_to_net_u16 (ETHERNET_TYPE_ARP);
935         }
936
937       /* Restore the original dst address, set src address */
938       clib_memcpy (eth0->dst_address, dst_mac_address,
939                    sizeof (eth0->dst_address));
940       clib_memcpy (eth0->src_address, hi->hw_address,
941                    sizeof (eth0->src_address));
942
943       /* Transmit replicas */
944       if (i > 0)
945         {
946           vlib_frame_t *f =
947             vlib_get_frame_to_node (vm, hi->output_node_index);
948           u32 *to_next = vlib_frame_vector_args (f);
949           to_next[0] = buffers[i];
950           f->n_vectors = 1;
951           vlib_put_frame_to_node (vm, hi->output_node_index, f);
952         }
953     }
954
955   /* The regular path outputs the original pkt.. */
956   vnet_buffer (p0)->sw_if_index[VLIB_TX] = broadcast_swifs[0];
957
958   vec_free (broadcast_swifs);
959   vec_free (buffers);
960
961   return !0;
962 }
963
964 static uword
965 arp_input (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
966 {
967   ethernet_arp_main_t *am = &ethernet_arp_main;
968   vnet_main_t *vnm = vnet_get_main ();
969   ip4_main_t *im4 = &ip4_main;
970   u32 n_left_from, next_index, *from, *to_next;
971   u32 n_replies_sent = 0, n_proxy_arp_replies_sent = 0;
972
973   from = vlib_frame_vector_args (frame);
974   n_left_from = frame->n_vectors;
975   next_index = node->cached_next_index;
976
977   if (node->flags & VLIB_NODE_FLAG_TRACE)
978     vlib_trace_frame_buffers_only (vm, node, from, frame->n_vectors,
979                                    /* stride */ 1,
980                                    sizeof (ethernet_arp_input_trace_t));
981
982   while (n_left_from > 0)
983     {
984       u32 n_left_to_next;
985
986       vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
987
988       while (n_left_from > 0 && n_left_to_next > 0)
989         {
990           vlib_buffer_t *p0;
991           vnet_hw_interface_t *hw_if0;
992           ethernet_arp_header_t *arp0;
993           ethernet_header_t *eth0;
994           ip_adjacency_t *adj0;
995           ip4_address_t *if_addr0, proxy_src;
996           u32 pi0, error0, next0, sw_if_index0, conn_sw_if_index0, fib_index0;
997           u8 is_request0, dst_is_local0, is_unnum0;
998           ethernet_proxy_arp_t *pa;
999           fib_node_index_t dst_fei, src_fei;
1000           fib_prefix_t pfx0;
1001           fib_entry_flag_t src_flags, dst_flags;
1002
1003           pi0 = from[0];
1004           to_next[0] = pi0;
1005           from += 1;
1006           to_next += 1;
1007           n_left_from -= 1;
1008           n_left_to_next -= 1;
1009           pa = 0;
1010
1011           p0 = vlib_get_buffer (vm, pi0);
1012           arp0 = vlib_buffer_get_current (p0);
1013
1014           is_request0 = arp0->opcode
1015             == clib_host_to_net_u16 (ETHERNET_ARP_OPCODE_request);
1016
1017           error0 = ETHERNET_ARP_ERROR_replies_sent;
1018
1019           error0 =
1020             (arp0->l2_type !=
1021              clib_net_to_host_u16 (ETHERNET_ARP_HARDWARE_TYPE_ethernet) ?
1022              ETHERNET_ARP_ERROR_l2_type_not_ethernet : error0);
1023           error0 =
1024             (arp0->l3_type !=
1025              clib_net_to_host_u16 (ETHERNET_TYPE_IP4) ?
1026              ETHERNET_ARP_ERROR_l3_type_not_ip4 : error0);
1027
1028           sw_if_index0 = vnet_buffer (p0)->sw_if_index[VLIB_RX];
1029
1030           if (error0)
1031             goto drop2;
1032
1033           /* Check that IP address is local and matches incoming interface. */
1034           fib_index0 = ip4_fib_table_get_index_for_sw_if_index (sw_if_index0);
1035           if (~0 == fib_index0)
1036             {
1037               error0 = ETHERNET_ARP_ERROR_interface_no_table;
1038               goto drop2;
1039
1040             }
1041           dst_fei = ip4_fib_table_lookup (ip4_fib_get (fib_index0),
1042                                           &arp0->ip4_over_ethernet[1].ip4,
1043                                           32);
1044           dst_flags = fib_entry_get_flags_for_source (dst_fei,
1045                                                       FIB_SOURCE_INTERFACE);
1046
1047           conn_sw_if_index0 =
1048             fib_entry_get_resolving_interface_for_source (dst_fei,
1049                                                           FIB_SOURCE_INTERFACE);
1050
1051           if (!(FIB_ENTRY_FLAG_CONNECTED & dst_flags))
1052             {
1053               error0 = ETHERNET_ARP_ERROR_l3_dst_address_not_local;
1054               goto drop1;
1055             }
1056
1057           /* Honor unnumbered interface, if any */
1058           is_unnum0 = sw_if_index0 != conn_sw_if_index0;
1059
1060           /* Source must also be local to subnet of matching interface address. */
1061           src_fei = ip4_fib_table_lookup (ip4_fib_get (fib_index0),
1062                                           &arp0->ip4_over_ethernet[0].ip4,
1063                                           32);
1064           src_flags = fib_entry_get_flags (src_fei);
1065
1066           if (!((FIB_ENTRY_FLAG_ATTACHED & src_flags) ||
1067                 (FIB_ENTRY_FLAG_CONNECTED & src_flags)) ||
1068               sw_if_index0 != fib_entry_get_resolving_interface (src_fei))
1069             {
1070               error0 = ETHERNET_ARP_ERROR_l3_src_address_not_local;
1071               goto drop2;
1072             }
1073
1074           /* Reject requests/replies with our local interface address. */
1075           if (FIB_ENTRY_FLAG_LOCAL & src_flags)
1076             {
1077               error0 = ETHERNET_ARP_ERROR_l3_src_address_is_local;
1078               goto drop2;
1079             }
1080
1081           dst_is_local0 = (FIB_ENTRY_FLAG_LOCAL & dst_flags);
1082           fib_entry_get_prefix (dst_fei, &pfx0);
1083           if_addr0 = &pfx0.fp_addr.ip4;
1084
1085           /* Fill in ethernet header. */
1086           eth0 = ethernet_buffer_get_header (p0);
1087
1088           /* Trash ARP packets whose ARP-level source addresses do not
1089              match their L2-frame-level source addresses */
1090           if (memcmp (eth0->src_address, arp0->ip4_over_ethernet[0].ethernet,
1091                       sizeof (eth0->src_address)))
1092             {
1093               error0 = ETHERNET_ARP_ERROR_l2_address_mismatch;
1094               goto drop2;
1095             }
1096
1097           /* Learn or update sender's mapping only for requests or unicasts
1098              that don't match local interface address. */
1099           if (ethernet_address_cast (eth0->dst_address) ==
1100               ETHERNET_ADDRESS_UNICAST || is_request0)
1101             {
1102               if (am->limit_arp_cache_size &&
1103                   pool_elts (am->ip4_entry_pool) >= am->limit_arp_cache_size)
1104                 unset_random_arp_entry ();
1105
1106               vnet_arp_set_ip4_over_ethernet (vnm, sw_if_index0,
1107                                               &arp0->ip4_over_ethernet[0],
1108                                               0 /* is_static */ );
1109               error0 = ETHERNET_ARP_ERROR_l3_src_address_learned;
1110             }
1111
1112           /* Only send a reply for requests sent which match a local interface. */
1113           if (!(is_request0 && dst_is_local0))
1114             {
1115               error0 =
1116                 (arp0->opcode ==
1117                  clib_host_to_net_u16 (ETHERNET_ARP_OPCODE_reply) ?
1118                  ETHERNET_ARP_ERROR_replies_received : error0);
1119               goto drop1;
1120             }
1121
1122           /* Send a reply. */
1123         send_reply:
1124           vnet_buffer (p0)->sw_if_index[VLIB_TX] = sw_if_index0;
1125           hw_if0 = vnet_get_sup_hw_interface (vnm, sw_if_index0);
1126
1127           /* Send reply back through input interface */
1128           vnet_buffer (p0)->sw_if_index[VLIB_TX] = sw_if_index0;
1129           next0 = ARP_INPUT_NEXT_REPLY_TX;
1130
1131           arp0->opcode = clib_host_to_net_u16 (ETHERNET_ARP_OPCODE_reply);
1132
1133           arp0->ip4_over_ethernet[1] = arp0->ip4_over_ethernet[0];
1134
1135           clib_memcpy (arp0->ip4_over_ethernet[0].ethernet,
1136                        hw_if0->hw_address, 6);
1137           clib_mem_unaligned (&arp0->ip4_over_ethernet[0].ip4.data_u32, u32) =
1138             if_addr0->data_u32;
1139
1140           /* Hardware must be ethernet-like. */
1141           ASSERT (vec_len (hw_if0->hw_address) == 6);
1142
1143           clib_memcpy (eth0->dst_address, eth0->src_address, 6);
1144           clib_memcpy (eth0->src_address, hw_if0->hw_address, 6);
1145
1146           /* Figure out how much to rewind current data from adjacency. */
1147           /* get the adj from the destination's covering connected */
1148           if (NULL == pa)
1149             {
1150               adj0 =
1151                 adj_get (fib_entry_get_adj_for_source
1152                          (ip4_fib_table_lookup
1153                           (ip4_fib_get (fib_index0),
1154                            &arp0->ip4_over_ethernet[1].ip4, 31),
1155                           FIB_SOURCE_INTERFACE));
1156               if (adj0->lookup_next_index != IP_LOOKUP_NEXT_GLEAN)
1157                 {
1158                   error0 = ETHERNET_ARP_ERROR_missing_interface_address;
1159                   goto drop2;
1160                 }
1161               if (is_unnum0)
1162                 {
1163                   if (!arp_unnumbered (p0, pi0, eth0, conn_sw_if_index0))
1164                     goto drop2;
1165                 }
1166               else
1167                 vlib_buffer_advance (p0, -adj0->rewrite_header.data_bytes);
1168             }
1169           vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
1170                                            n_left_to_next, pi0, next0);
1171
1172           n_replies_sent += 1;
1173           continue;
1174
1175         drop1:
1176           if (0 == arp0->ip4_over_ethernet[0].ip4.as_u32 ||
1177               (arp0->ip4_over_ethernet[0].ip4.as_u32 ==
1178                arp0->ip4_over_ethernet[1].ip4.as_u32))
1179             {
1180               error0 = ETHERNET_ARP_ERROR_gratuitous_arp;
1181               goto drop2;
1182             }
1183           /* See if proxy arp is configured for the address */
1184           if (is_request0)
1185             {
1186               vnet_sw_interface_t *si;
1187               u32 this_addr = clib_net_to_host_u32
1188                 (arp0->ip4_over_ethernet[1].ip4.as_u32);
1189               u32 fib_index0;
1190
1191               si = vnet_get_sw_interface (vnm, sw_if_index0);
1192
1193               if (!(si->flags & VNET_SW_INTERFACE_FLAG_PROXY_ARP))
1194                 goto drop2;
1195
1196               fib_index0 = vec_elt (im4->fib_index_by_sw_if_index,
1197                                     sw_if_index0);
1198
1199               vec_foreach (pa, am->proxy_arps)
1200               {
1201                 u32 lo_addr = clib_net_to_host_u32 (pa->lo_addr);
1202                 u32 hi_addr = clib_net_to_host_u32 (pa->hi_addr);
1203
1204                 /* an ARP request hit in the proxy-arp table? */
1205                 if ((this_addr >= lo_addr && this_addr <= hi_addr) &&
1206                     (fib_index0 == pa->fib_index))
1207                   {
1208                     eth0 = ethernet_buffer_get_header (p0);
1209                     proxy_src.as_u32 =
1210                       arp0->ip4_over_ethernet[1].ip4.data_u32;
1211
1212                     /*
1213                      * Rewind buffer, direct code above not to
1214                      * think too hard about it.
1215                      */
1216                     if_addr0 = &proxy_src;
1217                     is_unnum0 = 0;
1218                     i32 ethernet_start =
1219                       vnet_buffer (p0)->ethernet.start_of_ethernet_header;
1220                     i32 rewind = p0->current_data - ethernet_start;
1221                     vlib_buffer_advance (p0, -rewind);
1222                     n_proxy_arp_replies_sent++;
1223                     goto send_reply;
1224                   }
1225               }
1226             }
1227
1228         drop2:
1229
1230           next0 = ARP_INPUT_NEXT_DROP;
1231           p0->error = node->errors[error0];
1232
1233           vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
1234                                            n_left_to_next, pi0, next0);
1235         }
1236
1237       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
1238     }
1239
1240   vlib_error_count (vm, node->node_index,
1241                     ETHERNET_ARP_ERROR_replies_sent,
1242                     n_replies_sent - n_proxy_arp_replies_sent);
1243
1244   vlib_error_count (vm, node->node_index,
1245                     ETHERNET_ARP_ERROR_proxy_arp_replies_sent,
1246                     n_proxy_arp_replies_sent);
1247   return frame->n_vectors;
1248 }
1249
1250 static char *ethernet_arp_error_strings[] = {
1251 #define _(sym,string) string,
1252   foreach_ethernet_arp_error
1253 #undef _
1254 };
1255
1256 /* *INDENT-OFF* */
1257 VLIB_REGISTER_NODE (arp_input_node, static) =
1258 {
1259   .function = arp_input,
1260   .name = "arp-input",
1261   .vector_size = sizeof (u32),
1262   .n_errors = ETHERNET_ARP_N_ERROR,
1263   .error_strings = ethernet_arp_error_strings,
1264   .n_next_nodes = ARP_INPUT_N_NEXT,
1265   .next_nodes = {
1266     [ARP_INPUT_NEXT_DROP] = "error-drop",
1267     [ARP_INPUT_NEXT_REPLY_TX] = "interface-output",
1268   },
1269   .format_buffer = format_ethernet_arp_header,
1270   .format_trace = format_ethernet_arp_input_trace,
1271 };
1272 /* *INDENT-ON* */
1273
1274 static int
1275 ip4_arp_entry_sort (void *a1, void *a2)
1276 {
1277   ethernet_arp_ip4_entry_t *e1 = a1;
1278   ethernet_arp_ip4_entry_t *e2 = a2;
1279
1280   int cmp;
1281   vnet_main_t *vnm = vnet_get_main ();
1282
1283   cmp = vnet_sw_interface_compare (vnm, e1->sw_if_index, e2->sw_if_index);
1284   if (!cmp)
1285     cmp = ip4_address_compare (&e1->ip4_address, &e2->ip4_address);
1286   return cmp;
1287 }
1288
1289 static clib_error_t *
1290 show_ip4_arp (vlib_main_t * vm,
1291               unformat_input_t * input, vlib_cli_command_t * cmd)
1292 {
1293   vnet_main_t *vnm = vnet_get_main ();
1294   ethernet_arp_main_t *am = &ethernet_arp_main;
1295   ethernet_arp_ip4_entry_t *e, *es;
1296   ethernet_proxy_arp_t *pa;
1297   clib_error_t *error = 0;
1298   u32 sw_if_index;
1299
1300   /* Filter entries by interface if given. */
1301   sw_if_index = ~0;
1302   (void) unformat_user (input, unformat_vnet_sw_interface, vnm, &sw_if_index);
1303
1304   es = 0;
1305   /* *INDENT-OFF* */
1306   pool_foreach (e, am->ip4_entry_pool,
1307   ({
1308     vec_add1 (es, e[0]);
1309   }));
1310   /* *INDENT-ON* */
1311
1312   if (es)
1313     {
1314       vec_sort_with_function (es, ip4_arp_entry_sort);
1315       vlib_cli_output (vm, "%U", format_ethernet_arp_ip4_entry, vnm, 0);
1316       vec_foreach (e, es)
1317       {
1318         if (sw_if_index != ~0 && e->sw_if_index != sw_if_index)
1319           continue;
1320         vlib_cli_output (vm, "%U", format_ethernet_arp_ip4_entry, vnm, e);
1321       }
1322       vec_free (es);
1323     }
1324
1325   if (vec_len (am->proxy_arps))
1326     {
1327       vlib_cli_output (vm, "Proxy arps enabled for:");
1328       vec_foreach (pa, am->proxy_arps)
1329       {
1330         vlib_cli_output (vm, "Fib_index %d   %U - %U ",
1331                          pa->fib_index,
1332                          format_ip4_address, &pa->lo_addr,
1333                          format_ip4_address, &pa->hi_addr);
1334       }
1335     }
1336
1337   return error;
1338 }
1339
1340 /*?
1341  * Display all the IPv4 ARP entries.
1342  *
1343  * @cliexpar
1344  * Example of how to display the IPv4 ARP table:
1345  * @cliexstart{show ip arp}
1346  *    Time      FIB        IP4       Flags      Ethernet              Interface
1347  *    346.3028   0       6.1.1.3            de:ad:be:ef:ba:be   GigabitEthernet2/0/0
1348  *   3077.4271   0       6.1.1.4       S    de:ad:be:ef:ff:ff   GigabitEthernet2/0/0
1349  *   2998.6409   1       6.2.2.3            de:ad:be:ef:00:01   GigabitEthernet2/0/0
1350  * Proxy arps enabled for:
1351  * Fib_index 0   6.0.0.1 - 6.0.0.11
1352  * @cliexend
1353  ?*/
1354 /* *INDENT-OFF* */
1355 VLIB_CLI_COMMAND (show_ip4_arp_command, static) = {
1356   .path = "show ip arp",
1357   .function = show_ip4_arp,
1358   .short_help = "show ip arp",
1359 };
1360 /* *INDENT-ON* */
1361
1362 typedef struct
1363 {
1364   pg_edit_t l2_type, l3_type;
1365   pg_edit_t n_l2_address_bytes, n_l3_address_bytes;
1366   pg_edit_t opcode;
1367   struct
1368   {
1369     pg_edit_t ethernet;
1370     pg_edit_t ip4;
1371   } ip4_over_ethernet[2];
1372 } pg_ethernet_arp_header_t;
1373
1374 static inline void
1375 pg_ethernet_arp_header_init (pg_ethernet_arp_header_t * p)
1376 {
1377   /* Initialize fields that are not bit fields in the IP header. */
1378 #define _(f) pg_edit_init (&p->f, ethernet_arp_header_t, f);
1379   _(l2_type);
1380   _(l3_type);
1381   _(n_l2_address_bytes);
1382   _(n_l3_address_bytes);
1383   _(opcode);
1384   _(ip4_over_ethernet[0].ethernet);
1385   _(ip4_over_ethernet[0].ip4);
1386   _(ip4_over_ethernet[1].ethernet);
1387   _(ip4_over_ethernet[1].ip4);
1388 #undef _
1389 }
1390
1391 uword
1392 unformat_pg_arp_header (unformat_input_t * input, va_list * args)
1393 {
1394   pg_stream_t *s = va_arg (*args, pg_stream_t *);
1395   pg_ethernet_arp_header_t *p;
1396   u32 group_index;
1397
1398   p = pg_create_edit_group (s, sizeof (p[0]), sizeof (ethernet_arp_header_t),
1399                             &group_index);
1400   pg_ethernet_arp_header_init (p);
1401
1402   /* Defaults. */
1403   pg_edit_set_fixed (&p->l2_type, ETHERNET_ARP_HARDWARE_TYPE_ethernet);
1404   pg_edit_set_fixed (&p->l3_type, ETHERNET_TYPE_IP4);
1405   pg_edit_set_fixed (&p->n_l2_address_bytes, 6);
1406   pg_edit_set_fixed (&p->n_l3_address_bytes, 4);
1407
1408   if (!unformat (input, "%U: %U/%U -> %U/%U",
1409                  unformat_pg_edit,
1410                  unformat_ethernet_arp_opcode_net_byte_order, &p->opcode,
1411                  unformat_pg_edit,
1412                  unformat_ethernet_address, &p->ip4_over_ethernet[0].ethernet,
1413                  unformat_pg_edit,
1414                  unformat_ip4_address, &p->ip4_over_ethernet[0].ip4,
1415                  unformat_pg_edit,
1416                  unformat_ethernet_address, &p->ip4_over_ethernet[1].ethernet,
1417                  unformat_pg_edit,
1418                  unformat_ip4_address, &p->ip4_over_ethernet[1].ip4))
1419     {
1420       /* Free up any edits we may have added. */
1421       pg_free_edit_group (s);
1422       return 0;
1423     }
1424   return 1;
1425 }
1426
1427 clib_error_t *
1428 ip4_set_arp_limit (u32 arp_limit)
1429 {
1430   ethernet_arp_main_t *am = &ethernet_arp_main;
1431
1432   am->limit_arp_cache_size = arp_limit;
1433   return 0;
1434 }
1435
1436 /**
1437  * @brief Control Plane hook to remove an ARP entry
1438  */
1439 int
1440 vnet_arp_unset_ip4_over_ethernet (vnet_main_t * vnm,
1441                                   u32 sw_if_index, void *a_arg)
1442 {
1443   ethernet_arp_ip4_over_ethernet_address_t *a = a_arg;
1444   vnet_arp_set_ip4_over_ethernet_rpc_args_t args;
1445
1446   args.sw_if_index = sw_if_index;
1447   args.flags = ETHERNET_ARP_ARGS_REMOVE;
1448   clib_memcpy (&args.a, a, sizeof (*a));
1449
1450   vl_api_rpc_call_main_thread (set_ip4_over_ethernet_rpc_callback,
1451                                (u8 *) & args, sizeof (args));
1452   return 0;
1453 }
1454
1455 /**
1456  * @brief Internally generated event to flush the ARP cache on an
1457  * interface state change event.
1458  * A flush will remove dynamic ARP entries, and for statics remove the MAC
1459  * address from the corresponding adjacencies.
1460  */
1461 static int
1462 vnet_arp_flush_ip4_over_ethernet (vnet_main_t * vnm,
1463                                   u32 sw_if_index, void *a_arg)
1464 {
1465   ethernet_arp_ip4_over_ethernet_address_t *a = a_arg;
1466   vnet_arp_set_ip4_over_ethernet_rpc_args_t args;
1467
1468   args.sw_if_index = sw_if_index;
1469   args.flags = ETHERNET_ARP_ARGS_FLUSH;
1470   clib_memcpy (&args.a, a, sizeof (*a));
1471
1472   vl_api_rpc_call_main_thread (set_ip4_over_ethernet_rpc_callback,
1473                                (u8 *) & args, sizeof (args));
1474   return 0;
1475 }
1476
1477 /**
1478  * @brief Internally generated event to populate the ARP cache on an
1479  * interface state change event.
1480  * For static entries this will re-source the adjacencies.
1481  *
1482  * @param sw_if_index The interface on which the ARP entires are acted
1483  */
1484 static int
1485 vnet_arp_populate_ip4_over_ethernet (vnet_main_t * vnm,
1486                                      u32 sw_if_index, void *a_arg)
1487 {
1488   ethernet_arp_ip4_over_ethernet_address_t *a = a_arg;
1489   vnet_arp_set_ip4_over_ethernet_rpc_args_t args;
1490
1491   args.sw_if_index = sw_if_index;
1492   args.flags = ETHERNET_ARP_ARGS_POPULATE;
1493   clib_memcpy (&args.a, a, sizeof (*a));
1494
1495   vl_api_rpc_call_main_thread (set_ip4_over_ethernet_rpc_callback,
1496                                (u8 *) & args, sizeof (args));
1497   return 0;
1498 }
1499
1500 /*
1501  * arp_add_del_interface_address
1502  *
1503  * callback when an interface address is added or deleted
1504  */
1505 static void
1506 arp_add_del_interface_address (ip4_main_t * im,
1507                                uword opaque,
1508                                u32 sw_if_index,
1509                                ip4_address_t * address,
1510                                u32 address_length,
1511                                u32 if_address_index, u32 is_del)
1512 {
1513   /*
1514    * Flush the ARP cache of all entries covered by the address
1515    * that is being removed.
1516    */
1517   ethernet_arp_main_t *am = &ethernet_arp_main;
1518   ethernet_arp_ip4_entry_t *e;
1519
1520   if (vec_len (am->ethernet_arp_by_sw_if_index) <= sw_if_index)
1521     return;
1522
1523   if (is_del)
1524     {
1525       ethernet_arp_interface_t *eai;
1526       u32 i, *to_delete = 0;
1527       hash_pair_t *pair;
1528
1529       eai = &am->ethernet_arp_by_sw_if_index[sw_if_index];
1530
1531       /* *INDENT-OFF* */
1532       hash_foreach_pair (pair, eai->arp_entries,
1533       ({
1534         e = pool_elt_at_index(am->ip4_entry_pool,
1535                               pair->value[0]);
1536         if (ip4_destination_matches_route (im, &e->ip4_address,
1537                                            address, address_length))
1538           {
1539             vec_add1 (to_delete, e - am->ip4_entry_pool);
1540           }
1541       }));
1542       /* *INDENT-ON* */
1543
1544       for (i = 0; i < vec_len (to_delete); i++)
1545         {
1546           ethernet_arp_ip4_over_ethernet_address_t delme;
1547           e = pool_elt_at_index (am->ip4_entry_pool, to_delete[i]);
1548
1549           clib_memcpy (&delme.ethernet, e->ethernet_address, 6);
1550           delme.ip4.as_u32 = e->ip4_address.as_u32;
1551
1552           vnet_arp_flush_ip4_over_ethernet (vnet_get_main (),
1553                                             e->sw_if_index, &delme);
1554         }
1555
1556       vec_free (to_delete);
1557     }
1558 }
1559
1560 static clib_error_t *
1561 ethernet_arp_init (vlib_main_t * vm)
1562 {
1563   ethernet_arp_main_t *am = &ethernet_arp_main;
1564   ip4_main_t *im = &ip4_main;
1565   clib_error_t *error;
1566   pg_node_t *pn;
1567
1568   if ((error = vlib_call_init_function (vm, ethernet_init)))
1569     return error;
1570
1571   ethernet_register_input_type (vm, ETHERNET_TYPE_ARP, arp_input_node.index);
1572
1573   pn = pg_get_node (arp_input_node.index);
1574   pn->unformat_edit = unformat_pg_arp_header;
1575
1576   am->opcode_by_name = hash_create_string (0, sizeof (uword));
1577 #define _(o) hash_set_mem (am->opcode_by_name, #o, ETHERNET_ARP_OPCODE_##o);
1578   foreach_ethernet_arp_opcode;
1579 #undef _
1580
1581   /* $$$ configurable */
1582   am->limit_arp_cache_size = 50000;
1583
1584   am->pending_resolutions_by_address = hash_create (0, sizeof (uword));
1585   am->mac_changes_by_address = hash_create (0, sizeof (uword));
1586
1587   /* don't trace ARP error packets */
1588   {
1589     vlib_node_runtime_t *rt =
1590       vlib_node_get_runtime (vm, arp_input_node.index);
1591
1592 #define _(a,b)                                  \
1593     vnet_pcap_drop_trace_filter_add_del         \
1594         (rt->errors[ETHERNET_ARP_ERROR_##a],    \
1595          1 /* is_add */);
1596     foreach_ethernet_arp_error
1597 #undef _
1598   }
1599
1600   ip4_add_del_interface_address_callback_t cb;
1601   cb.function = arp_add_del_interface_address;
1602   cb.function_opaque = 0;
1603   vec_add1 (im->add_del_interface_address_callbacks, cb);
1604
1605   return 0;
1606 }
1607
1608 VLIB_INIT_FUNCTION (ethernet_arp_init);
1609
1610 static void
1611 arp_entry_free (ethernet_arp_interface_t * eai, ethernet_arp_ip4_entry_t * e)
1612 {
1613   ethernet_arp_main_t *am = &ethernet_arp_main;
1614
1615   fib_table_entry_delete_index (e->fib_entry_index, FIB_SOURCE_ADJ);
1616   hash_unset (eai->arp_entries, e->ip4_address.as_u32);
1617   pool_put (am->ip4_entry_pool, e);
1618 }
1619
1620 static inline int
1621 vnet_arp_unset_ip4_over_ethernet_internal (vnet_main_t * vnm,
1622                                            vnet_arp_set_ip4_over_ethernet_rpc_args_t
1623                                            * args)
1624 {
1625   ethernet_arp_main_t *am = &ethernet_arp_main;
1626   ethernet_arp_ip4_entry_t *e;
1627   ethernet_arp_interface_t *eai;
1628
1629   eai = &am->ethernet_arp_by_sw_if_index[args->sw_if_index];
1630
1631   e = arp_entry_find (eai, &args->a.ip4);
1632
1633   if (NULL != e)
1634     {
1635       adj_nbr_walk_nh4 (e->sw_if_index,
1636                         &e->ip4_address, arp_mk_incomplete_walk, e);
1637       arp_entry_free (eai, e);
1638     }
1639
1640   return 0;
1641 }
1642
1643 static int
1644 vnet_arp_flush_ip4_over_ethernet_internal (vnet_main_t * vnm,
1645                                            vnet_arp_set_ip4_over_ethernet_rpc_args_t
1646                                            * args)
1647 {
1648   ethernet_arp_main_t *am = &ethernet_arp_main;
1649   ethernet_arp_ip4_entry_t *e;
1650   ethernet_arp_interface_t *eai;
1651
1652   eai = &am->ethernet_arp_by_sw_if_index[args->sw_if_index];
1653
1654   e = arp_entry_find (eai, &args->a.ip4);
1655
1656   if (NULL != e)
1657     {
1658       adj_nbr_walk_nh4 (e->sw_if_index,
1659                         &e->ip4_address, arp_mk_incomplete_walk, e);
1660
1661       /*
1662        * The difference between flush and unset, is that an unset
1663        * means delete for static and dynamic entries. A flush
1664        * means delete only for dynamic. Flushing is what the DP
1665        * does in response to interface events. unset is only done
1666        * by the control plane.
1667        */
1668       if (e->flags & ETHERNET_ARP_IP4_ENTRY_FLAG_DYNAMIC)
1669         {
1670           arp_entry_free (eai, e);
1671         }
1672     }
1673   return (0);
1674 }
1675
1676 static int
1677 vnet_arp_populate_ip4_over_ethernet_internal (vnet_main_t * vnm,
1678                                               vnet_arp_set_ip4_over_ethernet_rpc_args_t
1679                                               * args)
1680 {
1681   ethernet_arp_main_t *am = &ethernet_arp_main;
1682   ethernet_arp_ip4_entry_t *e;
1683   ethernet_arp_interface_t *eai;
1684
1685   eai = &am->ethernet_arp_by_sw_if_index[args->sw_if_index];
1686
1687   e = arp_entry_find (eai, &args->a.ip4);
1688
1689   if (NULL != e)
1690     {
1691       adj_nbr_walk_nh4 (e->sw_if_index,
1692                         &e->ip4_address, arp_mk_complete_walk, e);
1693     }
1694   return (0);
1695 }
1696
1697 static void
1698 set_ip4_over_ethernet_rpc_callback (vnet_arp_set_ip4_over_ethernet_rpc_args_t
1699                                     * a)
1700 {
1701   vnet_main_t *vm = vnet_get_main ();
1702   ASSERT (os_get_cpu_number () == 0);
1703
1704   if (a->flags & ETHERNET_ARP_ARGS_REMOVE)
1705     vnet_arp_unset_ip4_over_ethernet_internal (vm, a);
1706   else if (a->flags & ETHERNET_ARP_ARGS_FLUSH)
1707     vnet_arp_flush_ip4_over_ethernet_internal (vm, a);
1708   else if (a->flags & ETHERNET_ARP_ARGS_POPULATE)
1709     vnet_arp_populate_ip4_over_ethernet_internal (vm, a);
1710   else
1711     vnet_arp_set_ip4_over_ethernet_internal (vm, a);
1712 }
1713
1714 /**
1715  * @brief Invoked when the interface's admin state changes
1716  */
1717 static clib_error_t *
1718 ethernet_arp_sw_interface_up_down (vnet_main_t * vnm,
1719                                    u32 sw_if_index, u32 flags)
1720 {
1721   ethernet_arp_main_t *am = &ethernet_arp_main;
1722   ethernet_arp_ip4_entry_t *e;
1723   u32 i, *to_delete = 0;
1724
1725   /* *INDENT-OFF* */
1726   pool_foreach (e, am->ip4_entry_pool,
1727   ({
1728     if (e->sw_if_index == sw_if_index)
1729       vec_add1 (to_delete,
1730                 e - am->ip4_entry_pool);
1731   }));
1732   /* *INDENT-ON* */
1733
1734   for (i = 0; i < vec_len (to_delete); i++)
1735     {
1736       ethernet_arp_ip4_over_ethernet_address_t delme;
1737       e = pool_elt_at_index (am->ip4_entry_pool, to_delete[i]);
1738
1739       clib_memcpy (&delme.ethernet, e->ethernet_address, 6);
1740       delme.ip4.as_u32 = e->ip4_address.as_u32;
1741
1742       if (flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP)
1743         {
1744           vnet_arp_populate_ip4_over_ethernet (vnm, e->sw_if_index, &delme);
1745         }
1746       else
1747         {
1748           vnet_arp_flush_ip4_over_ethernet (vnm, e->sw_if_index, &delme);
1749         }
1750
1751     }
1752   vec_free (to_delete);
1753
1754   return 0;
1755 }
1756
1757 VNET_SW_INTERFACE_ADMIN_UP_DOWN_FUNCTION (ethernet_arp_sw_interface_up_down);
1758
1759 static void
1760 increment_ip4_and_mac_address (ethernet_arp_ip4_over_ethernet_address_t * a)
1761 {
1762   u8 old;
1763   int i;
1764
1765   for (i = 3; i >= 0; i--)
1766     {
1767       old = a->ip4.as_u8[i];
1768       a->ip4.as_u8[i] += 1;
1769       if (old < a->ip4.as_u8[i])
1770         break;
1771     }
1772
1773   for (i = 5; i >= 0; i--)
1774     {
1775       old = a->ethernet[i];
1776       a->ethernet[i] += 1;
1777       if (old < a->ethernet[i])
1778         break;
1779     }
1780 }
1781
1782 int
1783 vnet_arp_set_ip4_over_ethernet (vnet_main_t * vnm,
1784                                 u32 sw_if_index, void *a_arg, int is_static)
1785 {
1786   ethernet_arp_ip4_over_ethernet_address_t *a = a_arg;
1787   vnet_arp_set_ip4_over_ethernet_rpc_args_t args;
1788
1789   args.sw_if_index = sw_if_index;
1790   args.is_static = is_static;
1791   args.flags = 0;
1792   clib_memcpy (&args.a, a, sizeof (*a));
1793
1794   vl_api_rpc_call_main_thread (set_ip4_over_ethernet_rpc_callback,
1795                                (u8 *) & args, sizeof (args));
1796   return 0;
1797 }
1798
1799 int
1800 vnet_proxy_arp_add_del (ip4_address_t * lo_addr,
1801                         ip4_address_t * hi_addr, u32 fib_index, int is_del)
1802 {
1803   ethernet_arp_main_t *am = &ethernet_arp_main;
1804   ethernet_proxy_arp_t *pa;
1805   u32 found_at_index = ~0;
1806
1807   vec_foreach (pa, am->proxy_arps)
1808   {
1809     if (pa->lo_addr == lo_addr->as_u32
1810         && pa->hi_addr == hi_addr->as_u32 && pa->fib_index == fib_index)
1811       {
1812         found_at_index = pa - am->proxy_arps;
1813         break;
1814       }
1815   }
1816
1817   if (found_at_index != ~0)
1818     {
1819       /* Delete, otherwise it's already in the table */
1820       if (is_del)
1821         vec_delete (am->proxy_arps, 1, found_at_index);
1822       return 0;
1823     }
1824   /* delete, no such entry */
1825   if (is_del)
1826     return VNET_API_ERROR_NO_SUCH_ENTRY;
1827
1828   /* add, not in table */
1829   vec_add2 (am->proxy_arps, pa, 1);
1830   pa->lo_addr = lo_addr->as_u32;
1831   pa->hi_addr = hi_addr->as_u32;
1832   pa->fib_index = fib_index;
1833   return 0;
1834 }
1835
1836 /*
1837  * Remove any proxy arp entries asdociated with the
1838  * specificed fib.
1839  */
1840 int
1841 vnet_proxy_arp_fib_reset (u32 fib_id)
1842 {
1843   ip4_main_t *im = &ip4_main;
1844   ethernet_arp_main_t *am = &ethernet_arp_main;
1845   ethernet_proxy_arp_t *pa;
1846   u32 *entries_to_delete = 0;
1847   u32 fib_index;
1848   uword *p;
1849   int i;
1850
1851   p = hash_get (im->fib_index_by_table_id, fib_id);
1852   if (!p)
1853     return VNET_API_ERROR_NO_SUCH_ENTRY;
1854   fib_index = p[0];
1855
1856   vec_foreach (pa, am->proxy_arps)
1857   {
1858     if (pa->fib_index == fib_index)
1859       {
1860         vec_add1 (entries_to_delete, pa - am->proxy_arps);
1861       }
1862   }
1863
1864   for (i = 0; i < vec_len (entries_to_delete); i++)
1865     {
1866       vec_delete (am->proxy_arps, 1, entries_to_delete[i]);
1867     }
1868
1869   vec_free (entries_to_delete);
1870
1871   return 0;
1872 }
1873
1874 static clib_error_t *
1875 ip_arp_add_del_command_fn (vlib_main_t * vm,
1876                            unformat_input_t * input, vlib_cli_command_t * cmd)
1877 {
1878   vnet_main_t *vnm = vnet_get_main ();
1879   u32 sw_if_index;
1880   ethernet_arp_ip4_over_ethernet_address_t lo_addr, hi_addr, addr;
1881   int addr_valid = 0;
1882   int is_del = 0;
1883   int count = 1;
1884   u32 fib_index = 0;
1885   u32 fib_id;
1886   int is_static = 0;
1887   int is_proxy = 0;
1888
1889   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
1890     {
1891       /* set ip arp TenGigE1/1/0/1 1.2.3.4 aa:bb:... or aabb.ccdd... */
1892       if (unformat (input, "%U %U %U",
1893                     unformat_vnet_sw_interface, vnm, &sw_if_index,
1894                     unformat_ip4_address, &addr.ip4,
1895                     unformat_ethernet_address, &addr.ethernet))
1896         addr_valid = 1;
1897
1898       else if (unformat (input, "delete") || unformat (input, "del"))
1899         is_del = 1;
1900
1901       else if (unformat (input, "static"))
1902         is_static = 1;
1903
1904       else if (unformat (input, "count %d", &count))
1905         ;
1906
1907       else if (unformat (input, "fib-id %d", &fib_id))
1908         {
1909           ip4_main_t *im = &ip4_main;
1910           uword *p = hash_get (im->fib_index_by_table_id, fib_id);
1911           if (!p)
1912             return clib_error_return (0, "fib ID %d doesn't exist\n", fib_id);
1913           fib_index = p[0];
1914         }
1915
1916       else if (unformat (input, "proxy %U - %U",
1917                          unformat_ip4_address, &lo_addr.ip4,
1918                          unformat_ip4_address, &hi_addr.ip4))
1919         is_proxy = 1;
1920       else
1921         break;
1922     }
1923
1924   if (is_proxy)
1925     {
1926       (void) vnet_proxy_arp_add_del (&lo_addr.ip4, &hi_addr.ip4,
1927                                      fib_index, is_del);
1928       return 0;
1929     }
1930
1931   if (addr_valid)
1932     {
1933       int i;
1934
1935       for (i = 0; i < count; i++)
1936         {
1937           if (is_del == 0)
1938             {
1939               uword event_type, *event_data = 0;
1940
1941               /* Park the debug CLI until the arp entry is installed */
1942               vnet_register_ip4_arp_resolution_event
1943                 (vnm, &addr.ip4, vlib_current_process (vm),
1944                  1 /* type */ , 0 /* data */ );
1945
1946               vnet_arp_set_ip4_over_ethernet
1947                 (vnm, sw_if_index, &addr, is_static);
1948
1949               vlib_process_wait_for_event (vm);
1950               event_type = vlib_process_get_events (vm, &event_data);
1951               vec_reset_length (event_data);
1952               if (event_type != 1)
1953                 clib_warning ("event type %d unexpected", event_type);
1954             }
1955           else
1956             vnet_arp_unset_ip4_over_ethernet (vnm, sw_if_index, &addr);
1957
1958           increment_ip4_and_mac_address (&addr);
1959         }
1960     }
1961   else
1962     {
1963       return clib_error_return (0, "unknown input `%U'",
1964                                 format_unformat_error, input);
1965     }
1966
1967   return 0;
1968 }
1969
1970 /* *INDENT-OFF* */
1971 /*?
1972  * Add or delete IPv4 ARP cache entries.
1973  *
1974  * @note 'set ip arp' options (e.g. delete, static, 'fib-id <id>',
1975  * 'count <number>', 'interface ip4_addr mac_addr') can be added in
1976  * any order and combination.
1977  *
1978  * @cliexpar
1979  * @parblock
1980  * Add or delete IPv4 ARP cache entries as follows. MAC Address can be in
1981  * either aa:bb:cc:dd:ee:ff format or aabb.ccdd.eeff format.
1982  * @cliexcmd{set ip arp GigabitEthernet2/0/0 6.0.0.3 dead.beef.babe}
1983  * @cliexcmd{set ip arp delete GigabitEthernet2/0/0 6.0.0.3 de:ad:be:ef:ba:be}
1984  *
1985  * To add or delete an IPv4 ARP cache entry to or from a specific fib
1986  * table:
1987  * @cliexcmd{set ip arp fib-id 1 GigabitEthernet2/0/0 6.0.0.3 dead.beef.babe}
1988  * @cliexcmd{set ip arp fib-id 1 delete GigabitEthernet2/0/0 6.0.0.3 dead.beef.babe}
1989  *
1990  * Add or delete IPv4 static ARP cache entries as follows:
1991  * @cliexcmd{set ip arp static GigabitEthernet2/0/0 6.0.0.3 dead.beef.babe}
1992  * @cliexcmd{set ip arp static delete GigabitEthernet2/0/0 6.0.0.3 dead.beef.babe}
1993  *
1994  * For testing / debugging purposes, the 'set ip arp' command can add or
1995  * delete multiple entries. Supply the 'count N' parameter:
1996  * @cliexcmd{set ip arp count 10 GigabitEthernet2/0/0 6.0.0.3 dead.beef.babe}
1997  * @endparblock
1998  ?*/
1999 VLIB_CLI_COMMAND (ip_arp_add_del_command, static) = {
2000   .path = "set ip arp",
2001   .short_help =
2002   "set ip arp [del] <intfc> <ip-address> <mac-address> [static] [count <count>] [fib-id <fib-id>] [proxy <lo-addr> - <hi-addr>]",
2003   .function = ip_arp_add_del_command_fn,
2004 };
2005 /* *INDENT-ON* */
2006
2007 static clib_error_t *
2008 set_int_proxy_arp_command_fn (vlib_main_t * vm,
2009                               unformat_input_t *
2010                               input, vlib_cli_command_t * cmd)
2011 {
2012   vnet_main_t *vnm = vnet_get_main ();
2013   u32 sw_if_index;
2014   vnet_sw_interface_t *si;
2015   int enable = 0;
2016   int intfc_set = 0;
2017
2018   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
2019     {
2020       if (unformat (input, "%U", unformat_vnet_sw_interface,
2021                     vnm, &sw_if_index))
2022         intfc_set = 1;
2023       else if (unformat (input, "enable") || unformat (input, "on"))
2024         enable = 1;
2025       else if (unformat (input, "disable") || unformat (input, "off"))
2026         enable = 0;
2027       else
2028         break;
2029     }
2030
2031   if (intfc_set == 0)
2032     return clib_error_return (0, "unknown input '%U'",
2033                               format_unformat_error, input);
2034
2035   si = vnet_get_sw_interface (vnm, sw_if_index);
2036   ASSERT (si);
2037   if (enable)
2038     si->flags |= VNET_SW_INTERFACE_FLAG_PROXY_ARP;
2039   else
2040     si->flags &= ~VNET_SW_INTERFACE_FLAG_PROXY_ARP;
2041
2042   return 0;
2043 }
2044
2045 /* *INDENT-OFF* */
2046 /*?
2047  * Enable proxy-arp on an interface. The vpp stack will answer ARP
2048  * requests for the indicated address range. Multiple proxy-arp
2049  * ranges may be provisioned.
2050  *
2051  * @note Proxy ARP as a technology is infamous for blackholing traffic.
2052  * Also, the underlying implementation has not been performance-tuned.
2053  * Avoid creating an unnecessarily large set of ranges.
2054  *
2055  * @cliexpar
2056  * To enable proxy arp on a range of addresses, use:
2057  * @cliexcmd{set ip arp proxy 6.0.0.1 - 6.0.0.11}
2058  * Append 'del' to delete a range of proxy ARP addresses:
2059  * @cliexcmd{set ip arp proxy 6.0.0.1 - 6.0.0.11 del}
2060  * You must then specifically enable proxy arp on individual interfaces:
2061  * @cliexcmd{set interface proxy-arp GigabitEthernet0/8/0 enable}
2062  * To disable proxy arp on an individual interface:
2063  * @cliexcmd{set interface proxy-arp GigabitEthernet0/8/0 disable}
2064  ?*/
2065 VLIB_CLI_COMMAND (set_int_proxy_enable_command, static) = {
2066   .path = "set interface proxy-arp",
2067   .short_help =
2068   "set interface proxy-arp <intfc> [enable|disable]",
2069   .function = set_int_proxy_arp_command_fn,
2070 };
2071 /* *INDENT-ON* */
2072
2073
2074 /*
2075  * ARP/ND Termination in a L2 Bridge Domain based on IP4/IP6 to MAC
2076  * hash tables mac_by_ip4 and mac_by_ip6 for each BD.
2077  */
2078 typedef enum
2079 {
2080   ARP_TERM_NEXT_L2_OUTPUT,
2081   ARP_TERM_NEXT_DROP,
2082   ARP_TERM_N_NEXT,
2083 } arp_term_next_t;
2084
2085 u32 arp_term_next_node_index[32];
2086
2087 static uword
2088 arp_term_l2bd (vlib_main_t * vm,
2089                vlib_node_runtime_t * node, vlib_frame_t * frame)
2090 {
2091   l2input_main_t *l2im = &l2input_main;
2092   u32 n_left_from, next_index, *from, *to_next;
2093   u32 n_replies_sent = 0;
2094   u16 last_bd_index = ~0;
2095   l2_bridge_domain_t *last_bd_config = 0;
2096   l2_input_config_t *cfg0;
2097
2098   from = vlib_frame_vector_args (frame);
2099   n_left_from = frame->n_vectors;
2100   next_index = node->cached_next_index;
2101
2102   while (n_left_from > 0)
2103     {
2104       u32 n_left_to_next;
2105
2106       vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
2107
2108       while (n_left_from > 0 && n_left_to_next > 0)
2109         {
2110           vlib_buffer_t *p0;
2111           ethernet_header_t *eth0;
2112           ethernet_arp_header_t *arp0;
2113           ip6_header_t *iph0;
2114           u8 *l3h0;
2115           u32 pi0, error0, next0, sw_if_index0;
2116           u16 ethertype0;
2117           u16 bd_index0;
2118           u32 ip0;
2119           u8 *macp0;
2120
2121           pi0 = from[0];
2122           to_next[0] = pi0;
2123           from += 1;
2124           to_next += 1;
2125           n_left_from -= 1;
2126           n_left_to_next -= 1;
2127
2128           p0 = vlib_get_buffer (vm, pi0);
2129           eth0 = vlib_buffer_get_current (p0);
2130           l3h0 = (u8 *) eth0 + vnet_buffer (p0)->l2.l2_len;
2131           ethertype0 = clib_net_to_host_u16 (*(u16 *) (l3h0 - 2));
2132           arp0 = (ethernet_arp_header_t *) l3h0;
2133
2134           if (PREDICT_FALSE ((ethertype0 != ETHERNET_TYPE_ARP) ||
2135                              (arp0->opcode !=
2136                               clib_host_to_net_u16
2137                               (ETHERNET_ARP_OPCODE_request))))
2138             goto check_ip6_nd;
2139
2140           /* Must be ARP request packet here */
2141           if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE) &&
2142                              (p0->flags & VLIB_BUFFER_IS_TRACED)))
2143             {
2144               u8 *t0 = vlib_add_trace (vm, node, p0,
2145                                        sizeof (ethernet_arp_input_trace_t));
2146               clib_memcpy (t0, l3h0, sizeof (ethernet_arp_input_trace_t));
2147             }
2148
2149           error0 = ETHERNET_ARP_ERROR_replies_sent;
2150           error0 =
2151             (arp0->l2_type !=
2152              clib_net_to_host_u16 (ETHERNET_ARP_HARDWARE_TYPE_ethernet)
2153              ? ETHERNET_ARP_ERROR_l2_type_not_ethernet : error0);
2154           error0 =
2155             (arp0->l3_type !=
2156              clib_net_to_host_u16 (ETHERNET_TYPE_IP4) ?
2157              ETHERNET_ARP_ERROR_l3_type_not_ip4 : error0);
2158
2159           sw_if_index0 = vnet_buffer (p0)->sw_if_index[VLIB_RX];
2160
2161           if (error0)
2162             goto drop;
2163
2164           /* Trash ARP packets whose ARP-level source addresses do not
2165              match their L2-frame-level source addresses  */
2166           if (PREDICT_FALSE
2167               (memcmp
2168                (eth0->src_address, arp0->ip4_over_ethernet[0].ethernet,
2169                 sizeof (eth0->src_address))))
2170             {
2171               error0 = ETHERNET_ARP_ERROR_l2_address_mismatch;
2172               goto drop;
2173             }
2174
2175           /* Check if anyone want ARP request events for L2 BDs */
2176           {
2177             pending_resolution_t *mc;
2178             ethernet_arp_main_t *am = &ethernet_arp_main;
2179             uword *p = hash_get (am->mac_changes_by_address, 0);
2180             if (p && (vnet_buffer (p0)->l2.shg == 0))
2181               {                 // Only SHG 0 interface which is more likely local
2182                 u32 next_index = p[0];
2183                 while (next_index != (u32) ~ 0)
2184                   {
2185                     int (*fp) (u32, u8 *, u32, u32);
2186                     int rv = 1;
2187                     mc = pool_elt_at_index (am->mac_changes, next_index);
2188                     fp = mc->data_callback;
2189                     /* Call the callback, return 1 to suppress dup events */
2190                     if (fp)
2191                       rv = (*fp) (mc->data,
2192                                   arp0->ip4_over_ethernet[0].ethernet,
2193                                   sw_if_index0,
2194                                   arp0->ip4_over_ethernet[0].ip4.as_u32);
2195                     /* Signal the resolver process */
2196                     if (rv == 0)
2197                       vlib_process_signal_event (vm, mc->node_index,
2198                                                  mc->type_opaque, mc->data);
2199                     next_index = mc->next_index;
2200                   }
2201               }
2202           }
2203
2204           /* lookup BD mac_by_ip4 hash table for MAC entry */
2205           ip0 = arp0->ip4_over_ethernet[1].ip4.as_u32;
2206           bd_index0 = vnet_buffer (p0)->l2.bd_index;
2207           if (PREDICT_FALSE ((bd_index0 != last_bd_index)
2208                              || (last_bd_index == (u16) ~ 0)))
2209             {
2210               last_bd_index = bd_index0;
2211               last_bd_config = vec_elt_at_index (l2im->bd_configs, bd_index0);
2212             }
2213           macp0 = (u8 *) hash_get (last_bd_config->mac_by_ip4, ip0);
2214
2215           if (PREDICT_FALSE (!macp0))
2216             goto next_l2_feature;       /* MAC not found */
2217
2218           /* MAC found, send ARP reply -
2219              Convert ARP request packet to ARP reply */
2220           arp0->opcode = clib_host_to_net_u16 (ETHERNET_ARP_OPCODE_reply);
2221           arp0->ip4_over_ethernet[1] = arp0->ip4_over_ethernet[0];
2222           arp0->ip4_over_ethernet[0].ip4.as_u32 = ip0;
2223           clib_memcpy (arp0->ip4_over_ethernet[0].ethernet, macp0, 6);
2224           clib_memcpy (eth0->dst_address, eth0->src_address, 6);
2225           clib_memcpy (eth0->src_address, macp0, 6);
2226           n_replies_sent += 1;
2227
2228         output_response:
2229           /* For BVI, need to use l2-fwd node to send ARP reply as
2230              l2-output node cannot output packet to BVI properly */
2231           cfg0 = vec_elt_at_index (l2im->configs, sw_if_index0);
2232           if (PREDICT_FALSE (cfg0->bvi))
2233             {
2234               vnet_buffer (p0)->l2.feature_bitmap |= L2INPUT_FEAT_FWD;
2235               vnet_buffer (p0)->sw_if_index[VLIB_RX] = 0;
2236               goto next_l2_feature;
2237             }
2238
2239           /* Send ARP/ND reply back out input interface through l2-output */
2240           vnet_buffer (p0)->sw_if_index[VLIB_TX] = sw_if_index0;
2241           next0 = ARP_TERM_NEXT_L2_OUTPUT;
2242           /* Note that output to VXLAN tunnel will fail due to SHG which
2243              is probably desireable since ARP termination is not intended
2244              for ARP requests from other hosts. If output to VXLAN tunnel is
2245              required, however, can just clear the SHG in packet as follows:
2246              vnet_buffer(p0)->l2.shg = 0;         */
2247           vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
2248                                            to_next, n_left_to_next, pi0,
2249                                            next0);
2250           continue;
2251
2252         check_ip6_nd:
2253           /* IP6 ND event notification or solicitation handling to generate
2254              local response instead of flooding */
2255           iph0 = (ip6_header_t *) l3h0;
2256           if (PREDICT_FALSE (ethertype0 == ETHERNET_TYPE_IP6 &&
2257                              iph0->protocol == IP_PROTOCOL_ICMP6 &&
2258                              !ip6_address_is_unspecified
2259                              (&iph0->src_address)))
2260             {
2261               sw_if_index0 = vnet_buffer (p0)->sw_if_index[VLIB_RX];
2262               if (vnet_ip6_nd_term
2263                   (vm, node, p0, eth0, iph0, sw_if_index0,
2264                    vnet_buffer (p0)->l2.bd_index, vnet_buffer (p0)->l2.shg))
2265                 goto output_response;
2266             }
2267
2268         next_l2_feature:
2269           {
2270             u32 feature_bitmap0 =
2271               vnet_buffer (p0)->l2.feature_bitmap & ~L2INPUT_FEAT_ARP_TERM;
2272             vnet_buffer (p0)->l2.feature_bitmap = feature_bitmap0;
2273             next0 =
2274               feat_bitmap_get_next_node_index (arp_term_next_node_index,
2275                                                feature_bitmap0);
2276             vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
2277                                              to_next, n_left_to_next,
2278                                              pi0, next0);
2279             continue;
2280           }
2281
2282         drop:
2283           if (0 == arp0->ip4_over_ethernet[0].ip4.as_u32 ||
2284               (arp0->ip4_over_ethernet[0].ip4.as_u32 ==
2285                arp0->ip4_over_ethernet[1].ip4.as_u32))
2286             {
2287               error0 = ETHERNET_ARP_ERROR_gratuitous_arp;
2288             }
2289           next0 = ARP_TERM_NEXT_DROP;
2290           p0->error = node->errors[error0];
2291
2292           vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
2293                                            to_next, n_left_to_next, pi0,
2294                                            next0);
2295         }
2296
2297       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
2298     }
2299
2300   vlib_error_count (vm, node->node_index,
2301                     ETHERNET_ARP_ERROR_replies_sent, n_replies_sent);
2302   return frame->n_vectors;
2303 }
2304
2305 /* *INDENT-OFF* */
2306 VLIB_REGISTER_NODE (arp_term_l2bd_node, static) = {
2307   .function = arp_term_l2bd,
2308   .name = "arp-term-l2bd",
2309   .vector_size = sizeof (u32),
2310   .n_errors = ETHERNET_ARP_N_ERROR,
2311   .error_strings = ethernet_arp_error_strings,
2312   .n_next_nodes = ARP_TERM_N_NEXT,
2313   .next_nodes = {
2314     [ARP_TERM_NEXT_L2_OUTPUT] = "l2-output",
2315     [ARP_TERM_NEXT_DROP] = "error-drop",
2316   },
2317   .format_buffer = format_ethernet_arp_header,
2318   .format_trace = format_arp_term_input_trace,
2319 };
2320 /* *INDENT-ON* */
2321
2322 clib_error_t *
2323 arp_term_init (vlib_main_t * vm)
2324 {
2325   // Initialize the feature next-node indexes
2326   feat_bitmap_init_next_nodes (vm,
2327                                arp_term_l2bd_node.index,
2328                                L2INPUT_N_FEAT,
2329                                l2input_get_feat_names (),
2330                                arp_term_next_node_index);
2331   return 0;
2332 }
2333
2334 VLIB_INIT_FUNCTION (arp_term_init);
2335
2336 void
2337 change_arp_mac (u32 sw_if_index, ethernet_arp_ip4_entry_t * e)
2338 {
2339   if (e->sw_if_index == sw_if_index)
2340     {
2341       adj_nbr_walk_nh4 (e->sw_if_index,
2342                         &e->ip4_address, arp_mk_complete_walk, e);
2343     }
2344 }
2345
2346 void
2347 ethernet_arp_change_mac (vnet_main_t * vnm, u32 sw_if_index)
2348 {
2349   ethernet_arp_main_t *am = &ethernet_arp_main;
2350   ethernet_arp_ip4_entry_t *e;
2351
2352   /* *INDENT-OFF* */
2353   pool_foreach (e, am->ip4_entry_pool,
2354   ({
2355     change_arp_mac (sw_if_index, e);
2356   }));
2357   /* *INDENT-ON* */
2358 }
2359
2360 /*
2361  * fd.io coding-style-patch-verification: ON
2362  *
2363  * Local Variables:
2364  * eval: (c-set-style "gnu")
2365  * End:
2366  */