api: missing support for dumping of neighbours (VPP-333)
[vpp.git] / vnet / vnet / ethernet / arp.c
1 /*
2  * ethernet/arp.c: IP v4 ARP node
3  *
4  * Copyright (c) 2010 Cisco and/or its affiliates.
5  * Licensed under the Apache License, Version 2.0 (the "License");
6  * you may not use this file except in compliance with the License.
7  * You may obtain a copy of the License at:
8  *
9  *     http://www.apache.org/licenses/LICENSE-2.0
10  *
11  * Unless required by applicable law or agreed to in writing, software
12  * distributed under the License is distributed on an "AS IS" BASIS,
13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  * See the License for the specific language governing permissions and
15  * limitations under the License.
16  */
17
18 #include <vnet/ip/ip.h>
19 #include <vnet/ip/ip6.h>
20 #include <vnet/ethernet/ethernet.h>
21 #include <vnet/ethernet/arp_packet.h>
22 #include <vnet/l2/l2_input.h>
23 #include <vppinfra/mhash.h>
24 #include <vnet/fib/ip4_fib.h>
25 #include <vnet/adj/adj_nbr.h>
26 #include <vnet/mpls/mpls.h>
27
28 /**
29  * @file
30  * @brief IPv4 ARP.
31  *
32  * This file contains code to manage the IPv4 ARP tables (IP Address
33  * to MAC Address lookup).
34  */
35
36
37 void vl_api_rpc_call_main_thread (void *fp, u8 * data, u32 data_length);
38
39 /**
40  * @brief Per-interface ARP configuration and state
41  */
42 typedef struct ethernet_arp_interface_t_
43 {
44   /**
45    * Hash table of ARP entries.
46    * Since this hash table is per-interface, the key is only the IPv4 address.
47    */
48   uword *arp_entries;
49 } ethernet_arp_interface_t;
50
51 typedef struct
52 {
53   u32 lo_addr;
54   u32 hi_addr;
55   u32 fib_index;
56 } ethernet_proxy_arp_t;
57
58 typedef struct
59 {
60   u32 next_index;
61   uword node_index;
62   uword type_opaque;
63   uword data;
64   /* Used for arp event notification only */
65   void *data_callback;
66   u32 pid;
67 } pending_resolution_t;
68
69 typedef struct
70 {
71   /* Hash tables mapping name to opcode. */
72   uword *opcode_by_name;
73
74   /* lite beer "glean" adjacency handling */
75   uword *pending_resolutions_by_address;
76   pending_resolution_t *pending_resolutions;
77
78   /* Mac address change notification */
79   uword *mac_changes_by_address;
80   pending_resolution_t *mac_changes;
81
82   ethernet_arp_ip4_entry_t *ip4_entry_pool;
83
84   /* ARP attack mitigation */
85   u32 arp_delete_rotor;
86   u32 limit_arp_cache_size;
87
88   /** Per interface state */
89   ethernet_arp_interface_t *ethernet_arp_by_sw_if_index;
90
91   /* Proxy arp vector */
92   ethernet_proxy_arp_t *proxy_arps;
93 } ethernet_arp_main_t;
94
95 static ethernet_arp_main_t ethernet_arp_main;
96
97 typedef struct
98 {
99   u32 sw_if_index;
100   ethernet_arp_ip4_over_ethernet_address_t a;
101   int is_static;
102   int flags;
103 #define ETHERNET_ARP_ARGS_REMOVE (1<<0)
104 #define ETHERNET_ARP_ARGS_FLUSH  (1<<1)
105 #define ETHERNET_ARP_ARGS_POPULATE  (1<<2)
106 } vnet_arp_set_ip4_over_ethernet_rpc_args_t;
107
108 static void
109 set_ip4_over_ethernet_rpc_callback (vnet_arp_set_ip4_over_ethernet_rpc_args_t
110                                     * a);
111
112 static u8 *
113 format_ethernet_arp_hardware_type (u8 * s, va_list * va)
114 {
115   ethernet_arp_hardware_type_t h = va_arg (*va, ethernet_arp_hardware_type_t);
116   char *t = 0;
117   switch (h)
118     {
119 #define _(n,f) case n: t = #f; break;
120       foreach_ethernet_arp_hardware_type;
121 #undef _
122
123     default:
124       return format (s, "unknown 0x%x", h);
125     }
126
127   return format (s, "%s", t);
128 }
129
130 static u8 *
131 format_ethernet_arp_opcode (u8 * s, va_list * va)
132 {
133   ethernet_arp_opcode_t o = va_arg (*va, ethernet_arp_opcode_t);
134   char *t = 0;
135   switch (o)
136     {
137 #define _(f) case ETHERNET_ARP_OPCODE_##f: t = #f; break;
138       foreach_ethernet_arp_opcode;
139 #undef _
140
141     default:
142       return format (s, "unknown 0x%x", o);
143     }
144
145   return format (s, "%s", t);
146 }
147
148 static uword
149 unformat_ethernet_arp_opcode_host_byte_order (unformat_input_t * input,
150                                               va_list * args)
151 {
152   int *result = va_arg (*args, int *);
153   ethernet_arp_main_t *am = &ethernet_arp_main;
154   int x, i;
155
156   /* Numeric opcode. */
157   if (unformat (input, "0x%x", &x) || unformat (input, "%d", &x))
158     {
159       if (x >= (1 << 16))
160         return 0;
161       *result = x;
162       return 1;
163     }
164
165   /* Named type. */
166   if (unformat_user (input, unformat_vlib_number_by_name,
167                      am->opcode_by_name, &i))
168     {
169       *result = i;
170       return 1;
171     }
172
173   return 0;
174 }
175
176 static uword
177 unformat_ethernet_arp_opcode_net_byte_order (unformat_input_t * input,
178                                              va_list * args)
179 {
180   int *result = va_arg (*args, int *);
181   if (!unformat_user
182       (input, unformat_ethernet_arp_opcode_host_byte_order, result))
183     return 0;
184
185   *result = clib_host_to_net_u16 ((u16) * result);
186   return 1;
187 }
188
189 static u8 *
190 format_ethernet_arp_header (u8 * s, va_list * va)
191 {
192   ethernet_arp_header_t *a = va_arg (*va, ethernet_arp_header_t *);
193   u32 max_header_bytes = va_arg (*va, u32);
194   uword indent;
195   u16 l2_type, l3_type;
196
197   if (max_header_bytes != 0 && sizeof (a[0]) > max_header_bytes)
198     return format (s, "ARP header truncated");
199
200   l2_type = clib_net_to_host_u16 (a->l2_type);
201   l3_type = clib_net_to_host_u16 (a->l3_type);
202
203   indent = format_get_indent (s);
204
205   s = format (s, "%U, type %U/%U, address size %d/%d",
206               format_ethernet_arp_opcode, clib_net_to_host_u16 (a->opcode),
207               format_ethernet_arp_hardware_type, l2_type,
208               format_ethernet_type, l3_type,
209               a->n_l2_address_bytes, a->n_l3_address_bytes);
210
211   if (l2_type == ETHERNET_ARP_HARDWARE_TYPE_ethernet
212       && l3_type == ETHERNET_TYPE_IP4)
213     {
214       s = format (s, "\n%U%U/%U -> %U/%U",
215                   format_white_space, indent,
216                   format_ethernet_address, a->ip4_over_ethernet[0].ethernet,
217                   format_ip4_address, &a->ip4_over_ethernet[0].ip4,
218                   format_ethernet_address, a->ip4_over_ethernet[1].ethernet,
219                   format_ip4_address, &a->ip4_over_ethernet[1].ip4);
220     }
221   else
222     {
223       uword n2 = a->n_l2_address_bytes;
224       uword n3 = a->n_l3_address_bytes;
225       s = format (s, "\n%U%U/%U -> %U/%U",
226                   format_white_space, indent,
227                   format_hex_bytes, a->data + 0 * n2 + 0 * n3, n2,
228                   format_hex_bytes, a->data + 1 * n2 + 0 * n3, n3,
229                   format_hex_bytes, a->data + 1 * n2 + 1 * n3, n2,
230                   format_hex_bytes, a->data + 2 * n2 + 1 * n3, n3);
231     }
232
233   return s;
234 }
235
236 u8 *
237 format_ethernet_arp_ip4_entry (u8 * s, va_list * va)
238 {
239   vnet_main_t *vnm = va_arg (*va, vnet_main_t *);
240   ethernet_arp_ip4_entry_t *e = va_arg (*va, ethernet_arp_ip4_entry_t *);
241   vnet_sw_interface_t *si;
242   u8 *flags = 0;
243
244   if (!e)
245     return format (s, "%=12s%=16s%=6s%=20s%=24s", "Time", "IP4",
246                    "Flags", "Ethernet", "Interface");
247
248   si = vnet_get_sw_interface (vnm, e->sw_if_index);
249
250   if (e->flags & ETHERNET_ARP_IP4_ENTRY_FLAG_STATIC)
251     flags = format (flags, "S");
252
253   if (e->flags & ETHERNET_ARP_IP4_ENTRY_FLAG_DYNAMIC)
254     flags = format (flags, "D");
255
256   s = format (s, "%=12U%=16U%=6s%=20U%=24U",
257               format_vlib_cpu_time, vnm->vlib_main, e->cpu_time_last_updated,
258               format_ip4_address, &e->ip4_address,
259               flags ? (char *) flags : "",
260               format_ethernet_address, e->ethernet_address,
261               format_vnet_sw_interface_name, vnm, si);
262
263   vec_free (flags);
264   return s;
265 }
266
267 typedef struct
268 {
269   u8 packet_data[64];
270 } ethernet_arp_input_trace_t;
271
272 static u8 *
273 format_ethernet_arp_input_trace (u8 * s, va_list * va)
274 {
275   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*va, vlib_main_t *);
276   CLIB_UNUSED (vlib_node_t * node) = va_arg (*va, vlib_node_t *);
277   ethernet_arp_input_trace_t *t = va_arg (*va, ethernet_arp_input_trace_t *);
278
279   s = format (s, "%U",
280               format_ethernet_arp_header,
281               t->packet_data, sizeof (t->packet_data));
282
283   return s;
284 }
285
286 static u8 *
287 format_arp_term_input_trace (u8 * s, va_list * va)
288 {
289   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*va, vlib_main_t *);
290   CLIB_UNUSED (vlib_node_t * node) = va_arg (*va, vlib_node_t *);
291   ethernet_arp_input_trace_t *t = va_arg (*va, ethernet_arp_input_trace_t *);
292
293   /* arp-term trace data saved is either arp or ip6/icmp6 packet:
294      - for arp, the 1st 16-bit field is hw type of value of 0x0001.
295      - for ip6, the first nibble has value of 6. */
296   s = format (s, "%U", t->packet_data[0] == 0 ?
297               format_ethernet_arp_header : format_ip6_header,
298               t->packet_data, sizeof (t->packet_data));
299
300   return s;
301 }
302
303 static void
304 arp_nbr_probe (ip_adjacency_t * adj)
305 {
306   vnet_main_t *vnm = vnet_get_main ();
307   ip4_main_t *im = &ip4_main;
308   ip_interface_address_t *ia;
309   ethernet_arp_header_t *h;
310   vnet_hw_interface_t *hi;
311   vnet_sw_interface_t *si;
312   ip4_address_t *src;
313   vlib_buffer_t *b;
314   vlib_main_t *vm;
315   u32 bi = 0;
316
317   vm = vlib_get_main ();
318
319   si = vnet_get_sw_interface (vnm, adj->rewrite_header.sw_if_index);
320
321   if (!(si->flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP))
322     {
323       return;
324     }
325
326   src =
327     ip4_interface_address_matching_destination (im,
328                                                 &adj->sub_type.nbr.next_hop.
329                                                 ip4,
330                                                 adj->rewrite_header.
331                                                 sw_if_index, &ia);
332   if (!src)
333     {
334       return;
335     }
336
337   h =
338     vlib_packet_template_get_packet (vm, &im->ip4_arp_request_packet_template,
339                                      &bi);
340
341   hi = vnet_get_sup_hw_interface (vnm, adj->rewrite_header.sw_if_index);
342
343   clib_memcpy (h->ip4_over_ethernet[0].ethernet,
344                hi->hw_address, sizeof (h->ip4_over_ethernet[0].ethernet));
345
346   h->ip4_over_ethernet[0].ip4 = src[0];
347   h->ip4_over_ethernet[1].ip4 = adj->sub_type.nbr.next_hop.ip4;
348
349   b = vlib_get_buffer (vm, bi);
350   vnet_buffer (b)->sw_if_index[VLIB_RX] =
351     vnet_buffer (b)->sw_if_index[VLIB_TX] = adj->rewrite_header.sw_if_index;
352
353   /* Add encapsulation string for software interface (e.g. ethernet header). */
354   vnet_rewrite_one_header (adj[0], h, sizeof (ethernet_header_t));
355   vlib_buffer_advance (b, -adj->rewrite_header.data_bytes);
356
357   {
358     vlib_frame_t *f = vlib_get_frame_to_node (vm, hi->output_node_index);
359     u32 *to_next = vlib_frame_vector_args (f);
360     to_next[0] = bi;
361     f->n_vectors = 1;
362     vlib_put_frame_to_node (vm, hi->output_node_index, f);
363   }
364 }
365
366 static void
367 arp_mk_complete (adj_index_t ai, ethernet_arp_ip4_entry_t * e)
368 {
369   adj_nbr_update_rewrite
370     (ai, ADJ_NBR_REWRITE_FLAG_COMPLETE,
371      ethernet_build_rewrite (vnet_get_main (),
372                              e->sw_if_index,
373                              adj_get_link_type (ai), e->ethernet_address));
374 }
375
376 static void
377 arp_mk_incomplete (adj_index_t ai, ethernet_arp_ip4_entry_t * e)
378 {
379   adj_nbr_update_rewrite
380     (ai,
381      ADJ_NBR_REWRITE_FLAG_INCOMPLETE,
382      ethernet_build_rewrite (vnet_get_main (),
383                              e->sw_if_index,
384                              VNET_LINK_ARP,
385                              VNET_REWRITE_FOR_SW_INTERFACE_ADDRESS_BROADCAST));
386 }
387
388 static ethernet_arp_ip4_entry_t *
389 arp_entry_find (ethernet_arp_interface_t * eai, const ip4_address_t * addr)
390 {
391   ethernet_arp_main_t *am = &ethernet_arp_main;
392   ethernet_arp_ip4_entry_t *e = NULL;
393   uword *p;
394
395   if (NULL != eai->arp_entries)
396     {
397       p = hash_get (eai->arp_entries, addr->as_u32);
398       if (!p)
399         return (NULL);
400
401       e = pool_elt_at_index (am->ip4_entry_pool, p[0]);
402     }
403
404   return (e);
405 }
406
407 static adj_walk_rc_t
408 arp_mk_complete_walk (adj_index_t ai, void *ctx)
409 {
410   ethernet_arp_ip4_entry_t *e = ctx;
411
412   arp_mk_complete (ai, e);
413
414   return (ADJ_WALK_RC_CONTINUE);
415 }
416
417 static adj_walk_rc_t
418 arp_mk_incomplete_walk (adj_index_t ai, void *ctx)
419 {
420   ethernet_arp_ip4_entry_t *e = ctx;
421
422   arp_mk_incomplete (ai, e);
423
424   return (ADJ_WALK_RC_CONTINUE);
425 }
426
427 void
428 arp_update_adjacency (vnet_main_t * vnm, u32 sw_if_index, u32 ai)
429 {
430   ethernet_arp_main_t *am = &ethernet_arp_main;
431   ethernet_arp_interface_t *arp_int;
432   ethernet_arp_ip4_entry_t *e;
433   ip_adjacency_t *adj;
434
435   adj = adj_get (ai);
436
437   vec_validate (am->ethernet_arp_by_sw_if_index, sw_if_index);
438   arp_int = &am->ethernet_arp_by_sw_if_index[sw_if_index];
439   e = arp_entry_find (arp_int, &adj->sub_type.nbr.next_hop.ip4);
440
441   if (NULL != e)
442     {
443       adj_nbr_walk_nh4 (sw_if_index,
444                         &e->ip4_address, arp_mk_complete_walk, e);
445     }
446   else
447     {
448       /*
449        * no matching ARP entry.
450        * construct the rewire required to for an ARP packet, and stick
451        * that in the adj's pipe to smoke.
452        */
453       adj_nbr_update_rewrite (ai,
454                               ADJ_NBR_REWRITE_FLAG_INCOMPLETE,
455                               ethernet_build_rewrite (vnm,
456                                                       sw_if_index,
457                                                       VNET_LINK_ARP,
458                                                       VNET_REWRITE_FOR_SW_INTERFACE_ADDRESS_BROADCAST));
459
460       /*
461        * since the FIB has added this adj for a route, it makes sense it may
462        * want to forward traffic sometime soon. Let's send a speculative ARP.
463        * just one. If we were to do periodically that wouldn't be bad either,
464        * but that's more code than i'm prepared to write at this time for
465        * relatively little reward.
466        */
467       arp_nbr_probe (adj);
468     }
469 }
470
471 int
472 vnet_arp_set_ip4_over_ethernet_internal (vnet_main_t * vnm,
473                                          vnet_arp_set_ip4_over_ethernet_rpc_args_t
474                                          * args)
475 {
476   ethernet_arp_ip4_entry_t *e = 0;
477   ethernet_arp_main_t *am = &ethernet_arp_main;
478   ethernet_arp_ip4_over_ethernet_address_t *a = &args->a;
479   vlib_main_t *vm = vlib_get_main ();
480   int make_new_arp_cache_entry = 1;
481   uword *p;
482   pending_resolution_t *pr, *mc;
483   ethernet_arp_interface_t *arp_int;
484   int is_static = args->is_static;
485   u32 sw_if_index = args->sw_if_index;
486
487   vec_validate (am->ethernet_arp_by_sw_if_index, sw_if_index);
488
489   arp_int = &am->ethernet_arp_by_sw_if_index[sw_if_index];
490
491   if (NULL != arp_int->arp_entries)
492     {
493       p = hash_get (arp_int->arp_entries, a->ip4.as_u32);
494       if (p)
495         {
496           e = pool_elt_at_index (am->ip4_entry_pool, p[0]);
497
498           /* Refuse to over-write static arp. */
499           if (!is_static && (e->flags & ETHERNET_ARP_IP4_ENTRY_FLAG_STATIC))
500             return -2;
501           make_new_arp_cache_entry = 0;
502         }
503     }
504
505   if (make_new_arp_cache_entry)
506     {
507       fib_prefix_t pfx = {
508         .fp_len = 32,
509         .fp_proto = FIB_PROTOCOL_IP4,
510         .fp_addr = {
511                     .ip4 = a->ip4,
512                     }
513         ,
514       };
515       u32 fib_index;
516
517       pool_get (am->ip4_entry_pool, e);
518
519       if (NULL == arp_int->arp_entries)
520         {
521           arp_int->arp_entries = hash_create (0, sizeof (u32));
522         }
523
524       hash_set (arp_int->arp_entries, a->ip4.as_u32, e - am->ip4_entry_pool);
525
526       e->sw_if_index = sw_if_index;
527       e->ip4_address = a->ip4;
528       clib_memcpy (e->ethernet_address,
529                    a->ethernet, sizeof (e->ethernet_address));
530
531       fib_index = ip4_fib_table_get_index_for_sw_if_index (e->sw_if_index);
532       e->fib_entry_index =
533         fib_table_entry_update_one_path (fib_index,
534                                          &pfx,
535                                          FIB_SOURCE_ADJ,
536                                          FIB_ENTRY_FLAG_ATTACHED,
537                                          FIB_PROTOCOL_IP4,
538                                          &pfx.fp_addr,
539                                          e->sw_if_index,
540                                          ~0,
541                                          1, NULL, FIB_ROUTE_PATH_FLAG_NONE);
542     }
543   else
544     {
545       /*
546        * prevent a DoS attack from the data-plane that
547        * spams us with no-op updates to the MAC address
548        */
549       if (0 == memcmp (e->ethernet_address,
550                        a->ethernet, sizeof (e->ethernet_address)))
551         return -1;
552
553       /* Update time stamp and ethernet address. */
554       clib_memcpy (e->ethernet_address, a->ethernet,
555                    sizeof (e->ethernet_address));
556     }
557
558   e->cpu_time_last_updated = clib_cpu_time_now ();
559   if (is_static)
560     e->flags |= ETHERNET_ARP_IP4_ENTRY_FLAG_STATIC;
561   else
562     e->flags |= ETHERNET_ARP_IP4_ENTRY_FLAG_DYNAMIC;
563
564   adj_nbr_walk_nh4 (sw_if_index, &e->ip4_address, arp_mk_complete_walk, e);
565
566   /* Customer(s) waiting for this address to be resolved? */
567   p = hash_get (am->pending_resolutions_by_address, a->ip4.as_u32);
568   if (p)
569     {
570       u32 next_index;
571       next_index = p[0];
572
573       while (next_index != (u32) ~ 0)
574         {
575           pr = pool_elt_at_index (am->pending_resolutions, next_index);
576           vlib_process_signal_event (vm, pr->node_index,
577                                      pr->type_opaque, pr->data);
578           next_index = pr->next_index;
579           pool_put (am->pending_resolutions, pr);
580         }
581
582       hash_unset (am->pending_resolutions_by_address, a->ip4.as_u32);
583     }
584
585   /* Customer(s) requesting ARP event for this address? */
586   p = hash_get (am->mac_changes_by_address, a->ip4.as_u32);
587   if (p)
588     {
589       u32 next_index;
590       next_index = p[0];
591
592       while (next_index != (u32) ~ 0)
593         {
594           int (*fp) (u32, u8 *, u32, u32);
595           int rv = 1;
596           mc = pool_elt_at_index (am->mac_changes, next_index);
597           fp = mc->data_callback;
598
599           /* Call the user's data callback, return 1 to suppress dup events */
600           if (fp)
601             rv = (*fp) (mc->data, a->ethernet, sw_if_index, 0);
602
603           /*
604            * Signal the resolver process, as long as the user
605            * says they want to be notified
606            */
607           if (rv == 0)
608             vlib_process_signal_event (vm, mc->node_index,
609                                        mc->type_opaque, mc->data);
610           next_index = mc->next_index;
611         }
612     }
613
614   return 0;
615 }
616
617 void
618 vnet_register_ip4_arp_resolution_event (vnet_main_t * vnm,
619                                         void *address_arg,
620                                         uword node_index,
621                                         uword type_opaque, uword data)
622 {
623   ethernet_arp_main_t *am = &ethernet_arp_main;
624   ip4_address_t *address = address_arg;
625   uword *p;
626   pending_resolution_t *pr;
627
628   pool_get (am->pending_resolutions, pr);
629
630   pr->next_index = ~0;
631   pr->node_index = node_index;
632   pr->type_opaque = type_opaque;
633   pr->data = data;
634   pr->data_callback = 0;
635
636   p = hash_get (am->pending_resolutions_by_address, address->as_u32);
637   if (p)
638     {
639       /* Insert new resolution at the head of the list */
640       pr->next_index = p[0];
641       hash_unset (am->pending_resolutions_by_address, address->as_u32);
642     }
643
644   hash_set (am->pending_resolutions_by_address, address->as_u32,
645             pr - am->pending_resolutions);
646 }
647
648 int
649 vnet_add_del_ip4_arp_change_event (vnet_main_t * vnm,
650                                    void *data_callback,
651                                    u32 pid,
652                                    void *address_arg,
653                                    uword node_index,
654                                    uword type_opaque, uword data, int is_add)
655 {
656   ethernet_arp_main_t *am = &ethernet_arp_main;
657   ip4_address_t *address = address_arg;
658   uword *p;
659   pending_resolution_t *mc;
660   void (*fp) (u32, u8 *) = data_callback;
661
662   if (is_add)
663     {
664       pool_get (am->mac_changes, mc);
665
666       mc->next_index = ~0;
667       mc->node_index = node_index;
668       mc->type_opaque = type_opaque;
669       mc->data = data;
670       mc->data_callback = data_callback;
671       mc->pid = pid;
672
673       p = hash_get (am->mac_changes_by_address, address->as_u32);
674       if (p)
675         {
676           /* Insert new resolution at the head of the list */
677           mc->next_index = p[0];
678           hash_unset (am->mac_changes_by_address, address->as_u32);
679         }
680
681       hash_set (am->mac_changes_by_address, address->as_u32,
682                 mc - am->mac_changes);
683       return 0;
684     }
685   else
686     {
687       u32 index;
688       pending_resolution_t *mc_last = 0;
689
690       p = hash_get (am->mac_changes_by_address, address->as_u32);
691       if (p == 0)
692         return VNET_API_ERROR_NO_SUCH_ENTRY;
693
694       index = p[0];
695
696       while (index != (u32) ~ 0)
697         {
698           mc = pool_elt_at_index (am->mac_changes, index);
699           if (mc->node_index == node_index &&
700               mc->type_opaque == type_opaque && mc->pid == pid)
701             {
702               /* Clients may need to clean up pool entries, too */
703               if (fp)
704                 (*fp) (mc->data, 0 /* no new mac addrs */ );
705               if (index == p[0])
706                 {
707                   hash_unset (am->mac_changes_by_address, address->as_u32);
708                   if (mc->next_index != ~0)
709                     hash_set (am->mac_changes_by_address, address->as_u32,
710                               mc->next_index);
711                   pool_put (am->mac_changes, mc);
712                   return 0;
713                 }
714               else
715                 {
716                   ASSERT (mc_last);
717                   mc_last->next_index = mc->next_index;
718                   pool_put (am->mac_changes, mc);
719                   return 0;
720                 }
721             }
722           mc_last = mc;
723           index = mc->next_index;
724         }
725
726       return VNET_API_ERROR_NO_SUCH_ENTRY;
727     }
728 }
729
730 /* Either we drop the packet or we send a reply to the sender. */
731 typedef enum
732 {
733   ARP_INPUT_NEXT_DROP,
734   ARP_INPUT_NEXT_REPLY_TX,
735   ARP_INPUT_N_NEXT,
736 } arp_input_next_t;
737
738 #define foreach_ethernet_arp_error                                      \
739   _ (replies_sent, "ARP replies sent")                                  \
740   _ (l2_type_not_ethernet, "L2 type not ethernet")                      \
741   _ (l3_type_not_ip4, "L3 type not IP4")                                \
742   _ (l3_src_address_not_local, "IP4 source address not local to subnet") \
743   _ (l3_dst_address_not_local, "IP4 destination address not local to subnet") \
744   _ (l3_src_address_is_local, "IP4 source address matches local interface") \
745   _ (l3_src_address_learned, "ARP request IP4 source address learned")  \
746   _ (replies_received, "ARP replies received")                          \
747   _ (opcode_not_request, "ARP opcode not request")                      \
748   _ (proxy_arp_replies_sent, "Proxy ARP replies sent")                  \
749   _ (l2_address_mismatch, "ARP hw addr does not match L2 frame src addr") \
750   _ (missing_interface_address, "ARP missing interface address") \
751   _ (gratuitous_arp, "ARP probe or announcement dropped") \
752   _ (interface_no_table, "Interface is not mapped to an IP table") \
753
754 typedef enum
755 {
756 #define _(sym,string) ETHERNET_ARP_ERROR_##sym,
757   foreach_ethernet_arp_error
758 #undef _
759     ETHERNET_ARP_N_ERROR,
760 } ethernet_arp_input_error_t;
761
762
763 static void
764 unset_random_arp_entry (void)
765 {
766   ethernet_arp_main_t *am = &ethernet_arp_main;
767   ethernet_arp_ip4_entry_t *e;
768   vnet_main_t *vnm = vnet_get_main ();
769   ethernet_arp_ip4_over_ethernet_address_t delme;
770   u32 index;
771
772   index = pool_next_index (am->ip4_entry_pool, am->arp_delete_rotor);
773   am->arp_delete_rotor = index;
774
775   /* Try again from elt 0, could happen if an intfc goes down */
776   if (index == ~0)
777     {
778       index = pool_next_index (am->ip4_entry_pool, am->arp_delete_rotor);
779       am->arp_delete_rotor = index;
780     }
781
782   /* Nothing left in the pool */
783   if (index == ~0)
784     return;
785
786   e = pool_elt_at_index (am->ip4_entry_pool, index);
787
788   clib_memcpy (&delme.ethernet, e->ethernet_address, 6);
789   delme.ip4.as_u32 = e->ip4_address.as_u32;
790
791   vnet_arp_unset_ip4_over_ethernet (vnm, e->sw_if_index, &delme);
792 }
793
794 static int
795 arp_unnumbered (vlib_buffer_t * p0,
796                 u32 pi0, ethernet_header_t * eth0, u32 sw_if_index)
797 {
798   vlib_main_t *vm = vlib_get_main ();
799   vnet_main_t *vnm = vnet_get_main ();
800   vnet_interface_main_t *vim = &vnm->interface_main;
801   vnet_sw_interface_t *si;
802   vnet_hw_interface_t *hi;
803   u32 unnum_src_sw_if_index;
804   u32 *broadcast_swifs = 0;
805   u32 *buffers = 0;
806   u32 n_alloc = 0;
807   vlib_buffer_t *b0;
808   int i;
809   u8 dst_mac_address[6];
810   i16 header_size;
811   ethernet_arp_header_t *arp0;
812
813   /* Save the dst mac address */
814   clib_memcpy (dst_mac_address, eth0->dst_address, sizeof (dst_mac_address));
815
816   /* Figure out which sw_if_index supplied the address */
817   unnum_src_sw_if_index = sw_if_index;
818
819   /* Track down all users of the unnumbered source */
820   /* *INDENT-OFF* */
821   pool_foreach (si, vim->sw_interfaces,
822   ({
823     if (si->flags & VNET_SW_INTERFACE_FLAG_UNNUMBERED &&
824         (si->unnumbered_sw_if_index == unnum_src_sw_if_index))
825       {
826         vec_add1 (broadcast_swifs, si->sw_if_index);
827       }
828   }));
829   /* *INDENT-ON* */
830
831   /* If there are no interfaces un-unmbered to this interface,
832      we are done  here. */
833   if (0 == vec_len (broadcast_swifs))
834     return 0;
835
836   /* Allocate buffering if we need it */
837   if (vec_len (broadcast_swifs) > 1)
838     {
839       vec_validate (buffers, vec_len (broadcast_swifs) - 2);
840       n_alloc = vlib_buffer_alloc (vm, buffers, vec_len (buffers));
841       _vec_len (buffers) = n_alloc;
842       for (i = 0; i < n_alloc; i++)
843         {
844           b0 = vlib_get_buffer (vm, buffers[i]);
845
846           /* xerox (partially built) ARP pkt */
847           clib_memcpy (b0->data, p0->data,
848                        p0->current_length + p0->current_data);
849           b0->current_data = p0->current_data;
850           b0->current_length = p0->current_length;
851           vnet_buffer (b0)->sw_if_index[VLIB_RX] =
852             vnet_buffer (p0)->sw_if_index[VLIB_RX];
853         }
854     }
855
856   vec_insert (buffers, 1, 0);
857   buffers[0] = pi0;
858
859   for (i = 0; i < vec_len (buffers); i++)
860     {
861       b0 = vlib_get_buffer (vm, buffers[i]);
862       arp0 = vlib_buffer_get_current (b0);
863
864       hi = vnet_get_sup_hw_interface (vnm, broadcast_swifs[i]);
865       si = vnet_get_sw_interface (vnm, broadcast_swifs[i]);
866
867       /* For decoration, most likely */
868       vnet_buffer (b0)->sw_if_index[VLIB_TX] = hi->sw_if_index;
869
870       /* Fix ARP pkt src address */
871       clib_memcpy (arp0->ip4_over_ethernet[0].ethernet, hi->hw_address, 6);
872
873       /* Build L2 encaps for this swif */
874       header_size = sizeof (ethernet_header_t);
875       if (si->sub.eth.flags.one_tag)
876         header_size += 4;
877       else if (si->sub.eth.flags.two_tags)
878         header_size += 8;
879
880       vlib_buffer_advance (b0, -header_size);
881       eth0 = vlib_buffer_get_current (b0);
882
883       if (si->sub.eth.flags.one_tag)
884         {
885           ethernet_vlan_header_t *outer = (void *) (eth0 + 1);
886
887           eth0->type = si->sub.eth.flags.dot1ad ?
888             clib_host_to_net_u16 (ETHERNET_TYPE_DOT1AD) :
889             clib_host_to_net_u16 (ETHERNET_TYPE_VLAN);
890           outer->priority_cfi_and_id =
891             clib_host_to_net_u16 (si->sub.eth.outer_vlan_id);
892           outer->type = clib_host_to_net_u16 (ETHERNET_TYPE_ARP);
893
894         }
895       else if (si->sub.eth.flags.two_tags)
896         {
897           ethernet_vlan_header_t *outer = (void *) (eth0 + 1);
898           ethernet_vlan_header_t *inner = (void *) (outer + 1);
899
900           eth0->type = si->sub.eth.flags.dot1ad ?
901             clib_host_to_net_u16 (ETHERNET_TYPE_DOT1AD) :
902             clib_host_to_net_u16 (ETHERNET_TYPE_VLAN);
903           outer->priority_cfi_and_id =
904             clib_host_to_net_u16 (si->sub.eth.outer_vlan_id);
905           outer->type = clib_host_to_net_u16 (ETHERNET_TYPE_VLAN);
906           inner->priority_cfi_and_id =
907             clib_host_to_net_u16 (si->sub.eth.inner_vlan_id);
908           inner->type = clib_host_to_net_u16 (ETHERNET_TYPE_ARP);
909
910         }
911       else
912         {
913           eth0->type = clib_host_to_net_u16 (ETHERNET_TYPE_ARP);
914         }
915
916       /* Restore the original dst address, set src address */
917       clib_memcpy (eth0->dst_address, dst_mac_address,
918                    sizeof (eth0->dst_address));
919       clib_memcpy (eth0->src_address, hi->hw_address,
920                    sizeof (eth0->src_address));
921
922       /* Transmit replicas */
923       if (i > 0)
924         {
925           vlib_frame_t *f =
926             vlib_get_frame_to_node (vm, hi->output_node_index);
927           u32 *to_next = vlib_frame_vector_args (f);
928           to_next[0] = buffers[i];
929           f->n_vectors = 1;
930           vlib_put_frame_to_node (vm, hi->output_node_index, f);
931         }
932     }
933
934   /* The regular path outputs the original pkt.. */
935   vnet_buffer (p0)->sw_if_index[VLIB_TX] = broadcast_swifs[0];
936
937   vec_free (broadcast_swifs);
938   vec_free (buffers);
939
940   return !0;
941 }
942
943 static uword
944 arp_input (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
945 {
946   ethernet_arp_main_t *am = &ethernet_arp_main;
947   vnet_main_t *vnm = vnet_get_main ();
948   ip4_main_t *im4 = &ip4_main;
949   u32 n_left_from, next_index, *from, *to_next;
950   u32 n_replies_sent = 0, n_proxy_arp_replies_sent = 0;
951
952   from = vlib_frame_vector_args (frame);
953   n_left_from = frame->n_vectors;
954   next_index = node->cached_next_index;
955
956   if (node->flags & VLIB_NODE_FLAG_TRACE)
957     vlib_trace_frame_buffers_only (vm, node, from, frame->n_vectors,
958                                    /* stride */ 1,
959                                    sizeof (ethernet_arp_input_trace_t));
960
961   while (n_left_from > 0)
962     {
963       u32 n_left_to_next;
964
965       vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
966
967       while (n_left_from > 0 && n_left_to_next > 0)
968         {
969           vlib_buffer_t *p0;
970           vnet_hw_interface_t *hw_if0;
971           ethernet_arp_header_t *arp0;
972           ethernet_header_t *eth0;
973           ip_adjacency_t *adj0;
974           ip4_address_t *if_addr0, proxy_src;
975           u32 pi0, error0, next0, sw_if_index0, conn_sw_if_index0, fib_index0;
976           u8 is_request0, dst_is_local0, is_unnum0;
977           ethernet_proxy_arp_t *pa;
978           fib_node_index_t dst_fei, src_fei;
979           fib_prefix_t pfx0;
980           fib_entry_flag_t src_flags, dst_flags;
981
982           pi0 = from[0];
983           to_next[0] = pi0;
984           from += 1;
985           to_next += 1;
986           n_left_from -= 1;
987           n_left_to_next -= 1;
988           pa = 0;
989
990           p0 = vlib_get_buffer (vm, pi0);
991           arp0 = vlib_buffer_get_current (p0);
992
993           is_request0 = arp0->opcode
994             == clib_host_to_net_u16 (ETHERNET_ARP_OPCODE_request);
995
996           error0 = ETHERNET_ARP_ERROR_replies_sent;
997
998           error0 =
999             (arp0->l2_type !=
1000              clib_net_to_host_u16 (ETHERNET_ARP_HARDWARE_TYPE_ethernet) ?
1001              ETHERNET_ARP_ERROR_l2_type_not_ethernet : error0);
1002           error0 =
1003             (arp0->l3_type !=
1004              clib_net_to_host_u16 (ETHERNET_TYPE_IP4) ?
1005              ETHERNET_ARP_ERROR_l3_type_not_ip4 : error0);
1006
1007           sw_if_index0 = vnet_buffer (p0)->sw_if_index[VLIB_RX];
1008
1009           if (error0)
1010             goto drop2;
1011
1012           /* Check that IP address is local and matches incoming interface. */
1013           fib_index0 = ip4_fib_table_get_index_for_sw_if_index (sw_if_index0);
1014           if (~0 == fib_index0)
1015             {
1016               error0 = ETHERNET_ARP_ERROR_interface_no_table;
1017               goto drop2;
1018
1019             }
1020           dst_fei = ip4_fib_table_lookup (ip4_fib_get (fib_index0),
1021                                           &arp0->ip4_over_ethernet[1].ip4,
1022                                           32);
1023           dst_flags = fib_entry_get_flags_for_source (dst_fei,
1024                                                       FIB_SOURCE_INTERFACE);
1025
1026           conn_sw_if_index0 =
1027             fib_entry_get_resolving_interface_for_source (dst_fei,
1028                                                           FIB_SOURCE_INTERFACE);
1029
1030           if (!(FIB_ENTRY_FLAG_CONNECTED & dst_flags))
1031             {
1032               error0 = ETHERNET_ARP_ERROR_l3_dst_address_not_local;
1033               goto drop1;
1034             }
1035
1036           /* Honor unnumbered interface, if any */
1037           is_unnum0 = sw_if_index0 != conn_sw_if_index0;
1038
1039           /* Source must also be local to subnet of matching interface address. */
1040           src_fei = ip4_fib_table_lookup (ip4_fib_get (fib_index0),
1041                                           &arp0->ip4_over_ethernet[0].ip4,
1042                                           32);
1043           src_flags = fib_entry_get_flags (src_fei);
1044
1045           if (!((FIB_ENTRY_FLAG_ATTACHED & src_flags) ||
1046                 (FIB_ENTRY_FLAG_CONNECTED & src_flags)) ||
1047               sw_if_index0 != fib_entry_get_resolving_interface (src_fei))
1048             {
1049               error0 = ETHERNET_ARP_ERROR_l3_src_address_not_local;
1050               goto drop2;
1051             }
1052
1053           /* Reject requests/replies with our local interface address. */
1054           if (FIB_ENTRY_FLAG_LOCAL & src_flags)
1055             {
1056               error0 = ETHERNET_ARP_ERROR_l3_src_address_is_local;
1057               goto drop2;
1058             }
1059
1060           dst_is_local0 = (FIB_ENTRY_FLAG_LOCAL & dst_flags);
1061           fib_entry_get_prefix (dst_fei, &pfx0);
1062           if_addr0 = &pfx0.fp_addr.ip4;
1063
1064           /* Fill in ethernet header. */
1065           eth0 = ethernet_buffer_get_header (p0);
1066
1067           /* Trash ARP packets whose ARP-level source addresses do not
1068              match their L2-frame-level source addresses */
1069           if (memcmp (eth0->src_address, arp0->ip4_over_ethernet[0].ethernet,
1070                       sizeof (eth0->src_address)))
1071             {
1072               error0 = ETHERNET_ARP_ERROR_l2_address_mismatch;
1073               goto drop2;
1074             }
1075
1076           /* Learn or update sender's mapping only for requests or unicasts
1077              that don't match local interface address. */
1078           if (ethernet_address_cast (eth0->dst_address) ==
1079               ETHERNET_ADDRESS_UNICAST || is_request0)
1080             {
1081               if (am->limit_arp_cache_size &&
1082                   pool_elts (am->ip4_entry_pool) >= am->limit_arp_cache_size)
1083                 unset_random_arp_entry ();
1084
1085               vnet_arp_set_ip4_over_ethernet (vnm, sw_if_index0,
1086                                               &arp0->ip4_over_ethernet[0],
1087                                               0 /* is_static */ );
1088               error0 = ETHERNET_ARP_ERROR_l3_src_address_learned;
1089             }
1090
1091           /* Only send a reply for requests sent which match a local interface. */
1092           if (!(is_request0 && dst_is_local0))
1093             {
1094               error0 =
1095                 (arp0->opcode ==
1096                  clib_host_to_net_u16 (ETHERNET_ARP_OPCODE_reply) ?
1097                  ETHERNET_ARP_ERROR_replies_received : error0);
1098               goto drop1;
1099             }
1100
1101           /* Send a reply. */
1102         send_reply:
1103           vnet_buffer (p0)->sw_if_index[VLIB_TX] = sw_if_index0;
1104           hw_if0 = vnet_get_sup_hw_interface (vnm, sw_if_index0);
1105
1106           /* Send reply back through input interface */
1107           vnet_buffer (p0)->sw_if_index[VLIB_TX] = sw_if_index0;
1108           next0 = ARP_INPUT_NEXT_REPLY_TX;
1109
1110           arp0->opcode = clib_host_to_net_u16 (ETHERNET_ARP_OPCODE_reply);
1111
1112           arp0->ip4_over_ethernet[1] = arp0->ip4_over_ethernet[0];
1113
1114           clib_memcpy (arp0->ip4_over_ethernet[0].ethernet,
1115                        hw_if0->hw_address, 6);
1116           clib_mem_unaligned (&arp0->ip4_over_ethernet[0].ip4.data_u32, u32) =
1117             if_addr0->data_u32;
1118
1119           /* Hardware must be ethernet-like. */
1120           ASSERT (vec_len (hw_if0->hw_address) == 6);
1121
1122           clib_memcpy (eth0->dst_address, eth0->src_address, 6);
1123           clib_memcpy (eth0->src_address, hw_if0->hw_address, 6);
1124
1125           /* Figure out how much to rewind current data from adjacency. */
1126           /* get the adj from the destination's covering connected */
1127           if (NULL == pa)
1128             {
1129               adj0 =
1130                 adj_get (fib_entry_get_adj_for_source
1131                          (ip4_fib_table_lookup
1132                           (ip4_fib_get (fib_index0),
1133                            &arp0->ip4_over_ethernet[1].ip4, 31),
1134                           FIB_SOURCE_INTERFACE));
1135               if (adj0->lookup_next_index != IP_LOOKUP_NEXT_GLEAN)
1136                 {
1137                   error0 = ETHERNET_ARP_ERROR_missing_interface_address;
1138                   goto drop2;
1139                 }
1140               if (is_unnum0)
1141                 {
1142                   if (!arp_unnumbered (p0, pi0, eth0, conn_sw_if_index0))
1143                     goto drop2;
1144                 }
1145               else
1146                 vlib_buffer_advance (p0, -adj0->rewrite_header.data_bytes);
1147             }
1148           vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
1149                                            n_left_to_next, pi0, next0);
1150
1151           n_replies_sent += 1;
1152           continue;
1153
1154         drop1:
1155           if (0 == arp0->ip4_over_ethernet[0].ip4.as_u32 ||
1156               (arp0->ip4_over_ethernet[0].ip4.as_u32 ==
1157                arp0->ip4_over_ethernet[1].ip4.as_u32))
1158             {
1159               error0 = ETHERNET_ARP_ERROR_gratuitous_arp;
1160               goto drop2;
1161             }
1162           /* See if proxy arp is configured for the address */
1163           if (is_request0)
1164             {
1165               vnet_sw_interface_t *si;
1166               u32 this_addr = clib_net_to_host_u32
1167                 (arp0->ip4_over_ethernet[1].ip4.as_u32);
1168               u32 fib_index0;
1169
1170               si = vnet_get_sw_interface (vnm, sw_if_index0);
1171
1172               if (!(si->flags & VNET_SW_INTERFACE_FLAG_PROXY_ARP))
1173                 goto drop2;
1174
1175               fib_index0 = vec_elt (im4->fib_index_by_sw_if_index,
1176                                     sw_if_index0);
1177
1178               vec_foreach (pa, am->proxy_arps)
1179               {
1180                 u32 lo_addr = clib_net_to_host_u32 (pa->lo_addr);
1181                 u32 hi_addr = clib_net_to_host_u32 (pa->hi_addr);
1182
1183                 /* an ARP request hit in the proxy-arp table? */
1184                 if ((this_addr >= lo_addr && this_addr <= hi_addr) &&
1185                     (fib_index0 == pa->fib_index))
1186                   {
1187                     eth0 = ethernet_buffer_get_header (p0);
1188                     proxy_src.as_u32 =
1189                       arp0->ip4_over_ethernet[1].ip4.data_u32;
1190
1191                     /*
1192                      * Rewind buffer, direct code above not to
1193                      * think too hard about it.
1194                      */
1195                     if_addr0 = &proxy_src;
1196                     is_unnum0 = 0;
1197                     i32 ethernet_start =
1198                       vnet_buffer (p0)->ethernet.start_of_ethernet_header;
1199                     i32 rewind = p0->current_data - ethernet_start;
1200                     vlib_buffer_advance (p0, -rewind);
1201                     n_proxy_arp_replies_sent++;
1202                     goto send_reply;
1203                   }
1204               }
1205             }
1206
1207         drop2:
1208
1209           next0 = ARP_INPUT_NEXT_DROP;
1210           p0->error = node->errors[error0];
1211
1212           vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
1213                                            n_left_to_next, pi0, next0);
1214         }
1215
1216       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
1217     }
1218
1219   vlib_error_count (vm, node->node_index,
1220                     ETHERNET_ARP_ERROR_replies_sent,
1221                     n_replies_sent - n_proxy_arp_replies_sent);
1222
1223   vlib_error_count (vm, node->node_index,
1224                     ETHERNET_ARP_ERROR_proxy_arp_replies_sent,
1225                     n_proxy_arp_replies_sent);
1226   return frame->n_vectors;
1227 }
1228
1229 static char *ethernet_arp_error_strings[] = {
1230 #define _(sym,string) string,
1231   foreach_ethernet_arp_error
1232 #undef _
1233 };
1234
1235 /* *INDENT-OFF* */
1236 VLIB_REGISTER_NODE (arp_input_node, static) =
1237 {
1238   .function = arp_input,
1239   .name = "arp-input",
1240   .vector_size = sizeof (u32),
1241   .n_errors = ETHERNET_ARP_N_ERROR,
1242   .error_strings = ethernet_arp_error_strings,
1243   .n_next_nodes = ARP_INPUT_N_NEXT,
1244   .next_nodes = {
1245     [ARP_INPUT_NEXT_DROP] = "error-drop",
1246     [ARP_INPUT_NEXT_REPLY_TX] = "interface-output",
1247   },
1248   .format_buffer = format_ethernet_arp_header,
1249   .format_trace = format_ethernet_arp_input_trace,
1250 };
1251 /* *INDENT-ON* */
1252
1253 static int
1254 ip4_arp_entry_sort (void *a1, void *a2)
1255 {
1256   ethernet_arp_ip4_entry_t *e1 = a1;
1257   ethernet_arp_ip4_entry_t *e2 = a2;
1258
1259   int cmp;
1260   vnet_main_t *vnm = vnet_get_main ();
1261
1262   cmp = vnet_sw_interface_compare (vnm, e1->sw_if_index, e2->sw_if_index);
1263   if (!cmp)
1264     cmp = ip4_address_compare (&e1->ip4_address, &e2->ip4_address);
1265   return cmp;
1266 }
1267
1268 ethernet_arp_ip4_entry_t *
1269 ip4_neighbor_entries (u32 sw_if_index)
1270 {
1271   ethernet_arp_main_t *am = &ethernet_arp_main;
1272   ethernet_arp_ip4_entry_t *n, *ns = 0;
1273
1274   /* *INDENT-OFF* */
1275   pool_foreach (n, am->ip4_entry_pool, ({
1276     if (sw_if_index != ~0 && n->sw_if_index != sw_if_index)
1277       continue;
1278     vec_add1 (ns, n[0]);
1279   }));
1280   /* *INDENT-ON* */
1281
1282   if (ns)
1283     vec_sort_with_function (ns, ip4_arp_entry_sort);
1284   return ns;
1285 }
1286
1287 static clib_error_t *
1288 show_ip4_arp (vlib_main_t * vm,
1289               unformat_input_t * input, vlib_cli_command_t * cmd)
1290 {
1291   vnet_main_t *vnm = vnet_get_main ();
1292   ethernet_arp_main_t *am = &ethernet_arp_main;
1293   ethernet_arp_ip4_entry_t *e, *es;
1294   ethernet_proxy_arp_t *pa;
1295   clib_error_t *error = 0;
1296   u32 sw_if_index;
1297
1298   /* Filter entries by interface if given. */
1299   sw_if_index = ~0;
1300   (void) unformat_user (input, unformat_vnet_sw_interface, vnm, &sw_if_index);
1301
1302   es = ip4_neighbor_entries (sw_if_index);
1303   if (es)
1304     {
1305       vlib_cli_output (vm, "%U", format_ethernet_arp_ip4_entry, vnm, 0);
1306       vec_foreach (e, es)
1307       {
1308         vlib_cli_output (vm, "%U", format_ethernet_arp_ip4_entry, vnm, e);
1309       }
1310       vec_free (es);
1311     }
1312
1313   if (vec_len (am->proxy_arps))
1314     {
1315       vlib_cli_output (vm, "Proxy arps enabled for:");
1316       vec_foreach (pa, am->proxy_arps)
1317       {
1318         vlib_cli_output (vm, "Fib_index %d   %U - %U ",
1319                          pa->fib_index,
1320                          format_ip4_address, &pa->lo_addr,
1321                          format_ip4_address, &pa->hi_addr);
1322       }
1323     }
1324
1325   return error;
1326 }
1327
1328 /*?
1329  * Display all the IPv4 ARP entries.
1330  *
1331  * @cliexpar
1332  * Example of how to display the IPv4 ARP table:
1333  * @cliexstart{show ip arp}
1334  *    Time      FIB        IP4       Flags      Ethernet              Interface
1335  *    346.3028   0       6.1.1.3            de:ad:be:ef:ba:be   GigabitEthernet2/0/0
1336  *   3077.4271   0       6.1.1.4       S    de:ad:be:ef:ff:ff   GigabitEthernet2/0/0
1337  *   2998.6409   1       6.2.2.3            de:ad:be:ef:00:01   GigabitEthernet2/0/0
1338  * Proxy arps enabled for:
1339  * Fib_index 0   6.0.0.1 - 6.0.0.11
1340  * @cliexend
1341  ?*/
1342 /* *INDENT-OFF* */
1343 VLIB_CLI_COMMAND (show_ip4_arp_command, static) = {
1344   .path = "show ip arp",
1345   .function = show_ip4_arp,
1346   .short_help = "show ip arp",
1347 };
1348 /* *INDENT-ON* */
1349
1350 typedef struct
1351 {
1352   pg_edit_t l2_type, l3_type;
1353   pg_edit_t n_l2_address_bytes, n_l3_address_bytes;
1354   pg_edit_t opcode;
1355   struct
1356   {
1357     pg_edit_t ethernet;
1358     pg_edit_t ip4;
1359   } ip4_over_ethernet[2];
1360 } pg_ethernet_arp_header_t;
1361
1362 static inline void
1363 pg_ethernet_arp_header_init (pg_ethernet_arp_header_t * p)
1364 {
1365   /* Initialize fields that are not bit fields in the IP header. */
1366 #define _(f) pg_edit_init (&p->f, ethernet_arp_header_t, f);
1367   _(l2_type);
1368   _(l3_type);
1369   _(n_l2_address_bytes);
1370   _(n_l3_address_bytes);
1371   _(opcode);
1372   _(ip4_over_ethernet[0].ethernet);
1373   _(ip4_over_ethernet[0].ip4);
1374   _(ip4_over_ethernet[1].ethernet);
1375   _(ip4_over_ethernet[1].ip4);
1376 #undef _
1377 }
1378
1379 uword
1380 unformat_pg_arp_header (unformat_input_t * input, va_list * args)
1381 {
1382   pg_stream_t *s = va_arg (*args, pg_stream_t *);
1383   pg_ethernet_arp_header_t *p;
1384   u32 group_index;
1385
1386   p = pg_create_edit_group (s, sizeof (p[0]), sizeof (ethernet_arp_header_t),
1387                             &group_index);
1388   pg_ethernet_arp_header_init (p);
1389
1390   /* Defaults. */
1391   pg_edit_set_fixed (&p->l2_type, ETHERNET_ARP_HARDWARE_TYPE_ethernet);
1392   pg_edit_set_fixed (&p->l3_type, ETHERNET_TYPE_IP4);
1393   pg_edit_set_fixed (&p->n_l2_address_bytes, 6);
1394   pg_edit_set_fixed (&p->n_l3_address_bytes, 4);
1395
1396   if (!unformat (input, "%U: %U/%U -> %U/%U",
1397                  unformat_pg_edit,
1398                  unformat_ethernet_arp_opcode_net_byte_order, &p->opcode,
1399                  unformat_pg_edit,
1400                  unformat_ethernet_address, &p->ip4_over_ethernet[0].ethernet,
1401                  unformat_pg_edit,
1402                  unformat_ip4_address, &p->ip4_over_ethernet[0].ip4,
1403                  unformat_pg_edit,
1404                  unformat_ethernet_address, &p->ip4_over_ethernet[1].ethernet,
1405                  unformat_pg_edit,
1406                  unformat_ip4_address, &p->ip4_over_ethernet[1].ip4))
1407     {
1408       /* Free up any edits we may have added. */
1409       pg_free_edit_group (s);
1410       return 0;
1411     }
1412   return 1;
1413 }
1414
1415 clib_error_t *
1416 ip4_set_arp_limit (u32 arp_limit)
1417 {
1418   ethernet_arp_main_t *am = &ethernet_arp_main;
1419
1420   am->limit_arp_cache_size = arp_limit;
1421   return 0;
1422 }
1423
1424 /**
1425  * @brief Control Plane hook to remove an ARP entry
1426  */
1427 int
1428 vnet_arp_unset_ip4_over_ethernet (vnet_main_t * vnm,
1429                                   u32 sw_if_index, void *a_arg)
1430 {
1431   ethernet_arp_ip4_over_ethernet_address_t *a = a_arg;
1432   vnet_arp_set_ip4_over_ethernet_rpc_args_t args;
1433
1434   args.sw_if_index = sw_if_index;
1435   args.flags = ETHERNET_ARP_ARGS_REMOVE;
1436   clib_memcpy (&args.a, a, sizeof (*a));
1437
1438   vl_api_rpc_call_main_thread (set_ip4_over_ethernet_rpc_callback,
1439                                (u8 *) & args, sizeof (args));
1440   return 0;
1441 }
1442
1443 /**
1444  * @brief Internally generated event to flush the ARP cache on an
1445  * interface state change event.
1446  * A flush will remove dynamic ARP entries, and for statics remove the MAC
1447  * address from the corresponding adjacencies.
1448  */
1449 static int
1450 vnet_arp_flush_ip4_over_ethernet (vnet_main_t * vnm,
1451                                   u32 sw_if_index, void *a_arg)
1452 {
1453   ethernet_arp_ip4_over_ethernet_address_t *a = a_arg;
1454   vnet_arp_set_ip4_over_ethernet_rpc_args_t args;
1455
1456   args.sw_if_index = sw_if_index;
1457   args.flags = ETHERNET_ARP_ARGS_FLUSH;
1458   clib_memcpy (&args.a, a, sizeof (*a));
1459
1460   vl_api_rpc_call_main_thread (set_ip4_over_ethernet_rpc_callback,
1461                                (u8 *) & args, sizeof (args));
1462   return 0;
1463 }
1464
1465 /**
1466  * @brief Internally generated event to populate the ARP cache on an
1467  * interface state change event.
1468  * For static entries this will re-source the adjacencies.
1469  *
1470  * @param sw_if_index The interface on which the ARP entires are acted
1471  */
1472 static int
1473 vnet_arp_populate_ip4_over_ethernet (vnet_main_t * vnm,
1474                                      u32 sw_if_index, void *a_arg)
1475 {
1476   ethernet_arp_ip4_over_ethernet_address_t *a = a_arg;
1477   vnet_arp_set_ip4_over_ethernet_rpc_args_t args;
1478
1479   args.sw_if_index = sw_if_index;
1480   args.flags = ETHERNET_ARP_ARGS_POPULATE;
1481   clib_memcpy (&args.a, a, sizeof (*a));
1482
1483   vl_api_rpc_call_main_thread (set_ip4_over_ethernet_rpc_callback,
1484                                (u8 *) & args, sizeof (args));
1485   return 0;
1486 }
1487
1488 /*
1489  * arp_add_del_interface_address
1490  *
1491  * callback when an interface address is added or deleted
1492  */
1493 static void
1494 arp_add_del_interface_address (ip4_main_t * im,
1495                                uword opaque,
1496                                u32 sw_if_index,
1497                                ip4_address_t * address,
1498                                u32 address_length,
1499                                u32 if_address_index, u32 is_del)
1500 {
1501   /*
1502    * Flush the ARP cache of all entries covered by the address
1503    * that is being removed.
1504    */
1505   ethernet_arp_main_t *am = &ethernet_arp_main;
1506   ethernet_arp_ip4_entry_t *e;
1507
1508   if (vec_len (am->ethernet_arp_by_sw_if_index) <= sw_if_index)
1509     return;
1510
1511   if (is_del)
1512     {
1513       ethernet_arp_interface_t *eai;
1514       u32 i, *to_delete = 0;
1515       hash_pair_t *pair;
1516
1517       eai = &am->ethernet_arp_by_sw_if_index[sw_if_index];
1518
1519       /* *INDENT-OFF* */
1520       hash_foreach_pair (pair, eai->arp_entries,
1521       ({
1522         e = pool_elt_at_index(am->ip4_entry_pool,
1523                               pair->value[0]);
1524         if (ip4_destination_matches_route (im, &e->ip4_address,
1525                                            address, address_length))
1526           {
1527             vec_add1 (to_delete, e - am->ip4_entry_pool);
1528           }
1529       }));
1530       /* *INDENT-ON* */
1531
1532       for (i = 0; i < vec_len (to_delete); i++)
1533         {
1534           ethernet_arp_ip4_over_ethernet_address_t delme;
1535           e = pool_elt_at_index (am->ip4_entry_pool, to_delete[i]);
1536
1537           clib_memcpy (&delme.ethernet, e->ethernet_address, 6);
1538           delme.ip4.as_u32 = e->ip4_address.as_u32;
1539
1540           vnet_arp_flush_ip4_over_ethernet (vnet_get_main (),
1541                                             e->sw_if_index, &delme);
1542         }
1543
1544       vec_free (to_delete);
1545     }
1546 }
1547
1548 static clib_error_t *
1549 ethernet_arp_init (vlib_main_t * vm)
1550 {
1551   ethernet_arp_main_t *am = &ethernet_arp_main;
1552   ip4_main_t *im = &ip4_main;
1553   clib_error_t *error;
1554   pg_node_t *pn;
1555
1556   if ((error = vlib_call_init_function (vm, ethernet_init)))
1557     return error;
1558
1559   ethernet_register_input_type (vm, ETHERNET_TYPE_ARP, arp_input_node.index);
1560
1561   pn = pg_get_node (arp_input_node.index);
1562   pn->unformat_edit = unformat_pg_arp_header;
1563
1564   am->opcode_by_name = hash_create_string (0, sizeof (uword));
1565 #define _(o) hash_set_mem (am->opcode_by_name, #o, ETHERNET_ARP_OPCODE_##o);
1566   foreach_ethernet_arp_opcode;
1567 #undef _
1568
1569   /* $$$ configurable */
1570   am->limit_arp_cache_size = 50000;
1571
1572   am->pending_resolutions_by_address = hash_create (0, sizeof (uword));
1573   am->mac_changes_by_address = hash_create (0, sizeof (uword));
1574
1575   /* don't trace ARP error packets */
1576   {
1577     vlib_node_runtime_t *rt =
1578       vlib_node_get_runtime (vm, arp_input_node.index);
1579
1580 #define _(a,b)                                  \
1581     vnet_pcap_drop_trace_filter_add_del         \
1582         (rt->errors[ETHERNET_ARP_ERROR_##a],    \
1583          1 /* is_add */);
1584     foreach_ethernet_arp_error
1585 #undef _
1586   }
1587
1588   ip4_add_del_interface_address_callback_t cb;
1589   cb.function = arp_add_del_interface_address;
1590   cb.function_opaque = 0;
1591   vec_add1 (im->add_del_interface_address_callbacks, cb);
1592
1593   return 0;
1594 }
1595
1596 VLIB_INIT_FUNCTION (ethernet_arp_init);
1597
1598 static void
1599 arp_entry_free (ethernet_arp_interface_t * eai, ethernet_arp_ip4_entry_t * e)
1600 {
1601   ethernet_arp_main_t *am = &ethernet_arp_main;
1602
1603   fib_table_entry_delete_index (e->fib_entry_index, FIB_SOURCE_ADJ);
1604   hash_unset (eai->arp_entries, e->ip4_address.as_u32);
1605   pool_put (am->ip4_entry_pool, e);
1606 }
1607
1608 static inline int
1609 vnet_arp_unset_ip4_over_ethernet_internal (vnet_main_t * vnm,
1610                                            vnet_arp_set_ip4_over_ethernet_rpc_args_t
1611                                            * args)
1612 {
1613   ethernet_arp_main_t *am = &ethernet_arp_main;
1614   ethernet_arp_ip4_entry_t *e;
1615   ethernet_arp_interface_t *eai;
1616
1617   eai = &am->ethernet_arp_by_sw_if_index[args->sw_if_index];
1618
1619   e = arp_entry_find (eai, &args->a.ip4);
1620
1621   if (NULL != e)
1622     {
1623       adj_nbr_walk_nh4 (e->sw_if_index,
1624                         &e->ip4_address, arp_mk_incomplete_walk, e);
1625       arp_entry_free (eai, e);
1626     }
1627
1628   return 0;
1629 }
1630
1631 static int
1632 vnet_arp_flush_ip4_over_ethernet_internal (vnet_main_t * vnm,
1633                                            vnet_arp_set_ip4_over_ethernet_rpc_args_t
1634                                            * args)
1635 {
1636   ethernet_arp_main_t *am = &ethernet_arp_main;
1637   ethernet_arp_ip4_entry_t *e;
1638   ethernet_arp_interface_t *eai;
1639
1640   eai = &am->ethernet_arp_by_sw_if_index[args->sw_if_index];
1641
1642   e = arp_entry_find (eai, &args->a.ip4);
1643
1644   if (NULL != e)
1645     {
1646       adj_nbr_walk_nh4 (e->sw_if_index,
1647                         &e->ip4_address, arp_mk_incomplete_walk, e);
1648
1649       /*
1650        * The difference between flush and unset, is that an unset
1651        * means delete for static and dynamic entries. A flush
1652        * means delete only for dynamic. Flushing is what the DP
1653        * does in response to interface events. unset is only done
1654        * by the control plane.
1655        */
1656       if (e->flags & ETHERNET_ARP_IP4_ENTRY_FLAG_DYNAMIC)
1657         {
1658           arp_entry_free (eai, e);
1659         }
1660     }
1661   return (0);
1662 }
1663
1664 static int
1665 vnet_arp_populate_ip4_over_ethernet_internal (vnet_main_t * vnm,
1666                                               vnet_arp_set_ip4_over_ethernet_rpc_args_t
1667                                               * args)
1668 {
1669   ethernet_arp_main_t *am = &ethernet_arp_main;
1670   ethernet_arp_ip4_entry_t *e;
1671   ethernet_arp_interface_t *eai;
1672
1673   eai = &am->ethernet_arp_by_sw_if_index[args->sw_if_index];
1674
1675   e = arp_entry_find (eai, &args->a.ip4);
1676
1677   if (NULL != e)
1678     {
1679       adj_nbr_walk_nh4 (e->sw_if_index,
1680                         &e->ip4_address, arp_mk_complete_walk, e);
1681     }
1682   return (0);
1683 }
1684
1685 static void
1686 set_ip4_over_ethernet_rpc_callback (vnet_arp_set_ip4_over_ethernet_rpc_args_t
1687                                     * a)
1688 {
1689   vnet_main_t *vm = vnet_get_main ();
1690   ASSERT (os_get_cpu_number () == 0);
1691
1692   if (a->flags & ETHERNET_ARP_ARGS_REMOVE)
1693     vnet_arp_unset_ip4_over_ethernet_internal (vm, a);
1694   else if (a->flags & ETHERNET_ARP_ARGS_FLUSH)
1695     vnet_arp_flush_ip4_over_ethernet_internal (vm, a);
1696   else if (a->flags & ETHERNET_ARP_ARGS_POPULATE)
1697     vnet_arp_populate_ip4_over_ethernet_internal (vm, a);
1698   else
1699     vnet_arp_set_ip4_over_ethernet_internal (vm, a);
1700 }
1701
1702 /**
1703  * @brief Invoked when the interface's admin state changes
1704  */
1705 static clib_error_t *
1706 ethernet_arp_sw_interface_up_down (vnet_main_t * vnm,
1707                                    u32 sw_if_index, u32 flags)
1708 {
1709   ethernet_arp_main_t *am = &ethernet_arp_main;
1710   ethernet_arp_ip4_entry_t *e;
1711   u32 i, *to_delete = 0;
1712
1713   /* *INDENT-OFF* */
1714   pool_foreach (e, am->ip4_entry_pool,
1715   ({
1716     if (e->sw_if_index == sw_if_index)
1717       vec_add1 (to_delete,
1718                 e - am->ip4_entry_pool);
1719   }));
1720   /* *INDENT-ON* */
1721
1722   for (i = 0; i < vec_len (to_delete); i++)
1723     {
1724       ethernet_arp_ip4_over_ethernet_address_t delme;
1725       e = pool_elt_at_index (am->ip4_entry_pool, to_delete[i]);
1726
1727       clib_memcpy (&delme.ethernet, e->ethernet_address, 6);
1728       delme.ip4.as_u32 = e->ip4_address.as_u32;
1729
1730       if (flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP)
1731         {
1732           vnet_arp_populate_ip4_over_ethernet (vnm, e->sw_if_index, &delme);
1733         }
1734       else
1735         {
1736           vnet_arp_flush_ip4_over_ethernet (vnm, e->sw_if_index, &delme);
1737         }
1738
1739     }
1740   vec_free (to_delete);
1741
1742   return 0;
1743 }
1744
1745 VNET_SW_INTERFACE_ADMIN_UP_DOWN_FUNCTION (ethernet_arp_sw_interface_up_down);
1746
1747 static void
1748 increment_ip4_and_mac_address (ethernet_arp_ip4_over_ethernet_address_t * a)
1749 {
1750   u8 old;
1751   int i;
1752
1753   for (i = 3; i >= 0; i--)
1754     {
1755       old = a->ip4.as_u8[i];
1756       a->ip4.as_u8[i] += 1;
1757       if (old < a->ip4.as_u8[i])
1758         break;
1759     }
1760
1761   for (i = 5; i >= 0; i--)
1762     {
1763       old = a->ethernet[i];
1764       a->ethernet[i] += 1;
1765       if (old < a->ethernet[i])
1766         break;
1767     }
1768 }
1769
1770 int
1771 vnet_arp_set_ip4_over_ethernet (vnet_main_t * vnm,
1772                                 u32 sw_if_index, void *a_arg, int is_static)
1773 {
1774   ethernet_arp_ip4_over_ethernet_address_t *a = a_arg;
1775   vnet_arp_set_ip4_over_ethernet_rpc_args_t args;
1776
1777   args.sw_if_index = sw_if_index;
1778   args.is_static = is_static;
1779   args.flags = 0;
1780   clib_memcpy (&args.a, a, sizeof (*a));
1781
1782   vl_api_rpc_call_main_thread (set_ip4_over_ethernet_rpc_callback,
1783                                (u8 *) & args, sizeof (args));
1784   return 0;
1785 }
1786
1787 int
1788 vnet_proxy_arp_add_del (ip4_address_t * lo_addr,
1789                         ip4_address_t * hi_addr, u32 fib_index, int is_del)
1790 {
1791   ethernet_arp_main_t *am = &ethernet_arp_main;
1792   ethernet_proxy_arp_t *pa;
1793   u32 found_at_index = ~0;
1794
1795   vec_foreach (pa, am->proxy_arps)
1796   {
1797     if (pa->lo_addr == lo_addr->as_u32
1798         && pa->hi_addr == hi_addr->as_u32 && pa->fib_index == fib_index)
1799       {
1800         found_at_index = pa - am->proxy_arps;
1801         break;
1802       }
1803   }
1804
1805   if (found_at_index != ~0)
1806     {
1807       /* Delete, otherwise it's already in the table */
1808       if (is_del)
1809         vec_delete (am->proxy_arps, 1, found_at_index);
1810       return 0;
1811     }
1812   /* delete, no such entry */
1813   if (is_del)
1814     return VNET_API_ERROR_NO_SUCH_ENTRY;
1815
1816   /* add, not in table */
1817   vec_add2 (am->proxy_arps, pa, 1);
1818   pa->lo_addr = lo_addr->as_u32;
1819   pa->hi_addr = hi_addr->as_u32;
1820   pa->fib_index = fib_index;
1821   return 0;
1822 }
1823
1824 /*
1825  * Remove any proxy arp entries asdociated with the
1826  * specificed fib.
1827  */
1828 int
1829 vnet_proxy_arp_fib_reset (u32 fib_id)
1830 {
1831   ip4_main_t *im = &ip4_main;
1832   ethernet_arp_main_t *am = &ethernet_arp_main;
1833   ethernet_proxy_arp_t *pa;
1834   u32 *entries_to_delete = 0;
1835   u32 fib_index;
1836   uword *p;
1837   int i;
1838
1839   p = hash_get (im->fib_index_by_table_id, fib_id);
1840   if (!p)
1841     return VNET_API_ERROR_NO_SUCH_ENTRY;
1842   fib_index = p[0];
1843
1844   vec_foreach (pa, am->proxy_arps)
1845   {
1846     if (pa->fib_index == fib_index)
1847       {
1848         vec_add1 (entries_to_delete, pa - am->proxy_arps);
1849       }
1850   }
1851
1852   for (i = 0; i < vec_len (entries_to_delete); i++)
1853     {
1854       vec_delete (am->proxy_arps, 1, entries_to_delete[i]);
1855     }
1856
1857   vec_free (entries_to_delete);
1858
1859   return 0;
1860 }
1861
1862 static clib_error_t *
1863 ip_arp_add_del_command_fn (vlib_main_t * vm,
1864                            unformat_input_t * input, vlib_cli_command_t * cmd)
1865 {
1866   vnet_main_t *vnm = vnet_get_main ();
1867   u32 sw_if_index;
1868   ethernet_arp_ip4_over_ethernet_address_t lo_addr, hi_addr, addr;
1869   int addr_valid = 0;
1870   int is_del = 0;
1871   int count = 1;
1872   u32 fib_index = 0;
1873   u32 fib_id;
1874   int is_static = 0;
1875   int is_proxy = 0;
1876
1877   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
1878     {
1879       /* set ip arp TenGigE1/1/0/1 1.2.3.4 aa:bb:... or aabb.ccdd... */
1880       if (unformat (input, "%U %U %U",
1881                     unformat_vnet_sw_interface, vnm, &sw_if_index,
1882                     unformat_ip4_address, &addr.ip4,
1883                     unformat_ethernet_address, &addr.ethernet))
1884         addr_valid = 1;
1885
1886       else if (unformat (input, "delete") || unformat (input, "del"))
1887         is_del = 1;
1888
1889       else if (unformat (input, "static"))
1890         is_static = 1;
1891
1892       else if (unformat (input, "count %d", &count))
1893         ;
1894
1895       else if (unformat (input, "fib-id %d", &fib_id))
1896         {
1897           ip4_main_t *im = &ip4_main;
1898           uword *p = hash_get (im->fib_index_by_table_id, fib_id);
1899           if (!p)
1900             return clib_error_return (0, "fib ID %d doesn't exist\n", fib_id);
1901           fib_index = p[0];
1902         }
1903
1904       else if (unformat (input, "proxy %U - %U",
1905                          unformat_ip4_address, &lo_addr.ip4,
1906                          unformat_ip4_address, &hi_addr.ip4))
1907         is_proxy = 1;
1908       else
1909         break;
1910     }
1911
1912   if (is_proxy)
1913     {
1914       (void) vnet_proxy_arp_add_del (&lo_addr.ip4, &hi_addr.ip4,
1915                                      fib_index, is_del);
1916       return 0;
1917     }
1918
1919   if (addr_valid)
1920     {
1921       int i;
1922
1923       for (i = 0; i < count; i++)
1924         {
1925           if (is_del == 0)
1926             {
1927               uword event_type, *event_data = 0;
1928
1929               /* Park the debug CLI until the arp entry is installed */
1930               vnet_register_ip4_arp_resolution_event
1931                 (vnm, &addr.ip4, vlib_current_process (vm),
1932                  1 /* type */ , 0 /* data */ );
1933
1934               vnet_arp_set_ip4_over_ethernet
1935                 (vnm, sw_if_index, &addr, is_static);
1936
1937               vlib_process_wait_for_event (vm);
1938               event_type = vlib_process_get_events (vm, &event_data);
1939               vec_reset_length (event_data);
1940               if (event_type != 1)
1941                 clib_warning ("event type %d unexpected", event_type);
1942             }
1943           else
1944             vnet_arp_unset_ip4_over_ethernet (vnm, sw_if_index, &addr);
1945
1946           increment_ip4_and_mac_address (&addr);
1947         }
1948     }
1949   else
1950     {
1951       return clib_error_return (0, "unknown input `%U'",
1952                                 format_unformat_error, input);
1953     }
1954
1955   return 0;
1956 }
1957
1958 /* *INDENT-OFF* */
1959 /*?
1960  * Add or delete IPv4 ARP cache entries.
1961  *
1962  * @note 'set ip arp' options (e.g. delete, static, 'fib-id <id>',
1963  * 'count <number>', 'interface ip4_addr mac_addr') can be added in
1964  * any order and combination.
1965  *
1966  * @cliexpar
1967  * @parblock
1968  * Add or delete IPv4 ARP cache entries as follows. MAC Address can be in
1969  * either aa:bb:cc:dd:ee:ff format or aabb.ccdd.eeff format.
1970  * @cliexcmd{set ip arp GigabitEthernet2/0/0 6.0.0.3 dead.beef.babe}
1971  * @cliexcmd{set ip arp delete GigabitEthernet2/0/0 6.0.0.3 de:ad:be:ef:ba:be}
1972  *
1973  * To add or delete an IPv4 ARP cache entry to or from a specific fib
1974  * table:
1975  * @cliexcmd{set ip arp fib-id 1 GigabitEthernet2/0/0 6.0.0.3 dead.beef.babe}
1976  * @cliexcmd{set ip arp fib-id 1 delete GigabitEthernet2/0/0 6.0.0.3 dead.beef.babe}
1977  *
1978  * Add or delete IPv4 static ARP cache entries as follows:
1979  * @cliexcmd{set ip arp static GigabitEthernet2/0/0 6.0.0.3 dead.beef.babe}
1980  * @cliexcmd{set ip arp static delete GigabitEthernet2/0/0 6.0.0.3 dead.beef.babe}
1981  *
1982  * For testing / debugging purposes, the 'set ip arp' command can add or
1983  * delete multiple entries. Supply the 'count N' parameter:
1984  * @cliexcmd{set ip arp count 10 GigabitEthernet2/0/0 6.0.0.3 dead.beef.babe}
1985  * @endparblock
1986  ?*/
1987 VLIB_CLI_COMMAND (ip_arp_add_del_command, static) = {
1988   .path = "set ip arp",
1989   .short_help =
1990   "set ip arp [del] <intfc> <ip-address> <mac-address> [static] [count <count>] [fib-id <fib-id>] [proxy <lo-addr> - <hi-addr>]",
1991   .function = ip_arp_add_del_command_fn,
1992 };
1993 /* *INDENT-ON* */
1994
1995 static clib_error_t *
1996 set_int_proxy_arp_command_fn (vlib_main_t * vm,
1997                               unformat_input_t *
1998                               input, vlib_cli_command_t * cmd)
1999 {
2000   vnet_main_t *vnm = vnet_get_main ();
2001   u32 sw_if_index;
2002   vnet_sw_interface_t *si;
2003   int enable = 0;
2004   int intfc_set = 0;
2005
2006   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
2007     {
2008       if (unformat (input, "%U", unformat_vnet_sw_interface,
2009                     vnm, &sw_if_index))
2010         intfc_set = 1;
2011       else if (unformat (input, "enable") || unformat (input, "on"))
2012         enable = 1;
2013       else if (unformat (input, "disable") || unformat (input, "off"))
2014         enable = 0;
2015       else
2016         break;
2017     }
2018
2019   if (intfc_set == 0)
2020     return clib_error_return (0, "unknown input '%U'",
2021                               format_unformat_error, input);
2022
2023   si = vnet_get_sw_interface (vnm, sw_if_index);
2024   ASSERT (si);
2025   if (enable)
2026     si->flags |= VNET_SW_INTERFACE_FLAG_PROXY_ARP;
2027   else
2028     si->flags &= ~VNET_SW_INTERFACE_FLAG_PROXY_ARP;
2029
2030   return 0;
2031 }
2032
2033 /* *INDENT-OFF* */
2034 /*?
2035  * Enable proxy-arp on an interface. The vpp stack will answer ARP
2036  * requests for the indicated address range. Multiple proxy-arp
2037  * ranges may be provisioned.
2038  *
2039  * @note Proxy ARP as a technology is infamous for blackholing traffic.
2040  * Also, the underlying implementation has not been performance-tuned.
2041  * Avoid creating an unnecessarily large set of ranges.
2042  *
2043  * @cliexpar
2044  * To enable proxy arp on a range of addresses, use:
2045  * @cliexcmd{set ip arp proxy 6.0.0.1 - 6.0.0.11}
2046  * Append 'del' to delete a range of proxy ARP addresses:
2047  * @cliexcmd{set ip arp proxy 6.0.0.1 - 6.0.0.11 del}
2048  * You must then specifically enable proxy arp on individual interfaces:
2049  * @cliexcmd{set interface proxy-arp GigabitEthernet0/8/0 enable}
2050  * To disable proxy arp on an individual interface:
2051  * @cliexcmd{set interface proxy-arp GigabitEthernet0/8/0 disable}
2052  ?*/
2053 VLIB_CLI_COMMAND (set_int_proxy_enable_command, static) = {
2054   .path = "set interface proxy-arp",
2055   .short_help =
2056   "set interface proxy-arp <intfc> [enable|disable]",
2057   .function = set_int_proxy_arp_command_fn,
2058 };
2059 /* *INDENT-ON* */
2060
2061
2062 /*
2063  * ARP/ND Termination in a L2 Bridge Domain based on IP4/IP6 to MAC
2064  * hash tables mac_by_ip4 and mac_by_ip6 for each BD.
2065  */
2066 typedef enum
2067 {
2068   ARP_TERM_NEXT_L2_OUTPUT,
2069   ARP_TERM_NEXT_DROP,
2070   ARP_TERM_N_NEXT,
2071 } arp_term_next_t;
2072
2073 u32 arp_term_next_node_index[32];
2074
2075 static uword
2076 arp_term_l2bd (vlib_main_t * vm,
2077                vlib_node_runtime_t * node, vlib_frame_t * frame)
2078 {
2079   l2input_main_t *l2im = &l2input_main;
2080   u32 n_left_from, next_index, *from, *to_next;
2081   u32 n_replies_sent = 0;
2082   u16 last_bd_index = ~0;
2083   l2_bridge_domain_t *last_bd_config = 0;
2084   l2_input_config_t *cfg0;
2085
2086   from = vlib_frame_vector_args (frame);
2087   n_left_from = frame->n_vectors;
2088   next_index = node->cached_next_index;
2089
2090   while (n_left_from > 0)
2091     {
2092       u32 n_left_to_next;
2093
2094       vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
2095
2096       while (n_left_from > 0 && n_left_to_next > 0)
2097         {
2098           vlib_buffer_t *p0;
2099           ethernet_header_t *eth0;
2100           ethernet_arp_header_t *arp0;
2101           ip6_header_t *iph0;
2102           u8 *l3h0;
2103           u32 pi0, error0, next0, sw_if_index0;
2104           u16 ethertype0;
2105           u16 bd_index0;
2106           u32 ip0;
2107           u8 *macp0;
2108
2109           pi0 = from[0];
2110           to_next[0] = pi0;
2111           from += 1;
2112           to_next += 1;
2113           n_left_from -= 1;
2114           n_left_to_next -= 1;
2115
2116           p0 = vlib_get_buffer (vm, pi0);
2117           eth0 = vlib_buffer_get_current (p0);
2118           l3h0 = (u8 *) eth0 + vnet_buffer (p0)->l2.l2_len;
2119           ethertype0 = clib_net_to_host_u16 (*(u16 *) (l3h0 - 2));
2120           arp0 = (ethernet_arp_header_t *) l3h0;
2121
2122           if (PREDICT_FALSE ((ethertype0 != ETHERNET_TYPE_ARP) ||
2123                              (arp0->opcode !=
2124                               clib_host_to_net_u16
2125                               (ETHERNET_ARP_OPCODE_request))))
2126             goto check_ip6_nd;
2127
2128           /* Must be ARP request packet here */
2129           if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE) &&
2130                              (p0->flags & VLIB_BUFFER_IS_TRACED)))
2131             {
2132               u8 *t0 = vlib_add_trace (vm, node, p0,
2133                                        sizeof (ethernet_arp_input_trace_t));
2134               clib_memcpy (t0, l3h0, sizeof (ethernet_arp_input_trace_t));
2135             }
2136
2137           error0 = ETHERNET_ARP_ERROR_replies_sent;
2138           error0 =
2139             (arp0->l2_type !=
2140              clib_net_to_host_u16 (ETHERNET_ARP_HARDWARE_TYPE_ethernet)
2141              ? ETHERNET_ARP_ERROR_l2_type_not_ethernet : error0);
2142           error0 =
2143             (arp0->l3_type !=
2144              clib_net_to_host_u16 (ETHERNET_TYPE_IP4) ?
2145              ETHERNET_ARP_ERROR_l3_type_not_ip4 : error0);
2146
2147           sw_if_index0 = vnet_buffer (p0)->sw_if_index[VLIB_RX];
2148
2149           if (error0)
2150             goto drop;
2151
2152           /* Trash ARP packets whose ARP-level source addresses do not
2153              match their L2-frame-level source addresses  */
2154           if (PREDICT_FALSE
2155               (memcmp
2156                (eth0->src_address, arp0->ip4_over_ethernet[0].ethernet,
2157                 sizeof (eth0->src_address))))
2158             {
2159               error0 = ETHERNET_ARP_ERROR_l2_address_mismatch;
2160               goto drop;
2161             }
2162
2163           /* Check if anyone want ARP request events for L2 BDs */
2164           {
2165             pending_resolution_t *mc;
2166             ethernet_arp_main_t *am = &ethernet_arp_main;
2167             uword *p = hash_get (am->mac_changes_by_address, 0);
2168             if (p && (vnet_buffer (p0)->l2.shg == 0))
2169               {                 // Only SHG 0 interface which is more likely local
2170                 u32 next_index = p[0];
2171                 while (next_index != (u32) ~ 0)
2172                   {
2173                     int (*fp) (u32, u8 *, u32, u32);
2174                     int rv = 1;
2175                     mc = pool_elt_at_index (am->mac_changes, next_index);
2176                     fp = mc->data_callback;
2177                     /* Call the callback, return 1 to suppress dup events */
2178                     if (fp)
2179                       rv = (*fp) (mc->data,
2180                                   arp0->ip4_over_ethernet[0].ethernet,
2181                                   sw_if_index0,
2182                                   arp0->ip4_over_ethernet[0].ip4.as_u32);
2183                     /* Signal the resolver process */
2184                     if (rv == 0)
2185                       vlib_process_signal_event (vm, mc->node_index,
2186                                                  mc->type_opaque, mc->data);
2187                     next_index = mc->next_index;
2188                   }
2189               }
2190           }
2191
2192           /* lookup BD mac_by_ip4 hash table for MAC entry */
2193           ip0 = arp0->ip4_over_ethernet[1].ip4.as_u32;
2194           bd_index0 = vnet_buffer (p0)->l2.bd_index;
2195           if (PREDICT_FALSE ((bd_index0 != last_bd_index)
2196                              || (last_bd_index == (u16) ~ 0)))
2197             {
2198               last_bd_index = bd_index0;
2199               last_bd_config = vec_elt_at_index (l2im->bd_configs, bd_index0);
2200             }
2201           macp0 = (u8 *) hash_get (last_bd_config->mac_by_ip4, ip0);
2202
2203           if (PREDICT_FALSE (!macp0))
2204             goto next_l2_feature;       /* MAC not found */
2205
2206           /* MAC found, send ARP reply -
2207              Convert ARP request packet to ARP reply */
2208           arp0->opcode = clib_host_to_net_u16 (ETHERNET_ARP_OPCODE_reply);
2209           arp0->ip4_over_ethernet[1] = arp0->ip4_over_ethernet[0];
2210           arp0->ip4_over_ethernet[0].ip4.as_u32 = ip0;
2211           clib_memcpy (arp0->ip4_over_ethernet[0].ethernet, macp0, 6);
2212           clib_memcpy (eth0->dst_address, eth0->src_address, 6);
2213           clib_memcpy (eth0->src_address, macp0, 6);
2214           n_replies_sent += 1;
2215
2216         output_response:
2217           /* For BVI, need to use l2-fwd node to send ARP reply as
2218              l2-output node cannot output packet to BVI properly */
2219           cfg0 = vec_elt_at_index (l2im->configs, sw_if_index0);
2220           if (PREDICT_FALSE (cfg0->bvi))
2221             {
2222               vnet_buffer (p0)->l2.feature_bitmap |= L2INPUT_FEAT_FWD;
2223               vnet_buffer (p0)->sw_if_index[VLIB_RX] = 0;
2224               goto next_l2_feature;
2225             }
2226
2227           /* Send ARP/ND reply back out input interface through l2-output */
2228           vnet_buffer (p0)->sw_if_index[VLIB_TX] = sw_if_index0;
2229           next0 = ARP_TERM_NEXT_L2_OUTPUT;
2230           /* Note that output to VXLAN tunnel will fail due to SHG which
2231              is probably desireable since ARP termination is not intended
2232              for ARP requests from other hosts. If output to VXLAN tunnel is
2233              required, however, can just clear the SHG in packet as follows:
2234              vnet_buffer(p0)->l2.shg = 0;         */
2235           vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
2236                                            to_next, n_left_to_next, pi0,
2237                                            next0);
2238           continue;
2239
2240         check_ip6_nd:
2241           /* IP6 ND event notification or solicitation handling to generate
2242              local response instead of flooding */
2243           iph0 = (ip6_header_t *) l3h0;
2244           if (PREDICT_FALSE (ethertype0 == ETHERNET_TYPE_IP6 &&
2245                              iph0->protocol == IP_PROTOCOL_ICMP6 &&
2246                              !ip6_address_is_unspecified
2247                              (&iph0->src_address)))
2248             {
2249               sw_if_index0 = vnet_buffer (p0)->sw_if_index[VLIB_RX];
2250               if (vnet_ip6_nd_term
2251                   (vm, node, p0, eth0, iph0, sw_if_index0,
2252                    vnet_buffer (p0)->l2.bd_index, vnet_buffer (p0)->l2.shg))
2253                 goto output_response;
2254             }
2255
2256         next_l2_feature:
2257           {
2258             u32 feature_bitmap0 =
2259               vnet_buffer (p0)->l2.feature_bitmap & ~L2INPUT_FEAT_ARP_TERM;
2260             vnet_buffer (p0)->l2.feature_bitmap = feature_bitmap0;
2261             next0 =
2262               feat_bitmap_get_next_node_index (arp_term_next_node_index,
2263                                                feature_bitmap0);
2264             vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
2265                                              to_next, n_left_to_next,
2266                                              pi0, next0);
2267             continue;
2268           }
2269
2270         drop:
2271           if (0 == arp0->ip4_over_ethernet[0].ip4.as_u32 ||
2272               (arp0->ip4_over_ethernet[0].ip4.as_u32 ==
2273                arp0->ip4_over_ethernet[1].ip4.as_u32))
2274             {
2275               error0 = ETHERNET_ARP_ERROR_gratuitous_arp;
2276             }
2277           next0 = ARP_TERM_NEXT_DROP;
2278           p0->error = node->errors[error0];
2279
2280           vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
2281                                            to_next, n_left_to_next, pi0,
2282                                            next0);
2283         }
2284
2285       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
2286     }
2287
2288   vlib_error_count (vm, node->node_index,
2289                     ETHERNET_ARP_ERROR_replies_sent, n_replies_sent);
2290   return frame->n_vectors;
2291 }
2292
2293 /* *INDENT-OFF* */
2294 VLIB_REGISTER_NODE (arp_term_l2bd_node, static) = {
2295   .function = arp_term_l2bd,
2296   .name = "arp-term-l2bd",
2297   .vector_size = sizeof (u32),
2298   .n_errors = ETHERNET_ARP_N_ERROR,
2299   .error_strings = ethernet_arp_error_strings,
2300   .n_next_nodes = ARP_TERM_N_NEXT,
2301   .next_nodes = {
2302     [ARP_TERM_NEXT_L2_OUTPUT] = "l2-output",
2303     [ARP_TERM_NEXT_DROP] = "error-drop",
2304   },
2305   .format_buffer = format_ethernet_arp_header,
2306   .format_trace = format_arp_term_input_trace,
2307 };
2308 /* *INDENT-ON* */
2309
2310 clib_error_t *
2311 arp_term_init (vlib_main_t * vm)
2312 {
2313   // Initialize the feature next-node indexes
2314   feat_bitmap_init_next_nodes (vm,
2315                                arp_term_l2bd_node.index,
2316                                L2INPUT_N_FEAT,
2317                                l2input_get_feat_names (),
2318                                arp_term_next_node_index);
2319   return 0;
2320 }
2321
2322 VLIB_INIT_FUNCTION (arp_term_init);
2323
2324 void
2325 change_arp_mac (u32 sw_if_index, ethernet_arp_ip4_entry_t * e)
2326 {
2327   if (e->sw_if_index == sw_if_index)
2328     {
2329       adj_nbr_walk_nh4 (e->sw_if_index,
2330                         &e->ip4_address, arp_mk_complete_walk, e);
2331     }
2332 }
2333
2334 void
2335 ethernet_arp_change_mac (vnet_main_t * vnm, u32 sw_if_index)
2336 {
2337   ethernet_arp_main_t *am = &ethernet_arp_main;
2338   ethernet_arp_ip4_entry_t *e;
2339
2340   /* *INDENT-OFF* */
2341   pool_foreach (e, am->ip4_entry_pool,
2342   ({
2343     change_arp_mac (sw_if_index, e);
2344   }));
2345   /* *INDENT-ON* */
2346 }
2347
2348 /*
2349  * fd.io coding-style-patch-verification: ON
2350  *
2351  * Local Variables:
2352  * eval: (c-set-style "gnu")
2353  * End:
2354  */