GRE tests and fixes
[vpp.git] / vnet / vnet / ethernet / arp.c
1 /*
2  * ethernet/arp.c: IP v4 ARP node
3  *
4  * Copyright (c) 2010 Cisco and/or its affiliates.
5  * Licensed under the Apache License, Version 2.0 (the "License");
6  * you may not use this file except in compliance with the License.
7  * You may obtain a copy of the License at:
8  *
9  *     http://www.apache.org/licenses/LICENSE-2.0
10  *
11  * Unless required by applicable law or agreed to in writing, software
12  * distributed under the License is distributed on an "AS IS" BASIS,
13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  * See the License for the specific language governing permissions and
15  * limitations under the License.
16  */
17
18 #include <vnet/ip/ip.h>
19 #include <vnet/ip/ip6.h>
20 #include <vnet/ethernet/ethernet.h>
21 #include <vnet/ethernet/arp_packet.h>
22 #include <vnet/l2/l2_input.h>
23 #include <vppinfra/mhash.h>
24 #include <vnet/fib/ip4_fib.h>
25 #include <vnet/adj/adj_nbr.h>
26 #include <vnet/mpls/mpls.h>
27
28 /**
29  * @file
30  * @brief IPv4 ARP.
31  *
32  * This file contains code to manage the IPv4 ARP tables (IP Address
33  * to MAC Address lookup).
34  */
35
36
37 void vl_api_rpc_call_main_thread (void *fp, u8 * data, u32 data_length);
38
39 typedef struct
40 {
41   u32 sw_if_index;
42   ip4_address_t ip4_address;
43
44   u8 ethernet_address[6];
45
46   u16 flags;
47 #define ETHERNET_ARP_IP4_ENTRY_FLAG_STATIC  (1 << 0)
48 #define ETHERNET_ARP_IP4_ENTRY_FLAG_DYNAMIC (1 << 1)
49
50   u64 cpu_time_last_updated;
51
52   /**
53    * The index of the adj-fib entry created
54    */
55   fib_node_index_t fib_entry_index;
56 } ethernet_arp_ip4_entry_t;
57
58 /**
59  * @brief Per-interface ARP configuration and state
60  */
61 typedef struct ethernet_arp_interface_t_
62 {
63   /**
64    * Hash table of ARP entries.
65    * Since this hash table is per-interface, the key is only the IPv4 address.
66    */
67   uword *arp_entries;
68 } ethernet_arp_interface_t;
69
70 typedef struct
71 {
72   u32 lo_addr;
73   u32 hi_addr;
74   u32 fib_index;
75 } ethernet_proxy_arp_t;
76
77 typedef struct
78 {
79   u32 next_index;
80   uword node_index;
81   uword type_opaque;
82   uword data;
83   /* Used for arp event notification only */
84   void *data_callback;
85   u32 pid;
86 } pending_resolution_t;
87
88 typedef struct
89 {
90   /* Hash tables mapping name to opcode. */
91   uword *opcode_by_name;
92
93   /* lite beer "glean" adjacency handling */
94   uword *pending_resolutions_by_address;
95   pending_resolution_t *pending_resolutions;
96
97   /* Mac address change notification */
98   uword *mac_changes_by_address;
99   pending_resolution_t *mac_changes;
100
101   ethernet_arp_ip4_entry_t *ip4_entry_pool;
102
103   /* ARP attack mitigation */
104   u32 arp_delete_rotor;
105   u32 limit_arp_cache_size;
106
107   /** Per interface state */
108   ethernet_arp_interface_t *ethernet_arp_by_sw_if_index;
109
110   /* Proxy arp vector */
111   ethernet_proxy_arp_t *proxy_arps;
112 } ethernet_arp_main_t;
113
114 static ethernet_arp_main_t ethernet_arp_main;
115
116 typedef struct
117 {
118   u32 sw_if_index;
119   ethernet_arp_ip4_over_ethernet_address_t a;
120   int is_static;
121   int flags;
122 #define ETHERNET_ARP_ARGS_REMOVE (1<<0)
123 #define ETHERNET_ARP_ARGS_FLUSH  (1<<1)
124 #define ETHERNET_ARP_ARGS_POPULATE  (1<<2)
125 } vnet_arp_set_ip4_over_ethernet_rpc_args_t;
126
127 static void
128 set_ip4_over_ethernet_rpc_callback (vnet_arp_set_ip4_over_ethernet_rpc_args_t
129                                     * a);
130
131 static u8 *
132 format_ethernet_arp_hardware_type (u8 * s, va_list * va)
133 {
134   ethernet_arp_hardware_type_t h = va_arg (*va, ethernet_arp_hardware_type_t);
135   char *t = 0;
136   switch (h)
137     {
138 #define _(n,f) case n: t = #f; break;
139       foreach_ethernet_arp_hardware_type;
140 #undef _
141
142     default:
143       return format (s, "unknown 0x%x", h);
144     }
145
146   return format (s, "%s", t);
147 }
148
149 static u8 *
150 format_ethernet_arp_opcode (u8 * s, va_list * va)
151 {
152   ethernet_arp_opcode_t o = va_arg (*va, ethernet_arp_opcode_t);
153   char *t = 0;
154   switch (o)
155     {
156 #define _(f) case ETHERNET_ARP_OPCODE_##f: t = #f; break;
157       foreach_ethernet_arp_opcode;
158 #undef _
159
160     default:
161       return format (s, "unknown 0x%x", o);
162     }
163
164   return format (s, "%s", t);
165 }
166
167 static uword
168 unformat_ethernet_arp_opcode_host_byte_order (unformat_input_t * input,
169                                               va_list * args)
170 {
171   int *result = va_arg (*args, int *);
172   ethernet_arp_main_t *am = &ethernet_arp_main;
173   int x, i;
174
175   /* Numeric opcode. */
176   if (unformat (input, "0x%x", &x) || unformat (input, "%d", &x))
177     {
178       if (x >= (1 << 16))
179         return 0;
180       *result = x;
181       return 1;
182     }
183
184   /* Named type. */
185   if (unformat_user (input, unformat_vlib_number_by_name,
186                      am->opcode_by_name, &i))
187     {
188       *result = i;
189       return 1;
190     }
191
192   return 0;
193 }
194
195 static uword
196 unformat_ethernet_arp_opcode_net_byte_order (unformat_input_t * input,
197                                              va_list * args)
198 {
199   int *result = va_arg (*args, int *);
200   if (!unformat_user
201       (input, unformat_ethernet_arp_opcode_host_byte_order, result))
202     return 0;
203
204   *result = clib_host_to_net_u16 ((u16) * result);
205   return 1;
206 }
207
208 static u8 *
209 format_ethernet_arp_header (u8 * s, va_list * va)
210 {
211   ethernet_arp_header_t *a = va_arg (*va, ethernet_arp_header_t *);
212   u32 max_header_bytes = va_arg (*va, u32);
213   uword indent;
214   u16 l2_type, l3_type;
215
216   if (max_header_bytes != 0 && sizeof (a[0]) > max_header_bytes)
217     return format (s, "ARP header truncated");
218
219   l2_type = clib_net_to_host_u16 (a->l2_type);
220   l3_type = clib_net_to_host_u16 (a->l3_type);
221
222   indent = format_get_indent (s);
223
224   s = format (s, "%U, type %U/%U, address size %d/%d",
225               format_ethernet_arp_opcode, clib_net_to_host_u16 (a->opcode),
226               format_ethernet_arp_hardware_type, l2_type,
227               format_ethernet_type, l3_type,
228               a->n_l2_address_bytes, a->n_l3_address_bytes);
229
230   if (l2_type == ETHERNET_ARP_HARDWARE_TYPE_ethernet
231       && l3_type == ETHERNET_TYPE_IP4)
232     {
233       s = format (s, "\n%U%U/%U -> %U/%U",
234                   format_white_space, indent,
235                   format_ethernet_address, a->ip4_over_ethernet[0].ethernet,
236                   format_ip4_address, &a->ip4_over_ethernet[0].ip4,
237                   format_ethernet_address, a->ip4_over_ethernet[1].ethernet,
238                   format_ip4_address, &a->ip4_over_ethernet[1].ip4);
239     }
240   else
241     {
242       uword n2 = a->n_l2_address_bytes;
243       uword n3 = a->n_l3_address_bytes;
244       s = format (s, "\n%U%U/%U -> %U/%U",
245                   format_white_space, indent,
246                   format_hex_bytes, a->data + 0 * n2 + 0 * n3, n2,
247                   format_hex_bytes, a->data + 1 * n2 + 0 * n3, n3,
248                   format_hex_bytes, a->data + 1 * n2 + 1 * n3, n2,
249                   format_hex_bytes, a->data + 2 * n2 + 1 * n3, n3);
250     }
251
252   return s;
253 }
254
255 static u8 *
256 format_ethernet_arp_ip4_entry (u8 * s, va_list * va)
257 {
258   vnet_main_t *vnm = va_arg (*va, vnet_main_t *);
259   ethernet_arp_ip4_entry_t *e = va_arg (*va, ethernet_arp_ip4_entry_t *);
260   vnet_sw_interface_t *si;
261   u8 *flags = 0;
262
263   if (!e)
264     return format (s, "%=12s%=16s%=6s%=20s%=24s", "Time", "IP4",
265                    "Flags", "Ethernet", "Interface");
266
267   si = vnet_get_sw_interface (vnm, e->sw_if_index);
268
269   if (e->flags & ETHERNET_ARP_IP4_ENTRY_FLAG_STATIC)
270     flags = format (flags, "S");
271
272   if (e->flags & ETHERNET_ARP_IP4_ENTRY_FLAG_DYNAMIC)
273     flags = format (flags, "D");
274
275   s = format (s, "%=12U%=16U%=6s%=20U%=24U",
276               format_vlib_cpu_time, vnm->vlib_main, e->cpu_time_last_updated,
277               format_ip4_address, &e->ip4_address,
278               flags ? (char *) flags : "",
279               format_ethernet_address, e->ethernet_address,
280               format_vnet_sw_interface_name, vnm, si);
281
282   vec_free (flags);
283   return s;
284 }
285
286 typedef struct
287 {
288   u8 packet_data[64];
289 } ethernet_arp_input_trace_t;
290
291 static u8 *
292 format_ethernet_arp_input_trace (u8 * s, va_list * va)
293 {
294   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*va, vlib_main_t *);
295   CLIB_UNUSED (vlib_node_t * node) = va_arg (*va, vlib_node_t *);
296   ethernet_arp_input_trace_t *t = va_arg (*va, ethernet_arp_input_trace_t *);
297
298   s = format (s, "%U",
299               format_ethernet_arp_header,
300               t->packet_data, sizeof (t->packet_data));
301
302   return s;
303 }
304
305 static u8 *
306 format_arp_term_input_trace (u8 * s, va_list * va)
307 {
308   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*va, vlib_main_t *);
309   CLIB_UNUSED (vlib_node_t * node) = va_arg (*va, vlib_node_t *);
310   ethernet_arp_input_trace_t *t = va_arg (*va, ethernet_arp_input_trace_t *);
311
312   /* arp-term trace data saved is either arp or ip6/icmp6 packet:
313      - for arp, the 1st 16-bit field is hw type of value of 0x0001.
314      - for ip6, the first nibble has value of 6. */
315   s = format (s, "%U", t->packet_data[0] == 0 ?
316               format_ethernet_arp_header : format_ip6_header,
317               t->packet_data, sizeof (t->packet_data));
318
319   return s;
320 }
321
322 static void
323 arp_nbr_probe (ip_adjacency_t * adj)
324 {
325   vnet_main_t *vnm = vnet_get_main ();
326   ip4_main_t *im = &ip4_main;
327   ip_interface_address_t *ia;
328   ethernet_arp_header_t *h;
329   vnet_hw_interface_t *hi;
330   vnet_sw_interface_t *si;
331   ip4_address_t *src;
332   vlib_buffer_t *b;
333   vlib_main_t *vm;
334   u32 bi = 0;
335
336   vm = vlib_get_main ();
337
338   si = vnet_get_sw_interface (vnm, adj->rewrite_header.sw_if_index);
339
340   if (!(si->flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP))
341     {
342       return;
343     }
344
345   src =
346     ip4_interface_address_matching_destination (im,
347                                                 &adj->sub_type.nbr.next_hop.
348                                                 ip4,
349                                                 adj->rewrite_header.
350                                                 sw_if_index, &ia);
351   if (!src)
352     {
353       return;
354     }
355
356   h =
357     vlib_packet_template_get_packet (vm, &im->ip4_arp_request_packet_template,
358                                      &bi);
359
360   hi = vnet_get_sup_hw_interface (vnm, adj->rewrite_header.sw_if_index);
361
362   clib_memcpy (h->ip4_over_ethernet[0].ethernet,
363                hi->hw_address, sizeof (h->ip4_over_ethernet[0].ethernet));
364
365   h->ip4_over_ethernet[0].ip4 = src[0];
366   h->ip4_over_ethernet[1].ip4 = adj->sub_type.nbr.next_hop.ip4;
367
368   b = vlib_get_buffer (vm, bi);
369   vnet_buffer (b)->sw_if_index[VLIB_RX] =
370     vnet_buffer (b)->sw_if_index[VLIB_TX] = adj->rewrite_header.sw_if_index;
371
372   /* Add encapsulation string for software interface (e.g. ethernet header). */
373   vnet_rewrite_one_header (adj[0], h, sizeof (ethernet_header_t));
374   vlib_buffer_advance (b, -adj->rewrite_header.data_bytes);
375
376   {
377     vlib_frame_t *f = vlib_get_frame_to_node (vm, hi->output_node_index);
378     u32 *to_next = vlib_frame_vector_args (f);
379     to_next[0] = bi;
380     f->n_vectors = 1;
381     vlib_put_frame_to_node (vm, hi->output_node_index, f);
382   }
383 }
384
385 static void
386 arp_mk_complete (adj_index_t ai, ethernet_arp_ip4_entry_t * e)
387 {
388   adj_nbr_update_rewrite
389     (ai, ADJ_NBR_REWRITE_FLAG_COMPLETE,
390      ethernet_build_rewrite (vnet_get_main (),
391                              e->sw_if_index,
392                              adj_get_link_type (ai), e->ethernet_address));
393 }
394
395 static void
396 arp_mk_incomplete (adj_index_t ai, ethernet_arp_ip4_entry_t * e)
397 {
398   adj_nbr_update_rewrite
399     (ai,
400      ADJ_NBR_REWRITE_FLAG_INCOMPLETE,
401      ethernet_build_rewrite (vnet_get_main (),
402                              e->sw_if_index,
403                              VNET_LINK_ARP,
404                              VNET_REWRITE_FOR_SW_INTERFACE_ADDRESS_BROADCAST));
405 }
406
407 static ethernet_arp_ip4_entry_t *
408 arp_entry_find (ethernet_arp_interface_t * eai, const ip4_address_t * addr)
409 {
410   ethernet_arp_main_t *am = &ethernet_arp_main;
411   ethernet_arp_ip4_entry_t *e = NULL;
412   uword *p;
413
414   if (NULL != eai->arp_entries)
415     {
416       p = hash_get (eai->arp_entries, addr->as_u32);
417       if (!p)
418         return (NULL);
419
420       e = pool_elt_at_index (am->ip4_entry_pool, p[0]);
421     }
422
423   return (e);
424 }
425
426 static adj_walk_rc_t
427 arp_mk_complete_walk (adj_index_t ai, void *ctx)
428 {
429   ethernet_arp_ip4_entry_t *e = ctx;
430
431   arp_mk_complete (ai, e);
432
433   return (ADJ_WALK_RC_CONTINUE);
434 }
435
436 static adj_walk_rc_t
437 arp_mk_incomplete_walk (adj_index_t ai, void *ctx)
438 {
439   ethernet_arp_ip4_entry_t *e = ctx;
440
441   arp_mk_incomplete (ai, e);
442
443   return (ADJ_WALK_RC_CONTINUE);
444 }
445
446 void
447 arp_update_adjacency (vnet_main_t * vnm, u32 sw_if_index, u32 ai)
448 {
449   ethernet_arp_main_t *am = &ethernet_arp_main;
450   ethernet_arp_interface_t *arp_int;
451   ethernet_arp_ip4_entry_t *e;
452   ip_adjacency_t *adj;
453
454   adj = adj_get (ai);
455
456   vec_validate (am->ethernet_arp_by_sw_if_index, sw_if_index);
457   arp_int = &am->ethernet_arp_by_sw_if_index[sw_if_index];
458   e = arp_entry_find (arp_int, &adj->sub_type.nbr.next_hop.ip4);
459
460   if (NULL != e)
461     {
462       adj_nbr_walk_nh4 (sw_if_index,
463                         &e->ip4_address, arp_mk_complete_walk, e);
464     }
465   else
466     {
467       /*
468        * no matching ARP entry.
469        * construct the rewire required to for an ARP packet, and stick
470        * that in the adj's pipe to smoke.
471        */
472       adj_nbr_update_rewrite (ai,
473                               ADJ_NBR_REWRITE_FLAG_INCOMPLETE,
474                               ethernet_build_rewrite (vnm,
475                                                       sw_if_index,
476                                                       VNET_LINK_ARP,
477                                                       VNET_REWRITE_FOR_SW_INTERFACE_ADDRESS_BROADCAST));
478
479       /*
480        * since the FIB has added this adj for a route, it makes sense it may
481        * want to forward traffic sometime soon. Let's send a speculative ARP.
482        * just one. If we were to do periodically that wouldn't be bad either,
483        * but that's more code than i'm prepared to write at this time for
484        * relatively little reward.
485        */
486       arp_nbr_probe (adj);
487     }
488 }
489
490 int
491 vnet_arp_set_ip4_over_ethernet_internal (vnet_main_t * vnm,
492                                          vnet_arp_set_ip4_over_ethernet_rpc_args_t
493                                          * args)
494 {
495   ethernet_arp_ip4_entry_t *e = 0;
496   ethernet_arp_main_t *am = &ethernet_arp_main;
497   ethernet_arp_ip4_over_ethernet_address_t *a = &args->a;
498   vlib_main_t *vm = vlib_get_main ();
499   int make_new_arp_cache_entry = 1;
500   uword *p;
501   pending_resolution_t *pr, *mc;
502   ethernet_arp_interface_t *arp_int;
503   int is_static = args->is_static;
504   u32 sw_if_index = args->sw_if_index;
505
506   vec_validate (am->ethernet_arp_by_sw_if_index, sw_if_index);
507
508   arp_int = &am->ethernet_arp_by_sw_if_index[sw_if_index];
509
510   if (NULL != arp_int->arp_entries)
511     {
512       p = hash_get (arp_int->arp_entries, a->ip4.as_u32);
513       if (p)
514         {
515           e = pool_elt_at_index (am->ip4_entry_pool, p[0]);
516
517           /* Refuse to over-write static arp. */
518           if (!is_static && (e->flags & ETHERNET_ARP_IP4_ENTRY_FLAG_STATIC))
519             return -2;
520           make_new_arp_cache_entry = 0;
521         }
522     }
523
524   if (make_new_arp_cache_entry)
525     {
526       fib_prefix_t pfx = {
527         .fp_len = 32,
528         .fp_proto = FIB_PROTOCOL_IP4,
529         .fp_addr = {
530                     .ip4 = a->ip4,
531                     }
532         ,
533       };
534       u32 fib_index;
535
536       pool_get (am->ip4_entry_pool, e);
537
538       if (NULL == arp_int->arp_entries)
539         {
540           arp_int->arp_entries = hash_create (0, sizeof (u32));
541         }
542
543       hash_set (arp_int->arp_entries, a->ip4.as_u32, e - am->ip4_entry_pool);
544
545       e->sw_if_index = sw_if_index;
546       e->ip4_address = a->ip4;
547       clib_memcpy (e->ethernet_address,
548                    a->ethernet, sizeof (e->ethernet_address));
549
550       fib_index = ip4_fib_table_get_index_for_sw_if_index (e->sw_if_index);
551       e->fib_entry_index =
552         fib_table_entry_update_one_path (fib_index,
553                                          &pfx,
554                                          FIB_SOURCE_ADJ,
555                                          FIB_ENTRY_FLAG_ATTACHED,
556                                          FIB_PROTOCOL_IP4,
557                                          &pfx.fp_addr,
558                                          e->sw_if_index,
559                                          ~0,
560                                          1,
561                                          MPLS_LABEL_INVALID,
562                                          FIB_ROUTE_PATH_FLAG_NONE);
563     }
564   else
565     {
566       /*
567        * prevent a DoS attack from the data-plane that
568        * spams us with no-op updates to the MAC address
569        */
570       if (0 == memcmp (e->ethernet_address,
571                        a->ethernet, sizeof (e->ethernet_address)))
572         return -1;
573
574       /* Update time stamp and ethernet address. */
575       clib_memcpy (e->ethernet_address, a->ethernet,
576                    sizeof (e->ethernet_address));
577     }
578
579   e->cpu_time_last_updated = clib_cpu_time_now ();
580   if (is_static)
581     e->flags |= ETHERNET_ARP_IP4_ENTRY_FLAG_STATIC;
582   else
583     e->flags |= ETHERNET_ARP_IP4_ENTRY_FLAG_DYNAMIC;
584
585   adj_nbr_walk_nh4 (sw_if_index, &e->ip4_address, arp_mk_complete_walk, e);
586
587   /* Customer(s) waiting for this address to be resolved? */
588   p = hash_get (am->pending_resolutions_by_address, a->ip4.as_u32);
589   if (p)
590     {
591       u32 next_index;
592       next_index = p[0];
593
594       while (next_index != (u32) ~ 0)
595         {
596           pr = pool_elt_at_index (am->pending_resolutions, next_index);
597           vlib_process_signal_event (vm, pr->node_index,
598                                      pr->type_opaque, pr->data);
599           next_index = pr->next_index;
600           pool_put (am->pending_resolutions, pr);
601         }
602
603       hash_unset (am->pending_resolutions_by_address, a->ip4.as_u32);
604     }
605
606   /* Customer(s) requesting ARP event for this address? */
607   p = hash_get (am->mac_changes_by_address, a->ip4.as_u32);
608   if (p)
609     {
610       u32 next_index;
611       next_index = p[0];
612
613       while (next_index != (u32) ~ 0)
614         {
615           int (*fp) (u32, u8 *, u32, u32);
616           int rv = 1;
617           mc = pool_elt_at_index (am->mac_changes, next_index);
618           fp = mc->data_callback;
619
620           /* Call the user's data callback, return 1 to suppress dup events */
621           if (fp)
622             rv = (*fp) (mc->data, a->ethernet, sw_if_index, 0);
623
624           /*
625            * Signal the resolver process, as long as the user
626            * says they want to be notified
627            */
628           if (rv == 0)
629             vlib_process_signal_event (vm, mc->node_index,
630                                        mc->type_opaque, mc->data);
631           next_index = mc->next_index;
632         }
633     }
634
635   return 0;
636 }
637
638 void
639 vnet_register_ip4_arp_resolution_event (vnet_main_t * vnm,
640                                         void *address_arg,
641                                         uword node_index,
642                                         uword type_opaque, uword data)
643 {
644   ethernet_arp_main_t *am = &ethernet_arp_main;
645   ip4_address_t *address = address_arg;
646   uword *p;
647   pending_resolution_t *pr;
648
649   pool_get (am->pending_resolutions, pr);
650
651   pr->next_index = ~0;
652   pr->node_index = node_index;
653   pr->type_opaque = type_opaque;
654   pr->data = data;
655   pr->data_callback = 0;
656
657   p = hash_get (am->pending_resolutions_by_address, address->as_u32);
658   if (p)
659     {
660       /* Insert new resolution at the head of the list */
661       pr->next_index = p[0];
662       hash_unset (am->pending_resolutions_by_address, address->as_u32);
663     }
664
665   hash_set (am->pending_resolutions_by_address, address->as_u32,
666             pr - am->pending_resolutions);
667 }
668
669 int
670 vnet_add_del_ip4_arp_change_event (vnet_main_t * vnm,
671                                    void *data_callback,
672                                    u32 pid,
673                                    void *address_arg,
674                                    uword node_index,
675                                    uword type_opaque, uword data, int is_add)
676 {
677   ethernet_arp_main_t *am = &ethernet_arp_main;
678   ip4_address_t *address = address_arg;
679   uword *p;
680   pending_resolution_t *mc;
681   void (*fp) (u32, u8 *) = data_callback;
682
683   if (is_add)
684     {
685       pool_get (am->mac_changes, mc);
686
687       mc->next_index = ~0;
688       mc->node_index = node_index;
689       mc->type_opaque = type_opaque;
690       mc->data = data;
691       mc->data_callback = data_callback;
692       mc->pid = pid;
693
694       p = hash_get (am->mac_changes_by_address, address->as_u32);
695       if (p)
696         {
697           /* Insert new resolution at the head of the list */
698           mc->next_index = p[0];
699           hash_unset (am->mac_changes_by_address, address->as_u32);
700         }
701
702       hash_set (am->mac_changes_by_address, address->as_u32,
703                 mc - am->mac_changes);
704       return 0;
705     }
706   else
707     {
708       u32 index;
709       pending_resolution_t *mc_last = 0;
710
711       p = hash_get (am->mac_changes_by_address, address->as_u32);
712       if (p == 0)
713         return VNET_API_ERROR_NO_SUCH_ENTRY;
714
715       index = p[0];
716
717       while (index != (u32) ~ 0)
718         {
719           mc = pool_elt_at_index (am->mac_changes, index);
720           if (mc->node_index == node_index &&
721               mc->type_opaque == type_opaque && mc->pid == pid)
722             {
723               /* Clients may need to clean up pool entries, too */
724               if (fp)
725                 (*fp) (mc->data, 0 /* no new mac addrs */ );
726               if (index == p[0])
727                 {
728                   hash_unset (am->mac_changes_by_address, address->as_u32);
729                   if (mc->next_index != ~0)
730                     hash_set (am->mac_changes_by_address, address->as_u32,
731                               mc->next_index);
732                   pool_put (am->mac_changes, mc);
733                   return 0;
734                 }
735               else
736                 {
737                   ASSERT (mc_last);
738                   mc_last->next_index = mc->next_index;
739                   pool_put (am->mac_changes, mc);
740                   return 0;
741                 }
742             }
743           mc_last = mc;
744           index = mc->next_index;
745         }
746
747       return VNET_API_ERROR_NO_SUCH_ENTRY;
748     }
749 }
750
751 /* Either we drop the packet or we send a reply to the sender. */
752 typedef enum
753 {
754   ARP_INPUT_NEXT_DROP,
755   ARP_INPUT_NEXT_REPLY_TX,
756   ARP_INPUT_N_NEXT,
757 } arp_input_next_t;
758
759 #define foreach_ethernet_arp_error                                      \
760   _ (replies_sent, "ARP replies sent")                                  \
761   _ (l2_type_not_ethernet, "L2 type not ethernet")                      \
762   _ (l3_type_not_ip4, "L3 type not IP4")                                \
763   _ (l3_src_address_not_local, "IP4 source address not local to subnet") \
764   _ (l3_dst_address_not_local, "IP4 destination address not local to subnet") \
765   _ (l3_src_address_is_local, "IP4 source address matches local interface") \
766   _ (l3_src_address_learned, "ARP request IP4 source address learned")  \
767   _ (replies_received, "ARP replies received")                          \
768   _ (opcode_not_request, "ARP opcode not request")                      \
769   _ (proxy_arp_replies_sent, "Proxy ARP replies sent")                  \
770   _ (l2_address_mismatch, "ARP hw addr does not match L2 frame src addr") \
771   _ (missing_interface_address, "ARP missing interface address") \
772   _ (gratuitous_arp, "ARP probe or announcement dropped") \
773   _ (interface_no_table, "Interface is not mapped to an IP table") \
774
775 typedef enum
776 {
777 #define _(sym,string) ETHERNET_ARP_ERROR_##sym,
778   foreach_ethernet_arp_error
779 #undef _
780     ETHERNET_ARP_N_ERROR,
781 } ethernet_arp_input_error_t;
782
783
784 static void
785 unset_random_arp_entry (void)
786 {
787   ethernet_arp_main_t *am = &ethernet_arp_main;
788   ethernet_arp_ip4_entry_t *e;
789   vnet_main_t *vnm = vnet_get_main ();
790   ethernet_arp_ip4_over_ethernet_address_t delme;
791   u32 index;
792
793   index = pool_next_index (am->ip4_entry_pool, am->arp_delete_rotor);
794   am->arp_delete_rotor = index;
795
796   /* Try again from elt 0, could happen if an intfc goes down */
797   if (index == ~0)
798     {
799       index = pool_next_index (am->ip4_entry_pool, am->arp_delete_rotor);
800       am->arp_delete_rotor = index;
801     }
802
803   /* Nothing left in the pool */
804   if (index == ~0)
805     return;
806
807   e = pool_elt_at_index (am->ip4_entry_pool, index);
808
809   clib_memcpy (&delme.ethernet, e->ethernet_address, 6);
810   delme.ip4.as_u32 = e->ip4_address.as_u32;
811
812   vnet_arp_unset_ip4_over_ethernet (vnm, e->sw_if_index, &delme);
813 }
814
815 static void
816 arp_unnumbered (vlib_buffer_t * p0,
817                 u32 pi0, ethernet_header_t * eth0, u32 sw_if_index)
818 {
819   vlib_main_t *vm = vlib_get_main ();
820   vnet_main_t *vnm = vnet_get_main ();
821   vnet_interface_main_t *vim = &vnm->interface_main;
822   vnet_sw_interface_t *si;
823   vnet_hw_interface_t *hi;
824   u32 unnum_src_sw_if_index;
825   u32 *broadcast_swifs = 0;
826   u32 *buffers = 0;
827   u32 n_alloc = 0;
828   vlib_buffer_t *b0;
829   int i;
830   u8 dst_mac_address[6];
831   i16 header_size;
832   ethernet_arp_header_t *arp0;
833
834   /* Save the dst mac address */
835   clib_memcpy (dst_mac_address, eth0->dst_address, sizeof (dst_mac_address));
836
837   /* Figure out which sw_if_index supplied the address */
838   unnum_src_sw_if_index = sw_if_index;
839
840   /* Track down all users of the unnumbered source */
841   /* *INDENT-OFF* */
842   pool_foreach (si, vim->sw_interfaces,
843   ({
844     if (si->flags & VNET_SW_INTERFACE_FLAG_UNNUMBERED &&
845         (si->unnumbered_sw_if_index == unnum_src_sw_if_index))
846       {
847         vec_add1 (broadcast_swifs, si->sw_if_index);
848       }
849   }));
850   /* *INDENT-ON* */
851
852   ASSERT (vec_len (broadcast_swifs));
853
854   /* Allocate buffering if we need it */
855   if (vec_len (broadcast_swifs) > 1)
856     {
857       vec_validate (buffers, vec_len (broadcast_swifs) - 2);
858       n_alloc = vlib_buffer_alloc (vm, buffers, vec_len (buffers));
859       _vec_len (buffers) = n_alloc;
860       for (i = 0; i < n_alloc; i++)
861         {
862           b0 = vlib_get_buffer (vm, buffers[i]);
863
864           /* xerox (partially built) ARP pkt */
865           clib_memcpy (b0->data, p0->data,
866                        p0->current_length + p0->current_data);
867           b0->current_data = p0->current_data;
868           b0->current_length = p0->current_length;
869           vnet_buffer (b0)->sw_if_index[VLIB_RX] =
870             vnet_buffer (p0)->sw_if_index[VLIB_RX];
871         }
872     }
873
874   vec_insert (buffers, 1, 0);
875   buffers[0] = pi0;
876
877   for (i = 0; i < vec_len (buffers); i++)
878     {
879       b0 = vlib_get_buffer (vm, buffers[i]);
880       arp0 = vlib_buffer_get_current (b0);
881
882       hi = vnet_get_sup_hw_interface (vnm, broadcast_swifs[i]);
883       si = vnet_get_sw_interface (vnm, broadcast_swifs[i]);
884
885       /* For decoration, most likely */
886       vnet_buffer (b0)->sw_if_index[VLIB_TX] = hi->sw_if_index;
887
888       /* Fix ARP pkt src address */
889       clib_memcpy (arp0->ip4_over_ethernet[0].ethernet, hi->hw_address, 6);
890
891       /* Build L2 encaps for this swif */
892       header_size = sizeof (ethernet_header_t);
893       if (si->sub.eth.flags.one_tag)
894         header_size += 4;
895       else if (si->sub.eth.flags.two_tags)
896         header_size += 8;
897
898       vlib_buffer_advance (b0, -header_size);
899       eth0 = vlib_buffer_get_current (b0);
900
901       if (si->sub.eth.flags.one_tag)
902         {
903           ethernet_vlan_header_t *outer = (void *) (eth0 + 1);
904
905           eth0->type = si->sub.eth.flags.dot1ad ?
906             clib_host_to_net_u16 (ETHERNET_TYPE_DOT1AD) :
907             clib_host_to_net_u16 (ETHERNET_TYPE_VLAN);
908           outer->priority_cfi_and_id =
909             clib_host_to_net_u16 (si->sub.eth.outer_vlan_id);
910           outer->type = clib_host_to_net_u16 (ETHERNET_TYPE_ARP);
911
912         }
913       else if (si->sub.eth.flags.two_tags)
914         {
915           ethernet_vlan_header_t *outer = (void *) (eth0 + 1);
916           ethernet_vlan_header_t *inner = (void *) (outer + 1);
917
918           eth0->type = si->sub.eth.flags.dot1ad ?
919             clib_host_to_net_u16 (ETHERNET_TYPE_DOT1AD) :
920             clib_host_to_net_u16 (ETHERNET_TYPE_VLAN);
921           outer->priority_cfi_and_id =
922             clib_host_to_net_u16 (si->sub.eth.outer_vlan_id);
923           outer->type = clib_host_to_net_u16 (ETHERNET_TYPE_VLAN);
924           inner->priority_cfi_and_id =
925             clib_host_to_net_u16 (si->sub.eth.inner_vlan_id);
926           inner->type = clib_host_to_net_u16 (ETHERNET_TYPE_ARP);
927
928         }
929       else
930         {
931           eth0->type = clib_host_to_net_u16 (ETHERNET_TYPE_ARP);
932         }
933
934       /* Restore the original dst address, set src address */
935       clib_memcpy (eth0->dst_address, dst_mac_address,
936                    sizeof (eth0->dst_address));
937       clib_memcpy (eth0->src_address, hi->hw_address,
938                    sizeof (eth0->src_address));
939
940       /* Transmit replicas */
941       if (i > 0)
942         {
943           vlib_frame_t *f =
944             vlib_get_frame_to_node (vm, hi->output_node_index);
945           u32 *to_next = vlib_frame_vector_args (f);
946           to_next[0] = buffers[i];
947           f->n_vectors = 1;
948           vlib_put_frame_to_node (vm, hi->output_node_index, f);
949         }
950     }
951
952   /* The regular path outputs the original pkt.. */
953   vnet_buffer (p0)->sw_if_index[VLIB_TX] = broadcast_swifs[0];
954
955   vec_free (broadcast_swifs);
956   vec_free (buffers);
957 }
958
959 static uword
960 arp_input (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
961 {
962   ethernet_arp_main_t *am = &ethernet_arp_main;
963   vnet_main_t *vnm = vnet_get_main ();
964   ip4_main_t *im4 = &ip4_main;
965   u32 n_left_from, next_index, *from, *to_next;
966   u32 n_replies_sent = 0, n_proxy_arp_replies_sent = 0;
967
968   from = vlib_frame_vector_args (frame);
969   n_left_from = frame->n_vectors;
970   next_index = node->cached_next_index;
971
972   if (node->flags & VLIB_NODE_FLAG_TRACE)
973     vlib_trace_frame_buffers_only (vm, node, from, frame->n_vectors,
974                                    /* stride */ 1,
975                                    sizeof (ethernet_arp_input_trace_t));
976
977   while (n_left_from > 0)
978     {
979       u32 n_left_to_next;
980
981       vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
982
983       while (n_left_from > 0 && n_left_to_next > 0)
984         {
985           vlib_buffer_t *p0;
986           vnet_hw_interface_t *hw_if0;
987           ethernet_arp_header_t *arp0;
988           ethernet_header_t *eth0;
989           ip_adjacency_t *adj0;
990           ip4_address_t *if_addr0, proxy_src;
991           u32 pi0, error0, next0, sw_if_index0, conn_sw_if_index0, fib_index0;
992           u8 is_request0, dst_is_local0, is_unnum0;
993           ethernet_proxy_arp_t *pa;
994           fib_node_index_t dst_fei, src_fei;
995           fib_prefix_t pfx0;
996           fib_entry_flag_t src_flags, dst_flags;
997
998           pi0 = from[0];
999           to_next[0] = pi0;
1000           from += 1;
1001           to_next += 1;
1002           n_left_from -= 1;
1003           n_left_to_next -= 1;
1004           pa = 0;
1005
1006           p0 = vlib_get_buffer (vm, pi0);
1007           arp0 = vlib_buffer_get_current (p0);
1008
1009           is_request0 = arp0->opcode
1010             == clib_host_to_net_u16 (ETHERNET_ARP_OPCODE_request);
1011
1012           error0 = ETHERNET_ARP_ERROR_replies_sent;
1013
1014           error0 =
1015             (arp0->l2_type !=
1016              clib_net_to_host_u16 (ETHERNET_ARP_HARDWARE_TYPE_ethernet) ?
1017              ETHERNET_ARP_ERROR_l2_type_not_ethernet : error0);
1018           error0 =
1019             (arp0->l3_type !=
1020              clib_net_to_host_u16 (ETHERNET_TYPE_IP4) ?
1021              ETHERNET_ARP_ERROR_l3_type_not_ip4 : error0);
1022
1023           sw_if_index0 = vnet_buffer (p0)->sw_if_index[VLIB_RX];
1024
1025           if (error0)
1026             goto drop2;
1027
1028           /* Check that IP address is local and matches incoming interface. */
1029           fib_index0 = ip4_fib_table_get_index_for_sw_if_index (sw_if_index0);
1030           if (~0 == fib_index0)
1031             {
1032               error0 = ETHERNET_ARP_ERROR_interface_no_table;
1033               goto drop2;
1034
1035             }
1036           dst_fei = ip4_fib_table_lookup (ip4_fib_get (fib_index0),
1037                                           &arp0->ip4_over_ethernet[1].ip4,
1038                                           32);
1039           dst_flags = fib_entry_get_flags_for_source (dst_fei,
1040                                                       FIB_SOURCE_INTERFACE);
1041
1042           conn_sw_if_index0 =
1043             fib_entry_get_resolving_interface_for_source (dst_fei,
1044                                                           FIB_SOURCE_INTERFACE);
1045
1046           if (!(FIB_ENTRY_FLAG_CONNECTED & dst_flags))
1047             {
1048               error0 = ETHERNET_ARP_ERROR_l3_dst_address_not_local;
1049               goto drop1;
1050             }
1051
1052           /* Honor unnumbered interface, if any */
1053           is_unnum0 = sw_if_index0 != conn_sw_if_index0;
1054
1055           /* Source must also be local to subnet of matching interface address. */
1056           src_fei = ip4_fib_table_lookup (ip4_fib_get (fib_index0),
1057                                           &arp0->ip4_over_ethernet[0].ip4,
1058                                           32);
1059           src_flags = fib_entry_get_flags (src_fei);
1060
1061           if (!((FIB_ENTRY_FLAG_ATTACHED & src_flags) ||
1062                 (FIB_ENTRY_FLAG_CONNECTED & src_flags)) ||
1063               sw_if_index0 != fib_entry_get_resolving_interface (src_fei))
1064             {
1065               error0 = ETHERNET_ARP_ERROR_l3_src_address_not_local;
1066               goto drop2;
1067             }
1068
1069           /* Reject requests/replies with our local interface address. */
1070           if (FIB_ENTRY_FLAG_LOCAL & src_flags)
1071             {
1072               error0 = ETHERNET_ARP_ERROR_l3_src_address_is_local;
1073               goto drop2;
1074             }
1075
1076           dst_is_local0 = (FIB_ENTRY_FLAG_LOCAL & dst_flags);
1077           fib_entry_get_prefix (dst_fei, &pfx0);
1078           if_addr0 = &pfx0.fp_addr.ip4;
1079
1080           /* Fill in ethernet header. */
1081           eth0 = ethernet_buffer_get_header (p0);
1082
1083           /* Trash ARP packets whose ARP-level source addresses do not
1084              match their L2-frame-level source addresses */
1085           if (memcmp (eth0->src_address, arp0->ip4_over_ethernet[0].ethernet,
1086                       sizeof (eth0->src_address)))
1087             {
1088               error0 = ETHERNET_ARP_ERROR_l2_address_mismatch;
1089               goto drop2;
1090             }
1091
1092           /* Learn or update sender's mapping only for requests or unicasts
1093              that don't match local interface address. */
1094           if (ethernet_address_cast (eth0->dst_address) ==
1095               ETHERNET_ADDRESS_UNICAST || is_request0)
1096             {
1097               if (am->limit_arp_cache_size &&
1098                   pool_elts (am->ip4_entry_pool) >= am->limit_arp_cache_size)
1099                 unset_random_arp_entry ();
1100
1101               vnet_arp_set_ip4_over_ethernet (vnm, sw_if_index0,
1102                                               &arp0->ip4_over_ethernet[0],
1103                                               0 /* is_static */ );
1104               error0 = ETHERNET_ARP_ERROR_l3_src_address_learned;
1105             }
1106
1107           /* Only send a reply for requests sent which match a local interface. */
1108           if (!(is_request0 && dst_is_local0))
1109             {
1110               error0 =
1111                 (arp0->opcode ==
1112                  clib_host_to_net_u16 (ETHERNET_ARP_OPCODE_reply) ?
1113                  ETHERNET_ARP_ERROR_replies_received : error0);
1114               goto drop1;
1115             }
1116
1117           /* Send a reply. */
1118         send_reply:
1119           vnet_buffer (p0)->sw_if_index[VLIB_TX] = sw_if_index0;
1120           hw_if0 = vnet_get_sup_hw_interface (vnm, sw_if_index0);
1121
1122           /* Send reply back through input interface */
1123           vnet_buffer (p0)->sw_if_index[VLIB_TX] = sw_if_index0;
1124           next0 = ARP_INPUT_NEXT_REPLY_TX;
1125
1126           arp0->opcode = clib_host_to_net_u16 (ETHERNET_ARP_OPCODE_reply);
1127
1128           arp0->ip4_over_ethernet[1] = arp0->ip4_over_ethernet[0];
1129
1130           clib_memcpy (arp0->ip4_over_ethernet[0].ethernet,
1131                        hw_if0->hw_address, 6);
1132           clib_mem_unaligned (&arp0->ip4_over_ethernet[0].ip4.data_u32, u32) =
1133             if_addr0->data_u32;
1134
1135           /* Hardware must be ethernet-like. */
1136           ASSERT (vec_len (hw_if0->hw_address) == 6);
1137
1138           clib_memcpy (eth0->dst_address, eth0->src_address, 6);
1139           clib_memcpy (eth0->src_address, hw_if0->hw_address, 6);
1140
1141           /* Figure out how much to rewind current data from adjacency. */
1142           /* get the adj from the destination's covering connected */
1143           if (NULL == pa)
1144             {
1145               adj0 =
1146                 adj_get (fib_entry_get_adj_for_source
1147                          (ip4_fib_table_lookup
1148                           (ip4_fib_get (fib_index0),
1149                            &arp0->ip4_over_ethernet[1].ip4, 31),
1150                           FIB_SOURCE_INTERFACE));
1151               if (adj0->lookup_next_index != IP_LOOKUP_NEXT_GLEAN)
1152                 {
1153                   error0 = ETHERNET_ARP_ERROR_missing_interface_address;
1154                   goto drop2;
1155                 }
1156               if (is_unnum0)
1157                 arp_unnumbered (p0, pi0, eth0, conn_sw_if_index0);
1158               else
1159                 vlib_buffer_advance (p0, -adj0->rewrite_header.data_bytes);
1160             }
1161           vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
1162                                            n_left_to_next, pi0, next0);
1163
1164           n_replies_sent += 1;
1165           continue;
1166
1167         drop1:
1168           if (0 == arp0->ip4_over_ethernet[0].ip4.as_u32 ||
1169               (arp0->ip4_over_ethernet[0].ip4.as_u32 ==
1170                arp0->ip4_over_ethernet[1].ip4.as_u32))
1171             {
1172               error0 = ETHERNET_ARP_ERROR_gratuitous_arp;
1173               goto drop2;
1174             }
1175           /* See if proxy arp is configured for the address */
1176           if (is_request0)
1177             {
1178               vnet_sw_interface_t *si;
1179               u32 this_addr = clib_net_to_host_u32
1180                 (arp0->ip4_over_ethernet[1].ip4.as_u32);
1181               u32 fib_index0;
1182
1183               si = vnet_get_sw_interface (vnm, sw_if_index0);
1184
1185               if (!(si->flags & VNET_SW_INTERFACE_FLAG_PROXY_ARP))
1186                 goto drop2;
1187
1188               fib_index0 = vec_elt (im4->fib_index_by_sw_if_index,
1189                                     sw_if_index0);
1190
1191               vec_foreach (pa, am->proxy_arps)
1192               {
1193                 u32 lo_addr = clib_net_to_host_u32 (pa->lo_addr);
1194                 u32 hi_addr = clib_net_to_host_u32 (pa->hi_addr);
1195
1196                 /* an ARP request hit in the proxy-arp table? */
1197                 if ((this_addr >= lo_addr && this_addr <= hi_addr) &&
1198                     (fib_index0 == pa->fib_index))
1199                   {
1200                     eth0 = ethernet_buffer_get_header (p0);
1201                     proxy_src.as_u32 =
1202                       arp0->ip4_over_ethernet[1].ip4.data_u32;
1203
1204                     /*
1205                      * Rewind buffer, direct code above not to
1206                      * think too hard about it.
1207                      */
1208                     if_addr0 = &proxy_src;
1209                     is_unnum0 = 0;
1210                     i32 ethernet_start =
1211                       vnet_buffer (p0)->ethernet.start_of_ethernet_header;
1212                     i32 rewind = p0->current_data - ethernet_start;
1213                     vlib_buffer_advance (p0, -rewind);
1214                     n_proxy_arp_replies_sent++;
1215                     goto send_reply;
1216                   }
1217               }
1218             }
1219
1220         drop2:
1221
1222           next0 = ARP_INPUT_NEXT_DROP;
1223           p0->error = node->errors[error0];
1224
1225           vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
1226                                            n_left_to_next, pi0, next0);
1227         }
1228
1229       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
1230     }
1231
1232   vlib_error_count (vm, node->node_index,
1233                     ETHERNET_ARP_ERROR_replies_sent,
1234                     n_replies_sent - n_proxy_arp_replies_sent);
1235
1236   vlib_error_count (vm, node->node_index,
1237                     ETHERNET_ARP_ERROR_proxy_arp_replies_sent,
1238                     n_proxy_arp_replies_sent);
1239   return frame->n_vectors;
1240 }
1241
1242 static char *ethernet_arp_error_strings[] = {
1243 #define _(sym,string) string,
1244   foreach_ethernet_arp_error
1245 #undef _
1246 };
1247
1248 /* *INDENT-OFF* */
1249 VLIB_REGISTER_NODE (arp_input_node, static) =
1250 {
1251   .function = arp_input,
1252   .name = "arp-input",
1253   .vector_size = sizeof (u32),
1254   .n_errors = ETHERNET_ARP_N_ERROR,
1255   .error_strings = ethernet_arp_error_strings,
1256   .n_next_nodes = ARP_INPUT_N_NEXT,
1257   .next_nodes = {
1258     [ARP_INPUT_NEXT_DROP] = "error-drop",
1259     [ARP_INPUT_NEXT_REPLY_TX] = "interface-output",
1260   },
1261   .format_buffer = format_ethernet_arp_header,
1262   .format_trace = format_ethernet_arp_input_trace,
1263 };
1264 /* *INDENT-ON* */
1265
1266 static int
1267 ip4_arp_entry_sort (void *a1, void *a2)
1268 {
1269   ethernet_arp_ip4_entry_t *e1 = a1;
1270   ethernet_arp_ip4_entry_t *e2 = a2;
1271
1272   int cmp;
1273   vnet_main_t *vnm = vnet_get_main ();
1274
1275   cmp = vnet_sw_interface_compare (vnm, e1->sw_if_index, e2->sw_if_index);
1276   if (!cmp)
1277     cmp = ip4_address_compare (&e1->ip4_address, &e2->ip4_address);
1278   return cmp;
1279 }
1280
1281 static clib_error_t *
1282 show_ip4_arp (vlib_main_t * vm,
1283               unformat_input_t * input, vlib_cli_command_t * cmd)
1284 {
1285   vnet_main_t *vnm = vnet_get_main ();
1286   ethernet_arp_main_t *am = &ethernet_arp_main;
1287   ethernet_arp_ip4_entry_t *e, *es;
1288   ethernet_proxy_arp_t *pa;
1289   clib_error_t *error = 0;
1290   u32 sw_if_index;
1291
1292   /* Filter entries by interface if given. */
1293   sw_if_index = ~0;
1294   (void) unformat_user (input, unformat_vnet_sw_interface, vnm, &sw_if_index);
1295
1296   es = 0;
1297   /* *INDENT-OFF* */
1298   pool_foreach (e, am->ip4_entry_pool,
1299   ({
1300     vec_add1 (es, e[0]);
1301   }));
1302   /* *INDENT-ON* */
1303
1304   if (es)
1305     {
1306       vec_sort_with_function (es, ip4_arp_entry_sort);
1307       vlib_cli_output (vm, "%U", format_ethernet_arp_ip4_entry, vnm, 0);
1308       vec_foreach (e, es)
1309       {
1310         if (sw_if_index != ~0 && e->sw_if_index != sw_if_index)
1311           continue;
1312         vlib_cli_output (vm, "%U", format_ethernet_arp_ip4_entry, vnm, e);
1313       }
1314       vec_free (es);
1315     }
1316
1317   if (vec_len (am->proxy_arps))
1318     {
1319       vlib_cli_output (vm, "Proxy arps enabled for:");
1320       vec_foreach (pa, am->proxy_arps)
1321       {
1322         vlib_cli_output (vm, "Fib_index %d   %U - %U ",
1323                          pa->fib_index,
1324                          format_ip4_address, &pa->lo_addr,
1325                          format_ip4_address, &pa->hi_addr);
1326       }
1327     }
1328
1329   return error;
1330 }
1331
1332 /*?
1333  * Display all the IPv4 ARP entries.
1334  *
1335  * @cliexpar
1336  * Example of how to display the IPv4 ARP table:
1337  * @cliexstart{show ip arp}
1338  *    Time      FIB        IP4       Flags      Ethernet              Interface
1339  *    346.3028   0       6.1.1.3            de:ad:be:ef:ba:be   GigabitEthernet2/0/0
1340  *   3077.4271   0       6.1.1.4       S    de:ad:be:ef:ff:ff   GigabitEthernet2/0/0
1341  *   2998.6409   1       6.2.2.3            de:ad:be:ef:00:01   GigabitEthernet2/0/0
1342  * Proxy arps enabled for:
1343  * Fib_index 0   6.0.0.1 - 6.0.0.11
1344  * @cliexend
1345  ?*/
1346 /* *INDENT-OFF* */
1347 VLIB_CLI_COMMAND (show_ip4_arp_command, static) = {
1348   .path = "show ip arp",
1349   .function = show_ip4_arp,
1350   .short_help = "show ip arp",
1351 };
1352 /* *INDENT-ON* */
1353
1354 typedef struct
1355 {
1356   pg_edit_t l2_type, l3_type;
1357   pg_edit_t n_l2_address_bytes, n_l3_address_bytes;
1358   pg_edit_t opcode;
1359   struct
1360   {
1361     pg_edit_t ethernet;
1362     pg_edit_t ip4;
1363   } ip4_over_ethernet[2];
1364 } pg_ethernet_arp_header_t;
1365
1366 static inline void
1367 pg_ethernet_arp_header_init (pg_ethernet_arp_header_t * p)
1368 {
1369   /* Initialize fields that are not bit fields in the IP header. */
1370 #define _(f) pg_edit_init (&p->f, ethernet_arp_header_t, f);
1371   _(l2_type);
1372   _(l3_type);
1373   _(n_l2_address_bytes);
1374   _(n_l3_address_bytes);
1375   _(opcode);
1376   _(ip4_over_ethernet[0].ethernet);
1377   _(ip4_over_ethernet[0].ip4);
1378   _(ip4_over_ethernet[1].ethernet);
1379   _(ip4_over_ethernet[1].ip4);
1380 #undef _
1381 }
1382
1383 uword
1384 unformat_pg_arp_header (unformat_input_t * input, va_list * args)
1385 {
1386   pg_stream_t *s = va_arg (*args, pg_stream_t *);
1387   pg_ethernet_arp_header_t *p;
1388   u32 group_index;
1389
1390   p = pg_create_edit_group (s, sizeof (p[0]), sizeof (ethernet_arp_header_t),
1391                             &group_index);
1392   pg_ethernet_arp_header_init (p);
1393
1394   /* Defaults. */
1395   pg_edit_set_fixed (&p->l2_type, ETHERNET_ARP_HARDWARE_TYPE_ethernet);
1396   pg_edit_set_fixed (&p->l3_type, ETHERNET_TYPE_IP4);
1397   pg_edit_set_fixed (&p->n_l2_address_bytes, 6);
1398   pg_edit_set_fixed (&p->n_l3_address_bytes, 4);
1399
1400   if (!unformat (input, "%U: %U/%U -> %U/%U",
1401                  unformat_pg_edit,
1402                  unformat_ethernet_arp_opcode_net_byte_order, &p->opcode,
1403                  unformat_pg_edit,
1404                  unformat_ethernet_address, &p->ip4_over_ethernet[0].ethernet,
1405                  unformat_pg_edit,
1406                  unformat_ip4_address, &p->ip4_over_ethernet[0].ip4,
1407                  unformat_pg_edit,
1408                  unformat_ethernet_address, &p->ip4_over_ethernet[1].ethernet,
1409                  unformat_pg_edit,
1410                  unformat_ip4_address, &p->ip4_over_ethernet[1].ip4))
1411     {
1412       /* Free up any edits we may have added. */
1413       pg_free_edit_group (s);
1414       return 0;
1415     }
1416   return 1;
1417 }
1418
1419 clib_error_t *
1420 ip4_set_arp_limit (u32 arp_limit)
1421 {
1422   ethernet_arp_main_t *am = &ethernet_arp_main;
1423
1424   am->limit_arp_cache_size = arp_limit;
1425   return 0;
1426 }
1427
1428 /**
1429  * @brief Control Plane hook to remove an ARP entry
1430  */
1431 int
1432 vnet_arp_unset_ip4_over_ethernet (vnet_main_t * vnm,
1433                                   u32 sw_if_index, void *a_arg)
1434 {
1435   ethernet_arp_ip4_over_ethernet_address_t *a = a_arg;
1436   vnet_arp_set_ip4_over_ethernet_rpc_args_t args;
1437
1438   args.sw_if_index = sw_if_index;
1439   args.flags = ETHERNET_ARP_ARGS_REMOVE;
1440   clib_memcpy (&args.a, a, sizeof (*a));
1441
1442   vl_api_rpc_call_main_thread (set_ip4_over_ethernet_rpc_callback,
1443                                (u8 *) & args, sizeof (args));
1444   return 0;
1445 }
1446
1447 /**
1448  * @brief Internally generated event to flush the ARP cache on an
1449  * interface state change event.
1450  * A flush will remove dynamic ARP entries, and for statics remove the MAC
1451  * address from the corresponding adjacencies.
1452  */
1453 static int
1454 vnet_arp_flush_ip4_over_ethernet (vnet_main_t * vnm,
1455                                   u32 sw_if_index, void *a_arg)
1456 {
1457   ethernet_arp_ip4_over_ethernet_address_t *a = a_arg;
1458   vnet_arp_set_ip4_over_ethernet_rpc_args_t args;
1459
1460   args.sw_if_index = sw_if_index;
1461   args.flags = ETHERNET_ARP_ARGS_FLUSH;
1462   clib_memcpy (&args.a, a, sizeof (*a));
1463
1464   vl_api_rpc_call_main_thread (set_ip4_over_ethernet_rpc_callback,
1465                                (u8 *) & args, sizeof (args));
1466   return 0;
1467 }
1468
1469 /**
1470  * @brief Internally generated event to populate the ARP cache on an
1471  * interface state change event.
1472  * For static entries this will re-source the adjacencies.
1473  *
1474  * @param sw_if_index The interface on which the ARP entires are acted
1475  */
1476 static int
1477 vnet_arp_populate_ip4_over_ethernet (vnet_main_t * vnm,
1478                                      u32 sw_if_index, void *a_arg)
1479 {
1480   ethernet_arp_ip4_over_ethernet_address_t *a = a_arg;
1481   vnet_arp_set_ip4_over_ethernet_rpc_args_t args;
1482
1483   args.sw_if_index = sw_if_index;
1484   args.flags = ETHERNET_ARP_ARGS_POPULATE;
1485   clib_memcpy (&args.a, a, sizeof (*a));
1486
1487   vl_api_rpc_call_main_thread (set_ip4_over_ethernet_rpc_callback,
1488                                (u8 *) & args, sizeof (args));
1489   return 0;
1490 }
1491
1492 /*
1493  * arp_add_del_interface_address
1494  *
1495  * callback when an interface address is added or deleted
1496  */
1497 static void
1498 arp_add_del_interface_address (ip4_main_t * im,
1499                                uword opaque,
1500                                u32 sw_if_index,
1501                                ip4_address_t * address,
1502                                u32 address_length,
1503                                u32 if_address_index, u32 is_del)
1504 {
1505   /*
1506    * Flush the ARP cache of all entries covered by the address
1507    * that is being removed.
1508    */
1509   ethernet_arp_main_t *am = &ethernet_arp_main;
1510   ethernet_arp_ip4_entry_t *e;
1511
1512   if (vec_len (am->ethernet_arp_by_sw_if_index) <= sw_if_index)
1513     return;
1514
1515   if (is_del)
1516     {
1517       ethernet_arp_interface_t *eai;
1518       u32 i, *to_delete = 0;
1519       hash_pair_t *pair;
1520
1521       eai = &am->ethernet_arp_by_sw_if_index[sw_if_index];
1522
1523       /* *INDENT-OFF* */
1524       hash_foreach_pair (pair, eai->arp_entries,
1525       ({
1526         e = pool_elt_at_index(am->ip4_entry_pool,
1527                               pair->value[0]);
1528         if (ip4_destination_matches_route (im, &e->ip4_address,
1529                                            address, address_length))
1530           {
1531             vec_add1 (to_delete, e - am->ip4_entry_pool);
1532           }
1533       }));
1534       /* *INDENT-ON* */
1535
1536       for (i = 0; i < vec_len (to_delete); i++)
1537         {
1538           ethernet_arp_ip4_over_ethernet_address_t delme;
1539           e = pool_elt_at_index (am->ip4_entry_pool, to_delete[i]);
1540
1541           clib_memcpy (&delme.ethernet, e->ethernet_address, 6);
1542           delme.ip4.as_u32 = e->ip4_address.as_u32;
1543
1544           vnet_arp_flush_ip4_over_ethernet (vnet_get_main (),
1545                                             e->sw_if_index, &delme);
1546         }
1547
1548       vec_free (to_delete);
1549     }
1550 }
1551
1552 static clib_error_t *
1553 ethernet_arp_init (vlib_main_t * vm)
1554 {
1555   ethernet_arp_main_t *am = &ethernet_arp_main;
1556   ip4_main_t *im = &ip4_main;
1557   clib_error_t *error;
1558   pg_node_t *pn;
1559
1560   if ((error = vlib_call_init_function (vm, ethernet_init)))
1561     return error;
1562
1563   ethernet_register_input_type (vm, ETHERNET_TYPE_ARP, arp_input_node.index);
1564
1565   pn = pg_get_node (arp_input_node.index);
1566   pn->unformat_edit = unformat_pg_arp_header;
1567
1568   am->opcode_by_name = hash_create_string (0, sizeof (uword));
1569 #define _(o) hash_set_mem (am->opcode_by_name, #o, ETHERNET_ARP_OPCODE_##o);
1570   foreach_ethernet_arp_opcode;
1571 #undef _
1572
1573   /* $$$ configurable */
1574   am->limit_arp_cache_size = 50000;
1575
1576   am->pending_resolutions_by_address = hash_create (0, sizeof (uword));
1577   am->mac_changes_by_address = hash_create (0, sizeof (uword));
1578
1579   /* don't trace ARP error packets */
1580   {
1581     vlib_node_runtime_t *rt =
1582       vlib_node_get_runtime (vm, arp_input_node.index);
1583
1584 #define _(a,b)                                  \
1585     vnet_pcap_drop_trace_filter_add_del         \
1586         (rt->errors[ETHERNET_ARP_ERROR_##a],    \
1587          1 /* is_add */);
1588     foreach_ethernet_arp_error
1589 #undef _
1590   }
1591
1592   ip4_add_del_interface_address_callback_t cb;
1593   cb.function = arp_add_del_interface_address;
1594   cb.function_opaque = 0;
1595   vec_add1 (im->add_del_interface_address_callbacks, cb);
1596
1597   return 0;
1598 }
1599
1600 VLIB_INIT_FUNCTION (ethernet_arp_init);
1601
1602 static void
1603 arp_entry_free (ethernet_arp_interface_t * eai, ethernet_arp_ip4_entry_t * e)
1604 {
1605   ethernet_arp_main_t *am = &ethernet_arp_main;
1606
1607   fib_table_entry_delete_index (e->fib_entry_index, FIB_SOURCE_ADJ);
1608   hash_unset (eai->arp_entries, e->ip4_address.as_u32);
1609   pool_put (am->ip4_entry_pool, e);
1610 }
1611
1612 static inline int
1613 vnet_arp_unset_ip4_over_ethernet_internal (vnet_main_t * vnm,
1614                                            vnet_arp_set_ip4_over_ethernet_rpc_args_t
1615                                            * args)
1616 {
1617   ethernet_arp_main_t *am = &ethernet_arp_main;
1618   ethernet_arp_ip4_entry_t *e;
1619   ethernet_arp_interface_t *eai;
1620
1621   eai = &am->ethernet_arp_by_sw_if_index[args->sw_if_index];
1622
1623   e = arp_entry_find (eai, &args->a.ip4);
1624
1625   if (NULL != e)
1626     {
1627       adj_nbr_walk_nh4 (e->sw_if_index,
1628                         &e->ip4_address, arp_mk_incomplete_walk, e);
1629       arp_entry_free (eai, e);
1630     }
1631
1632   return 0;
1633 }
1634
1635 static int
1636 vnet_arp_flush_ip4_over_ethernet_internal (vnet_main_t * vnm,
1637                                            vnet_arp_set_ip4_over_ethernet_rpc_args_t
1638                                            * args)
1639 {
1640   ethernet_arp_main_t *am = &ethernet_arp_main;
1641   ethernet_arp_ip4_entry_t *e;
1642   ethernet_arp_interface_t *eai;
1643
1644   eai = &am->ethernet_arp_by_sw_if_index[args->sw_if_index];
1645
1646   e = arp_entry_find (eai, &args->a.ip4);
1647
1648   if (NULL != e)
1649     {
1650       adj_nbr_walk_nh4 (e->sw_if_index,
1651                         &e->ip4_address, arp_mk_incomplete_walk, e);
1652
1653       /*
1654        * The difference between flush and unset, is that an unset
1655        * means delete for static and dynamic entries. A flush
1656        * means delete only for dynamic. Flushing is what the DP
1657        * does in response to interface events. unset is only done
1658        * by the control plane.
1659        */
1660       if (e->flags & ETHERNET_ARP_IP4_ENTRY_FLAG_DYNAMIC)
1661         {
1662           arp_entry_free (eai, e);
1663         }
1664     }
1665   return (0);
1666 }
1667
1668 static int
1669 vnet_arp_populate_ip4_over_ethernet_internal (vnet_main_t * vnm,
1670                                               vnet_arp_set_ip4_over_ethernet_rpc_args_t
1671                                               * args)
1672 {
1673   ethernet_arp_main_t *am = &ethernet_arp_main;
1674   ethernet_arp_ip4_entry_t *e;
1675   ethernet_arp_interface_t *eai;
1676
1677   eai = &am->ethernet_arp_by_sw_if_index[args->sw_if_index];
1678
1679   e = arp_entry_find (eai, &args->a.ip4);
1680
1681   if (NULL != e)
1682     {
1683       adj_nbr_walk_nh4 (e->sw_if_index,
1684                         &e->ip4_address, arp_mk_complete_walk, e);
1685     }
1686   return (0);
1687 }
1688
1689 static void
1690 set_ip4_over_ethernet_rpc_callback (vnet_arp_set_ip4_over_ethernet_rpc_args_t
1691                                     * a)
1692 {
1693   vnet_main_t *vm = vnet_get_main ();
1694   ASSERT (os_get_cpu_number () == 0);
1695
1696   if (a->flags & ETHERNET_ARP_ARGS_REMOVE)
1697     vnet_arp_unset_ip4_over_ethernet_internal (vm, a);
1698   else if (a->flags & ETHERNET_ARP_ARGS_FLUSH)
1699     vnet_arp_flush_ip4_over_ethernet_internal (vm, a);
1700   else if (a->flags & ETHERNET_ARP_ARGS_POPULATE)
1701     vnet_arp_populate_ip4_over_ethernet_internal (vm, a);
1702   else
1703     vnet_arp_set_ip4_over_ethernet_internal (vm, a);
1704 }
1705
1706 /**
1707  * @brief Invoked when the interface's admin state changes
1708  */
1709 static clib_error_t *
1710 ethernet_arp_sw_interface_up_down (vnet_main_t * vnm,
1711                                    u32 sw_if_index, u32 flags)
1712 {
1713   ethernet_arp_main_t *am = &ethernet_arp_main;
1714   ethernet_arp_ip4_entry_t *e;
1715   u32 i, *to_delete = 0;
1716
1717   /* *INDENT-OFF* */
1718   pool_foreach (e, am->ip4_entry_pool,
1719   ({
1720     if (e->sw_if_index == sw_if_index)
1721       vec_add1 (to_delete,
1722                 e - am->ip4_entry_pool);
1723   }));
1724   /* *INDENT-ON* */
1725
1726   for (i = 0; i < vec_len (to_delete); i++)
1727     {
1728       ethernet_arp_ip4_over_ethernet_address_t delme;
1729       e = pool_elt_at_index (am->ip4_entry_pool, to_delete[i]);
1730
1731       clib_memcpy (&delme.ethernet, e->ethernet_address, 6);
1732       delme.ip4.as_u32 = e->ip4_address.as_u32;
1733
1734       if (flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP)
1735         {
1736           vnet_arp_populate_ip4_over_ethernet (vnm, e->sw_if_index, &delme);
1737         }
1738       else
1739         {
1740           vnet_arp_flush_ip4_over_ethernet (vnm, e->sw_if_index, &delme);
1741         }
1742
1743     }
1744   vec_free (to_delete);
1745
1746   return 0;
1747 }
1748
1749 VNET_SW_INTERFACE_ADMIN_UP_DOWN_FUNCTION (ethernet_arp_sw_interface_up_down);
1750
1751 static void
1752 increment_ip4_and_mac_address (ethernet_arp_ip4_over_ethernet_address_t * a)
1753 {
1754   u8 old;
1755   int i;
1756
1757   for (i = 3; i >= 0; i--)
1758     {
1759       old = a->ip4.as_u8[i];
1760       a->ip4.as_u8[i] += 1;
1761       if (old < a->ip4.as_u8[i])
1762         break;
1763     }
1764
1765   for (i = 5; i >= 0; i--)
1766     {
1767       old = a->ethernet[i];
1768       a->ethernet[i] += 1;
1769       if (old < a->ethernet[i])
1770         break;
1771     }
1772 }
1773
1774 int
1775 vnet_arp_set_ip4_over_ethernet (vnet_main_t * vnm,
1776                                 u32 sw_if_index, void *a_arg, int is_static)
1777 {
1778   ethernet_arp_ip4_over_ethernet_address_t *a = a_arg;
1779   vnet_arp_set_ip4_over_ethernet_rpc_args_t args;
1780
1781   args.sw_if_index = sw_if_index;
1782   args.is_static = is_static;
1783   args.flags = 0;
1784   clib_memcpy (&args.a, a, sizeof (*a));
1785
1786   vl_api_rpc_call_main_thread (set_ip4_over_ethernet_rpc_callback,
1787                                (u8 *) & args, sizeof (args));
1788   return 0;
1789 }
1790
1791 int
1792 vnet_proxy_arp_add_del (ip4_address_t * lo_addr,
1793                         ip4_address_t * hi_addr, u32 fib_index, int is_del)
1794 {
1795   ethernet_arp_main_t *am = &ethernet_arp_main;
1796   ethernet_proxy_arp_t *pa;
1797   u32 found_at_index = ~0;
1798
1799   vec_foreach (pa, am->proxy_arps)
1800   {
1801     if (pa->lo_addr == lo_addr->as_u32
1802         && pa->hi_addr == hi_addr->as_u32 && pa->fib_index == fib_index)
1803       {
1804         found_at_index = pa - am->proxy_arps;
1805         break;
1806       }
1807   }
1808
1809   if (found_at_index != ~0)
1810     {
1811       /* Delete, otherwise it's already in the table */
1812       if (is_del)
1813         vec_delete (am->proxy_arps, 1, found_at_index);
1814       return 0;
1815     }
1816   /* delete, no such entry */
1817   if (is_del)
1818     return VNET_API_ERROR_NO_SUCH_ENTRY;
1819
1820   /* add, not in table */
1821   vec_add2 (am->proxy_arps, pa, 1);
1822   pa->lo_addr = lo_addr->as_u32;
1823   pa->hi_addr = hi_addr->as_u32;
1824   pa->fib_index = fib_index;
1825   return 0;
1826 }
1827
1828 /*
1829  * Remove any proxy arp entries asdociated with the
1830  * specificed fib.
1831  */
1832 int
1833 vnet_proxy_arp_fib_reset (u32 fib_id)
1834 {
1835   ip4_main_t *im = &ip4_main;
1836   ethernet_arp_main_t *am = &ethernet_arp_main;
1837   ethernet_proxy_arp_t *pa;
1838   u32 *entries_to_delete = 0;
1839   u32 fib_index;
1840   uword *p;
1841   int i;
1842
1843   p = hash_get (im->fib_index_by_table_id, fib_id);
1844   if (!p)
1845     return VNET_API_ERROR_NO_SUCH_ENTRY;
1846   fib_index = p[0];
1847
1848   vec_foreach (pa, am->proxy_arps)
1849   {
1850     if (pa->fib_index == fib_index)
1851       {
1852         vec_add1 (entries_to_delete, pa - am->proxy_arps);
1853       }
1854   }
1855
1856   for (i = 0; i < vec_len (entries_to_delete); i++)
1857     {
1858       vec_delete (am->proxy_arps, 1, entries_to_delete[i]);
1859     }
1860
1861   vec_free (entries_to_delete);
1862
1863   return 0;
1864 }
1865
1866 static clib_error_t *
1867 ip_arp_add_del_command_fn (vlib_main_t * vm,
1868                            unformat_input_t * input, vlib_cli_command_t * cmd)
1869 {
1870   vnet_main_t *vnm = vnet_get_main ();
1871   u32 sw_if_index;
1872   ethernet_arp_ip4_over_ethernet_address_t lo_addr, hi_addr, addr;
1873   int addr_valid = 0;
1874   int is_del = 0;
1875   int count = 1;
1876   u32 fib_index = 0;
1877   u32 fib_id;
1878   int is_static = 0;
1879   int is_proxy = 0;
1880
1881   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
1882     {
1883       /* set ip arp TenGigE1/1/0/1 1.2.3.4 aa:bb:... or aabb.ccdd... */
1884       if (unformat (input, "%U %U %U",
1885                     unformat_vnet_sw_interface, vnm, &sw_if_index,
1886                     unformat_ip4_address, &addr.ip4,
1887                     unformat_ethernet_address, &addr.ethernet))
1888         addr_valid = 1;
1889
1890       else if (unformat (input, "delete") || unformat (input, "del"))
1891         is_del = 1;
1892
1893       else if (unformat (input, "static"))
1894         is_static = 1;
1895
1896       else if (unformat (input, "count %d", &count))
1897         ;
1898
1899       else if (unformat (input, "fib-id %d", &fib_id))
1900         {
1901           ip4_main_t *im = &ip4_main;
1902           uword *p = hash_get (im->fib_index_by_table_id, fib_id);
1903           if (!p)
1904             return clib_error_return (0, "fib ID %d doesn't exist\n", fib_id);
1905           fib_index = p[0];
1906         }
1907
1908       else if (unformat (input, "proxy %U - %U",
1909                          unformat_ip4_address, &lo_addr.ip4,
1910                          unformat_ip4_address, &hi_addr.ip4))
1911         is_proxy = 1;
1912       else
1913         break;
1914     }
1915
1916   if (is_proxy)
1917     {
1918       (void) vnet_proxy_arp_add_del (&lo_addr.ip4, &hi_addr.ip4,
1919                                      fib_index, is_del);
1920       return 0;
1921     }
1922
1923   if (addr_valid)
1924     {
1925       int i;
1926
1927       for (i = 0; i < count; i++)
1928         {
1929           if (is_del == 0)
1930             {
1931               uword event_type, *event_data = 0;
1932
1933               /* Park the debug CLI until the arp entry is installed */
1934               vnet_register_ip4_arp_resolution_event
1935                 (vnm, &addr.ip4, vlib_current_process (vm),
1936                  1 /* type */ , 0 /* data */ );
1937
1938               vnet_arp_set_ip4_over_ethernet
1939                 (vnm, sw_if_index, &addr, is_static);
1940
1941               vlib_process_wait_for_event (vm);
1942               event_type = vlib_process_get_events (vm, &event_data);
1943               vec_reset_length (event_data);
1944               if (event_type != 1)
1945                 clib_warning ("event type %d unexpected", event_type);
1946             }
1947           else
1948             vnet_arp_unset_ip4_over_ethernet (vnm, sw_if_index, &addr);
1949
1950           increment_ip4_and_mac_address (&addr);
1951         }
1952     }
1953   else
1954     {
1955       return clib_error_return (0, "unknown input `%U'",
1956                                 format_unformat_error, input);
1957     }
1958
1959   return 0;
1960 }
1961
1962 /* *INDENT-OFF* */
1963 /*?
1964  * Add or delete IPv4 ARP cache entries.
1965  *
1966  * @note 'set ip arp' options (e.g. delete, static, 'fib-id <id>',
1967  * 'count <number>', 'interface ip4_addr mac_addr') can be added in
1968  * any order and combination.
1969  *
1970  * @cliexpar
1971  * @parblock
1972  * Add or delete IPv4 ARP cache entries as follows. MAC Address can be in
1973  * either aa:bb:cc:dd:ee:ff format or aabb.ccdd.eeff format.
1974  * @cliexcmd{set ip arp GigabitEthernet2/0/0 6.0.0.3 dead.beef.babe}
1975  * @cliexcmd{set ip arp delete GigabitEthernet2/0/0 6.0.0.3 de:ad:be:ef:ba:be}
1976  *
1977  * To add or delete an IPv4 ARP cache entry to or from a specific fib
1978  * table:
1979  * @cliexcmd{set ip arp fib-id 1 GigabitEthernet2/0/0 6.0.0.3 dead.beef.babe}
1980  * @cliexcmd{set ip arp fib-id 1 delete GigabitEthernet2/0/0 6.0.0.3 dead.beef.babe}
1981  *
1982  * Add or delete IPv4 static ARP cache entries as follows:
1983  * @cliexcmd{set ip arp static GigabitEthernet2/0/0 6.0.0.3 dead.beef.babe}
1984  * @cliexcmd{set ip arp static delete GigabitEthernet2/0/0 6.0.0.3 dead.beef.babe}
1985  *
1986  * For testing / debugging purposes, the 'set ip arp' command can add or
1987  * delete multiple entries. Supply the 'count N' parameter:
1988  * @cliexcmd{set ip arp count 10 GigabitEthernet2/0/0 6.0.0.3 dead.beef.babe}
1989  * @endparblock
1990  ?*/
1991 VLIB_CLI_COMMAND (ip_arp_add_del_command, static) = {
1992   .path = "set ip arp",
1993   .short_help =
1994   "set ip arp [del] <intfc> <ip-address> <mac-address> [static] [count <count>] [fib-id <fib-id>] [proxy <lo-addr> - <hi-addr>]",
1995   .function = ip_arp_add_del_command_fn,
1996 };
1997 /* *INDENT-ON* */
1998
1999 static clib_error_t *
2000 set_int_proxy_arp_command_fn (vlib_main_t * vm,
2001                               unformat_input_t *
2002                               input, vlib_cli_command_t * cmd)
2003 {
2004   vnet_main_t *vnm = vnet_get_main ();
2005   u32 sw_if_index;
2006   vnet_sw_interface_t *si;
2007   int enable = 0;
2008   int intfc_set = 0;
2009
2010   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
2011     {
2012       if (unformat (input, "%U", unformat_vnet_sw_interface,
2013                     vnm, &sw_if_index))
2014         intfc_set = 1;
2015       else if (unformat (input, "enable") || unformat (input, "on"))
2016         enable = 1;
2017       else if (unformat (input, "disable") || unformat (input, "off"))
2018         enable = 0;
2019       else
2020         break;
2021     }
2022
2023   if (intfc_set == 0)
2024     return clib_error_return (0, "unknown input '%U'",
2025                               format_unformat_error, input);
2026
2027   si = vnet_get_sw_interface (vnm, sw_if_index);
2028   ASSERT (si);
2029   if (enable)
2030     si->flags |= VNET_SW_INTERFACE_FLAG_PROXY_ARP;
2031   else
2032     si->flags &= ~VNET_SW_INTERFACE_FLAG_PROXY_ARP;
2033
2034   return 0;
2035 }
2036
2037 /* *INDENT-OFF* */
2038 /*?
2039  * Enable proxy-arp on an interface. The vpp stack will answer ARP
2040  * requests for the indicated address range. Multiple proxy-arp
2041  * ranges may be provisioned.
2042  *
2043  * @note Proxy ARP as a technology is infamous for blackholing traffic.
2044  * Also, the underlying implementation has not been performance-tuned.
2045  * Avoid creating an unnecessarily large set of ranges.
2046  *
2047  * @cliexpar
2048  * To enable proxy arp on a range of addresses, use:
2049  * @cliexcmd{set ip arp proxy 6.0.0.1 - 6.0.0.11}
2050  * Append 'del' to delete a range of proxy ARP addresses:
2051  * @cliexcmd{set ip arp proxy 6.0.0.1 - 6.0.0.11 del}
2052  * You must then specifically enable proxy arp on individual interfaces:
2053  * @cliexcmd{set interface proxy-arp GigabitEthernet0/8/0 enable}
2054  * To disable proxy arp on an individual interface:
2055  * @cliexcmd{set interface proxy-arp GigabitEthernet0/8/0 disable}
2056  ?*/
2057 VLIB_CLI_COMMAND (set_int_proxy_enable_command, static) = {
2058   .path = "set interface proxy-arp",
2059   .short_help =
2060   "set interface proxy-arp <intfc> [enable|disable]",
2061   .function = set_int_proxy_arp_command_fn,
2062 };
2063 /* *INDENT-ON* */
2064
2065
2066 /*
2067  * ARP/ND Termination in a L2 Bridge Domain based on IP4/IP6 to MAC
2068  * hash tables mac_by_ip4 and mac_by_ip6 for each BD.
2069  */
2070 typedef enum
2071 {
2072   ARP_TERM_NEXT_L2_OUTPUT,
2073   ARP_TERM_NEXT_DROP,
2074   ARP_TERM_N_NEXT,
2075 } arp_term_next_t;
2076
2077 u32 arp_term_next_node_index[32];
2078
2079 static uword
2080 arp_term_l2bd (vlib_main_t * vm,
2081                vlib_node_runtime_t * node, vlib_frame_t * frame)
2082 {
2083   l2input_main_t *l2im = &l2input_main;
2084   u32 n_left_from, next_index, *from, *to_next;
2085   u32 n_replies_sent = 0;
2086   u16 last_bd_index = ~0;
2087   l2_bridge_domain_t *last_bd_config = 0;
2088   l2_input_config_t *cfg0;
2089
2090   from = vlib_frame_vector_args (frame);
2091   n_left_from = frame->n_vectors;
2092   next_index = node->cached_next_index;
2093
2094   while (n_left_from > 0)
2095     {
2096       u32 n_left_to_next;
2097
2098       vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
2099
2100       while (n_left_from > 0 && n_left_to_next > 0)
2101         {
2102           vlib_buffer_t *p0;
2103           ethernet_header_t *eth0;
2104           ethernet_arp_header_t *arp0;
2105           ip6_header_t *iph0;
2106           u8 *l3h0;
2107           u32 pi0, error0, next0, sw_if_index0;
2108           u16 ethertype0;
2109           u16 bd_index0;
2110           u32 ip0;
2111           u8 *macp0;
2112
2113           pi0 = from[0];
2114           to_next[0] = pi0;
2115           from += 1;
2116           to_next += 1;
2117           n_left_from -= 1;
2118           n_left_to_next -= 1;
2119
2120           p0 = vlib_get_buffer (vm, pi0);
2121           eth0 = vlib_buffer_get_current (p0);
2122           l3h0 = (u8 *) eth0 + vnet_buffer (p0)->l2.l2_len;
2123           ethertype0 = clib_net_to_host_u16 (*(u16 *) (l3h0 - 2));
2124           arp0 = (ethernet_arp_header_t *) l3h0;
2125
2126           if (PREDICT_FALSE ((ethertype0 != ETHERNET_TYPE_ARP) ||
2127                              (arp0->opcode !=
2128                               clib_host_to_net_u16
2129                               (ETHERNET_ARP_OPCODE_request))))
2130             goto check_ip6_nd;
2131
2132           /* Must be ARP request packet here */
2133           if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE) &&
2134                              (p0->flags & VLIB_BUFFER_IS_TRACED)))
2135             {
2136               u8 *t0 = vlib_add_trace (vm, node, p0,
2137                                        sizeof (ethernet_arp_input_trace_t));
2138               clib_memcpy (t0, l3h0, sizeof (ethernet_arp_input_trace_t));
2139             }
2140
2141           error0 = ETHERNET_ARP_ERROR_replies_sent;
2142           error0 =
2143             (arp0->l2_type !=
2144              clib_net_to_host_u16 (ETHERNET_ARP_HARDWARE_TYPE_ethernet)
2145              ? ETHERNET_ARP_ERROR_l2_type_not_ethernet : error0);
2146           error0 =
2147             (arp0->l3_type !=
2148              clib_net_to_host_u16 (ETHERNET_TYPE_IP4) ?
2149              ETHERNET_ARP_ERROR_l3_type_not_ip4 : error0);
2150
2151           sw_if_index0 = vnet_buffer (p0)->sw_if_index[VLIB_RX];
2152
2153           if (error0)
2154             goto drop;
2155
2156           /* Trash ARP packets whose ARP-level source addresses do not
2157              match their L2-frame-level source addresses  */
2158           if (PREDICT_FALSE
2159               (memcmp
2160                (eth0->src_address, arp0->ip4_over_ethernet[0].ethernet,
2161                 sizeof (eth0->src_address))))
2162             {
2163               error0 = ETHERNET_ARP_ERROR_l2_address_mismatch;
2164               goto drop;
2165             }
2166
2167           /* Check if anyone want ARP request events for L2 BDs */
2168           {
2169             pending_resolution_t *mc;
2170             ethernet_arp_main_t *am = &ethernet_arp_main;
2171             uword *p = hash_get (am->mac_changes_by_address, 0);
2172             if (p && (vnet_buffer (p0)->l2.shg == 0))
2173               {                 // Only SHG 0 interface which is more likely local
2174                 u32 next_index = p[0];
2175                 while (next_index != (u32) ~ 0)
2176                   {
2177                     int (*fp) (u32, u8 *, u32, u32);
2178                     int rv = 1;
2179                     mc = pool_elt_at_index (am->mac_changes, next_index);
2180                     fp = mc->data_callback;
2181                     /* Call the callback, return 1 to suppress dup events */
2182                     if (fp)
2183                       rv = (*fp) (mc->data,
2184                                   arp0->ip4_over_ethernet[0].ethernet,
2185                                   sw_if_index0,
2186                                   arp0->ip4_over_ethernet[0].ip4.as_u32);
2187                     /* Signal the resolver process */
2188                     if (rv == 0)
2189                       vlib_process_signal_event (vm, mc->node_index,
2190                                                  mc->type_opaque, mc->data);
2191                     next_index = mc->next_index;
2192                   }
2193               }
2194           }
2195
2196           /* lookup BD mac_by_ip4 hash table for MAC entry */
2197           ip0 = arp0->ip4_over_ethernet[1].ip4.as_u32;
2198           bd_index0 = vnet_buffer (p0)->l2.bd_index;
2199           if (PREDICT_FALSE ((bd_index0 != last_bd_index)
2200                              || (last_bd_index == (u16) ~ 0)))
2201             {
2202               last_bd_index = bd_index0;
2203               last_bd_config = vec_elt_at_index (l2im->bd_configs, bd_index0);
2204             }
2205           macp0 = (u8 *) hash_get (last_bd_config->mac_by_ip4, ip0);
2206
2207           if (PREDICT_FALSE (!macp0))
2208             goto next_l2_feature;       /* MAC not found */
2209
2210           /* MAC found, send ARP reply -
2211              Convert ARP request packet to ARP reply */
2212           arp0->opcode = clib_host_to_net_u16 (ETHERNET_ARP_OPCODE_reply);
2213           arp0->ip4_over_ethernet[1] = arp0->ip4_over_ethernet[0];
2214           arp0->ip4_over_ethernet[0].ip4.as_u32 = ip0;
2215           clib_memcpy (arp0->ip4_over_ethernet[0].ethernet, macp0, 6);
2216           clib_memcpy (eth0->dst_address, eth0->src_address, 6);
2217           clib_memcpy (eth0->src_address, macp0, 6);
2218           n_replies_sent += 1;
2219
2220         output_response:
2221           /* For BVI, need to use l2-fwd node to send ARP reply as
2222              l2-output node cannot output packet to BVI properly */
2223           cfg0 = vec_elt_at_index (l2im->configs, sw_if_index0);
2224           if (PREDICT_FALSE (cfg0->bvi))
2225             {
2226               vnet_buffer (p0)->l2.feature_bitmap |= L2INPUT_FEAT_FWD;
2227               vnet_buffer (p0)->sw_if_index[VLIB_RX] = 0;
2228               goto next_l2_feature;
2229             }
2230
2231           /* Send ARP/ND reply back out input interface through l2-output */
2232           vnet_buffer (p0)->sw_if_index[VLIB_TX] = sw_if_index0;
2233           next0 = ARP_TERM_NEXT_L2_OUTPUT;
2234           /* Note that output to VXLAN tunnel will fail due to SHG which
2235              is probably desireable since ARP termination is not intended
2236              for ARP requests from other hosts. If output to VXLAN tunnel is
2237              required, however, can just clear the SHG in packet as follows:
2238              vnet_buffer(p0)->l2.shg = 0;         */
2239           vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
2240                                            to_next, n_left_to_next, pi0,
2241                                            next0);
2242           continue;
2243
2244         check_ip6_nd:
2245           /* IP6 ND event notification or solicitation handling to generate
2246              local response instead of flooding */
2247           iph0 = (ip6_header_t *) l3h0;
2248           if (PREDICT_FALSE (ethertype0 == ETHERNET_TYPE_IP6 &&
2249                              iph0->protocol == IP_PROTOCOL_ICMP6 &&
2250                              !ip6_address_is_unspecified
2251                              (&iph0->src_address)))
2252             {
2253               sw_if_index0 = vnet_buffer (p0)->sw_if_index[VLIB_RX];
2254               if (vnet_ip6_nd_term
2255                   (vm, node, p0, eth0, iph0, sw_if_index0,
2256                    vnet_buffer (p0)->l2.bd_index, vnet_buffer (p0)->l2.shg))
2257                 goto output_response;
2258             }
2259
2260         next_l2_feature:
2261           {
2262             u32 feature_bitmap0 =
2263               vnet_buffer (p0)->l2.feature_bitmap & ~L2INPUT_FEAT_ARP_TERM;
2264             vnet_buffer (p0)->l2.feature_bitmap = feature_bitmap0;
2265             next0 =
2266               feat_bitmap_get_next_node_index (arp_term_next_node_index,
2267                                                feature_bitmap0);
2268             vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
2269                                              to_next, n_left_to_next,
2270                                              pi0, next0);
2271             continue;
2272           }
2273
2274         drop:
2275           if (0 == arp0->ip4_over_ethernet[0].ip4.as_u32 ||
2276               (arp0->ip4_over_ethernet[0].ip4.as_u32 ==
2277                arp0->ip4_over_ethernet[1].ip4.as_u32))
2278             {
2279               error0 = ETHERNET_ARP_ERROR_gratuitous_arp;
2280             }
2281           next0 = ARP_TERM_NEXT_DROP;
2282           p0->error = node->errors[error0];
2283
2284           vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
2285                                            to_next, n_left_to_next, pi0,
2286                                            next0);
2287         }
2288
2289       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
2290     }
2291
2292   vlib_error_count (vm, node->node_index,
2293                     ETHERNET_ARP_ERROR_replies_sent, n_replies_sent);
2294   return frame->n_vectors;
2295 }
2296
2297 /* *INDENT-OFF* */
2298 VLIB_REGISTER_NODE (arp_term_l2bd_node, static) = {
2299   .function = arp_term_l2bd,
2300   .name = "arp-term-l2bd",
2301   .vector_size = sizeof (u32),
2302   .n_errors = ETHERNET_ARP_N_ERROR,
2303   .error_strings = ethernet_arp_error_strings,
2304   .n_next_nodes = ARP_TERM_N_NEXT,
2305   .next_nodes = {
2306     [ARP_TERM_NEXT_L2_OUTPUT] = "l2-output",
2307     [ARP_TERM_NEXT_DROP] = "error-drop",
2308   },
2309   .format_buffer = format_ethernet_arp_header,
2310   .format_trace = format_arp_term_input_trace,
2311 };
2312 /* *INDENT-ON* */
2313
2314 clib_error_t *
2315 arp_term_init (vlib_main_t * vm)
2316 {
2317   // Initialize the feature next-node indexes
2318   feat_bitmap_init_next_nodes (vm,
2319                                arp_term_l2bd_node.index,
2320                                L2INPUT_N_FEAT,
2321                                l2input_get_feat_names (),
2322                                arp_term_next_node_index);
2323   return 0;
2324 }
2325
2326 VLIB_INIT_FUNCTION (arp_term_init);
2327
2328 void
2329 change_arp_mac (u32 sw_if_index, ethernet_arp_ip4_entry_t * e)
2330 {
2331   if (e->sw_if_index == sw_if_index)
2332     {
2333       adj_nbr_walk_nh4 (e->sw_if_index,
2334                         &e->ip4_address, arp_mk_complete_walk, e);
2335     }
2336 }
2337
2338 void
2339 ethernet_arp_change_mac (vnet_main_t * vnm, u32 sw_if_index)
2340 {
2341   ethernet_arp_main_t *am = &ethernet_arp_main;
2342   ethernet_arp_ip4_entry_t *e;
2343
2344   /* *INDENT-OFF* */
2345   pool_foreach (e, am->ip4_entry_pool,
2346   ({
2347     change_arp_mac (sw_if_index, e);
2348   }));
2349   /* *INDENT-ON* */
2350 }
2351
2352 /*
2353  * fd.io coding-style-patch-verification: ON
2354  *
2355  * Local Variables:
2356  * eval: (c-set-style "gnu")
2357  * End:
2358  */