Fix ARP show exception when not ARP entries as found.
[vpp.git] / vnet / vnet / ethernet / arp.c
1 /*
2  * ethernet/arp.c: IP v4 ARP node
3  *
4  * Copyright (c) 2010 Cisco and/or its affiliates.
5  * Licensed under the Apache License, Version 2.0 (the "License");
6  * you may not use this file except in compliance with the License.
7  * You may obtain a copy of the License at:
8  *
9  *     http://www.apache.org/licenses/LICENSE-2.0
10  *
11  * Unless required by applicable law or agreed to in writing, software
12  * distributed under the License is distributed on an "AS IS" BASIS,
13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  * See the License for the specific language governing permissions and
15  * limitations under the License.
16  */
17
18 #include <vnet/ip/ip.h>
19 #include <vnet/ethernet/ethernet.h>
20 #include <vnet/ethernet/arp_packet.h>
21 #include <vnet/l2/l2_input.h>
22 #include <vppinfra/mhash.h>
23
24 void vl_api_rpc_call_main_thread (void *fp, u8 * data, u32 data_length);
25
26 typedef struct {
27   u32 sw_if_index;
28   u32 fib_index;
29   ip4_address_t ip4_address;
30 } ethernet_arp_ip4_key_t;
31
32 typedef struct {
33   ethernet_arp_ip4_key_t key;
34   u8 ethernet_address[6];
35
36   u16 flags;
37 #define ETHERNET_ARP_IP4_ENTRY_FLAG_STATIC (1 << 0)
38
39   u64 cpu_time_last_updated;
40 } ethernet_arp_ip4_entry_t;
41
42 typedef struct {
43   u32 lo_addr;
44   u32 hi_addr;
45   u32 fib_index;
46 } ethernet_proxy_arp_t;
47
48 typedef struct {
49   u32 next_index;
50   uword node_index;
51   uword type_opaque;
52   uword data;
53   /* Used for arp event notification only */
54   void * data_callback;
55   u32 pid;
56 } pending_resolution_t;
57
58 typedef struct {
59   /* Hash tables mapping name to opcode. */
60   uword * opcode_by_name;
61
62   /* lite beer "glean" adjacency handling */
63   uword * pending_resolutions_by_address;
64   pending_resolution_t * pending_resolutions;
65
66   /* Mac address change notification */
67   uword * mac_changes_by_address;
68   pending_resolution_t * mac_changes;
69
70   u32 * arp_input_next_index_by_hw_if_index;
71
72   ethernet_arp_ip4_entry_t * ip4_entry_pool;
73
74   mhash_t ip4_entry_by_key;
75     
76   /* ARP attack mitigation */
77   u32 arp_delete_rotor;
78   u32 limit_arp_cache_size;
79
80   /* Proxy arp vector */
81   ethernet_proxy_arp_t * proxy_arps;
82 } ethernet_arp_main_t;
83
84 static ethernet_arp_main_t ethernet_arp_main;
85
86 static u8 * format_ethernet_arp_hardware_type (u8 * s, va_list * va)
87 {
88   ethernet_arp_hardware_type_t h = va_arg (*va, ethernet_arp_hardware_type_t);
89   char * t = 0;
90   switch (h)
91     {
92 #define _(n,f) case n: t = #f; break;
93       foreach_ethernet_arp_hardware_type;
94 #undef _
95
96     default:
97       return format (s, "unknown 0x%x", h);
98     }
99
100   return format (s, "%s", t);
101 }
102
103 static u8 * format_ethernet_arp_opcode (u8 * s, va_list * va)
104 {
105   ethernet_arp_opcode_t o = va_arg (*va, ethernet_arp_opcode_t);
106   char * t = 0;
107   switch (o)
108     {
109 #define _(f) case ETHERNET_ARP_OPCODE_##f: t = #f; break;
110       foreach_ethernet_arp_opcode;
111 #undef _
112
113     default:
114       return format (s, "unknown 0x%x", o);
115     }
116
117   return format (s, "%s", t);
118 }
119
120 static uword
121 unformat_ethernet_arp_opcode_host_byte_order (unformat_input_t * input,
122                                               va_list * args)
123 {
124   int * result = va_arg (*args, int *);
125   ethernet_arp_main_t * am = &ethernet_arp_main;
126   int x, i;
127
128   /* Numeric opcode. */
129   if (unformat (input, "0x%x", &x)
130       || unformat (input, "%d", &x))
131     {
132       if (x >= (1 << 16))
133         return 0;
134       *result = x;
135       return 1;
136     }
137
138   /* Named type. */
139   if (unformat_user (input, unformat_vlib_number_by_name,
140                      am->opcode_by_name, &i))
141     {
142       *result = i;
143       return 1;
144     }
145
146   return 0;
147 }
148
149 static uword
150 unformat_ethernet_arp_opcode_net_byte_order (unformat_input_t * input,
151                                              va_list * args)
152 {
153   int * result = va_arg (*args, int *);
154   if (! unformat_user (input, unformat_ethernet_arp_opcode_host_byte_order, result))
155     return 0;
156
157   *result = clib_host_to_net_u16 ((u16) *result);
158   return 1;
159 }
160
161 static u8 * format_ethernet_arp_header (u8 * s, va_list * va)
162 {
163   ethernet_arp_header_t * a = va_arg (*va, ethernet_arp_header_t *);
164   u32 max_header_bytes = va_arg (*va, u32);
165   uword indent;
166   u16 l2_type, l3_type;
167
168   if (max_header_bytes != 0 && sizeof (a[0]) > max_header_bytes)
169     return format (s, "ARP header truncated");
170
171   l2_type = clib_net_to_host_u16 (a->l2_type);
172   l3_type = clib_net_to_host_u16 (a->l3_type);
173
174   indent = format_get_indent (s);
175
176   s = format (s, "%U, type %U/%U, address size %d/%d",
177               format_ethernet_arp_opcode, clib_net_to_host_u16 (a->opcode),
178               format_ethernet_arp_hardware_type, l2_type,
179               format_ethernet_type, l3_type,
180               a->n_l2_address_bytes, a->n_l3_address_bytes);
181               
182   if (l2_type == ETHERNET_ARP_HARDWARE_TYPE_ethernet
183       && l3_type == ETHERNET_TYPE_IP4)
184     {
185       s = format (s, "\n%U%U/%U -> %U/%U",
186                   format_white_space, indent,
187                   format_ethernet_address, a->ip4_over_ethernet[0].ethernet,
188                   format_ip4_address, &a->ip4_over_ethernet[0].ip4,
189                   format_ethernet_address, a->ip4_over_ethernet[1].ethernet,
190                   format_ip4_address, &a->ip4_over_ethernet[1].ip4);
191     }
192   else
193     {
194       uword n2 = a->n_l2_address_bytes;
195       uword n3 = a->n_l3_address_bytes;
196       s = format (s, "\n%U%U/%U -> %U/%U",
197                   format_white_space, indent,
198                   format_hex_bytes, a->data + 0*n2 + 0*n3, n2,
199                   format_hex_bytes, a->data + 1*n2 + 0*n3, n3,
200                   format_hex_bytes, a->data + 1*n2 + 1*n3, n2,
201                   format_hex_bytes, a->data + 2*n2 + 1*n3, n3);
202     }
203
204   return s;
205 }
206
207 static u8 * format_ethernet_arp_ip4_entry (u8 * s, va_list * va)
208 {
209   vnet_main_t * vnm = va_arg (*va, vnet_main_t *);
210   ethernet_arp_ip4_entry_t * e = va_arg (*va, ethernet_arp_ip4_entry_t *);
211   vnet_sw_interface_t * si;
212   ip4_fib_t * fib;
213
214   if (! e)
215     return format (s, "%=12s%=6s%=16s%=4s%=20s%=24s", "Time", "FIB", "IP4", 
216                    "Static", "Ethernet", "Interface");
217
218   fib = find_ip4_fib_by_table_index_or_id (&ip4_main, e->key.fib_index,
219                                            IP4_ROUTE_FLAG_FIB_INDEX);
220   si = vnet_get_sw_interface (vnm, e->key.sw_if_index);
221   s = format (s, "%=12U%=6u%=16U%=4s%=20U%=25U",
222               format_vlib_cpu_time, vnm->vlib_main, e->cpu_time_last_updated,
223               fib->table_id,
224               format_ip4_address, &e->key.ip4_address,
225               (e->flags & ETHERNET_ARP_IP4_ENTRY_FLAG_STATIC) ? "S" : "",
226               format_ethernet_address, e->ethernet_address,
227               format_vnet_sw_interface_name, vnm, si);
228
229   return s;
230 }
231
232 typedef struct {
233   u8 packet_data[64];
234 } ethernet_arp_input_trace_t;
235
236 static u8 * format_ethernet_arp_input_trace (u8 * s, va_list * va)
237 {
238   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*va, vlib_main_t *);
239   CLIB_UNUSED (vlib_node_t * node) = va_arg (*va, vlib_node_t *);
240   ethernet_arp_input_trace_t * t = va_arg (*va, ethernet_arp_input_trace_t *);
241
242   s = format (s, "%U",
243               format_ethernet_arp_header,
244               t->packet_data, sizeof (t->packet_data));
245
246   return s;
247 }
248
249 clib_error_t *
250 ethernet_arp_sw_interface_up_down (vnet_main_t * vnm,
251                                    u32 sw_if_index,
252                                    u32 flags)
253 {
254   ethernet_arp_main_t * am = &ethernet_arp_main;
255   ethernet_arp_ip4_entry_t * e;
256
257   if (! (flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP))
258     {
259       u32 i, * to_delete = 0;
260
261       pool_foreach (e, am->ip4_entry_pool, ({
262         if (e->key.sw_if_index == sw_if_index)
263           vec_add1 (to_delete, e - am->ip4_entry_pool);
264       }));
265
266       for (i = 0; i < vec_len (to_delete); i++)
267         {
268           ethernet_arp_ip4_over_ethernet_address_t delme;
269           e = pool_elt_at_index (am->ip4_entry_pool, to_delete[i]);
270
271           memcpy (&delme.ethernet, e->ethernet_address, 6);
272           delme.ip4.as_u32 = e->key.ip4_address.as_u32;
273
274           vnet_arp_unset_ip4_over_ethernet (vnm, e->key.sw_if_index,
275                                             e->key.fib_index, &delme);
276         }
277
278       vec_free (to_delete);
279     }
280
281   return 0;
282 }
283
284 VNET_SW_INTERFACE_ADMIN_UP_DOWN_FUNCTION (ethernet_arp_sw_interface_up_down);
285
286 static int
287 vnet_arp_set_ip4_over_ethernet_internal (vnet_main_t * vnm,
288                                          u32 sw_if_index,
289                                          u32 fib_index,
290                                          void * a_arg,
291                                          int is_static);
292
293 static int
294 vnet_arp_unset_ip4_over_ethernet_internal (vnet_main_t * vnm,
295                                            u32 sw_if_index,
296                                            u32 fib_index,
297                                            void * a_arg);
298
299 typedef struct {
300   u32 sw_if_index;
301   u32 fib_index;
302   ethernet_arp_ip4_over_ethernet_address_t a;
303   int is_static;
304   int is_remove; /* set is_remove=1 to clear arp entry */
305 } vnet_arp_set_ip4_over_ethernet_rpc_args_t;
306
307 static void set_ip4_over_ethernet_rpc_callback 
308 ( vnet_arp_set_ip4_over_ethernet_rpc_args_t * a)
309 {
310   vnet_main_t * vm = vnet_get_main();
311   ASSERT(os_get_cpu_number() == 0);
312
313   if (a->is_remove)
314     vnet_arp_unset_ip4_over_ethernet_internal(vm, 
315                                               a->sw_if_index, 
316                                               a->fib_index,
317                                               &(a->a));
318   else
319     vnet_arp_set_ip4_over_ethernet_internal (vm,
320                                              a->sw_if_index,
321                                              a->fib_index,
322                                              &(a->a),
323                                              a->is_static);
324 }
325
326 int
327 vnet_arp_set_ip4_over_ethernet (vnet_main_t * vnm,
328                                 u32 sw_if_index,
329                                 u32 fib_index,
330                                 void * a_arg,
331                                 int is_static)
332 {
333   ethernet_arp_ip4_over_ethernet_address_t * a = a_arg;
334   vnet_arp_set_ip4_over_ethernet_rpc_args_t args;
335
336   args.sw_if_index = sw_if_index;
337   args.fib_index = fib_index;
338   args.is_static = is_static;
339   args.is_remove = 0;
340   memcpy (&args.a, a, sizeof (*a));
341
342   vl_api_rpc_call_main_thread (set_ip4_over_ethernet_rpc_callback, 
343                                (u8 *) &args, sizeof (args));
344   return 0;
345 }
346
347 int
348 vnet_arp_set_ip4_over_ethernet_internal (vnet_main_t * vnm,
349                                          u32 sw_if_index,
350                                          u32 fib_index,
351                                          void * a_arg,
352                                          int is_static)
353 {
354   ethernet_arp_ip4_key_t k;
355   ethernet_arp_ip4_entry_t * e = 0;
356   ethernet_arp_main_t * am = &ethernet_arp_main;
357   ethernet_arp_ip4_over_ethernet_address_t * a = a_arg;
358   vlib_main_t * vm = vlib_get_main();
359   ip4_main_t * im = &ip4_main;
360   int make_new_arp_cache_entry=1;
361   uword * p;
362   ip4_add_del_route_args_t args;
363   ip_adjacency_t adj;
364   pending_resolution_t * pr, * mc;
365   
366   u32 next_index;
367
368   fib_index = (fib_index != (u32)~0) 
369     ? fib_index : im->fib_index_by_sw_if_index[sw_if_index];
370
371   k.sw_if_index = sw_if_index;
372   k.ip4_address = a->ip4;
373   k.fib_index = fib_index;
374
375   p = mhash_get (&am->ip4_entry_by_key, &k);
376   if (p)
377     {
378       e = pool_elt_at_index (am->ip4_entry_pool, p[0]);
379
380       /* Refuse to over-write static arp. */
381       if (e->flags & ETHERNET_ARP_IP4_ENTRY_FLAG_STATIC)
382         return -2;
383       make_new_arp_cache_entry = 0;
384     }
385
386   /* Note: always install the route. It might have been deleted */
387   memset(&adj, 0, sizeof(adj));
388   adj.lookup_next_index = IP_LOOKUP_NEXT_REWRITE;
389
390   vnet_rewrite_for_sw_interface
391     (vnm,
392      VNET_L3_PACKET_TYPE_IP4,
393      sw_if_index,
394      ip4_rewrite_node.index,
395      a->ethernet,               /* destination address */
396      &adj.rewrite_header,
397      sizeof (adj.rewrite_data));
398
399   args.table_index_or_table_id = fib_index;
400   args.flags = IP4_ROUTE_FLAG_FIB_INDEX | IP4_ROUTE_FLAG_ADD | IP4_ROUTE_FLAG_NEIGHBOR;
401   args.dst_address = a->ip4;
402   args.dst_address_length = 32;
403   args.adj_index = ~0;
404   args.add_adj = &adj;
405   args.n_add_adj = 1;
406
407   ip4_add_del_route (im, &args);
408   if (make_new_arp_cache_entry)
409     {
410       pool_get (am->ip4_entry_pool, e);
411       mhash_set (&am->ip4_entry_by_key, &k,
412                  e - am->ip4_entry_pool,
413                  /* old value */ 0);
414       e->key = k;
415     }
416
417   /* Update time stamp and ethernet address. */
418   memcpy (e->ethernet_address, a->ethernet, sizeof (e->ethernet_address));
419   e->cpu_time_last_updated = clib_cpu_time_now ();
420   if (is_static)
421     e->flags |= ETHERNET_ARP_IP4_ENTRY_FLAG_STATIC;
422
423   /* Customer(s) waiting for this address to be resolved? */
424   p = hash_get (am->pending_resolutions_by_address, a->ip4.as_u32);
425   if (p)
426     {
427       next_index = p[0];
428
429       while (next_index != (u32)~0)
430         {
431           pr = pool_elt_at_index (am->pending_resolutions, next_index);
432           vlib_process_signal_event (vm, pr->node_index,
433                                      pr->type_opaque, 
434                                      pr->data);
435           next_index = pr->next_index;
436           pool_put (am->pending_resolutions, pr);
437         }
438       
439       hash_unset (am->pending_resolutions_by_address, a->ip4.as_u32);
440     }
441
442   /* Customer(s) requesting ARP event for this address? */
443   p = hash_get (am->mac_changes_by_address, a->ip4.as_u32);
444   if (p)
445     {
446       next_index = p[0];
447
448       while (next_index != (u32)~0)
449         {
450           int (*fp)(u32, u8 *, u32, u32);
451           int rv = 1;
452           mc = pool_elt_at_index (am->mac_changes, next_index);
453           fp = mc->data_callback;
454
455           /* Call the user's data callback, return 1 to suppress dup events */
456           if (fp)
457             rv = (*fp)(mc->data, a->ethernet, sw_if_index, 0);
458           
459           /* 
460            * Signal the resolver process, as long as the user
461            * says they want to be notified
462            */
463           if (rv == 0)
464             vlib_process_signal_event (vm, mc->node_index,
465                                        mc->type_opaque, 
466                                        mc->data);
467           next_index = mc->next_index;
468         }
469     }
470
471   return 0;
472 }
473
474 void vnet_register_ip4_arp_resolution_event (vnet_main_t * vnm, 
475                                              void * address_arg,
476                                              uword node_index,
477                                              uword type_opaque,
478                                              uword data)
479 {
480   ethernet_arp_main_t * am = &ethernet_arp_main;
481   ip4_address_t * address = address_arg;
482   uword * p;
483   pending_resolution_t * pr;
484   
485   pool_get (am->pending_resolutions, pr);
486
487   pr->next_index = ~0;
488   pr->node_index = node_index;
489   pr->type_opaque = type_opaque;
490   pr->data = data;
491   pr->data_callback = 0;
492
493   p = hash_get (am->pending_resolutions_by_address, address->as_u32);
494   if (p)
495     {
496       /* Insert new resolution at the head of the list */
497       pr->next_index = p[0];
498       hash_unset (am->pending_resolutions_by_address, address->as_u32);
499     }
500   
501   hash_set (am->pending_resolutions_by_address, address->as_u32, 
502             pr - am->pending_resolutions);
503 }
504
505 int vnet_add_del_ip4_arp_change_event (vnet_main_t * vnm, 
506                                        void * data_callback,
507                                        u32 pid,
508                                        void * address_arg,
509                                        uword node_index,
510                                        uword type_opaque,
511                                        uword data, int is_add)
512 {
513   ethernet_arp_main_t * am = &ethernet_arp_main;
514   ip4_address_t * address = address_arg;
515   uword * p;
516   pending_resolution_t * mc;
517   void (*fp)(u32, u8 *) = data_callback;
518   
519   if (is_add)
520     {
521       pool_get (am->mac_changes, mc);
522
523       mc->next_index = ~0;
524       mc->node_index = node_index;
525       mc->type_opaque = type_opaque;
526       mc->data = data;
527       mc->data_callback = data_callback;
528       mc->pid = pid;
529       
530       p = hash_get (am->mac_changes_by_address, address->as_u32);
531       if (p)
532         {
533           /* Insert new resolution at the head of the list */
534           mc->next_index = p[0];
535           hash_unset (am->mac_changes_by_address, address->as_u32);
536         }
537       
538       hash_set (am->mac_changes_by_address, address->as_u32, 
539                 mc - am->mac_changes);
540       return 0;
541     }
542   else
543     {
544       u32 index;
545       pending_resolution_t * mc_last = 0;
546
547       p = hash_get (am->mac_changes_by_address, address->as_u32);
548       if (p == 0)
549         return VNET_API_ERROR_NO_SUCH_ENTRY;
550
551       index = p[0];
552
553       while (index != (u32)~0)
554         {
555           mc = pool_elt_at_index (am->mac_changes, index);
556           if (mc->node_index == node_index &&
557               mc->type_opaque == type_opaque &&
558               mc->pid == pid)
559             {
560               /* Clients may need to clean up pool entries, too */
561               if (fp)
562                 (*fp)(mc->data, 0 /* no new mac addrs */);
563               if (index == p[0])
564                 {
565                   hash_unset (am->mac_changes_by_address, address->as_u32);
566                   if (mc->next_index != ~0)
567                     hash_set (am->mac_changes_by_address, address->as_u32,
568                               mc->next_index);
569                   pool_put (am->mac_changes, mc);
570                   return 0;
571                 }
572               else
573                 {
574                   ASSERT(mc_last);
575                   mc_last->next_index = mc->next_index;
576                   pool_put (am->mac_changes, mc);
577                   return 0;
578                 }
579             }
580           mc_last = mc;
581           index = mc->next_index;
582         }
583       
584       return VNET_API_ERROR_NO_SUCH_ENTRY;
585     }
586 }
587
588 /* Either we drop the packet or we send a reply to the sender. */
589 typedef enum {
590   ARP_INPUT_NEXT_DROP,
591   ARP_INPUT_N_NEXT,
592 } arp_input_next_t;
593
594 #define foreach_ethernet_arp_error                                      \
595   _ (replies_sent, "ARP replies sent")                                  \
596   _ (l2_type_not_ethernet, "L2 type not ethernet")                      \
597   _ (l3_type_not_ip4, "L3 type not IP4")                                \
598   _ (l3_src_address_not_local, "IP4 source address not local to subnet") \
599   _ (l3_dst_address_not_local, "IP4 destination address not local to subnet") \
600   _ (l3_src_address_is_local, "IP4 source address matches local interface") \
601   _ (l3_src_address_learned, "ARP request IP4 source address learned")  \
602   _ (replies_received, "ARP replies received")                          \
603   _ (opcode_not_request, "ARP opcode not request")                      \
604   _ (proxy_arp_replies_sent, "Proxy ARP replies sent")                  \
605   _ (l2_address_mismatch, "ARP hw addr does not match L2 frame src addr") \
606   _ (missing_interface_address, "ARP missing interface address") \
607   _ (gratuitous_arp, "ARP probe or announcement dropped") \
608
609 typedef enum {
610 #define _(sym,string) ETHERNET_ARP_ERROR_##sym,
611   foreach_ethernet_arp_error
612 #undef _
613   ETHERNET_ARP_N_ERROR,
614 } ethernet_arp_input_error_t;
615
616 /* get first interface address */
617 ip4_address_t *
618 ip4_interface_first_address (ip4_main_t * im, u32 sw_if_index,
619                              ip_interface_address_t ** result_ia)
620 {
621   ip_lookup_main_t * lm = &im->lookup_main;
622   ip_interface_address_t * ia = 0;
623   ip4_address_t * result = 0;
624
625   foreach_ip_interface_address (lm, ia, sw_if_index, 
626                                 1 /* honor unnumbered */,
627   ({
628     ip4_address_t * a = ip_interface_address_get_address (lm, ia);
629     result = a;
630     break;
631   }));
632   if (result_ia)
633     *result_ia = result ? ia : 0;
634   return result;
635 }
636
637 static void unset_random_arp_entry (void)
638 {
639   ethernet_arp_main_t * am = &ethernet_arp_main;
640   ethernet_arp_ip4_entry_t * e;
641   vnet_main_t * vnm = vnet_get_main();
642   ethernet_arp_ip4_over_ethernet_address_t delme;  
643   u32 index;
644
645   index = pool_next_index (am->ip4_entry_pool, am->arp_delete_rotor);
646   am->arp_delete_rotor = index;
647
648   /* Try again from elt 0, could happen if an intfc goes down */
649   if (index == ~0)
650     {
651       index = pool_next_index (am->ip4_entry_pool, am->arp_delete_rotor);
652       am->arp_delete_rotor = index;
653     }
654
655   /* Nothing left in the pool */
656   if (index == ~0)
657     return;
658
659   e = pool_elt_at_index (am->ip4_entry_pool, index);
660   
661   memcpy (&delme.ethernet, e->ethernet_address, 6);
662   delme.ip4.as_u32 = e->key.ip4_address.as_u32;
663   
664   vnet_arp_unset_ip4_over_ethernet (vnm, e->key.sw_if_index,
665                                     e->key.fib_index, &delme);
666 }
667   
668 static u32 arp_unnumbered (vlib_buffer_t * p0, 
669                            u32 pi0,
670                            ethernet_header_t * eth0,
671                            ip_interface_address_t * ifa0)
672 {
673   ethernet_arp_main_t * am = &ethernet_arp_main;
674   vlib_main_t * vm = vlib_get_main();
675   vnet_main_t * vnm = vnet_get_main();
676   vnet_interface_main_t * vim = &vnm->interface_main;
677   vnet_sw_interface_t * si;
678   vnet_hw_interface_t * hi;
679   u32 unnum_src_sw_if_index;
680   u32 * broadcast_swifs = 0;
681   u32 * buffers = 0;
682   u32 n_alloc = 0;
683   vlib_buffer_t * b0;
684   int i;
685   u8 dst_mac_address[6];
686   i16 header_size;
687   ethernet_arp_header_t * arp0;
688
689   /* Save the dst mac address */
690   memcpy(dst_mac_address, eth0->dst_address, sizeof (dst_mac_address));
691
692   /* Figure out which sw_if_index supplied the address */
693   unnum_src_sw_if_index = ifa0->sw_if_index;
694
695   /* Track down all users of the unnumbered source */
696   pool_foreach (si, vim->sw_interfaces, 
697   ({
698     if (si->flags & VNET_SW_INTERFACE_FLAG_UNNUMBERED &&
699         (si->unnumbered_sw_if_index == unnum_src_sw_if_index))
700       {
701         vec_add1 (broadcast_swifs, si->sw_if_index);
702       }
703   }));
704           
705
706   ASSERT (vec_len(broadcast_swifs));
707
708   /* Allocate buffering if we need it */
709   if (vec_len(broadcast_swifs) > 1)
710     {
711       vec_validate (buffers, vec_len(broadcast_swifs)-2);
712       n_alloc = vlib_buffer_alloc (vm, buffers, vec_len(buffers));
713       _vec_len (buffers) = n_alloc;
714       for (i = 0; i < n_alloc; i++)
715         {
716           b0 = vlib_get_buffer (vm, buffers[i]);
717
718           /* xerox (partially built) ARP pkt */
719           memcpy (b0->data, p0->data, p0->current_length + p0->current_data);
720           b0->current_data = p0->current_data;
721           b0->current_length = p0->current_length;
722           vnet_buffer(b0)->sw_if_index[VLIB_RX] =
723             vnet_buffer(p0)->sw_if_index[VLIB_RX];
724         }
725     }
726
727   vec_insert (buffers, 1, 0);
728   buffers[0] = pi0;
729   
730   for (i = 0; i < vec_len(buffers); i++)
731     {
732       b0 = vlib_get_buffer(vm, buffers[i]);
733       arp0 = vlib_buffer_get_current (b0);
734
735       hi = vnet_get_sup_hw_interface (vnm, broadcast_swifs[i]);
736       si = vnet_get_sw_interface (vnm, broadcast_swifs[i]);
737
738       /* For decoration, most likely */
739       vnet_buffer(b0)->sw_if_index[VLIB_TX] = hi->sw_if_index;
740
741       /* Fix ARP pkt src address */
742       memcpy (arp0->ip4_over_ethernet[0].ethernet, hi->hw_address, 6);
743
744       /* Build L2 encaps for this swif */
745       header_size = sizeof (ethernet_header_t);
746       if (si->sub.eth.flags.one_tag) 
747         header_size += 4;
748       else if (si->sub.eth.flags.two_tags)
749         header_size += 8;
750       
751       vlib_buffer_advance (b0, -header_size);
752       eth0 = vlib_buffer_get_current (b0);
753
754       if (si->sub.eth.flags.one_tag) {
755         ethernet_vlan_header_t * outer = (void *) (eth0 + 1);
756         
757         eth0->type = si->sub.eth.flags.dot1ad ?
758           clib_host_to_net_u16 (ETHERNET_TYPE_DOT1AD) :
759           clib_host_to_net_u16 (ETHERNET_TYPE_VLAN);
760         outer->priority_cfi_and_id = 
761           clib_host_to_net_u16 (si->sub.eth.outer_vlan_id);
762         outer->type = clib_host_to_net_u16 (ETHERNET_TYPE_ARP);
763         
764       } else if (si->sub.eth.flags.two_tags) {
765         ethernet_vlan_header_t * outer = (void *) (eth0 + 1);
766         ethernet_vlan_header_t * inner = (void *) (outer + 1);
767         
768         eth0->type = si->sub.eth.flags.dot1ad ?
769           clib_host_to_net_u16 (ETHERNET_TYPE_DOT1AD) :
770           clib_host_to_net_u16 (ETHERNET_TYPE_VLAN);
771         outer->priority_cfi_and_id = 
772           clib_host_to_net_u16 (si->sub.eth.outer_vlan_id);
773         outer->type = clib_host_to_net_u16 (ETHERNET_TYPE_VLAN);
774         inner->priority_cfi_and_id = 
775           clib_host_to_net_u16 (si->sub.eth.inner_vlan_id);
776         inner->type = clib_host_to_net_u16 (ETHERNET_TYPE_ARP);
777         
778       } else {
779         eth0->type = clib_host_to_net_u16 (ETHERNET_TYPE_ARP);
780       }
781       
782       /* Restore the original dst address, set src address */
783       memcpy (eth0->dst_address, dst_mac_address, sizeof (eth0->dst_address));
784       memcpy (eth0->src_address, hi->hw_address, sizeof (eth0->src_address));
785       
786       /* Transmit replicas */
787       if (i > 0)
788         {
789           vlib_frame_t * f = vlib_get_frame_to_node (vm, hi->output_node_index);
790           u32 * to_next = vlib_frame_vector_args (f);
791           to_next[0] = buffers[i];
792           f->n_vectors = 1;
793           vlib_put_frame_to_node (vm, hi->output_node_index, f);
794         }
795     }
796
797   hi = vnet_get_sup_hw_interface (vnm, broadcast_swifs[0]);
798
799   vec_free (broadcast_swifs);
800   vec_free (buffers);
801
802   /* The regular path outputs the original pkt.. */
803   return vec_elt (am->arp_input_next_index_by_hw_if_index, hi->hw_if_index);
804 }
805
806 static uword
807 arp_input (vlib_main_t * vm,
808            vlib_node_runtime_t * node,
809            vlib_frame_t * frame)
810 {
811   ethernet_arp_main_t * am = &ethernet_arp_main;
812   vnet_main_t * vnm = vnet_get_main();
813   ip4_main_t * im4 = &ip4_main;
814   u32 n_left_from, next_index, * from, * to_next;
815   u32 n_replies_sent = 0, n_proxy_arp_replies_sent = 0;
816
817   from = vlib_frame_vector_args (frame);
818   n_left_from = frame->n_vectors;
819   next_index = node->cached_next_index;
820
821   if (node->flags & VLIB_NODE_FLAG_TRACE)
822     vlib_trace_frame_buffers_only (vm, node, from, frame->n_vectors,
823                                    /* stride */ 1,
824                                    sizeof (ethernet_arp_input_trace_t));
825
826   while (n_left_from > 0)
827     {
828       u32 n_left_to_next;
829
830       vlib_get_next_frame (vm, node, next_index,
831                            to_next, n_left_to_next);
832
833       while (n_left_from > 0 && n_left_to_next > 0)
834         {
835           vlib_buffer_t * p0;
836           vnet_hw_interface_t * hw_if0;
837           ethernet_arp_header_t * arp0;
838           ethernet_header_t * eth0;
839           ip_interface_address_t * ifa0;
840           ip_adjacency_t * adj0;
841           ip4_address_t * if_addr0;
842           ip4_address_t proxy_src;
843           u32 pi0, error0, next0, sw_if_index0;
844           u8 is_request0, src_is_local0, dst_is_local0, is_unnum0;
845           ethernet_proxy_arp_t * pa;
846
847           pi0 = from[0];
848           to_next[0] = pi0;
849           from += 1;
850           to_next += 1;
851           n_left_from -= 1;
852           n_left_to_next -= 1;
853
854           p0 = vlib_get_buffer (vm, pi0);
855           arp0 = vlib_buffer_get_current (p0);
856
857           is_request0 = arp0->opcode 
858               == clib_host_to_net_u16 (ETHERNET_ARP_OPCODE_request);
859
860           error0 = ETHERNET_ARP_ERROR_replies_sent;
861
862           error0 = (arp0->l2_type != clib_net_to_host_u16 (ETHERNET_ARP_HARDWARE_TYPE_ethernet)
863                     ? ETHERNET_ARP_ERROR_l2_type_not_ethernet
864                     : error0);
865           error0 = (arp0->l3_type != clib_net_to_host_u16 (ETHERNET_TYPE_IP4)
866                     ? ETHERNET_ARP_ERROR_l3_type_not_ip4
867                     : error0);
868
869           sw_if_index0 = vnet_buffer (p0)->sw_if_index[VLIB_RX];
870
871           if (error0)
872             goto drop1;
873
874           /* Check that IP address is local and matches incoming interface. */
875           if_addr0 = ip4_interface_address_matching_destination (im4,
876                                                                  &arp0->ip4_over_ethernet[1].ip4,
877                                                                  sw_if_index0,
878                                                                  &ifa0);
879           if (! if_addr0)
880             {
881               error0 = ETHERNET_ARP_ERROR_l3_dst_address_not_local;
882               goto drop1;
883             }
884
885           /* Honor unnumbered interface, if any */
886           is_unnum0 = sw_if_index0 != ifa0->sw_if_index;
887
888           /* Source must also be local to subnet of matching interface address. */
889           if (! ip4_destination_matches_interface (im4, &arp0->ip4_over_ethernet[0].ip4, ifa0))
890             {
891               error0 = ETHERNET_ARP_ERROR_l3_src_address_not_local;
892               goto drop1;
893             }
894
895           /* Reject requests/replies with our local interface address. */
896           src_is_local0 = if_addr0->as_u32 == arp0->ip4_over_ethernet[0].ip4.as_u32;
897           if (src_is_local0)
898             {
899               error0 = ETHERNET_ARP_ERROR_l3_src_address_is_local;
900               goto drop1;
901             }
902
903           dst_is_local0 = if_addr0->as_u32 == arp0->ip4_over_ethernet[1].ip4.as_u32;
904
905           /* Fill in ethernet header. */
906           eth0 = ethernet_buffer_get_header (p0);
907
908           /* Trash ARP packets whose ARP-level source addresses do not
909              match their L2-frame-level source addresses */
910           if (memcmp (eth0->src_address, arp0->ip4_over_ethernet[0].ethernet,
911                       sizeof (eth0->src_address)))
912             {
913               error0 = ETHERNET_ARP_ERROR_l2_address_mismatch;
914               goto drop2;
915             }
916
917           /* Learn or update sender's mapping only for requests or unicasts
918              that don't match local interface address. */
919           if (ethernet_address_cast (eth0->dst_address) == ETHERNET_ADDRESS_UNICAST
920               || is_request0)
921             {
922               if (am->limit_arp_cache_size && 
923                   pool_elts (am->ip4_entry_pool) >= am->limit_arp_cache_size)
924                 unset_random_arp_entry();
925
926               vnet_arp_set_ip4_over_ethernet (vnm, sw_if_index0, 
927                                               (u32)~0 /* default fib */,
928                                               &arp0->ip4_over_ethernet[0], 
929                                               0 /* is_static */);
930               error0 = ETHERNET_ARP_ERROR_l3_src_address_learned;
931              }
932
933           /* Only send a reply for requests sent which match a local interface. */
934           if (! (is_request0 && dst_is_local0))
935             {
936               error0 = (arp0->opcode == clib_host_to_net_u16 (ETHERNET_ARP_OPCODE_reply)
937                         ? ETHERNET_ARP_ERROR_replies_received : error0);
938               goto drop1;
939             }
940
941           /* Send a reply. */
942         send_reply:
943           vnet_buffer (p0)->sw_if_index[VLIB_TX] = sw_if_index0;
944           hw_if0 = vnet_get_sup_hw_interface (vnm, sw_if_index0);
945
946           /* Can happen in a multi-core env. */
947           if (PREDICT_FALSE(hw_if0->hw_if_index >= vec_len (am->arp_input_next_index_by_hw_if_index)))
948             {
949               error0 = ETHERNET_ARP_ERROR_missing_interface_address;
950               goto drop2;
951             }
952
953           next0 = vec_elt (am->arp_input_next_index_by_hw_if_index, hw_if0->hw_if_index);
954
955           arp0->opcode = clib_host_to_net_u16 (ETHERNET_ARP_OPCODE_reply);
956
957           arp0->ip4_over_ethernet[1] = arp0->ip4_over_ethernet[0];
958
959           memcpy (arp0->ip4_over_ethernet[0].ethernet, hw_if0->hw_address, 6);
960           clib_mem_unaligned (&arp0->ip4_over_ethernet[0].ip4.data_u32, u32) = if_addr0->data_u32;
961
962           /* Hardware must be ethernet-like. */
963           ASSERT (vec_len (hw_if0->hw_address) == 6);
964
965           memcpy (eth0->dst_address, eth0->src_address, 6);
966           memcpy (eth0->src_address, hw_if0->hw_address, 6);
967
968           /* Figure out how much to rewind current data from adjacency. */
969           if (ifa0)
970             {
971               adj0 = ip_get_adjacency (&ip4_main.lookup_main, 
972                                        ifa0->neighbor_probe_adj_index);
973               if (adj0->lookup_next_index != IP_LOOKUP_NEXT_ARP)
974                 {
975                   error0 = ETHERNET_ARP_ERROR_missing_interface_address;
976                   goto drop2;
977                 }
978               if (is_unnum0)
979                 next0 = arp_unnumbered (p0, pi0, eth0, ifa0);
980               else
981                 vlib_buffer_advance (p0, -adj0->rewrite_header.data_bytes);
982             }
983
984           vlib_validate_buffer_enqueue_x1 (vm, node, next_index,to_next,
985                                            n_left_to_next,pi0,next0);
986
987           n_replies_sent += 1;
988           continue;
989
990         drop1:
991           if (0 == arp0->ip4_over_ethernet[0].ip4.as_u32 ||
992               (arp0->ip4_over_ethernet[0].ip4.as_u32 ==
993                arp0->ip4_over_ethernet[1].ip4.as_u32))
994             {
995               error0 = ETHERNET_ARP_ERROR_gratuitous_arp;
996               goto drop2;
997             }
998           /* See if proxy arp is configured for the address */
999           if (is_request0) 
1000             {
1001               vnet_sw_interface_t * si;
1002               u32 this_addr = clib_net_to_host_u32 
1003                 (arp0->ip4_over_ethernet[1].ip4.as_u32);
1004               u32 fib_index0;
1005
1006               si = vnet_get_sw_interface (vnm, sw_if_index0);
1007               
1008               if (!(si->flags & VNET_SW_INTERFACE_FLAG_PROXY_ARP))
1009                 goto drop2;
1010
1011               fib_index0 = vec_elt (im4->fib_index_by_sw_if_index, 
1012                                     sw_if_index0);
1013
1014               vec_foreach (pa, am->proxy_arps)
1015                 {
1016                   u32 lo_addr = clib_net_to_host_u32 (pa->lo_addr);
1017                   u32 hi_addr = clib_net_to_host_u32 (pa->hi_addr);
1018
1019                    /* an ARP request hit in the proxy-arp table? */
1020                    if ((this_addr >= lo_addr && this_addr <= hi_addr) &&
1021                        (fib_index0 == pa->fib_index))
1022                     {
1023                       eth0 = ethernet_buffer_get_header (p0);
1024                       proxy_src.as_u32 = 
1025                         arp0->ip4_over_ethernet[1].ip4.data_u32;
1026
1027                       /* 
1028                        * Rewind buffer, direct code above not to
1029                        * think too hard about it. 
1030                        * $$$ is the answer ever anything other than
1031                        * vlib_buffer_reset(..)?
1032                        */
1033                       ifa0 = 0;
1034                       if_addr0 = &proxy_src;
1035                       vlib_buffer_reset (p0);
1036                       n_proxy_arp_replies_sent++;
1037                       goto send_reply;
1038                     }
1039                 }
1040             }
1041           
1042         drop2:
1043
1044           next0 = ARP_INPUT_NEXT_DROP;
1045           p0->error = node->errors[error0];
1046
1047           vlib_validate_buffer_enqueue_x1 (vm, node, next_index,to_next,
1048                                            n_left_to_next,pi0,next0);
1049         }
1050
1051       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
1052     }
1053
1054   vlib_error_count (vm, node->node_index,
1055                     ETHERNET_ARP_ERROR_replies_sent, 
1056                     n_replies_sent - n_proxy_arp_replies_sent);
1057   
1058   vlib_error_count (vm, node->node_index,
1059                     ETHERNET_ARP_ERROR_proxy_arp_replies_sent, 
1060                     n_proxy_arp_replies_sent);
1061   return frame->n_vectors;
1062 }
1063
1064 static char * ethernet_arp_error_strings[] = {
1065 #define _(sym,string) string,
1066   foreach_ethernet_arp_error
1067 #undef _
1068 };
1069
1070 VLIB_REGISTER_NODE (arp_input_node,static) = {
1071   .function = arp_input,
1072   .name = "arp-input",
1073   .vector_size = sizeof (u32),
1074
1075   .n_errors = ETHERNET_ARP_N_ERROR,
1076   .error_strings = ethernet_arp_error_strings,
1077
1078   .n_next_nodes = ARP_INPUT_N_NEXT,
1079   .next_nodes = {
1080     [ARP_INPUT_NEXT_DROP] = "error-drop",
1081   },
1082
1083   .format_buffer = format_ethernet_arp_header,
1084   .format_trace = format_ethernet_arp_input_trace,
1085 };
1086
1087 clib_error_t *
1088 ethernet_arp_hw_interface_link_up_down (vnet_main_t * vnm,
1089                                         u32 hw_if_index,
1090                                         u32 flags)
1091 {
1092   ethernet_arp_main_t * am = &ethernet_arp_main;
1093   vnet_hw_interface_t * hw_if;
1094
1095   hw_if = vnet_get_hw_interface (vnm, hw_if_index);
1096
1097   /* Fill in lookup tables with default table (0). */
1098   vec_validate_init_empty (am->arp_input_next_index_by_hw_if_index, hw_if_index, ~0);
1099   am->arp_input_next_index_by_hw_if_index[hw_if_index]
1100     = vlib_node_add_next (vnm->vlib_main, arp_input_node.index, hw_if->output_node_index);
1101
1102   return 0;
1103 }
1104
1105 VNET_HW_INTERFACE_LINK_UP_DOWN_FUNCTION (ethernet_arp_hw_interface_link_up_down);
1106
1107 static int
1108 ip4_arp_entry_sort (void *a1, void *a2)
1109 {
1110   ethernet_arp_ip4_entry_t * e1 = a1;
1111   ethernet_arp_ip4_entry_t * e2 = a2;
1112
1113   int cmp;
1114   vnet_main_t * vnm = vnet_get_main();
1115
1116   cmp = vnet_sw_interface_compare 
1117     (vnm, e1->key.sw_if_index, e2->key.sw_if_index);
1118   if (! cmp)
1119     cmp = ip4_address_compare (&e1->key.ip4_address, &e2->key.ip4_address);
1120   return cmp;
1121 }
1122
1123 static clib_error_t *
1124 show_ip4_arp (vlib_main_t * vm,
1125               unformat_input_t * input,
1126               vlib_cli_command_t * cmd)
1127 {
1128   vnet_main_t * vnm = vnet_get_main();
1129   ethernet_arp_main_t * am = &ethernet_arp_main;
1130   ethernet_arp_ip4_entry_t * e, * es;
1131   ethernet_proxy_arp_t * pa;
1132   clib_error_t * error = 0;
1133   u32 sw_if_index;
1134
1135   /* Filter entries by interface if given. */
1136   sw_if_index = ~0;
1137   (void) unformat_user (input, unformat_vnet_sw_interface, vnm, &sw_if_index);
1138
1139   es = 0;
1140   pool_foreach (e, am->ip4_entry_pool, ({ vec_add1 (es, e[0]); }));
1141   if ( es )
1142     {
1143       vec_sort_with_function (es, ip4_arp_entry_sort);
1144       vlib_cli_output (vm, "%U", format_ethernet_arp_ip4_entry, vnm, 0);
1145       vec_foreach (e, es) {
1146         if (sw_if_index != ~0 && e->key.sw_if_index != sw_if_index)
1147           continue;
1148         vlib_cli_output (vm, "%U", format_ethernet_arp_ip4_entry, vnm, e);
1149       }
1150       vec_free (es);
1151     }
1152
1153   if (vec_len (am->proxy_arps))
1154     {
1155       vlib_cli_output (vm, "Proxy arps enabled for:");
1156       vec_foreach(pa, am->proxy_arps)
1157         {
1158           vlib_cli_output (vm, "Fib_index %d   %U - %U ", 
1159                            pa->fib_index,
1160                            format_ip4_address, &pa->lo_addr, 
1161                            format_ip4_address, &pa->hi_addr);
1162         }
1163     }
1164       
1165   return error;
1166 }
1167
1168 VLIB_CLI_COMMAND (show_ip4_arp_command, static) = {
1169   .path = "show ip arp",
1170   .function = show_ip4_arp,
1171   .short_help = "Show ARP table",
1172 };
1173
1174 typedef struct {
1175   pg_edit_t l2_type, l3_type;
1176   pg_edit_t n_l2_address_bytes, n_l3_address_bytes;
1177   pg_edit_t opcode;
1178   struct {
1179     pg_edit_t ethernet;
1180     pg_edit_t ip4;
1181   } ip4_over_ethernet[2];
1182 } pg_ethernet_arp_header_t;
1183
1184 static inline void
1185 pg_ethernet_arp_header_init (pg_ethernet_arp_header_t * p)
1186 {
1187   /* Initialize fields that are not bit fields in the IP header. */
1188 #define _(f) pg_edit_init (&p->f, ethernet_arp_header_t, f);
1189   _ (l2_type);
1190   _ (l3_type);
1191   _ (n_l2_address_bytes);
1192   _ (n_l3_address_bytes);
1193   _ (opcode);
1194   _ (ip4_over_ethernet[0].ethernet);
1195   _ (ip4_over_ethernet[0].ip4);
1196   _ (ip4_over_ethernet[1].ethernet);
1197   _ (ip4_over_ethernet[1].ip4);
1198 #undef _
1199 }
1200
1201 uword
1202 unformat_pg_arp_header (unformat_input_t * input, va_list * args)
1203 {
1204   pg_stream_t * s = va_arg (*args, pg_stream_t *);
1205   pg_ethernet_arp_header_t * p;
1206   u32 group_index;
1207   
1208   p = pg_create_edit_group (s, sizeof (p[0]), sizeof (ethernet_arp_header_t),
1209                             &group_index);
1210   pg_ethernet_arp_header_init (p);
1211
1212   /* Defaults. */
1213   pg_edit_set_fixed (&p->l2_type, ETHERNET_ARP_HARDWARE_TYPE_ethernet);
1214   pg_edit_set_fixed (&p->l3_type, ETHERNET_TYPE_IP4);
1215   pg_edit_set_fixed (&p->n_l2_address_bytes, 6);
1216   pg_edit_set_fixed (&p->n_l3_address_bytes, 4);
1217
1218   if (! unformat (input, "%U: %U/%U -> %U/%U",
1219                   unformat_pg_edit,
1220                   unformat_ethernet_arp_opcode_net_byte_order, &p->opcode,
1221                   unformat_pg_edit,
1222                   unformat_ethernet_address, &p->ip4_over_ethernet[0].ethernet,
1223                   unformat_pg_edit,
1224                   unformat_ip4_address, &p->ip4_over_ethernet[0].ip4,
1225                   unformat_pg_edit,
1226                   unformat_ethernet_address, &p->ip4_over_ethernet[1].ethernet,
1227                   unformat_pg_edit,
1228                   unformat_ip4_address, &p->ip4_over_ethernet[1].ip4))
1229     {
1230       /* Free up any edits we may have added. */
1231       pg_free_edit_group (s);
1232       return 0;
1233     }
1234   return 1;
1235 }
1236
1237 clib_error_t *ip4_set_arp_limit (u32 arp_limit)
1238 {
1239   ethernet_arp_main_t * am = &ethernet_arp_main;
1240
1241   am->limit_arp_cache_size = arp_limit;
1242   return 0;
1243 }
1244
1245 static clib_error_t * ethernet_arp_init (vlib_main_t * vm)
1246 {
1247   ethernet_arp_main_t * am = &ethernet_arp_main;
1248   pg_node_t * pn;
1249
1250   ethernet_register_input_type (vm, ETHERNET_TYPE_ARP, arp_input_node.index);
1251
1252   pn = pg_get_node (arp_input_node.index);
1253   pn->unformat_edit = unformat_pg_arp_header;
1254
1255   am->opcode_by_name = hash_create_string (0, sizeof (uword));
1256 #define _(o) hash_set_mem (am->opcode_by_name, #o, ETHERNET_ARP_OPCODE_##o);
1257   foreach_ethernet_arp_opcode;
1258 #undef _
1259
1260   mhash_init (&am->ip4_entry_by_key,
1261               /* value size */ sizeof (uword),
1262               /* key size */ sizeof (ethernet_arp_ip4_key_t));
1263
1264   /* $$$ configurable */
1265   am->limit_arp_cache_size = 50000;
1266
1267   am->pending_resolutions_by_address = hash_create (0, sizeof (uword));
1268   am->mac_changes_by_address = hash_create (0, sizeof (uword));
1269
1270   /* don't trace ARP error packets */
1271   {
1272     vlib_node_runtime_t *rt = 
1273       vlib_node_get_runtime (vm, arp_input_node.index);
1274
1275 #define _(a,b)                                  \
1276     vnet_pcap_drop_trace_filter_add_del         \
1277         (rt->errors[ETHERNET_ARP_ERROR_##a],    \
1278          1 /* is_add */);
1279     foreach_ethernet_arp_error
1280 #undef _
1281   }
1282   
1283   return 0;
1284 }
1285
1286 VLIB_INIT_FUNCTION (ethernet_arp_init);
1287
1288 int 
1289 vnet_arp_unset_ip4_over_ethernet (vnet_main_t * vnm,
1290                                   u32 sw_if_index, u32 fib_index,
1291                                   void * a_arg)
1292 {
1293   ethernet_arp_ip4_over_ethernet_address_t * a = a_arg;
1294   vnet_arp_set_ip4_over_ethernet_rpc_args_t args;
1295
1296   args.sw_if_index = sw_if_index;
1297   args.fib_index = fib_index;
1298   args.is_remove = 1;
1299   memcpy (&args.a, a, sizeof (*a));
1300
1301   vl_api_rpc_call_main_thread (set_ip4_over_ethernet_rpc_callback, 
1302                                (u8 *) &args, sizeof (args));
1303   return 0;
1304 }
1305
1306 static inline int 
1307 vnet_arp_unset_ip4_over_ethernet_internal (vnet_main_t * vnm,
1308                                            u32 sw_if_index, 
1309                                            u32 fib_index,
1310                                            void * a_arg)
1311 {
1312   ethernet_arp_ip4_entry_t * e;
1313   ethernet_arp_main_t * am = &ethernet_arp_main;
1314   ethernet_arp_ip4_over_ethernet_address_t * a = a_arg;
1315   ethernet_arp_ip4_key_t k;
1316   uword * p;
1317   ip4_add_del_route_args_t args;
1318   ip4_main_t * im = &ip4_main;
1319   ip_lookup_main_t * lm = &im->lookup_main;
1320   u32 adj_index;
1321   ip_adjacency_t * adj;
1322
1323   k.sw_if_index = sw_if_index;
1324   k.ip4_address = a->ip4;
1325   k.fib_index = fib_index;
1326   p = mhash_get (&am->ip4_entry_by_key, &k);
1327   if (! p)
1328     return -1;
1329
1330   memset(&args, 0, sizeof(args));
1331
1332   /* 
1333    * Make sure that the route actually exists before we try to delete it,
1334    * and make sure that it's a rewrite adjacency.
1335    *
1336    * If we point 1-N unnumbered interfaces at a loopback interface and 
1337    * shut down the loopback before shutting down 1-N unnumbered 
1338    * interfaces, the ARP cache will still have an entry, 
1339    * but the route will have disappeared.
1340    * 
1341    * See also ip4_del_interface_routes (...) 
1342    *            -> ip4_delete_matching_routes (...).
1343    */
1344   
1345   adj_index = ip4_fib_lookup_with_table 
1346       (im, fib_index, &a->ip4, 1 /* disable default route */);
1347
1348   /* Miss adj? Forget it... */
1349   if (adj_index != lm->miss_adj_index) {
1350       adj = ip_get_adjacency (lm, adj_index);
1351       /* 
1352        * Stupid control-plane trick:
1353        * admin down an interface (removes arp routes from fib),
1354        * bring the interface back up (does not reinstall them)
1355        * then remove the arp cache entry (yuck). When that happens,
1356        * the adj we find here will be the interface subnet ARP adj.
1357        */
1358       if (adj->lookup_next_index == IP_LOOKUP_NEXT_REWRITE) {
1359           args.table_index_or_table_id = fib_index;
1360           args.flags = IP4_ROUTE_FLAG_FIB_INDEX | IP4_ROUTE_FLAG_DEL 
1361               | IP4_ROUTE_FLAG_NEIGHBOR;
1362           args.dst_address = a->ip4;
1363           args.dst_address_length = 32;
1364           ip4_add_del_route (im, &args);
1365           ip4_maybe_remap_adjacencies (im, fib_index, args.flags);
1366       }
1367   }
1368
1369   e = pool_elt_at_index (am->ip4_entry_pool, p[0]);
1370   mhash_unset (&am->ip4_entry_by_key, &e->key, 0);
1371   pool_put (am->ip4_entry_pool, e);
1372   return 0;
1373 }
1374
1375 static void 
1376 increment_ip4_and_mac_address (ethernet_arp_ip4_over_ethernet_address_t *a)
1377 {
1378   u8 old;
1379   int i;
1380
1381   for (i = 3; i >= 0; i--) 
1382     {
1383       old = a->ip4.as_u8[i];
1384       a->ip4.as_u8[i] += 1;
1385       if (old < a->ip4.as_u8[i])
1386         break;
1387     }
1388
1389   for (i = 5; i >= 0; i--)
1390     {
1391       old = a->ethernet[i];
1392       a->ethernet[i] += 1;
1393       if (old < a->ethernet[i])
1394         break;
1395     }
1396 }
1397
1398 int vnet_proxy_arp_add_del (ip4_address_t *lo_addr,
1399                             ip4_address_t *hi_addr,
1400                             u32 fib_index, int is_del)
1401 {
1402   ethernet_arp_main_t *am = &ethernet_arp_main;
1403   ethernet_proxy_arp_t *pa;
1404   u32 found_at_index = ~0;
1405
1406   vec_foreach (pa, am->proxy_arps)
1407     {
1408       if (pa->lo_addr == lo_addr->as_u32 
1409           && pa->hi_addr == hi_addr->as_u32
1410           && pa->fib_index == fib_index)
1411         {
1412           found_at_index = pa - am->proxy_arps;
1413           break;
1414         }
1415     }
1416
1417   if (found_at_index != ~0)
1418     {
1419       /* Delete, otherwise it's already in the table */
1420       if (is_del)
1421         vec_delete (am->proxy_arps, 1, found_at_index);
1422       return 0;
1423     }
1424   /* delete, no such entry */
1425   if (is_del)
1426     return VNET_API_ERROR_NO_SUCH_ENTRY;
1427
1428   /* add, not in table */
1429   vec_add2 (am->proxy_arps, pa, 1);
1430   pa->lo_addr = lo_addr->as_u32;
1431   pa->hi_addr = hi_addr->as_u32;
1432   pa->fib_index = fib_index;
1433   return 0;
1434 }
1435
1436 /*
1437  * Remove any proxy arp entries asdociated with the 
1438  * specificed fib.
1439  */
1440 int vnet_proxy_arp_fib_reset (u32 fib_id)
1441 {
1442   ip4_main_t * im = &ip4_main;
1443   ethernet_arp_main_t *am = &ethernet_arp_main;
1444   ethernet_proxy_arp_t *pa;
1445   u32 * entries_to_delete = 0;
1446   u32 fib_index;
1447   uword * p;
1448   int i;
1449
1450   p = hash_get (im->fib_index_by_table_id, fib_id);
1451   if (! p)
1452       return VNET_API_ERROR_NO_SUCH_ENTRY;
1453   fib_index = p[0];
1454
1455   vec_foreach (pa, am->proxy_arps)
1456     {
1457       if (pa->fib_index == fib_index)
1458         {
1459           vec_add1 (entries_to_delete, pa - am->proxy_arps);
1460         }
1461     }
1462
1463   for (i = 0; i < vec_len(entries_to_delete); i++)
1464     {
1465        vec_delete (am->proxy_arps, 1, entries_to_delete[i]);
1466     } 
1467
1468   vec_free (entries_to_delete);
1469
1470    return 0;
1471 }
1472
1473 static clib_error_t *
1474 ip_arp_add_del_command_fn (vlib_main_t * vm,
1475                  unformat_input_t * input,
1476                  vlib_cli_command_t * cmd)
1477 {
1478   vnet_main_t * vnm = vnet_get_main();
1479   u32 sw_if_index;
1480   ethernet_arp_ip4_over_ethernet_address_t lo_addr, hi_addr, addr;
1481   int addr_valid = 0;
1482   int is_del = 0;
1483   int count = 1;
1484   u32 fib_index = 0;
1485   u32 fib_id;
1486   int is_static = 0;
1487   int is_proxy = 0;
1488
1489   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) 
1490     {
1491       /* set ip arp TenGigE1/1/0/1 1.2.3.4 aa:bb:... or aabb.ccdd... */
1492       if (unformat (input, "%U %U %U",
1493                     unformat_vnet_sw_interface, vnm, &sw_if_index,
1494                     unformat_ip4_address, &addr.ip4, 
1495                     unformat_ethernet_address, &addr.ethernet))
1496         addr_valid = 1;
1497
1498       else if (unformat (input, "delete") || unformat (input, "del"))
1499         is_del = 1;
1500
1501       else if (unformat (input, "static"))
1502         is_static = 1;
1503
1504       else if (unformat (input, "count %d", &count))
1505         ;
1506
1507       else if (unformat (input, "fib-id %d", &fib_id))
1508         {
1509           ip4_main_t * im = &ip4_main;
1510           uword * p = hash_get (im->fib_index_by_table_id, fib_id);
1511           if (! p)
1512             return clib_error_return (0, "fib ID %d doesn't exist\n",
1513                                       fib_id);
1514           fib_index = p[0];
1515         }
1516
1517       else if (unformat (input, "proxy %U - %U", 
1518                          unformat_ip4_address, &lo_addr.ip4, 
1519                          unformat_ip4_address, &hi_addr.ip4))
1520         is_proxy = 1;
1521       else
1522         break;
1523     }
1524   
1525   if (is_proxy)
1526     {
1527       (void) vnet_proxy_arp_add_del (&lo_addr.ip4, &hi_addr.ip4, 
1528                                      fib_index, is_del);
1529       return 0;
1530     }
1531
1532   if (addr_valid) 
1533     {
1534       int i;
1535
1536       for (i = 0; i < count; i++) 
1537         {
1538           if (is_del == 0) 
1539             {
1540               uword event_type, * event_data = 0;
1541
1542               /* Park the debug CLI until the arp entry is installed */
1543               vnet_register_ip4_arp_resolution_event 
1544                 (vnm, &addr.ip4, vlib_current_process(vm),
1545                  1 /* type */, 0 /* data */);
1546               
1547               vnet_arp_set_ip4_over_ethernet
1548                 (vnm, sw_if_index, fib_index, &addr, is_static);
1549               
1550               vlib_process_wait_for_event (vm);
1551               event_type = vlib_process_get_events (vm, &event_data);
1552               vec_reset_length(event_data);
1553               if (event_type != 1)
1554                 clib_warning ("event type %d unexpected", event_type);
1555             }
1556           else
1557             vnet_arp_unset_ip4_over_ethernet
1558                 (vnm, sw_if_index, fib_index, &addr);
1559
1560           increment_ip4_and_mac_address (&addr);
1561         }
1562     }
1563   else
1564     {
1565       return clib_error_return (0, "unknown input `%U'",
1566                                 format_unformat_error, input);
1567     }
1568   
1569   return 0;
1570 }
1571
1572 VLIB_CLI_COMMAND (ip_arp_add_del_command, static) = {
1573     .path = "set ip arp",
1574     .short_help = "set ip arp [del] <intfc> <ip-address> <mac-address>",
1575     .function = ip_arp_add_del_command_fn,
1576 };
1577
1578 static clib_error_t *
1579 set_int_proxy_arp_command_fn (vlib_main_t * vm,
1580                               unformat_input_t * input,
1581                               vlib_cli_command_t * cmd)
1582 {
1583   vnet_main_t * vnm = vnet_get_main();
1584   u32 sw_if_index;
1585   vnet_sw_interface_t * si;
1586   int enable = 0;
1587   int intfc_set = 0;
1588
1589   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) 
1590     {
1591       if (unformat (input, "%U", unformat_vnet_sw_interface, 
1592                     vnm, &sw_if_index))
1593         intfc_set = 1;
1594       else if (unformat (input, "enable") || unformat (input, "on"))
1595         enable = 1;
1596       else if (unformat (input, "disable") || unformat (input, "off"))
1597         enable = 0;
1598       else
1599         break;
1600     }
1601
1602   if (intfc_set == 0)
1603     return clib_error_return (0, "unknown input '%U'",
1604                               format_unformat_error, input);
1605
1606   si = vnet_get_sw_interface (vnm, sw_if_index);
1607   ASSERT(si);
1608   if (enable)
1609     si->flags |= VNET_SW_INTERFACE_FLAG_PROXY_ARP;
1610   else 
1611     si->flags &= ~VNET_SW_INTERFACE_FLAG_PROXY_ARP;
1612   
1613   return 0;
1614 }
1615
1616 VLIB_CLI_COMMAND (set_int_proxy_enable_command, static) = {
1617     .path = "set interface proxy-arp",
1618     .short_help = "set interface proxy-arp <intfc> [enable|disable]",
1619     .function = set_int_proxy_arp_command_fn,
1620 };
1621
1622
1623 /*
1624  * ARP Termination in a L2 Bridge Domain based on an
1625  * IP4 to MAC hash table mac_by_ip4 for each BD.
1626  */
1627 typedef enum {
1628   ARP_TERM_NEXT_L2_OUTPUT,
1629   ARP_TERM_NEXT_DROP,
1630   ARP_TERM_N_NEXT,
1631 } arp_term_next_t;
1632
1633 u32 arp_term_next_node_index[32];
1634
1635 static uword
1636 arp_term_l2bd (vlib_main_t * vm,
1637                vlib_node_runtime_t * node,
1638                vlib_frame_t * frame)
1639 {
1640   l2input_main_t * l2im = &l2input_main;
1641   u32 n_left_from, next_index, * from, * to_next;
1642   u32 n_replies_sent = 0;
1643   u16 last_bd_index = ~0;
1644   l2_bridge_domain_t * last_bd_config = 0;
1645   l2_input_config_t * cfg0;
1646
1647   from = vlib_frame_vector_args (frame);
1648   n_left_from = frame->n_vectors;
1649   next_index = node->cached_next_index;
1650
1651   while (n_left_from > 0)
1652     {
1653       u32 n_left_to_next;
1654
1655       vlib_get_next_frame (vm, node, next_index,
1656                            to_next, n_left_to_next);
1657
1658       while (n_left_from > 0 && n_left_to_next > 0)
1659         {
1660           vlib_buffer_t * p0;
1661           ethernet_header_t * eth0;
1662           ethernet_arp_header_t * arp0;
1663           u8 * l3h0;
1664           u32 pi0, error0, next0, sw_if_index0;
1665           u16 ethertype0;
1666           u16 bd_index0;
1667           u32 ip0;
1668           u8 * macp0;
1669
1670           pi0 = from[0];
1671           to_next[0] = pi0;
1672           from += 1;
1673           to_next += 1;
1674           n_left_from -= 1;
1675           n_left_to_next -= 1;
1676
1677           p0 = vlib_get_buffer (vm, pi0);
1678           eth0 = vlib_buffer_get_current (p0);
1679           l3h0 = (u8 *)eth0 + vnet_buffer(p0)->l2.l2_len;
1680           ethertype0 = clib_net_to_host_u16(*(u16 *)(l3h0 - 2));
1681           arp0 = (ethernet_arp_header_t *) l3h0;
1682
1683           if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE) &&
1684                              (p0->flags & VLIB_BUFFER_IS_TRACED)))
1685             {
1686               u8 *t0 = vlib_add_trace (
1687                   vm, node, p0, sizeof(ethernet_arp_input_trace_t));
1688               memcpy (t0, l3h0, sizeof(ethernet_arp_input_trace_t));
1689             }
1690
1691           if (PREDICT_FALSE  (
1692             (ethertype0 != ETHERNET_TYPE_ARP) ||
1693             (arp0->opcode != clib_host_to_net_u16(ETHERNET_ARP_OPCODE_request))))
1694             goto next_l2_feature;
1695
1696           error0 = ETHERNET_ARP_ERROR_replies_sent;
1697           error0 = (arp0->l2_type != clib_net_to_host_u16 (ETHERNET_ARP_HARDWARE_TYPE_ethernet)
1698                     ? ETHERNET_ARP_ERROR_l2_type_not_ethernet
1699                     : error0);
1700           error0 = (arp0->l3_type != clib_net_to_host_u16 (ETHERNET_TYPE_IP4)
1701                     ? ETHERNET_ARP_ERROR_l3_type_not_ip4
1702                     : error0);
1703
1704           sw_if_index0 = vnet_buffer (p0)->sw_if_index[VLIB_RX];
1705
1706           if (error0)
1707             goto drop;
1708
1709           // Trash ARP packets whose ARP-level source addresses do not
1710           // match their L2-frame-level source addresses */
1711           if (PREDICT_FALSE (
1712             memcmp (eth0->src_address, arp0->ip4_over_ethernet[0].ethernet,
1713                     sizeof (eth0->src_address))))
1714             {
1715               error0 = ETHERNET_ARP_ERROR_l2_address_mismatch;
1716               goto drop;
1717             }
1718
1719           // Check if anyone want ARP request events for L2 BDs
1720           {
1721           pending_resolution_t * mc;
1722           ethernet_arp_main_t * am = &ethernet_arp_main;
1723           uword *p = hash_get (am->mac_changes_by_address, 0);
1724           if (p && (vnet_buffer(p0)->l2.shg == 0))
1725             { // Only SHG 0 interface which is more likely local
1726               u32 next_index = p[0];
1727               while (next_index != (u32)~0)
1728                 {
1729                   int (*fp)(u32, u8 *, u32, u32);
1730                   int rv = 1;
1731                   mc = pool_elt_at_index (am->mac_changes, next_index);
1732                   fp = mc->data_callback;
1733                   // Call the callback, return 1 to suppress dup events */
1734                   if (fp) rv = (*fp)(mc->data, 
1735                                      arp0->ip4_over_ethernet[0].ethernet, 
1736                                      sw_if_index0, 
1737                                      arp0->ip4_over_ethernet[0].ip4.as_u32);
1738                   // Signal the resolver process
1739                   if (rv == 0)
1740                     vlib_process_signal_event (vm, mc->node_index,
1741                                                mc->type_opaque, 
1742                                                mc->data);
1743                   next_index = mc->next_index;
1744                 }
1745             }
1746           }
1747
1748           // lookup BD mac_by_ip4 hash table for MAC entry
1749           ip0 = arp0->ip4_over_ethernet[1].ip4.as_u32;
1750           bd_index0 = vnet_buffer(p0)->l2.bd_index;
1751           if (PREDICT_FALSE (
1752             (bd_index0 != last_bd_index) || (last_bd_index == (u16) ~0)))
1753             {
1754               last_bd_index = bd_index0;
1755               last_bd_config = vec_elt_at_index(l2im->bd_configs, bd_index0);
1756             }
1757           macp0 = (u8 *) hash_get (last_bd_config->mac_by_ip4, ip0);
1758
1759           if (PREDICT_FALSE(!macp0)) 
1760               goto next_l2_feature;     // MAC not found 
1761
1762           // MAC found, send ARP reply -
1763           // Convert ARP request packet to ARP reply
1764           arp0->opcode = clib_host_to_net_u16 (ETHERNET_ARP_OPCODE_reply);
1765           arp0->ip4_over_ethernet[1] = arp0->ip4_over_ethernet[0];
1766           arp0->ip4_over_ethernet[0].ip4.as_u32 = ip0;
1767           memcpy (arp0->ip4_over_ethernet[0].ethernet, macp0, 6);
1768           memcpy (eth0->dst_address, eth0->src_address, 6);
1769           memcpy (eth0->src_address, macp0, 6);
1770           n_replies_sent += 1;
1771
1772           // For BVI, need to use l2-fwd node to send ARP reply as 
1773           // l2-output node cannot output packet to BVI properly
1774           cfg0 = vec_elt_at_index(l2im->configs, sw_if_index0);
1775           if (PREDICT_FALSE (cfg0->bvi))
1776             {
1777               vnet_buffer(p0)->l2.feature_bitmap |= L2INPUT_FEAT_FWD;
1778               vnet_buffer (p0)->sw_if_index[VLIB_RX] = 0;
1779               goto next_l2_feature;
1780             }
1781
1782           // Send ARP reply back out input interface through l2-output
1783           vnet_buffer (p0)->sw_if_index[VLIB_TX] = sw_if_index0;
1784           next0 = ARP_TERM_NEXT_L2_OUTPUT;
1785           // Note that output to VXLAN tunnel will fail due to SHG which
1786           // is probably desireable since ARP termination is not intended
1787           // for ARP requests from other hosts. If output to VXLAN tunnel is
1788           // required, however, can just clear the SHG in packet as follows:
1789           //   vnet_buffer(p0)->l2.shg = 0;
1790
1791           vlib_validate_buffer_enqueue_x1 (vm, node, next_index,to_next,
1792                                            n_left_to_next,pi0,next0);
1793           continue;
1794
1795         next_l2_feature:
1796           {
1797             u32 feature_bitmap0 =
1798                 vnet_buffer(p0)->l2.feature_bitmap & ~L2INPUT_FEAT_ARP_TERM;
1799             vnet_buffer(p0)->l2.feature_bitmap = feature_bitmap0;
1800             next0 = feat_bitmap_get_next_node_index(arp_term_next_node_index,
1801                                                     feature_bitmap0);
1802             vlib_validate_buffer_enqueue_x1 (vm, node, next_index,to_next,
1803                                              n_left_to_next,pi0,next0);
1804             continue;
1805           }
1806
1807         drop:
1808           if (0 == arp0->ip4_over_ethernet[0].ip4.as_u32 ||
1809               (arp0->ip4_over_ethernet[0].ip4.as_u32 ==
1810                arp0->ip4_over_ethernet[1].ip4.as_u32))
1811             {
1812               error0 = ETHERNET_ARP_ERROR_gratuitous_arp;
1813             }
1814           next0 = ARP_TERM_NEXT_DROP;
1815           p0->error = node->errors[error0];
1816
1817           vlib_validate_buffer_enqueue_x1 (vm, node, next_index,to_next,
1818                                            n_left_to_next,pi0,next0);
1819         }
1820
1821       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
1822     }
1823
1824   vlib_error_count (vm, node->node_index,
1825                     ETHERNET_ARP_ERROR_replies_sent, 
1826                     n_replies_sent);
1827   return frame->n_vectors;
1828 }
1829
1830 VLIB_REGISTER_NODE (arp_term_l2bd_node,static) = {
1831   .function = arp_term_l2bd,
1832   .name = "arp-term-l2bd",
1833   .vector_size = sizeof (u32),
1834
1835   .n_errors = ETHERNET_ARP_N_ERROR,
1836   .error_strings = ethernet_arp_error_strings,
1837
1838   .n_next_nodes = ARP_TERM_N_NEXT,
1839   .next_nodes = {
1840     [ARP_TERM_NEXT_L2_OUTPUT] = "l2-output",
1841     [ARP_TERM_NEXT_DROP] = "error-drop",
1842   },
1843
1844   .format_buffer = format_ethernet_arp_header,
1845   .format_trace = format_ethernet_arp_input_trace,
1846 };
1847
1848 clib_error_t *arp_term_init (vlib_main_t *vm)
1849 { // Initialize the feature next-node indexes 
1850   feat_bitmap_init_next_nodes(vm,
1851                               arp_term_l2bd_node.index,
1852                               L2INPUT_N_FEAT,
1853                               l2input_get_feat_names(),
1854                               arp_term_next_node_index);
1855   return 0;
1856 }
1857
1858 VLIB_INIT_FUNCTION (arp_term_init);