VPP-182: IPv4 static ARP entries should not be deleted on interface down
[vpp.git] / vnet / vnet / ethernet / arp.c
1 /*
2  * ethernet/arp.c: IP v4 ARP node
3  *
4  * Copyright (c) 2010 Cisco and/or its affiliates.
5  * Licensed under the Apache License, Version 2.0 (the "License");
6  * you may not use this file except in compliance with the License.
7  * You may obtain a copy of the License at:
8  *
9  *     http://www.apache.org/licenses/LICENSE-2.0
10  *
11  * Unless required by applicable law or agreed to in writing, software
12  * distributed under the License is distributed on an "AS IS" BASIS,
13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  * See the License for the specific language governing permissions and
15  * limitations under the License.
16  */
17
18 #include <vnet/ip/ip.h>
19 #include <vnet/ethernet/ethernet.h>
20 #include <vnet/ethernet/arp_packet.h>
21 #include <vnet/l2/l2_input.h>
22 #include <vppinfra/mhash.h>
23
24 void vl_api_rpc_call_main_thread (void *fp, u8 * data, u32 data_length);
25
26 typedef struct {
27   u32 sw_if_index;
28   u32 fib_index;
29   ip4_address_t ip4_address;
30 } ethernet_arp_ip4_key_t;
31
32 typedef struct {
33   ethernet_arp_ip4_key_t key;
34   u8 ethernet_address[6];
35
36   u16 flags;
37 #define ETHERNET_ARP_IP4_ENTRY_FLAG_STATIC (1 << 0)
38 #define ETHERNET_ARP_IP4_ENTRY_FLAG_GLEAN  (2 << 0)
39
40   u64 cpu_time_last_updated;
41
42   u32 * adjacencies;
43 } ethernet_arp_ip4_entry_t;
44
45 typedef struct {
46   u32 lo_addr;
47   u32 hi_addr;
48   u32 fib_index;
49 } ethernet_proxy_arp_t;
50
51 typedef struct {
52   u32 next_index;
53   uword node_index;
54   uword type_opaque;
55   uword data;
56   /* Used for arp event notification only */
57   void * data_callback;
58   u32 pid;
59 } pending_resolution_t;
60
61 typedef struct {
62   /* Hash tables mapping name to opcode. */
63   uword * opcode_by_name;
64
65   /* lite beer "glean" adjacency handling */
66   uword * pending_resolutions_by_address;
67   pending_resolution_t * pending_resolutions;
68
69   /* Mac address change notification */
70   uword * mac_changes_by_address;
71   pending_resolution_t * mac_changes;
72
73   ethernet_arp_ip4_entry_t * ip4_entry_pool;
74
75   mhash_t ip4_entry_by_key;
76     
77   /* ARP attack mitigation */
78   u32 arp_delete_rotor;
79   u32 limit_arp_cache_size;
80
81   /* Proxy arp vector */
82   ethernet_proxy_arp_t * proxy_arps;
83 } ethernet_arp_main_t;
84
85 static ethernet_arp_main_t ethernet_arp_main;
86
87 static u8 * format_ethernet_arp_hardware_type (u8 * s, va_list * va)
88 {
89   ethernet_arp_hardware_type_t h = va_arg (*va, ethernet_arp_hardware_type_t);
90   char * t = 0;
91   switch (h)
92     {
93 #define _(n,f) case n: t = #f; break;
94       foreach_ethernet_arp_hardware_type;
95 #undef _
96
97     default:
98       return format (s, "unknown 0x%x", h);
99     }
100
101   return format (s, "%s", t);
102 }
103
104 static u8 * format_ethernet_arp_opcode (u8 * s, va_list * va)
105 {
106   ethernet_arp_opcode_t o = va_arg (*va, ethernet_arp_opcode_t);
107   char * t = 0;
108   switch (o)
109     {
110 #define _(f) case ETHERNET_ARP_OPCODE_##f: t = #f; break;
111       foreach_ethernet_arp_opcode;
112 #undef _
113
114     default:
115       return format (s, "unknown 0x%x", o);
116     }
117
118   return format (s, "%s", t);
119 }
120
121 static uword
122 unformat_ethernet_arp_opcode_host_byte_order (unformat_input_t * input,
123                                               va_list * args)
124 {
125   int * result = va_arg (*args, int *);
126   ethernet_arp_main_t * am = &ethernet_arp_main;
127   int x, i;
128
129   /* Numeric opcode. */
130   if (unformat (input, "0x%x", &x)
131       || unformat (input, "%d", &x))
132     {
133       if (x >= (1 << 16))
134         return 0;
135       *result = x;
136       return 1;
137     }
138
139   /* Named type. */
140   if (unformat_user (input, unformat_vlib_number_by_name,
141                      am->opcode_by_name, &i))
142     {
143       *result = i;
144       return 1;
145     }
146
147   return 0;
148 }
149
150 static uword
151 unformat_ethernet_arp_opcode_net_byte_order (unformat_input_t * input,
152                                              va_list * args)
153 {
154   int * result = va_arg (*args, int *);
155   if (! unformat_user (input, unformat_ethernet_arp_opcode_host_byte_order, result))
156     return 0;
157
158   *result = clib_host_to_net_u16 ((u16) *result);
159   return 1;
160 }
161
162 static u8 * format_ethernet_arp_header (u8 * s, va_list * va)
163 {
164   ethernet_arp_header_t * a = va_arg (*va, ethernet_arp_header_t *);
165   u32 max_header_bytes = va_arg (*va, u32);
166   uword indent;
167   u16 l2_type, l3_type;
168
169   if (max_header_bytes != 0 && sizeof (a[0]) > max_header_bytes)
170     return format (s, "ARP header truncated");
171
172   l2_type = clib_net_to_host_u16 (a->l2_type);
173   l3_type = clib_net_to_host_u16 (a->l3_type);
174
175   indent = format_get_indent (s);
176
177   s = format (s, "%U, type %U/%U, address size %d/%d",
178               format_ethernet_arp_opcode, clib_net_to_host_u16 (a->opcode),
179               format_ethernet_arp_hardware_type, l2_type,
180               format_ethernet_type, l3_type,
181               a->n_l2_address_bytes, a->n_l3_address_bytes);
182               
183   if (l2_type == ETHERNET_ARP_HARDWARE_TYPE_ethernet
184       && l3_type == ETHERNET_TYPE_IP4)
185     {
186       s = format (s, "\n%U%U/%U -> %U/%U",
187                   format_white_space, indent,
188                   format_ethernet_address, a->ip4_over_ethernet[0].ethernet,
189                   format_ip4_address, &a->ip4_over_ethernet[0].ip4,
190                   format_ethernet_address, a->ip4_over_ethernet[1].ethernet,
191                   format_ip4_address, &a->ip4_over_ethernet[1].ip4);
192     }
193   else
194     {
195       uword n2 = a->n_l2_address_bytes;
196       uword n3 = a->n_l3_address_bytes;
197       s = format (s, "\n%U%U/%U -> %U/%U",
198                   format_white_space, indent,
199                   format_hex_bytes, a->data + 0*n2 + 0*n3, n2,
200                   format_hex_bytes, a->data + 1*n2 + 0*n3, n3,
201                   format_hex_bytes, a->data + 1*n2 + 1*n3, n2,
202                   format_hex_bytes, a->data + 2*n2 + 1*n3, n3);
203     }
204
205   return s;
206 }
207
208 static u8 * format_ethernet_arp_ip4_entry (u8 * s, va_list * va)
209 {
210   vnet_main_t * vnm = va_arg (*va, vnet_main_t *);
211   ethernet_arp_ip4_entry_t * e = va_arg (*va, ethernet_arp_ip4_entry_t *);
212   vnet_sw_interface_t * si;
213   ip4_fib_t * fib;
214   u8 * flags = 0;
215
216   if (! e)
217     return format (s, "%=12s%=6s%=16s%=6s%=20s%=24s", "Time", "FIB", "IP4",
218                    "Flags", "Ethernet", "Interface");
219
220   fib = find_ip4_fib_by_table_index_or_id (&ip4_main, e->key.fib_index,
221                                            IP4_ROUTE_FLAG_FIB_INDEX);
222   si = vnet_get_sw_interface (vnm, e->key.sw_if_index);
223
224   if (e->flags & ETHERNET_ARP_IP4_ENTRY_FLAG_GLEAN)
225     flags = format(flags, "G");
226
227   if (e->flags & ETHERNET_ARP_IP4_ENTRY_FLAG_STATIC)
228     flags = format(flags, "S");
229
230   s = format (s, "%=12U%=6u%=16U%=6s%=20U%=24U",
231               format_vlib_cpu_time, vnm->vlib_main, e->cpu_time_last_updated,
232               fib->table_id,
233               format_ip4_address, &e->key.ip4_address,
234               flags ? (char *) flags : "",
235               format_ethernet_address, e->ethernet_address,
236               format_vnet_sw_interface_name, vnm, si);
237
238   vec_free(flags);
239   return s;
240 }
241
242 typedef struct {
243   u8 packet_data[64];
244 } ethernet_arp_input_trace_t;
245
246 static u8 * format_ethernet_arp_input_trace (u8 * s, va_list * va)
247 {
248   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*va, vlib_main_t *);
249   CLIB_UNUSED (vlib_node_t * node) = va_arg (*va, vlib_node_t *);
250   ethernet_arp_input_trace_t * t = va_arg (*va, ethernet_arp_input_trace_t *);
251
252   s = format (s, "%U",
253               format_ethernet_arp_header,
254               t->packet_data, sizeof (t->packet_data));
255
256   return s;
257 }
258
259 clib_error_t *
260 ethernet_arp_sw_interface_up_down (vnet_main_t * vnm,
261                                    u32 sw_if_index,
262                                    u32 flags)
263 {
264   ethernet_arp_main_t * am = &ethernet_arp_main;
265   ethernet_arp_ip4_entry_t * e;
266   u32 i;
267   u32 * to_add_del = 0;
268
269   pool_foreach (e, am->ip4_entry_pool, ({
270     if (e->key.sw_if_index == sw_if_index)
271         vec_add1 (to_add_del, e - am->ip4_entry_pool);
272   }));
273
274   for (i = 0; i < vec_len (to_add_del); i++)
275     {
276       ethernet_arp_ip4_over_ethernet_address_t arp_add;
277       e = pool_elt_at_index (am->ip4_entry_pool, to_add_del[i]);
278
279       clib_memcpy (&arp_add.ethernet, e->ethernet_address, 6);
280       arp_add.ip4.as_u32 = e->key.ip4_address.as_u32;
281
282       if (flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP)
283         {
284           vnet_arp_set_ip4_over_ethernet (vnm,
285               e->key.sw_if_index, e->key.fib_index, &arp_add, 
286               e->flags & ETHERNET_ARP_IP4_ENTRY_FLAG_STATIC);
287         }
288       else if ((e->flags & ETHERNET_ARP_IP4_ENTRY_FLAG_STATIC) == 0)
289         {
290           vnet_arp_unset_ip4_over_ethernet (vnm,
291               e->key.sw_if_index, e->key.fib_index, &arp_add);
292         }
293     }
294
295   vec_free (to_add_del);
296   return 0;
297 }
298
299 VNET_SW_INTERFACE_ADMIN_UP_DOWN_FUNCTION (ethernet_arp_sw_interface_up_down);
300
301 static int
302 vnet_arp_set_ip4_over_ethernet_internal (vnet_main_t * vnm,
303                                          u32 sw_if_index,
304                                          u32 fib_index,
305                                          void * a_arg,
306                                          int is_static);
307
308 static int
309 vnet_arp_unset_ip4_over_ethernet_internal (vnet_main_t * vnm,
310                                            u32 sw_if_index,
311                                            u32 fib_index,
312                                            void * a_arg);
313
314 typedef struct {
315   u32 sw_if_index;
316   u32 fib_index;
317   ethernet_arp_ip4_over_ethernet_address_t a;
318   int is_static;
319   int is_remove; /* set is_remove=1 to clear arp entry */
320 } vnet_arp_set_ip4_over_ethernet_rpc_args_t;
321
322 static void set_ip4_over_ethernet_rpc_callback 
323 ( vnet_arp_set_ip4_over_ethernet_rpc_args_t * a)
324 {
325   vnet_main_t * vm = vnet_get_main();
326   ASSERT(os_get_cpu_number() == 0);
327
328   if (a->is_remove)
329     vnet_arp_unset_ip4_over_ethernet_internal(vm, 
330                                               a->sw_if_index, 
331                                               a->fib_index,
332                                               &(a->a));
333   else
334     vnet_arp_set_ip4_over_ethernet_internal (vm,
335                                              a->sw_if_index,
336                                              a->fib_index,
337                                              &(a->a),
338                                              a->is_static);
339 }
340
341 int
342 vnet_arp_set_ip4_over_ethernet (vnet_main_t * vnm,
343                                 u32 sw_if_index,
344                                 u32 fib_index,
345                                 void * a_arg,
346                                 int is_static)
347 {
348   ethernet_arp_ip4_over_ethernet_address_t * a = a_arg;
349   vnet_arp_set_ip4_over_ethernet_rpc_args_t args;
350
351   args.sw_if_index = sw_if_index;
352   args.fib_index = fib_index;
353   args.is_static = is_static;
354   args.is_remove = 0;
355   clib_memcpy (&args.a, a, sizeof (*a));
356
357   vl_api_rpc_call_main_thread (set_ip4_over_ethernet_rpc_callback, 
358                                (u8 *) &args, sizeof (args));
359   return 0;
360 }
361
362 int
363 vnet_arp_set_ip4_over_ethernet_internal (vnet_main_t * vnm,
364                                          u32 sw_if_index,
365                                          u32 fib_index,
366                                          void * a_arg,
367                                          int is_static)
368 {
369   ethernet_arp_ip4_key_t k;
370   ethernet_arp_ip4_entry_t * e = 0;
371   ethernet_arp_main_t * am = &ethernet_arp_main;
372   ethernet_arp_ip4_over_ethernet_address_t * a = a_arg;
373   vlib_main_t * vm = vlib_get_main();
374   ip4_main_t * im = &ip4_main;
375   ip_lookup_main_t * lm = &im->lookup_main;
376   int make_new_arp_cache_entry=1;
377   uword * p;
378   ip4_add_del_route_args_t args;
379   ip_adjacency_t adj, * existing_adj;
380   pending_resolution_t * pr, * mc;
381   
382   u32 next_index;
383   u32 adj_index;
384
385   fib_index = (fib_index != (u32)~0) 
386     ? fib_index : im->fib_index_by_sw_if_index[sw_if_index];
387
388   k.sw_if_index = sw_if_index;
389   k.ip4_address = a->ip4;
390   k.fib_index = fib_index;
391
392   p = mhash_get (&am->ip4_entry_by_key, &k);
393   if (p)
394     {
395       e = pool_elt_at_index (am->ip4_entry_pool, p[0]);
396
397       /* Refuse to over-write static arp. */
398       if (!is_static &&
399           (e->flags & ETHERNET_ARP_IP4_ENTRY_FLAG_STATIC))
400         return -2;
401       make_new_arp_cache_entry = 0;
402     }
403
404   /* Note: always install the route. It might have been deleted */
405   memset(&adj, 0, sizeof(adj));
406   adj.lookup_next_index = IP_LOOKUP_NEXT_REWRITE;
407   adj.n_adj = 1; /*  otherwise signature compare fails */
408
409   vnet_rewrite_for_sw_interface
410     (vnm,
411      VNET_L3_PACKET_TYPE_IP4,
412      sw_if_index,
413      ip4_rewrite_node.index,
414      a->ethernet,               /* destination address */
415      &adj.rewrite_header,
416      sizeof (adj.rewrite_data));
417
418   /* result of this lookup should be next-hop adjacency */
419   adj_index = ip4_fib_lookup_with_table (im, fib_index, &a->ip4, 0);
420   existing_adj = ip_get_adjacency(lm, adj_index);
421
422   if (existing_adj->lookup_next_index == IP_LOOKUP_NEXT_ARP &&
423       existing_adj->arp.next_hop.ip4.as_u32 == a->ip4.as_u32)
424     {
425       u32 * ai;
426       u32 * adjs = vec_dup(e->adjacencies);
427       /* Update all adj assigned to this arp entry */
428       vec_foreach(ai, adjs)
429         {
430           int i;
431           ip_adjacency_t * uadj = ip_get_adjacency(lm, *ai);
432           for (i = 0; i < uadj->n_adj; i++)
433             if (uadj[i].lookup_next_index == IP_LOOKUP_NEXT_ARP &&
434                 uadj[i].arp.next_hop.ip4.as_u32 == a->ip4.as_u32)
435               ip_update_adjacency (lm, *ai + i, &adj);
436         }
437       vec_free(adjs);
438     }
439   else
440     {
441       /* Check that new adjacency actually isn't exactly the same as
442        *  what is already there. If we over-write the adjacency with
443        *  exactly the same info, its technically a new adjacency with
444        *  new counters, but to user it appears as counters reset.
445        */
446       if (vnet_ip_adjacency_share_compare (&adj, existing_adj) == 0) {
447         /* create new adj */
448         args.table_index_or_table_id = fib_index;
449         args.flags = IP4_ROUTE_FLAG_FIB_INDEX | IP4_ROUTE_FLAG_ADD | IP4_ROUTE_FLAG_NEIGHBOR;
450         args.dst_address = a->ip4;
451         args.dst_address_length = 32;
452         args.adj_index = ~0;
453         args.add_adj = &adj;
454         args.n_add_adj = 1;
455         ip4_add_del_route (im, &args);
456       }
457     }
458
459   if (make_new_arp_cache_entry)
460     {
461       pool_get (am->ip4_entry_pool, e);
462       mhash_set (&am->ip4_entry_by_key, &k,
463                  e - am->ip4_entry_pool,
464                  /* old value */ 0);
465       e->key = k;
466     }
467
468   /* Update time stamp and ethernet address. */
469   clib_memcpy (e->ethernet_address, a->ethernet, sizeof (e->ethernet_address));
470   e->cpu_time_last_updated = clib_cpu_time_now ();
471   if (is_static)
472     e->flags |= ETHERNET_ARP_IP4_ENTRY_FLAG_STATIC;
473
474   /* Customer(s) waiting for this address to be resolved? */
475   p = hash_get (am->pending_resolutions_by_address, a->ip4.as_u32);
476   if (p)
477     {
478       next_index = p[0];
479
480       while (next_index != (u32)~0)
481         {
482           pr = pool_elt_at_index (am->pending_resolutions, next_index);
483           vlib_process_signal_event (vm, pr->node_index,
484                                      pr->type_opaque, 
485                                      pr->data);
486           next_index = pr->next_index;
487           pool_put (am->pending_resolutions, pr);
488         }
489       
490       hash_unset (am->pending_resolutions_by_address, a->ip4.as_u32);
491     }
492
493   /* Customer(s) requesting ARP event for this address? */
494   p = hash_get (am->mac_changes_by_address, a->ip4.as_u32);
495   if (p)
496     {
497       next_index = p[0];
498
499       while (next_index != (u32)~0)
500         {
501           int (*fp)(u32, u8 *, u32, u32);
502           int rv = 1;
503           mc = pool_elt_at_index (am->mac_changes, next_index);
504           fp = mc->data_callback;
505
506           /* Call the user's data callback, return 1 to suppress dup events */
507           if (fp)
508             rv = (*fp)(mc->data, a->ethernet, sw_if_index, 0);
509           
510           /* 
511            * Signal the resolver process, as long as the user
512            * says they want to be notified
513            */
514           if (rv == 0)
515             vlib_process_signal_event (vm, mc->node_index,
516                                        mc->type_opaque, 
517                                        mc->data);
518           next_index = mc->next_index;
519         }
520     }
521
522   return 0;
523 }
524
525 void vnet_register_ip4_arp_resolution_event (vnet_main_t * vnm, 
526                                              void * address_arg,
527                                              uword node_index,
528                                              uword type_opaque,
529                                              uword data)
530 {
531   ethernet_arp_main_t * am = &ethernet_arp_main;
532   ip4_address_t * address = address_arg;
533   uword * p;
534   pending_resolution_t * pr;
535   
536   pool_get (am->pending_resolutions, pr);
537
538   pr->next_index = ~0;
539   pr->node_index = node_index;
540   pr->type_opaque = type_opaque;
541   pr->data = data;
542   pr->data_callback = 0;
543
544   p = hash_get (am->pending_resolutions_by_address, address->as_u32);
545   if (p)
546     {
547       /* Insert new resolution at the head of the list */
548       pr->next_index = p[0];
549       hash_unset (am->pending_resolutions_by_address, address->as_u32);
550     }
551   
552   hash_set (am->pending_resolutions_by_address, address->as_u32, 
553             pr - am->pending_resolutions);
554 }
555
556 int vnet_add_del_ip4_arp_change_event (vnet_main_t * vnm, 
557                                        void * data_callback,
558                                        u32 pid,
559                                        void * address_arg,
560                                        uword node_index,
561                                        uword type_opaque,
562                                        uword data, int is_add)
563 {
564   ethernet_arp_main_t * am = &ethernet_arp_main;
565   ip4_address_t * address = address_arg;
566   uword * p;
567   pending_resolution_t * mc;
568   void (*fp)(u32, u8 *) = data_callback;
569   
570   if (is_add)
571     {
572       pool_get (am->mac_changes, mc);
573
574       mc->next_index = ~0;
575       mc->node_index = node_index;
576       mc->type_opaque = type_opaque;
577       mc->data = data;
578       mc->data_callback = data_callback;
579       mc->pid = pid;
580       
581       p = hash_get (am->mac_changes_by_address, address->as_u32);
582       if (p)
583         {
584           /* Insert new resolution at the head of the list */
585           mc->next_index = p[0];
586           hash_unset (am->mac_changes_by_address, address->as_u32);
587         }
588       
589       hash_set (am->mac_changes_by_address, address->as_u32, 
590                 mc - am->mac_changes);
591       return 0;
592     }
593   else
594     {
595       u32 index;
596       pending_resolution_t * mc_last = 0;
597
598       p = hash_get (am->mac_changes_by_address, address->as_u32);
599       if (p == 0)
600         return VNET_API_ERROR_NO_SUCH_ENTRY;
601
602       index = p[0];
603
604       while (index != (u32)~0)
605         {
606           mc = pool_elt_at_index (am->mac_changes, index);
607           if (mc->node_index == node_index &&
608               mc->type_opaque == type_opaque &&
609               mc->pid == pid)
610             {
611               /* Clients may need to clean up pool entries, too */
612               if (fp)
613                 (*fp)(mc->data, 0 /* no new mac addrs */);
614               if (index == p[0])
615                 {
616                   hash_unset (am->mac_changes_by_address, address->as_u32);
617                   if (mc->next_index != ~0)
618                     hash_set (am->mac_changes_by_address, address->as_u32,
619                               mc->next_index);
620                   pool_put (am->mac_changes, mc);
621                   return 0;
622                 }
623               else
624                 {
625                   ASSERT(mc_last);
626                   mc_last->next_index = mc->next_index;
627                   pool_put (am->mac_changes, mc);
628                   return 0;
629                 }
630             }
631           mc_last = mc;
632           index = mc->next_index;
633         }
634       
635       return VNET_API_ERROR_NO_SUCH_ENTRY;
636     }
637 }
638
639 /* Either we drop the packet or we send a reply to the sender. */
640 typedef enum {
641   ARP_INPUT_NEXT_DROP,
642   ARP_INPUT_NEXT_REPLY_TX,
643   ARP_INPUT_N_NEXT,
644 } arp_input_next_t;
645
646 #define foreach_ethernet_arp_error                                      \
647   _ (replies_sent, "ARP replies sent")                                  \
648   _ (l2_type_not_ethernet, "L2 type not ethernet")                      \
649   _ (l3_type_not_ip4, "L3 type not IP4")                                \
650   _ (l3_src_address_not_local, "IP4 source address not local to subnet") \
651   _ (l3_dst_address_not_local, "IP4 destination address not local to subnet") \
652   _ (l3_src_address_is_local, "IP4 source address matches local interface") \
653   _ (l3_src_address_learned, "ARP request IP4 source address learned")  \
654   _ (replies_received, "ARP replies received")                          \
655   _ (opcode_not_request, "ARP opcode not request")                      \
656   _ (proxy_arp_replies_sent, "Proxy ARP replies sent")                  \
657   _ (l2_address_mismatch, "ARP hw addr does not match L2 frame src addr") \
658   _ (missing_interface_address, "ARP missing interface address") \
659   _ (gratuitous_arp, "ARP probe or announcement dropped") \
660
661 typedef enum {
662 #define _(sym,string) ETHERNET_ARP_ERROR_##sym,
663   foreach_ethernet_arp_error
664 #undef _
665   ETHERNET_ARP_N_ERROR,
666 } ethernet_arp_input_error_t;
667
668 /* get first interface address */
669 ip4_address_t *
670 ip4_interface_first_address (ip4_main_t * im, u32 sw_if_index,
671                              ip_interface_address_t ** result_ia)
672 {
673   ip_lookup_main_t * lm = &im->lookup_main;
674   ip_interface_address_t * ia = 0;
675   ip4_address_t * result = 0;
676
677   foreach_ip_interface_address (lm, ia, sw_if_index, 
678                                 1 /* honor unnumbered */,
679   ({
680     ip4_address_t * a = ip_interface_address_get_address (lm, ia);
681     result = a;
682     break;
683   }));
684   if (result_ia)
685     *result_ia = result ? ia : 0;
686   return result;
687 }
688
689 static void unset_random_arp_entry (void)
690 {
691   ethernet_arp_main_t * am = &ethernet_arp_main;
692   ethernet_arp_ip4_entry_t * e;
693   vnet_main_t * vnm = vnet_get_main();
694   ethernet_arp_ip4_over_ethernet_address_t delme;  
695   u32 index;
696
697   index = pool_next_index (am->ip4_entry_pool, am->arp_delete_rotor);
698   am->arp_delete_rotor = index;
699
700   /* Try again from elt 0, could happen if an intfc goes down */
701   if (index == ~0)
702     {
703       index = pool_next_index (am->ip4_entry_pool, am->arp_delete_rotor);
704       am->arp_delete_rotor = index;
705     }
706
707   /* Nothing left in the pool */
708   if (index == ~0)
709     return;
710
711   e = pool_elt_at_index (am->ip4_entry_pool, index);
712   
713   clib_memcpy (&delme.ethernet, e->ethernet_address, 6);
714   delme.ip4.as_u32 = e->key.ip4_address.as_u32;
715   
716   vnet_arp_unset_ip4_over_ethernet (vnm, e->key.sw_if_index,
717                                     e->key.fib_index, &delme);
718 }
719   
720 static void arp_unnumbered (vlib_buffer_t * p0, 
721                        u32 pi0,
722                        ethernet_header_t * eth0,
723                        ip_interface_address_t * ifa0)
724 {
725   vlib_main_t * vm = vlib_get_main();
726   vnet_main_t * vnm = vnet_get_main();
727   vnet_interface_main_t * vim = &vnm->interface_main;
728   vnet_sw_interface_t * si;
729   vnet_hw_interface_t * hi;
730   u32 unnum_src_sw_if_index;
731   u32 * broadcast_swifs = 0;
732   u32 * buffers = 0;
733   u32 n_alloc = 0;
734   vlib_buffer_t * b0;
735   int i;
736   u8 dst_mac_address[6];
737   i16 header_size;
738   ethernet_arp_header_t * arp0;
739
740   /* Save the dst mac address */
741   clib_memcpy(dst_mac_address, eth0->dst_address, sizeof (dst_mac_address));
742
743   /* Figure out which sw_if_index supplied the address */
744   unnum_src_sw_if_index = ifa0->sw_if_index;
745
746   /* Track down all users of the unnumbered source */
747   pool_foreach (si, vim->sw_interfaces, 
748   ({
749     if (si->flags & VNET_SW_INTERFACE_FLAG_UNNUMBERED &&
750         (si->unnumbered_sw_if_index == unnum_src_sw_if_index))
751       {
752         vec_add1 (broadcast_swifs, si->sw_if_index);
753       }
754   }));
755           
756
757   ASSERT (vec_len(broadcast_swifs));
758
759   /* Allocate buffering if we need it */
760   if (vec_len(broadcast_swifs) > 1)
761     {
762       vec_validate (buffers, vec_len(broadcast_swifs)-2);
763       n_alloc = vlib_buffer_alloc (vm, buffers, vec_len(buffers));
764       _vec_len (buffers) = n_alloc;
765       for (i = 0; i < n_alloc; i++)
766         {
767           b0 = vlib_get_buffer (vm, buffers[i]);
768
769           /* xerox (partially built) ARP pkt */
770           clib_memcpy (b0->data, p0->data, p0->current_length + p0->current_data);
771           b0->current_data = p0->current_data;
772           b0->current_length = p0->current_length;
773           vnet_buffer(b0)->sw_if_index[VLIB_RX] =
774             vnet_buffer(p0)->sw_if_index[VLIB_RX];
775         }
776     }
777
778   vec_insert (buffers, 1, 0);
779   buffers[0] = pi0;
780   
781   for (i = 0; i < vec_len(buffers); i++)
782     {
783       b0 = vlib_get_buffer(vm, buffers[i]);
784       arp0 = vlib_buffer_get_current (b0);
785
786       hi = vnet_get_sup_hw_interface (vnm, broadcast_swifs[i]);
787       si = vnet_get_sw_interface (vnm, broadcast_swifs[i]);
788
789       /* For decoration, most likely */
790       vnet_buffer(b0)->sw_if_index[VLIB_TX] = hi->sw_if_index;
791
792       /* Fix ARP pkt src address */
793       clib_memcpy (arp0->ip4_over_ethernet[0].ethernet, hi->hw_address, 6);
794
795       /* Build L2 encaps for this swif */
796       header_size = sizeof (ethernet_header_t);
797       if (si->sub.eth.flags.one_tag) 
798         header_size += 4;
799       else if (si->sub.eth.flags.two_tags)
800         header_size += 8;
801       
802       vlib_buffer_advance (b0, -header_size);
803       eth0 = vlib_buffer_get_current (b0);
804
805       if (si->sub.eth.flags.one_tag) {
806         ethernet_vlan_header_t * outer = (void *) (eth0 + 1);
807         
808         eth0->type = si->sub.eth.flags.dot1ad ?
809           clib_host_to_net_u16 (ETHERNET_TYPE_DOT1AD) :
810           clib_host_to_net_u16 (ETHERNET_TYPE_VLAN);
811         outer->priority_cfi_and_id = 
812           clib_host_to_net_u16 (si->sub.eth.outer_vlan_id);
813         outer->type = clib_host_to_net_u16 (ETHERNET_TYPE_ARP);
814         
815       } else if (si->sub.eth.flags.two_tags) {
816         ethernet_vlan_header_t * outer = (void *) (eth0 + 1);
817         ethernet_vlan_header_t * inner = (void *) (outer + 1);
818         
819         eth0->type = si->sub.eth.flags.dot1ad ?
820           clib_host_to_net_u16 (ETHERNET_TYPE_DOT1AD) :
821           clib_host_to_net_u16 (ETHERNET_TYPE_VLAN);
822         outer->priority_cfi_and_id = 
823           clib_host_to_net_u16 (si->sub.eth.outer_vlan_id);
824         outer->type = clib_host_to_net_u16 (ETHERNET_TYPE_VLAN);
825         inner->priority_cfi_and_id = 
826           clib_host_to_net_u16 (si->sub.eth.inner_vlan_id);
827         inner->type = clib_host_to_net_u16 (ETHERNET_TYPE_ARP);
828         
829       } else {
830         eth0->type = clib_host_to_net_u16 (ETHERNET_TYPE_ARP);
831       }
832       
833       /* Restore the original dst address, set src address */
834       clib_memcpy (eth0->dst_address, dst_mac_address, sizeof (eth0->dst_address));
835       clib_memcpy (eth0->src_address, hi->hw_address, sizeof (eth0->src_address));
836       
837       /* Transmit replicas */
838       if (i > 0)
839         {
840           vlib_frame_t * f = vlib_get_frame_to_node (vm, hi->output_node_index);
841           u32 * to_next = vlib_frame_vector_args (f);
842           to_next[0] = buffers[i];
843           f->n_vectors = 1;
844           vlib_put_frame_to_node (vm, hi->output_node_index, f);
845         }
846     }
847
848   /* The regular path outputs the original pkt.. */
849   vnet_buffer (p0)->sw_if_index[VLIB_TX] = broadcast_swifs[0];
850
851   vec_free (broadcast_swifs);
852   vec_free (buffers);
853 }
854
855 static uword
856 arp_input (vlib_main_t * vm,
857            vlib_node_runtime_t * node,
858            vlib_frame_t * frame)
859 {
860   ethernet_arp_main_t * am = &ethernet_arp_main;
861   vnet_main_t * vnm = vnet_get_main();
862   ip4_main_t * im4 = &ip4_main;
863   u32 n_left_from, next_index, * from, * to_next;
864   u32 n_replies_sent = 0, n_proxy_arp_replies_sent = 0;
865
866   from = vlib_frame_vector_args (frame);
867   n_left_from = frame->n_vectors;
868   next_index = node->cached_next_index;
869
870   if (node->flags & VLIB_NODE_FLAG_TRACE)
871     vlib_trace_frame_buffers_only (vm, node, from, frame->n_vectors,
872                                    /* stride */ 1,
873                                    sizeof (ethernet_arp_input_trace_t));
874
875   while (n_left_from > 0)
876     {
877       u32 n_left_to_next;
878
879       vlib_get_next_frame (vm, node, next_index,
880                            to_next, n_left_to_next);
881
882       while (n_left_from > 0 && n_left_to_next > 0)
883         {
884           vlib_buffer_t * p0;
885           vnet_hw_interface_t * hw_if0;
886           ethernet_arp_header_t * arp0;
887           ethernet_header_t * eth0;
888           ip_interface_address_t * ifa0;
889           ip_adjacency_t * adj0;
890           ip4_address_t * if_addr0;
891           ip4_address_t proxy_src;
892           u32 pi0, error0, next0, sw_if_index0;
893           u8 is_request0, src_is_local0, dst_is_local0, is_unnum0;
894           ethernet_proxy_arp_t * pa;
895
896           pi0 = from[0];
897           to_next[0] = pi0;
898           from += 1;
899           to_next += 1;
900           n_left_from -= 1;
901           n_left_to_next -= 1;
902
903           p0 = vlib_get_buffer (vm, pi0);
904           arp0 = vlib_buffer_get_current (p0);
905
906           is_request0 = arp0->opcode 
907               == clib_host_to_net_u16 (ETHERNET_ARP_OPCODE_request);
908
909           error0 = ETHERNET_ARP_ERROR_replies_sent;
910
911           error0 = (arp0->l2_type != clib_net_to_host_u16 (ETHERNET_ARP_HARDWARE_TYPE_ethernet)
912                     ? ETHERNET_ARP_ERROR_l2_type_not_ethernet
913                     : error0);
914           error0 = (arp0->l3_type != clib_net_to_host_u16 (ETHERNET_TYPE_IP4)
915                     ? ETHERNET_ARP_ERROR_l3_type_not_ip4
916                     : error0);
917
918           sw_if_index0 = vnet_buffer (p0)->sw_if_index[VLIB_RX];
919
920           if (error0)
921             goto drop1;
922
923           /* Check that IP address is local and matches incoming interface. */
924           if_addr0 = ip4_interface_address_matching_destination (im4,
925                                                                  &arp0->ip4_over_ethernet[1].ip4,
926                                                                  sw_if_index0,
927                                                                  &ifa0);
928           if (! if_addr0)
929             {
930               error0 = ETHERNET_ARP_ERROR_l3_dst_address_not_local;
931               goto drop1;
932             }
933
934           /* Honor unnumbered interface, if any */
935           is_unnum0 = sw_if_index0 != ifa0->sw_if_index;
936
937           /* Source must also be local to subnet of matching interface address. */
938           if (! ip4_destination_matches_interface (im4, &arp0->ip4_over_ethernet[0].ip4, ifa0))
939             {
940               error0 = ETHERNET_ARP_ERROR_l3_src_address_not_local;
941               goto drop1;
942             }
943
944           /* Reject requests/replies with our local interface address. */
945           src_is_local0 = if_addr0->as_u32 == arp0->ip4_over_ethernet[0].ip4.as_u32;
946           if (src_is_local0)
947             {
948               error0 = ETHERNET_ARP_ERROR_l3_src_address_is_local;
949               goto drop1;
950             }
951
952           dst_is_local0 = if_addr0->as_u32 == arp0->ip4_over_ethernet[1].ip4.as_u32;
953
954           /* Fill in ethernet header. */
955           eth0 = ethernet_buffer_get_header (p0);
956
957           /* Trash ARP packets whose ARP-level source addresses do not
958              match their L2-frame-level source addresses */
959           if (memcmp (eth0->src_address, arp0->ip4_over_ethernet[0].ethernet,
960                       sizeof (eth0->src_address)))
961             {
962               error0 = ETHERNET_ARP_ERROR_l2_address_mismatch;
963               goto drop2;
964             }
965
966           /* Learn or update sender's mapping only for requests or unicasts
967              that don't match local interface address. */
968           if (ethernet_address_cast (eth0->dst_address) == ETHERNET_ADDRESS_UNICAST
969               || is_request0)
970             {
971               if (am->limit_arp_cache_size && 
972                   pool_elts (am->ip4_entry_pool) >= am->limit_arp_cache_size)
973                 unset_random_arp_entry();
974
975               vnet_arp_set_ip4_over_ethernet (vnm, sw_if_index0, 
976                                               (u32)~0 /* default fib */,
977                                               &arp0->ip4_over_ethernet[0], 
978                                               0 /* is_static */);
979               error0 = ETHERNET_ARP_ERROR_l3_src_address_learned;
980              }
981
982           /* Only send a reply for requests sent which match a local interface. */
983           if (! (is_request0 && dst_is_local0))
984             {
985               error0 = (arp0->opcode == clib_host_to_net_u16 (ETHERNET_ARP_OPCODE_reply)
986                         ? ETHERNET_ARP_ERROR_replies_received : error0);
987               goto drop1;
988             }
989
990           /* Send a reply. */
991         send_reply:
992           vnet_buffer (p0)->sw_if_index[VLIB_TX] = sw_if_index0;
993           hw_if0 = vnet_get_sup_hw_interface (vnm, sw_if_index0);
994
995           /* Send reply back through input interface */
996           vnet_buffer (p0)->sw_if_index[VLIB_TX] = sw_if_index0;
997           next0 = ARP_INPUT_NEXT_REPLY_TX;
998
999           arp0->opcode = clib_host_to_net_u16 (ETHERNET_ARP_OPCODE_reply);
1000
1001           arp0->ip4_over_ethernet[1] = arp0->ip4_over_ethernet[0];
1002
1003           clib_memcpy (arp0->ip4_over_ethernet[0].ethernet, hw_if0->hw_address, 6);
1004           clib_mem_unaligned (&arp0->ip4_over_ethernet[0].ip4.data_u32, u32) = if_addr0->data_u32;
1005
1006           /* Hardware must be ethernet-like. */
1007           ASSERT (vec_len (hw_if0->hw_address) == 6);
1008
1009           clib_memcpy (eth0->dst_address, eth0->src_address, 6);
1010           clib_memcpy (eth0->src_address, hw_if0->hw_address, 6);
1011
1012           /* Figure out how much to rewind current data from adjacency. */
1013           if (ifa0)
1014             {
1015               adj0 = ip_get_adjacency (&ip4_main.lookup_main, 
1016                                        ifa0->neighbor_probe_adj_index);
1017               if (adj0->lookup_next_index != IP_LOOKUP_NEXT_ARP)
1018                 {
1019                   error0 = ETHERNET_ARP_ERROR_missing_interface_address;
1020                   goto drop2;
1021                 }
1022               if (is_unnum0)
1023                 arp_unnumbered (p0, pi0, eth0, ifa0);
1024               else
1025                 vlib_buffer_advance (p0, -adj0->rewrite_header.data_bytes);
1026             }
1027
1028           vlib_validate_buffer_enqueue_x1 (vm, node, next_index,to_next,
1029                                            n_left_to_next,pi0,next0);
1030
1031           n_replies_sent += 1;
1032           continue;
1033
1034         drop1:
1035           if (0 == arp0->ip4_over_ethernet[0].ip4.as_u32 ||
1036               (arp0->ip4_over_ethernet[0].ip4.as_u32 ==
1037                arp0->ip4_over_ethernet[1].ip4.as_u32))
1038             {
1039               error0 = ETHERNET_ARP_ERROR_gratuitous_arp;
1040               goto drop2;
1041             }
1042           /* See if proxy arp is configured for the address */
1043           if (is_request0) 
1044             {
1045               vnet_sw_interface_t * si;
1046               u32 this_addr = clib_net_to_host_u32 
1047                 (arp0->ip4_over_ethernet[1].ip4.as_u32);
1048               u32 fib_index0;
1049
1050               si = vnet_get_sw_interface (vnm, sw_if_index0);
1051               
1052               if (!(si->flags & VNET_SW_INTERFACE_FLAG_PROXY_ARP))
1053                 goto drop2;
1054
1055               fib_index0 = vec_elt (im4->fib_index_by_sw_if_index, 
1056                                     sw_if_index0);
1057
1058               vec_foreach (pa, am->proxy_arps)
1059                 {
1060                   u32 lo_addr = clib_net_to_host_u32 (pa->lo_addr);
1061                   u32 hi_addr = clib_net_to_host_u32 (pa->hi_addr);
1062
1063                    /* an ARP request hit in the proxy-arp table? */
1064                    if ((this_addr >= lo_addr && this_addr <= hi_addr) &&
1065                        (fib_index0 == pa->fib_index))
1066                     {
1067                       eth0 = ethernet_buffer_get_header (p0);
1068                       proxy_src.as_u32 = 
1069                         arp0->ip4_over_ethernet[1].ip4.data_u32;
1070
1071                       /* 
1072                        * Rewind buffer, direct code above not to
1073                        * think too hard about it. 
1074                        * $$$ is the answer ever anything other than
1075                        * vlib_buffer_reset(..)?
1076                        */
1077                       ifa0 = 0;
1078                       if_addr0 = &proxy_src;
1079                       vlib_buffer_reset (p0);
1080                       n_proxy_arp_replies_sent++;
1081                       goto send_reply;
1082                     }
1083                 }
1084             }
1085           
1086         drop2:
1087
1088           next0 = ARP_INPUT_NEXT_DROP;
1089           p0->error = node->errors[error0];
1090
1091           vlib_validate_buffer_enqueue_x1 (vm, node, next_index,to_next,
1092                                            n_left_to_next,pi0,next0);
1093         }
1094
1095       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
1096     }
1097
1098   vlib_error_count (vm, node->node_index,
1099                     ETHERNET_ARP_ERROR_replies_sent, 
1100                     n_replies_sent - n_proxy_arp_replies_sent);
1101   
1102   vlib_error_count (vm, node->node_index,
1103                     ETHERNET_ARP_ERROR_proxy_arp_replies_sent, 
1104                     n_proxy_arp_replies_sent);
1105   return frame->n_vectors;
1106 }
1107
1108 static char * ethernet_arp_error_strings[] = {
1109 #define _(sym,string) string,
1110   foreach_ethernet_arp_error
1111 #undef _
1112 };
1113
1114 VLIB_REGISTER_NODE (arp_input_node,static) = {
1115   .function = arp_input,
1116   .name = "arp-input",
1117   .vector_size = sizeof (u32),
1118
1119   .n_errors = ETHERNET_ARP_N_ERROR,
1120   .error_strings = ethernet_arp_error_strings,
1121
1122   .n_next_nodes = ARP_INPUT_N_NEXT,
1123   .next_nodes = {
1124     [ARP_INPUT_NEXT_DROP] = "error-drop",
1125     [ARP_INPUT_NEXT_REPLY_TX] = "interface-output",
1126   },
1127
1128   .format_buffer = format_ethernet_arp_header,
1129   .format_trace = format_ethernet_arp_input_trace,
1130 };
1131
1132 static int
1133 ip4_arp_entry_sort (void *a1, void *a2)
1134 {
1135   ethernet_arp_ip4_entry_t * e1 = a1;
1136   ethernet_arp_ip4_entry_t * e2 = a2;
1137
1138   int cmp;
1139   vnet_main_t * vnm = vnet_get_main();
1140
1141   cmp = vnet_sw_interface_compare 
1142     (vnm, e1->key.sw_if_index, e2->key.sw_if_index);
1143   if (! cmp)
1144     cmp = ip4_address_compare (&e1->key.ip4_address, &e2->key.ip4_address);
1145   return cmp;
1146 }
1147
1148 static clib_error_t *
1149 show_ip4_arp (vlib_main_t * vm,
1150               unformat_input_t * input,
1151               vlib_cli_command_t * cmd)
1152 {
1153   vnet_main_t * vnm = vnet_get_main();
1154   ethernet_arp_main_t * am = &ethernet_arp_main;
1155   ethernet_arp_ip4_entry_t * e, * es;
1156   ethernet_proxy_arp_t * pa;
1157   clib_error_t * error = 0;
1158   u32 sw_if_index;
1159
1160   /* Filter entries by interface if given. */
1161   sw_if_index = ~0;
1162   (void) unformat_user (input, unformat_vnet_sw_interface, vnm, &sw_if_index);
1163
1164   es = 0;
1165   pool_foreach (e, am->ip4_entry_pool, ({ vec_add1 (es, e[0]); }));
1166   if ( es )
1167     {
1168       vec_sort_with_function (es, ip4_arp_entry_sort);
1169       vlib_cli_output (vm, "%U", format_ethernet_arp_ip4_entry, vnm, 0);
1170       vec_foreach (e, es) {
1171         if (sw_if_index != ~0 && e->key.sw_if_index != sw_if_index)
1172           continue;
1173         vlib_cli_output (vm, "%U", format_ethernet_arp_ip4_entry, vnm, e);
1174       }
1175       vec_free (es);
1176     }
1177
1178   if (vec_len (am->proxy_arps))
1179     {
1180       vlib_cli_output (vm, "Proxy arps enabled for:");
1181       vec_foreach(pa, am->proxy_arps)
1182         {
1183           vlib_cli_output (vm, "Fib_index %d   %U - %U ", 
1184                            pa->fib_index,
1185                            format_ip4_address, &pa->lo_addr, 
1186                            format_ip4_address, &pa->hi_addr);
1187         }
1188     }
1189       
1190   return error;
1191 }
1192
1193 VLIB_CLI_COMMAND (show_ip4_arp_command, static) = {
1194   .path = "show ip arp",
1195   .function = show_ip4_arp,
1196   .short_help = "Show ARP table",
1197 };
1198
1199 typedef struct {
1200   pg_edit_t l2_type, l3_type;
1201   pg_edit_t n_l2_address_bytes, n_l3_address_bytes;
1202   pg_edit_t opcode;
1203   struct {
1204     pg_edit_t ethernet;
1205     pg_edit_t ip4;
1206   } ip4_over_ethernet[2];
1207 } pg_ethernet_arp_header_t;
1208
1209 static inline void
1210 pg_ethernet_arp_header_init (pg_ethernet_arp_header_t * p)
1211 {
1212   /* Initialize fields that are not bit fields in the IP header. */
1213 #define _(f) pg_edit_init (&p->f, ethernet_arp_header_t, f);
1214   _ (l2_type);
1215   _ (l3_type);
1216   _ (n_l2_address_bytes);
1217   _ (n_l3_address_bytes);
1218   _ (opcode);
1219   _ (ip4_over_ethernet[0].ethernet);
1220   _ (ip4_over_ethernet[0].ip4);
1221   _ (ip4_over_ethernet[1].ethernet);
1222   _ (ip4_over_ethernet[1].ip4);
1223 #undef _
1224 }
1225
1226 uword
1227 unformat_pg_arp_header (unformat_input_t * input, va_list * args)
1228 {
1229   pg_stream_t * s = va_arg (*args, pg_stream_t *);
1230   pg_ethernet_arp_header_t * p;
1231   u32 group_index;
1232   
1233   p = pg_create_edit_group (s, sizeof (p[0]), sizeof (ethernet_arp_header_t),
1234                             &group_index);
1235   pg_ethernet_arp_header_init (p);
1236
1237   /* Defaults. */
1238   pg_edit_set_fixed (&p->l2_type, ETHERNET_ARP_HARDWARE_TYPE_ethernet);
1239   pg_edit_set_fixed (&p->l3_type, ETHERNET_TYPE_IP4);
1240   pg_edit_set_fixed (&p->n_l2_address_bytes, 6);
1241   pg_edit_set_fixed (&p->n_l3_address_bytes, 4);
1242
1243   if (! unformat (input, "%U: %U/%U -> %U/%U",
1244                   unformat_pg_edit,
1245                   unformat_ethernet_arp_opcode_net_byte_order, &p->opcode,
1246                   unformat_pg_edit,
1247                   unformat_ethernet_address, &p->ip4_over_ethernet[0].ethernet,
1248                   unformat_pg_edit,
1249                   unformat_ip4_address, &p->ip4_over_ethernet[0].ip4,
1250                   unformat_pg_edit,
1251                   unformat_ethernet_address, &p->ip4_over_ethernet[1].ethernet,
1252                   unformat_pg_edit,
1253                   unformat_ip4_address, &p->ip4_over_ethernet[1].ip4))
1254     {
1255       /* Free up any edits we may have added. */
1256       pg_free_edit_group (s);
1257       return 0;
1258     }
1259   return 1;
1260 }
1261
1262 clib_error_t *ip4_set_arp_limit (u32 arp_limit)
1263 {
1264   ethernet_arp_main_t * am = &ethernet_arp_main;
1265
1266   am->limit_arp_cache_size = arp_limit;
1267   return 0;
1268 }
1269
1270 static void
1271 arp_ip4_entry_del_adj(ethernet_arp_ip4_entry_t *e, u32 adj_index)
1272 {
1273   int done = 0;
1274   int i;
1275
1276   while (!done)
1277     {
1278       vec_foreach_index(i, e->adjacencies)
1279         if (vec_elt(e->adjacencies, i) == adj_index)
1280           {
1281             vec_del1(e->adjacencies, i);
1282             continue;
1283           }
1284       done = 1;
1285     }
1286 }
1287
1288 static void
1289 arp_ip4_entry_add_adj(ethernet_arp_ip4_entry_t *e, u32 adj_index)
1290 {
1291   int i;
1292   vec_foreach_index(i, e->adjacencies)
1293     if (vec_elt(e->adjacencies, i) == adj_index)
1294       return;
1295   vec_add1(e->adjacencies, adj_index);
1296 }
1297
1298 static void
1299 arp_add_del_adj_cb (struct ip_lookup_main_t * lm,
1300                     u32 adj_index,
1301                     ip_adjacency_t * adj,
1302                     u32 is_del)
1303 {
1304   ethernet_arp_main_t * am = &ethernet_arp_main;
1305   ip4_main_t * im = &ip4_main;
1306   ethernet_arp_ip4_key_t k;
1307   ethernet_arp_ip4_entry_t * e = 0;
1308   uword * p;
1309   u32 ai;
1310
1311   for(ai = adj->heap_handle; ai < adj->heap_handle + adj->n_adj ; ai++)
1312     {
1313       adj = ip_get_adjacency (lm, ai);
1314       if (adj->lookup_next_index == IP_LOOKUP_NEXT_ARP && adj->arp.next_hop.ip4.as_u32)
1315         {
1316           k.sw_if_index = adj->rewrite_header.sw_if_index;
1317           k.ip4_address.as_u32 = adj->arp.next_hop.ip4.as_u32;
1318           k.fib_index = im->fib_index_by_sw_if_index[adj->rewrite_header.sw_if_index];
1319           p = mhash_get (&am->ip4_entry_by_key, &k);
1320           if (p)
1321             e = pool_elt_at_index (am->ip4_entry_pool, p[0]);
1322         }
1323       else
1324         continue;
1325
1326       if (is_del)
1327         {
1328           if (!e)
1329             clib_warning("Adjacency contains unknown ARP next hop %U (del)",
1330                          format_ip46_address, &adj->arp.next_hop, IP46_TYPE_IP4);
1331           else
1332             arp_ip4_entry_del_adj(e, adj->heap_handle);
1333         }
1334       else /* add */
1335         {
1336           if (!e)
1337             clib_warning("Adjacency contains unknown ARP next hop %U (add)",
1338                          format_ip46_address, &adj->arp.next_hop, IP46_TYPE_IP4);
1339           else
1340             arp_ip4_entry_add_adj(e, adj->heap_handle);
1341         }
1342     }
1343 }
1344
1345 static clib_error_t * ethernet_arp_init (vlib_main_t * vm)
1346 {
1347   ethernet_arp_main_t * am = &ethernet_arp_main;
1348   pg_node_t * pn;
1349   clib_error_t * error;
1350   ip4_main_t * im = &ip4_main;
1351   ip_lookup_main_t * lm = &im->lookup_main;
1352
1353   if ((error = vlib_call_init_function (vm, ethernet_init)))
1354     return error;
1355
1356   ethernet_register_input_type (vm, ETHERNET_TYPE_ARP, arp_input_node.index);
1357
1358   pn = pg_get_node (arp_input_node.index);
1359   pn->unformat_edit = unformat_pg_arp_header;
1360
1361   am->opcode_by_name = hash_create_string (0, sizeof (uword));
1362 #define _(o) hash_set_mem (am->opcode_by_name, #o, ETHERNET_ARP_OPCODE_##o);
1363   foreach_ethernet_arp_opcode;
1364 #undef _
1365
1366   mhash_init (&am->ip4_entry_by_key,
1367               /* value size */ sizeof (uword),
1368               /* key size */ sizeof (ethernet_arp_ip4_key_t));
1369
1370   /* $$$ configurable */
1371   am->limit_arp_cache_size = 50000;
1372
1373   am->pending_resolutions_by_address = hash_create (0, sizeof (uword));
1374   am->mac_changes_by_address = hash_create (0, sizeof (uword));
1375
1376   /* don't trace ARP error packets */
1377   {
1378     vlib_node_runtime_t *rt = 
1379       vlib_node_get_runtime (vm, arp_input_node.index);
1380
1381 #define _(a,b)                                  \
1382     vnet_pcap_drop_trace_filter_add_del         \
1383         (rt->errors[ETHERNET_ARP_ERROR_##a],    \
1384          1 /* is_add */);
1385     foreach_ethernet_arp_error
1386 #undef _
1387   }
1388
1389   ip_register_add_del_adjacency_callback(lm, arp_add_del_adj_cb);
1390
1391   return 0;
1392 }
1393
1394 VLIB_INIT_FUNCTION (ethernet_arp_init);
1395
1396 int 
1397 vnet_arp_unset_ip4_over_ethernet (vnet_main_t * vnm,
1398                                   u32 sw_if_index, u32 fib_index,
1399                                   void * a_arg)
1400 {
1401   ethernet_arp_ip4_over_ethernet_address_t * a = a_arg;
1402   vnet_arp_set_ip4_over_ethernet_rpc_args_t args;
1403
1404   args.sw_if_index = sw_if_index;
1405   args.fib_index = fib_index;
1406   args.is_remove = 1;
1407   clib_memcpy (&args.a, a, sizeof (*a));
1408
1409   vl_api_rpc_call_main_thread (set_ip4_over_ethernet_rpc_callback, 
1410                                (u8 *) &args, sizeof (args));
1411   return 0;
1412 }
1413
1414 static inline int 
1415 vnet_arp_unset_ip4_over_ethernet_internal (vnet_main_t * vnm,
1416                                            u32 sw_if_index, 
1417                                            u32 fib_index,
1418                                            void * a_arg)
1419 {
1420   ethernet_arp_ip4_entry_t * e;
1421   ethernet_arp_main_t * am = &ethernet_arp_main;
1422   ethernet_arp_ip4_over_ethernet_address_t * a = a_arg;
1423   ethernet_arp_ip4_key_t k;
1424   uword * p;
1425   ip4_add_del_route_args_t args;
1426   ip4_main_t * im = &ip4_main;
1427   ip_lookup_main_t * lm = &im->lookup_main;
1428   u32 adj_index;
1429   ip_adjacency_t * adj;
1430
1431   k.sw_if_index = sw_if_index;
1432   k.ip4_address = a->ip4;
1433   k.fib_index = fib_index;
1434   p = mhash_get (&am->ip4_entry_by_key, &k);
1435   if (! p)
1436     return -1;
1437
1438   memset(&args, 0, sizeof(args));
1439
1440   /* 
1441    * Make sure that the route actually exists before we try to delete it,
1442    * and make sure that it's a rewrite adjacency.
1443    *
1444    * If we point 1-N unnumbered interfaces at a loopback interface and 
1445    * shut down the loopback before shutting down 1-N unnumbered 
1446    * interfaces, the ARP cache will still have an entry, 
1447    * but the route will have disappeared.
1448    * 
1449    * See also ip4_del_interface_routes (...) 
1450    *            -> ip4_delete_matching_routes (...).
1451    */
1452   
1453   adj_index = ip4_fib_lookup_with_table 
1454       (im, fib_index, &a->ip4, 1 /* disable default route */);
1455
1456   /* Miss adj? Forget it... */
1457   if (adj_index != lm->miss_adj_index) {
1458       adj = ip_get_adjacency (lm, adj_index);
1459       /* 
1460        * Stupid control-plane trick:
1461        * admin down an interface (removes arp routes from fib),
1462        * bring the interface back up (does not reinstall them)
1463        * then remove the arp cache entry (yuck). When that happens,
1464        * the adj we find here will be the interface subnet ARP adj.
1465        */
1466       if (adj->lookup_next_index == IP_LOOKUP_NEXT_REWRITE) {
1467           args.table_index_or_table_id = fib_index;
1468           args.flags = IP4_ROUTE_FLAG_FIB_INDEX | IP4_ROUTE_FLAG_DEL 
1469               | IP4_ROUTE_FLAG_NEIGHBOR;
1470           args.dst_address = a->ip4;
1471           args.dst_address_length = 32;
1472           ip4_add_del_route (im, &args);
1473           ip4_maybe_remap_adjacencies (im, fib_index, args.flags);
1474       }
1475   }
1476
1477   e = pool_elt_at_index (am->ip4_entry_pool, p[0]);
1478   mhash_unset (&am->ip4_entry_by_key, &e->key, 0);
1479   pool_put (am->ip4_entry_pool, e);
1480   return 0;
1481 }
1482
1483 static void 
1484 increment_ip4_and_mac_address (ethernet_arp_ip4_over_ethernet_address_t *a)
1485 {
1486   u8 old;
1487   int i;
1488
1489   for (i = 3; i >= 0; i--) 
1490     {
1491       old = a->ip4.as_u8[i];
1492       a->ip4.as_u8[i] += 1;
1493       if (old < a->ip4.as_u8[i])
1494         break;
1495     }
1496
1497   for (i = 5; i >= 0; i--)
1498     {
1499       old = a->ethernet[i];
1500       a->ethernet[i] += 1;
1501       if (old < a->ethernet[i])
1502         break;
1503     }
1504 }
1505
1506 int vnet_proxy_arp_add_del (ip4_address_t *lo_addr,
1507                             ip4_address_t *hi_addr,
1508                             u32 fib_index, int is_del)
1509 {
1510   ethernet_arp_main_t *am = &ethernet_arp_main;
1511   ethernet_proxy_arp_t *pa;
1512   u32 found_at_index = ~0;
1513
1514   vec_foreach (pa, am->proxy_arps)
1515     {
1516       if (pa->lo_addr == lo_addr->as_u32 
1517           && pa->hi_addr == hi_addr->as_u32
1518           && pa->fib_index == fib_index)
1519         {
1520           found_at_index = pa - am->proxy_arps;
1521           break;
1522         }
1523     }
1524
1525   if (found_at_index != ~0)
1526     {
1527       /* Delete, otherwise it's already in the table */
1528       if (is_del)
1529         vec_delete (am->proxy_arps, 1, found_at_index);
1530       return 0;
1531     }
1532   /* delete, no such entry */
1533   if (is_del)
1534     return VNET_API_ERROR_NO_SUCH_ENTRY;
1535
1536   /* add, not in table */
1537   vec_add2 (am->proxy_arps, pa, 1);
1538   pa->lo_addr = lo_addr->as_u32;
1539   pa->hi_addr = hi_addr->as_u32;
1540   pa->fib_index = fib_index;
1541   return 0;
1542 }
1543
1544 /*
1545  * Remove any proxy arp entries asdociated with the 
1546  * specificed fib.
1547  */
1548 int vnet_proxy_arp_fib_reset (u32 fib_id)
1549 {
1550   ip4_main_t * im = &ip4_main;
1551   ethernet_arp_main_t *am = &ethernet_arp_main;
1552   ethernet_proxy_arp_t *pa;
1553   u32 * entries_to_delete = 0;
1554   u32 fib_index;
1555   uword * p;
1556   int i;
1557
1558   p = hash_get (im->fib_index_by_table_id, fib_id);
1559   if (! p)
1560       return VNET_API_ERROR_NO_SUCH_ENTRY;
1561   fib_index = p[0];
1562
1563   vec_foreach (pa, am->proxy_arps)
1564     {
1565       if (pa->fib_index == fib_index)
1566         {
1567           vec_add1 (entries_to_delete, pa - am->proxy_arps);
1568         }
1569     }
1570
1571   for (i = 0; i < vec_len(entries_to_delete); i++)
1572     {
1573        vec_delete (am->proxy_arps, 1, entries_to_delete[i]);
1574     } 
1575
1576   vec_free (entries_to_delete);
1577
1578    return 0;
1579 }
1580
1581 u32
1582 vnet_arp_glean_add(u32 fib_index, void * next_hop_arg)
1583 {
1584   ethernet_arp_main_t * am = &ethernet_arp_main;
1585   ip4_main_t * im = &ip4_main;
1586   ip_lookup_main_t * lm = &im->lookup_main;
1587   ip4_address_t * next_hop = next_hop_arg;
1588   ip_adjacency_t add_adj, *adj;
1589   ip4_add_del_route_args_t args;
1590   ethernet_arp_ip4_entry_t * e;
1591   ethernet_arp_ip4_key_t k;
1592   u32 adj_index;
1593
1594   adj_index = ip4_fib_lookup_with_table(im, fib_index, next_hop, 0);
1595   adj = ip_get_adjacency(lm, adj_index);
1596
1597   if (!adj || adj->lookup_next_index != IP_LOOKUP_NEXT_ARP)
1598     return ~0;
1599
1600   if (adj->arp.next_hop.ip4.as_u32 != 0)
1601     return adj_index;
1602
1603   k.sw_if_index = adj->rewrite_header.sw_if_index;
1604   k.fib_index = fib_index;
1605   k.ip4_address.as_u32 = next_hop->as_u32;
1606
1607   if (mhash_get (&am->ip4_entry_by_key, &k))
1608     return adj_index;
1609
1610   pool_get (am->ip4_entry_pool, e);
1611   mhash_set (&am->ip4_entry_by_key, &k, e - am->ip4_entry_pool, /* old value */ 0);
1612   e->key = k;
1613   e->cpu_time_last_updated = clib_cpu_time_now ();
1614   e->flags = ETHERNET_ARP_IP4_ENTRY_FLAG_GLEAN;
1615
1616   memset(&args, 0, sizeof(args));
1617   clib_memcpy(&add_adj, adj, sizeof(add_adj));
1618   ip46_address_set_ip4(&add_adj.arp.next_hop, next_hop); /* install neighbor /32 route */
1619   args.table_index_or_table_id = fib_index;
1620   args.flags = IP4_ROUTE_FLAG_FIB_INDEX | IP4_ROUTE_FLAG_ADD| IP4_ROUTE_FLAG_NEIGHBOR;
1621   args.dst_address.as_u32 = next_hop->as_u32;
1622   args.dst_address_length = 32;
1623   args.adj_index = ~0;
1624   args.add_adj = &add_adj;
1625   args.n_add_adj = 1;
1626   ip4_add_del_route (im, &args);
1627   return ip4_fib_lookup_with_table (im, fib_index, next_hop, 0);
1628 }
1629
1630 static clib_error_t *
1631 ip_arp_add_del_command_fn (vlib_main_t * vm,
1632                  unformat_input_t * input,
1633                  vlib_cli_command_t * cmd)
1634 {
1635   vnet_main_t * vnm = vnet_get_main();
1636   u32 sw_if_index;
1637   ethernet_arp_ip4_over_ethernet_address_t lo_addr, hi_addr, addr;
1638   int addr_valid = 0;
1639   int is_del = 0;
1640   int count = 1;
1641   u32 fib_index = 0;
1642   u32 fib_id;
1643   int is_static = 0;
1644   int is_proxy = 0;
1645
1646   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) 
1647     {
1648       /* set ip arp TenGigE1/1/0/1 1.2.3.4 aa:bb:... or aabb.ccdd... */
1649       if (unformat (input, "%U %U %U",
1650                     unformat_vnet_sw_interface, vnm, &sw_if_index,
1651                     unformat_ip4_address, &addr.ip4, 
1652                     unformat_ethernet_address, &addr.ethernet))
1653         addr_valid = 1;
1654
1655       else if (unformat (input, "delete") || unformat (input, "del"))
1656         is_del = 1;
1657
1658       else if (unformat (input, "static"))
1659         is_static = 1;
1660
1661       else if (unformat (input, "count %d", &count))
1662         ;
1663
1664       else if (unformat (input, "fib-id %d", &fib_id))
1665         {
1666           ip4_main_t * im = &ip4_main;
1667           uword * p = hash_get (im->fib_index_by_table_id, fib_id);
1668           if (! p)
1669             return clib_error_return (0, "fib ID %d doesn't exist\n",
1670                                       fib_id);
1671           fib_index = p[0];
1672         }
1673
1674       else if (unformat (input, "proxy %U - %U", 
1675                          unformat_ip4_address, &lo_addr.ip4, 
1676                          unformat_ip4_address, &hi_addr.ip4))
1677         is_proxy = 1;
1678       else
1679         break;
1680     }
1681   
1682   if (is_proxy)
1683     {
1684       (void) vnet_proxy_arp_add_del (&lo_addr.ip4, &hi_addr.ip4, 
1685                                      fib_index, is_del);
1686       return 0;
1687     }
1688
1689   if (addr_valid) 
1690     {
1691       int i;
1692
1693       for (i = 0; i < count; i++) 
1694         {
1695           if (is_del == 0) 
1696             {
1697               uword event_type, * event_data = 0;
1698
1699               /* Park the debug CLI until the arp entry is installed */
1700               vnet_register_ip4_arp_resolution_event 
1701                 (vnm, &addr.ip4, vlib_current_process(vm),
1702                  1 /* type */, 0 /* data */);
1703               
1704               vnet_arp_set_ip4_over_ethernet
1705                 (vnm, sw_if_index, fib_index, &addr, is_static);
1706               
1707               vlib_process_wait_for_event (vm);
1708               event_type = vlib_process_get_events (vm, &event_data);
1709               vec_reset_length(event_data);
1710               if (event_type != 1)
1711                 clib_warning ("event type %d unexpected", event_type);
1712             }
1713           else
1714             vnet_arp_unset_ip4_over_ethernet
1715                 (vnm, sw_if_index, fib_index, &addr);
1716
1717           increment_ip4_and_mac_address (&addr);
1718         }
1719     }
1720   else
1721     {
1722       return clib_error_return (0, "unknown input `%U'",
1723                                 format_unformat_error, input);
1724     }
1725   
1726   return 0;
1727 }
1728
1729 VLIB_CLI_COMMAND (ip_arp_add_del_command, static) = {
1730     .path = "set ip arp",
1731     .short_help = "set ip arp [del] <intfc> <ip-address> <mac-address> [static] [count <count>] [fib-id <fib-id>] [proxy <lo-addr> - <hi-addr>]",
1732     .function = ip_arp_add_del_command_fn,
1733 };
1734
1735 static clib_error_t *
1736 set_int_proxy_arp_command_fn (vlib_main_t * vm,
1737                               unformat_input_t * input,
1738                               vlib_cli_command_t * cmd)
1739 {
1740   vnet_main_t * vnm = vnet_get_main();
1741   u32 sw_if_index;
1742   vnet_sw_interface_t * si;
1743   int enable = 0;
1744   int intfc_set = 0;
1745
1746   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) 
1747     {
1748       if (unformat (input, "%U", unformat_vnet_sw_interface, 
1749                     vnm, &sw_if_index))
1750         intfc_set = 1;
1751       else if (unformat (input, "enable") || unformat (input, "on"))
1752         enable = 1;
1753       else if (unformat (input, "disable") || unformat (input, "off"))
1754         enable = 0;
1755       else
1756         break;
1757     }
1758
1759   if (intfc_set == 0)
1760     return clib_error_return (0, "unknown input '%U'",
1761                               format_unformat_error, input);
1762
1763   si = vnet_get_sw_interface (vnm, sw_if_index);
1764   ASSERT(si);
1765   if (enable)
1766     si->flags |= VNET_SW_INTERFACE_FLAG_PROXY_ARP;
1767   else 
1768     si->flags &= ~VNET_SW_INTERFACE_FLAG_PROXY_ARP;
1769   
1770   return 0;
1771 }
1772
1773 VLIB_CLI_COMMAND (set_int_proxy_enable_command, static) = {
1774     .path = "set interface proxy-arp",
1775     .short_help = "set interface proxy-arp <intfc> [enable|disable]",
1776     .function = set_int_proxy_arp_command_fn,
1777 };
1778
1779
1780 /*
1781  * ARP Termination in a L2 Bridge Domain based on an
1782  * IP4 to MAC hash table mac_by_ip4 for each BD.
1783  */
1784 typedef enum {
1785   ARP_TERM_NEXT_L2_OUTPUT,
1786   ARP_TERM_NEXT_DROP,
1787   ARP_TERM_N_NEXT,
1788 } arp_term_next_t;
1789
1790 u32 arp_term_next_node_index[32];
1791
1792 static uword
1793 arp_term_l2bd (vlib_main_t * vm,
1794                vlib_node_runtime_t * node,
1795                vlib_frame_t * frame)
1796 {
1797   l2input_main_t * l2im = &l2input_main;
1798   u32 n_left_from, next_index, * from, * to_next;
1799   u32 n_replies_sent = 0;
1800   u16 last_bd_index = ~0;
1801   l2_bridge_domain_t * last_bd_config = 0;
1802   l2_input_config_t * cfg0;
1803
1804   from = vlib_frame_vector_args (frame);
1805   n_left_from = frame->n_vectors;
1806   next_index = node->cached_next_index;
1807
1808   while (n_left_from > 0)
1809     {
1810       u32 n_left_to_next;
1811
1812       vlib_get_next_frame (vm, node, next_index,
1813                            to_next, n_left_to_next);
1814
1815       while (n_left_from > 0 && n_left_to_next > 0)
1816         {
1817           vlib_buffer_t * p0;
1818           ethernet_header_t * eth0;
1819           ethernet_arp_header_t * arp0;
1820           u8 * l3h0;
1821           u32 pi0, error0, next0, sw_if_index0;
1822           u16 ethertype0;
1823           u16 bd_index0;
1824           u32 ip0;
1825           u8 * macp0;
1826
1827           pi0 = from[0];
1828           to_next[0] = pi0;
1829           from += 1;
1830           to_next += 1;
1831           n_left_from -= 1;
1832           n_left_to_next -= 1;
1833
1834           p0 = vlib_get_buffer (vm, pi0);
1835           eth0 = vlib_buffer_get_current (p0);
1836           l3h0 = (u8 *)eth0 + vnet_buffer(p0)->l2.l2_len;
1837           ethertype0 = clib_net_to_host_u16(*(u16 *)(l3h0 - 2));
1838           arp0 = (ethernet_arp_header_t *) l3h0;
1839
1840           if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE) &&
1841                              (p0->flags & VLIB_BUFFER_IS_TRACED)))
1842             {
1843               u8 *t0 = vlib_add_trace (
1844                   vm, node, p0, sizeof(ethernet_arp_input_trace_t));
1845               clib_memcpy (t0, l3h0, sizeof(ethernet_arp_input_trace_t));
1846             }
1847
1848           if (PREDICT_FALSE  (
1849             (ethertype0 != ETHERNET_TYPE_ARP) ||
1850             (arp0->opcode != clib_host_to_net_u16(ETHERNET_ARP_OPCODE_request))))
1851             goto next_l2_feature;
1852
1853           error0 = ETHERNET_ARP_ERROR_replies_sent;
1854           error0 = (arp0->l2_type != clib_net_to_host_u16 (ETHERNET_ARP_HARDWARE_TYPE_ethernet)
1855                     ? ETHERNET_ARP_ERROR_l2_type_not_ethernet
1856                     : error0);
1857           error0 = (arp0->l3_type != clib_net_to_host_u16 (ETHERNET_TYPE_IP4)
1858                     ? ETHERNET_ARP_ERROR_l3_type_not_ip4
1859                     : error0);
1860
1861           sw_if_index0 = vnet_buffer (p0)->sw_if_index[VLIB_RX];
1862
1863           if (error0)
1864             goto drop;
1865
1866           // Trash ARP packets whose ARP-level source addresses do not
1867           // match their L2-frame-level source addresses */
1868           if (PREDICT_FALSE (
1869             memcmp (eth0->src_address, arp0->ip4_over_ethernet[0].ethernet,
1870                     sizeof (eth0->src_address))))
1871             {
1872               error0 = ETHERNET_ARP_ERROR_l2_address_mismatch;
1873               goto drop;
1874             }
1875
1876           // Check if anyone want ARP request events for L2 BDs
1877           {
1878           pending_resolution_t * mc;
1879           ethernet_arp_main_t * am = &ethernet_arp_main;
1880           uword *p = hash_get (am->mac_changes_by_address, 0);
1881           if (p && (vnet_buffer(p0)->l2.shg == 0))
1882             { // Only SHG 0 interface which is more likely local
1883               u32 next_index = p[0];
1884               while (next_index != (u32)~0)
1885                 {
1886                   int (*fp)(u32, u8 *, u32, u32);
1887                   int rv = 1;
1888                   mc = pool_elt_at_index (am->mac_changes, next_index);
1889                   fp = mc->data_callback;
1890                   // Call the callback, return 1 to suppress dup events */
1891                   if (fp) rv = (*fp)(mc->data, 
1892                                      arp0->ip4_over_ethernet[0].ethernet, 
1893                                      sw_if_index0, 
1894                                      arp0->ip4_over_ethernet[0].ip4.as_u32);
1895                   // Signal the resolver process
1896                   if (rv == 0)
1897                     vlib_process_signal_event (vm, mc->node_index,
1898                                                mc->type_opaque, 
1899                                                mc->data);
1900                   next_index = mc->next_index;
1901                 }
1902             }
1903           }
1904
1905           // lookup BD mac_by_ip4 hash table for MAC entry
1906           ip0 = arp0->ip4_over_ethernet[1].ip4.as_u32;
1907           bd_index0 = vnet_buffer(p0)->l2.bd_index;
1908           if (PREDICT_FALSE (
1909             (bd_index0 != last_bd_index) || (last_bd_index == (u16) ~0)))
1910             {
1911               last_bd_index = bd_index0;
1912               last_bd_config = vec_elt_at_index(l2im->bd_configs, bd_index0);
1913             }
1914           macp0 = (u8 *) hash_get (last_bd_config->mac_by_ip4, ip0);
1915
1916           if (PREDICT_FALSE(!macp0)) 
1917               goto next_l2_feature;     // MAC not found 
1918
1919           // MAC found, send ARP reply -
1920           // Convert ARP request packet to ARP reply
1921           arp0->opcode = clib_host_to_net_u16 (ETHERNET_ARP_OPCODE_reply);
1922           arp0->ip4_over_ethernet[1] = arp0->ip4_over_ethernet[0];
1923           arp0->ip4_over_ethernet[0].ip4.as_u32 = ip0;
1924           clib_memcpy (arp0->ip4_over_ethernet[0].ethernet, macp0, 6);
1925           clib_memcpy (eth0->dst_address, eth0->src_address, 6);
1926           clib_memcpy (eth0->src_address, macp0, 6);
1927           n_replies_sent += 1;
1928
1929           // For BVI, need to use l2-fwd node to send ARP reply as 
1930           // l2-output node cannot output packet to BVI properly
1931           cfg0 = vec_elt_at_index(l2im->configs, sw_if_index0);
1932           if (PREDICT_FALSE (cfg0->bvi))
1933             {
1934               vnet_buffer(p0)->l2.feature_bitmap |= L2INPUT_FEAT_FWD;
1935               vnet_buffer (p0)->sw_if_index[VLIB_RX] = 0;
1936               goto next_l2_feature;
1937             }
1938
1939           // Send ARP reply back out input interface through l2-output
1940           vnet_buffer (p0)->sw_if_index[VLIB_TX] = sw_if_index0;
1941           next0 = ARP_TERM_NEXT_L2_OUTPUT;
1942           // Note that output to VXLAN tunnel will fail due to SHG which
1943           // is probably desireable since ARP termination is not intended
1944           // for ARP requests from other hosts. If output to VXLAN tunnel is
1945           // required, however, can just clear the SHG in packet as follows:
1946           //   vnet_buffer(p0)->l2.shg = 0;
1947
1948           vlib_validate_buffer_enqueue_x1 (vm, node, next_index,to_next,
1949                                            n_left_to_next,pi0,next0);
1950           continue;
1951
1952         next_l2_feature:
1953           {
1954             u32 feature_bitmap0 =
1955                 vnet_buffer(p0)->l2.feature_bitmap & ~L2INPUT_FEAT_ARP_TERM;
1956             vnet_buffer(p0)->l2.feature_bitmap = feature_bitmap0;
1957             next0 = feat_bitmap_get_next_node_index(arp_term_next_node_index,
1958                                                     feature_bitmap0);
1959             vlib_validate_buffer_enqueue_x1 (vm, node, next_index,to_next,
1960                                              n_left_to_next,pi0,next0);
1961             continue;
1962           }
1963
1964         drop:
1965           if (0 == arp0->ip4_over_ethernet[0].ip4.as_u32 ||
1966               (arp0->ip4_over_ethernet[0].ip4.as_u32 ==
1967                arp0->ip4_over_ethernet[1].ip4.as_u32))
1968             {
1969               error0 = ETHERNET_ARP_ERROR_gratuitous_arp;
1970             }
1971           next0 = ARP_TERM_NEXT_DROP;
1972           p0->error = node->errors[error0];
1973
1974           vlib_validate_buffer_enqueue_x1 (vm, node, next_index,to_next,
1975                                            n_left_to_next,pi0,next0);
1976         }
1977
1978       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
1979     }
1980
1981   vlib_error_count (vm, node->node_index,
1982                     ETHERNET_ARP_ERROR_replies_sent, 
1983                     n_replies_sent);
1984   return frame->n_vectors;
1985 }
1986
1987 VLIB_REGISTER_NODE (arp_term_l2bd_node,static) = {
1988   .function = arp_term_l2bd,
1989   .name = "arp-term-l2bd",
1990   .vector_size = sizeof (u32),
1991
1992   .n_errors = ETHERNET_ARP_N_ERROR,
1993   .error_strings = ethernet_arp_error_strings,
1994
1995   .n_next_nodes = ARP_TERM_N_NEXT,
1996   .next_nodes = {
1997     [ARP_TERM_NEXT_L2_OUTPUT] = "l2-output",
1998     [ARP_TERM_NEXT_DROP] = "error-drop",
1999   },
2000
2001   .format_buffer = format_ethernet_arp_header,
2002   .format_trace = format_ethernet_arp_input_trace,
2003 };
2004
2005 clib_error_t *arp_term_init (vlib_main_t *vm)
2006 { // Initialize the feature next-node indexes 
2007   feat_bitmap_init_next_nodes(vm,
2008                               arp_term_l2bd_node.index,
2009                               L2INPUT_N_FEAT,
2010                               l2input_get_feat_names(),
2011                               arp_term_next_node_index);
2012   return 0;
2013 }
2014
2015 VLIB_INIT_FUNCTION (arp_term_init);