Explicity specify IP address type for format_ip46_address function
[vpp.git] / vnet / vnet / ethernet / arp.c
1 /*
2  * ethernet/arp.c: IP v4 ARP node
3  *
4  * Copyright (c) 2010 Cisco and/or its affiliates.
5  * Licensed under the Apache License, Version 2.0 (the "License");
6  * you may not use this file except in compliance with the License.
7  * You may obtain a copy of the License at:
8  *
9  *     http://www.apache.org/licenses/LICENSE-2.0
10  *
11  * Unless required by applicable law or agreed to in writing, software
12  * distributed under the License is distributed on an "AS IS" BASIS,
13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  * See the License for the specific language governing permissions and
15  * limitations under the License.
16  */
17
18 #include <vnet/ip/ip.h>
19 #include <vnet/ethernet/ethernet.h>
20 #include <vnet/ethernet/arp_packet.h>
21 #include <vnet/l2/l2_input.h>
22 #include <vppinfra/mhash.h>
23
24 void vl_api_rpc_call_main_thread (void *fp, u8 * data, u32 data_length);
25
26 typedef struct {
27   u32 sw_if_index;
28   u32 fib_index;
29   ip4_address_t ip4_address;
30 } ethernet_arp_ip4_key_t;
31
32 typedef struct {
33   ethernet_arp_ip4_key_t key;
34   u8 ethernet_address[6];
35
36   u16 flags;
37 #define ETHERNET_ARP_IP4_ENTRY_FLAG_STATIC (1 << 0)
38 #define ETHERNET_ARP_IP4_ENTRY_FLAG_GLEAN  (2 << 0)
39
40   u64 cpu_time_last_updated;
41
42   u32 * adjacencies;
43 } ethernet_arp_ip4_entry_t;
44
45 typedef struct {
46   u32 lo_addr;
47   u32 hi_addr;
48   u32 fib_index;
49 } ethernet_proxy_arp_t;
50
51 typedef struct {
52   u32 next_index;
53   uword node_index;
54   uword type_opaque;
55   uword data;
56   /* Used for arp event notification only */
57   void * data_callback;
58   u32 pid;
59 } pending_resolution_t;
60
61 typedef struct {
62   /* Hash tables mapping name to opcode. */
63   uword * opcode_by_name;
64
65   /* lite beer "glean" adjacency handling */
66   uword * pending_resolutions_by_address;
67   pending_resolution_t * pending_resolutions;
68
69   /* Mac address change notification */
70   uword * mac_changes_by_address;
71   pending_resolution_t * mac_changes;
72
73   ethernet_arp_ip4_entry_t * ip4_entry_pool;
74
75   mhash_t ip4_entry_by_key;
76     
77   /* ARP attack mitigation */
78   u32 arp_delete_rotor;
79   u32 limit_arp_cache_size;
80
81   /* Proxy arp vector */
82   ethernet_proxy_arp_t * proxy_arps;
83 } ethernet_arp_main_t;
84
85 static ethernet_arp_main_t ethernet_arp_main;
86
87 static u8 * format_ethernet_arp_hardware_type (u8 * s, va_list * va)
88 {
89   ethernet_arp_hardware_type_t h = va_arg (*va, ethernet_arp_hardware_type_t);
90   char * t = 0;
91   switch (h)
92     {
93 #define _(n,f) case n: t = #f; break;
94       foreach_ethernet_arp_hardware_type;
95 #undef _
96
97     default:
98       return format (s, "unknown 0x%x", h);
99     }
100
101   return format (s, "%s", t);
102 }
103
104 static u8 * format_ethernet_arp_opcode (u8 * s, va_list * va)
105 {
106   ethernet_arp_opcode_t o = va_arg (*va, ethernet_arp_opcode_t);
107   char * t = 0;
108   switch (o)
109     {
110 #define _(f) case ETHERNET_ARP_OPCODE_##f: t = #f; break;
111       foreach_ethernet_arp_opcode;
112 #undef _
113
114     default:
115       return format (s, "unknown 0x%x", o);
116     }
117
118   return format (s, "%s", t);
119 }
120
121 static uword
122 unformat_ethernet_arp_opcode_host_byte_order (unformat_input_t * input,
123                                               va_list * args)
124 {
125   int * result = va_arg (*args, int *);
126   ethernet_arp_main_t * am = &ethernet_arp_main;
127   int x, i;
128
129   /* Numeric opcode. */
130   if (unformat (input, "0x%x", &x)
131       || unformat (input, "%d", &x))
132     {
133       if (x >= (1 << 16))
134         return 0;
135       *result = x;
136       return 1;
137     }
138
139   /* Named type. */
140   if (unformat_user (input, unformat_vlib_number_by_name,
141                      am->opcode_by_name, &i))
142     {
143       *result = i;
144       return 1;
145     }
146
147   return 0;
148 }
149
150 static uword
151 unformat_ethernet_arp_opcode_net_byte_order (unformat_input_t * input,
152                                              va_list * args)
153 {
154   int * result = va_arg (*args, int *);
155   if (! unformat_user (input, unformat_ethernet_arp_opcode_host_byte_order, result))
156     return 0;
157
158   *result = clib_host_to_net_u16 ((u16) *result);
159   return 1;
160 }
161
162 static u8 * format_ethernet_arp_header (u8 * s, va_list * va)
163 {
164   ethernet_arp_header_t * a = va_arg (*va, ethernet_arp_header_t *);
165   u32 max_header_bytes = va_arg (*va, u32);
166   uword indent;
167   u16 l2_type, l3_type;
168
169   if (max_header_bytes != 0 && sizeof (a[0]) > max_header_bytes)
170     return format (s, "ARP header truncated");
171
172   l2_type = clib_net_to_host_u16 (a->l2_type);
173   l3_type = clib_net_to_host_u16 (a->l3_type);
174
175   indent = format_get_indent (s);
176
177   s = format (s, "%U, type %U/%U, address size %d/%d",
178               format_ethernet_arp_opcode, clib_net_to_host_u16 (a->opcode),
179               format_ethernet_arp_hardware_type, l2_type,
180               format_ethernet_type, l3_type,
181               a->n_l2_address_bytes, a->n_l3_address_bytes);
182               
183   if (l2_type == ETHERNET_ARP_HARDWARE_TYPE_ethernet
184       && l3_type == ETHERNET_TYPE_IP4)
185     {
186       s = format (s, "\n%U%U/%U -> %U/%U",
187                   format_white_space, indent,
188                   format_ethernet_address, a->ip4_over_ethernet[0].ethernet,
189                   format_ip4_address, &a->ip4_over_ethernet[0].ip4,
190                   format_ethernet_address, a->ip4_over_ethernet[1].ethernet,
191                   format_ip4_address, &a->ip4_over_ethernet[1].ip4);
192     }
193   else
194     {
195       uword n2 = a->n_l2_address_bytes;
196       uword n3 = a->n_l3_address_bytes;
197       s = format (s, "\n%U%U/%U -> %U/%U",
198                   format_white_space, indent,
199                   format_hex_bytes, a->data + 0*n2 + 0*n3, n2,
200                   format_hex_bytes, a->data + 1*n2 + 0*n3, n3,
201                   format_hex_bytes, a->data + 1*n2 + 1*n3, n2,
202                   format_hex_bytes, a->data + 2*n2 + 1*n3, n3);
203     }
204
205   return s;
206 }
207
208 static u8 * format_ethernet_arp_ip4_entry (u8 * s, va_list * va)
209 {
210   vnet_main_t * vnm = va_arg (*va, vnet_main_t *);
211   ethernet_arp_ip4_entry_t * e = va_arg (*va, ethernet_arp_ip4_entry_t *);
212   vnet_sw_interface_t * si;
213   ip4_fib_t * fib;
214   u8 * flags = 0;
215
216   if (! e)
217     return format (s, "%=12s%=6s%=16s%=6s%=20s%=24s", "Time", "FIB", "IP4",
218                    "Flags", "Ethernet", "Interface");
219
220   fib = find_ip4_fib_by_table_index_or_id (&ip4_main, e->key.fib_index,
221                                            IP4_ROUTE_FLAG_FIB_INDEX);
222   si = vnet_get_sw_interface (vnm, e->key.sw_if_index);
223
224   if (e->flags & ETHERNET_ARP_IP4_ENTRY_FLAG_GLEAN)
225     flags = format(flags, "G");
226
227   if (e->flags & ETHERNET_ARP_IP4_ENTRY_FLAG_STATIC)
228     flags = format(flags, "S");
229
230   s = format (s, "%=12U%=6u%=16U%=6s%=20U%=24U",
231               format_vlib_cpu_time, vnm->vlib_main, e->cpu_time_last_updated,
232               fib->table_id,
233               format_ip4_address, &e->key.ip4_address,
234               flags ? (char *) flags : "",
235               format_ethernet_address, e->ethernet_address,
236               format_vnet_sw_interface_name, vnm, si);
237
238   vec_free(flags);
239   return s;
240 }
241
242 typedef struct {
243   u8 packet_data[64];
244 } ethernet_arp_input_trace_t;
245
246 static u8 * format_ethernet_arp_input_trace (u8 * s, va_list * va)
247 {
248   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*va, vlib_main_t *);
249   CLIB_UNUSED (vlib_node_t * node) = va_arg (*va, vlib_node_t *);
250   ethernet_arp_input_trace_t * t = va_arg (*va, ethernet_arp_input_trace_t *);
251
252   s = format (s, "%U",
253               format_ethernet_arp_header,
254               t->packet_data, sizeof (t->packet_data));
255
256   return s;
257 }
258
259 clib_error_t *
260 ethernet_arp_sw_interface_up_down (vnet_main_t * vnm,
261                                    u32 sw_if_index,
262                                    u32 flags)
263 {
264   ethernet_arp_main_t * am = &ethernet_arp_main;
265   ethernet_arp_ip4_entry_t * e;
266
267   if (! (flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP))
268     {
269       u32 i, * to_delete = 0;
270
271       pool_foreach (e, am->ip4_entry_pool, ({
272         if (e->key.sw_if_index == sw_if_index)
273           vec_add1 (to_delete, e - am->ip4_entry_pool);
274       }));
275
276       for (i = 0; i < vec_len (to_delete); i++)
277         {
278           ethernet_arp_ip4_over_ethernet_address_t delme;
279           e = pool_elt_at_index (am->ip4_entry_pool, to_delete[i]);
280
281           clib_memcpy (&delme.ethernet, e->ethernet_address, 6);
282           delme.ip4.as_u32 = e->key.ip4_address.as_u32;
283
284           vnet_arp_unset_ip4_over_ethernet (vnm, e->key.sw_if_index,
285                                             e->key.fib_index, &delme);
286         }
287
288       vec_free (to_delete);
289     }
290
291   return 0;
292 }
293
294 VNET_SW_INTERFACE_ADMIN_UP_DOWN_FUNCTION (ethernet_arp_sw_interface_up_down);
295
296 static int
297 vnet_arp_set_ip4_over_ethernet_internal (vnet_main_t * vnm,
298                                          u32 sw_if_index,
299                                          u32 fib_index,
300                                          void * a_arg,
301                                          int is_static);
302
303 static int
304 vnet_arp_unset_ip4_over_ethernet_internal (vnet_main_t * vnm,
305                                            u32 sw_if_index,
306                                            u32 fib_index,
307                                            void * a_arg);
308
309 typedef struct {
310   u32 sw_if_index;
311   u32 fib_index;
312   ethernet_arp_ip4_over_ethernet_address_t a;
313   int is_static;
314   int is_remove; /* set is_remove=1 to clear arp entry */
315 } vnet_arp_set_ip4_over_ethernet_rpc_args_t;
316
317 static void set_ip4_over_ethernet_rpc_callback 
318 ( vnet_arp_set_ip4_over_ethernet_rpc_args_t * a)
319 {
320   vnet_main_t * vm = vnet_get_main();
321   ASSERT(os_get_cpu_number() == 0);
322
323   if (a->is_remove)
324     vnet_arp_unset_ip4_over_ethernet_internal(vm, 
325                                               a->sw_if_index, 
326                                               a->fib_index,
327                                               &(a->a));
328   else
329     vnet_arp_set_ip4_over_ethernet_internal (vm,
330                                              a->sw_if_index,
331                                              a->fib_index,
332                                              &(a->a),
333                                              a->is_static);
334 }
335
336 int
337 vnet_arp_set_ip4_over_ethernet (vnet_main_t * vnm,
338                                 u32 sw_if_index,
339                                 u32 fib_index,
340                                 void * a_arg,
341                                 int is_static)
342 {
343   ethernet_arp_ip4_over_ethernet_address_t * a = a_arg;
344   vnet_arp_set_ip4_over_ethernet_rpc_args_t args;
345
346   args.sw_if_index = sw_if_index;
347   args.fib_index = fib_index;
348   args.is_static = is_static;
349   args.is_remove = 0;
350   clib_memcpy (&args.a, a, sizeof (*a));
351
352   vl_api_rpc_call_main_thread (set_ip4_over_ethernet_rpc_callback, 
353                                (u8 *) &args, sizeof (args));
354   return 0;
355 }
356
357 int
358 vnet_arp_set_ip4_over_ethernet_internal (vnet_main_t * vnm,
359                                          u32 sw_if_index,
360                                          u32 fib_index,
361                                          void * a_arg,
362                                          int is_static)
363 {
364   ethernet_arp_ip4_key_t k;
365   ethernet_arp_ip4_entry_t * e = 0;
366   ethernet_arp_main_t * am = &ethernet_arp_main;
367   ethernet_arp_ip4_over_ethernet_address_t * a = a_arg;
368   vlib_main_t * vm = vlib_get_main();
369   ip4_main_t * im = &ip4_main;
370   ip_lookup_main_t * lm = &im->lookup_main;
371   int make_new_arp_cache_entry=1;
372   uword * p;
373   ip4_add_del_route_args_t args;
374   ip_adjacency_t adj, * existing_adj;
375   pending_resolution_t * pr, * mc;
376   
377   u32 next_index;
378   u32 adj_index;
379
380   fib_index = (fib_index != (u32)~0) 
381     ? fib_index : im->fib_index_by_sw_if_index[sw_if_index];
382
383   k.sw_if_index = sw_if_index;
384   k.ip4_address = a->ip4;
385   k.fib_index = fib_index;
386
387   p = mhash_get (&am->ip4_entry_by_key, &k);
388   if (p)
389     {
390       e = pool_elt_at_index (am->ip4_entry_pool, p[0]);
391
392       /* Refuse to over-write static arp. */
393       if (!is_static &&
394           (e->flags & ETHERNET_ARP_IP4_ENTRY_FLAG_STATIC))
395         return -2;
396       make_new_arp_cache_entry = 0;
397     }
398
399   /* Note: always install the route. It might have been deleted */
400   memset(&adj, 0, sizeof(adj));
401   adj.lookup_next_index = IP_LOOKUP_NEXT_REWRITE;
402
403   vnet_rewrite_for_sw_interface
404     (vnm,
405      VNET_L3_PACKET_TYPE_IP4,
406      sw_if_index,
407      ip4_rewrite_node.index,
408      a->ethernet,               /* destination address */
409      &adj.rewrite_header,
410      sizeof (adj.rewrite_data));
411
412   /* result of this lookup should be next-hop adjacency */
413   adj_index = ip4_fib_lookup_with_table (im, fib_index, &a->ip4, 0);
414   existing_adj = ip_get_adjacency(lm, adj_index);
415
416   if (existing_adj->lookup_next_index == IP_LOOKUP_NEXT_ARP &&
417       existing_adj->arp.next_hop.ip4.as_u32 == a->ip4.as_u32)
418     {
419       u32 * ai;
420       u32 * adjs = vec_dup(e->adjacencies);
421       /* Update all adj assigned to this arp entry */
422       vec_foreach(ai, adjs)
423         {
424           int i;
425           ip_adjacency_t * uadj = ip_get_adjacency(lm, *ai);
426           for (i = 0; i < uadj->n_adj; i++)
427             if (uadj[i].lookup_next_index == IP_LOOKUP_NEXT_ARP &&
428                 uadj[i].arp.next_hop.ip4.as_u32 == a->ip4.as_u32)
429               ip_update_adjacency (lm, *ai + i, &adj);
430         }
431       vec_free(adjs);
432     }
433   else
434     {
435       /* create new adj */
436       args.table_index_or_table_id = fib_index;
437       args.flags = IP4_ROUTE_FLAG_FIB_INDEX | IP4_ROUTE_FLAG_ADD | IP4_ROUTE_FLAG_NEIGHBOR;
438       args.dst_address = a->ip4;
439       args.dst_address_length = 32;
440       args.adj_index = ~0;
441       args.add_adj = &adj;
442       args.n_add_adj = 1;
443       ip4_add_del_route (im, &args);
444     }
445
446   if (make_new_arp_cache_entry)
447     {
448       pool_get (am->ip4_entry_pool, e);
449       mhash_set (&am->ip4_entry_by_key, &k,
450                  e - am->ip4_entry_pool,
451                  /* old value */ 0);
452       e->key = k;
453     }
454
455   /* Update time stamp and ethernet address. */
456   clib_memcpy (e->ethernet_address, a->ethernet, sizeof (e->ethernet_address));
457   e->cpu_time_last_updated = clib_cpu_time_now ();
458   if (is_static)
459     e->flags |= ETHERNET_ARP_IP4_ENTRY_FLAG_STATIC;
460
461   /* Customer(s) waiting for this address to be resolved? */
462   p = hash_get (am->pending_resolutions_by_address, a->ip4.as_u32);
463   if (p)
464     {
465       next_index = p[0];
466
467       while (next_index != (u32)~0)
468         {
469           pr = pool_elt_at_index (am->pending_resolutions, next_index);
470           vlib_process_signal_event (vm, pr->node_index,
471                                      pr->type_opaque, 
472                                      pr->data);
473           next_index = pr->next_index;
474           pool_put (am->pending_resolutions, pr);
475         }
476       
477       hash_unset (am->pending_resolutions_by_address, a->ip4.as_u32);
478     }
479
480   /* Customer(s) requesting ARP event for this address? */
481   p = hash_get (am->mac_changes_by_address, a->ip4.as_u32);
482   if (p)
483     {
484       next_index = p[0];
485
486       while (next_index != (u32)~0)
487         {
488           int (*fp)(u32, u8 *, u32, u32);
489           int rv = 1;
490           mc = pool_elt_at_index (am->mac_changes, next_index);
491           fp = mc->data_callback;
492
493           /* Call the user's data callback, return 1 to suppress dup events */
494           if (fp)
495             rv = (*fp)(mc->data, a->ethernet, sw_if_index, 0);
496           
497           /* 
498            * Signal the resolver process, as long as the user
499            * says they want to be notified
500            */
501           if (rv == 0)
502             vlib_process_signal_event (vm, mc->node_index,
503                                        mc->type_opaque, 
504                                        mc->data);
505           next_index = mc->next_index;
506         }
507     }
508
509   return 0;
510 }
511
512 void vnet_register_ip4_arp_resolution_event (vnet_main_t * vnm, 
513                                              void * address_arg,
514                                              uword node_index,
515                                              uword type_opaque,
516                                              uword data)
517 {
518   ethernet_arp_main_t * am = &ethernet_arp_main;
519   ip4_address_t * address = address_arg;
520   uword * p;
521   pending_resolution_t * pr;
522   
523   pool_get (am->pending_resolutions, pr);
524
525   pr->next_index = ~0;
526   pr->node_index = node_index;
527   pr->type_opaque = type_opaque;
528   pr->data = data;
529   pr->data_callback = 0;
530
531   p = hash_get (am->pending_resolutions_by_address, address->as_u32);
532   if (p)
533     {
534       /* Insert new resolution at the head of the list */
535       pr->next_index = p[0];
536       hash_unset (am->pending_resolutions_by_address, address->as_u32);
537     }
538   
539   hash_set (am->pending_resolutions_by_address, address->as_u32, 
540             pr - am->pending_resolutions);
541 }
542
543 int vnet_add_del_ip4_arp_change_event (vnet_main_t * vnm, 
544                                        void * data_callback,
545                                        u32 pid,
546                                        void * address_arg,
547                                        uword node_index,
548                                        uword type_opaque,
549                                        uword data, int is_add)
550 {
551   ethernet_arp_main_t * am = &ethernet_arp_main;
552   ip4_address_t * address = address_arg;
553   uword * p;
554   pending_resolution_t * mc;
555   void (*fp)(u32, u8 *) = data_callback;
556   
557   if (is_add)
558     {
559       pool_get (am->mac_changes, mc);
560
561       mc->next_index = ~0;
562       mc->node_index = node_index;
563       mc->type_opaque = type_opaque;
564       mc->data = data;
565       mc->data_callback = data_callback;
566       mc->pid = pid;
567       
568       p = hash_get (am->mac_changes_by_address, address->as_u32);
569       if (p)
570         {
571           /* Insert new resolution at the head of the list */
572           mc->next_index = p[0];
573           hash_unset (am->mac_changes_by_address, address->as_u32);
574         }
575       
576       hash_set (am->mac_changes_by_address, address->as_u32, 
577                 mc - am->mac_changes);
578       return 0;
579     }
580   else
581     {
582       u32 index;
583       pending_resolution_t * mc_last = 0;
584
585       p = hash_get (am->mac_changes_by_address, address->as_u32);
586       if (p == 0)
587         return VNET_API_ERROR_NO_SUCH_ENTRY;
588
589       index = p[0];
590
591       while (index != (u32)~0)
592         {
593           mc = pool_elt_at_index (am->mac_changes, index);
594           if (mc->node_index == node_index &&
595               mc->type_opaque == type_opaque &&
596               mc->pid == pid)
597             {
598               /* Clients may need to clean up pool entries, too */
599               if (fp)
600                 (*fp)(mc->data, 0 /* no new mac addrs */);
601               if (index == p[0])
602                 {
603                   hash_unset (am->mac_changes_by_address, address->as_u32);
604                   if (mc->next_index != ~0)
605                     hash_set (am->mac_changes_by_address, address->as_u32,
606                               mc->next_index);
607                   pool_put (am->mac_changes, mc);
608                   return 0;
609                 }
610               else
611                 {
612                   ASSERT(mc_last);
613                   mc_last->next_index = mc->next_index;
614                   pool_put (am->mac_changes, mc);
615                   return 0;
616                 }
617             }
618           mc_last = mc;
619           index = mc->next_index;
620         }
621       
622       return VNET_API_ERROR_NO_SUCH_ENTRY;
623     }
624 }
625
626 /* Either we drop the packet or we send a reply to the sender. */
627 typedef enum {
628   ARP_INPUT_NEXT_DROP,
629   ARP_INPUT_NEXT_REPLY_TX,
630   ARP_INPUT_N_NEXT,
631 } arp_input_next_t;
632
633 #define foreach_ethernet_arp_error                                      \
634   _ (replies_sent, "ARP replies sent")                                  \
635   _ (l2_type_not_ethernet, "L2 type not ethernet")                      \
636   _ (l3_type_not_ip4, "L3 type not IP4")                                \
637   _ (l3_src_address_not_local, "IP4 source address not local to subnet") \
638   _ (l3_dst_address_not_local, "IP4 destination address not local to subnet") \
639   _ (l3_src_address_is_local, "IP4 source address matches local interface") \
640   _ (l3_src_address_learned, "ARP request IP4 source address learned")  \
641   _ (replies_received, "ARP replies received")                          \
642   _ (opcode_not_request, "ARP opcode not request")                      \
643   _ (proxy_arp_replies_sent, "Proxy ARP replies sent")                  \
644   _ (l2_address_mismatch, "ARP hw addr does not match L2 frame src addr") \
645   _ (missing_interface_address, "ARP missing interface address") \
646   _ (gratuitous_arp, "ARP probe or announcement dropped") \
647
648 typedef enum {
649 #define _(sym,string) ETHERNET_ARP_ERROR_##sym,
650   foreach_ethernet_arp_error
651 #undef _
652   ETHERNET_ARP_N_ERROR,
653 } ethernet_arp_input_error_t;
654
655 /* get first interface address */
656 ip4_address_t *
657 ip4_interface_first_address (ip4_main_t * im, u32 sw_if_index,
658                              ip_interface_address_t ** result_ia)
659 {
660   ip_lookup_main_t * lm = &im->lookup_main;
661   ip_interface_address_t * ia = 0;
662   ip4_address_t * result = 0;
663
664   foreach_ip_interface_address (lm, ia, sw_if_index, 
665                                 1 /* honor unnumbered */,
666   ({
667     ip4_address_t * a = ip_interface_address_get_address (lm, ia);
668     result = a;
669     break;
670   }));
671   if (result_ia)
672     *result_ia = result ? ia : 0;
673   return result;
674 }
675
676 static void unset_random_arp_entry (void)
677 {
678   ethernet_arp_main_t * am = &ethernet_arp_main;
679   ethernet_arp_ip4_entry_t * e;
680   vnet_main_t * vnm = vnet_get_main();
681   ethernet_arp_ip4_over_ethernet_address_t delme;  
682   u32 index;
683
684   index = pool_next_index (am->ip4_entry_pool, am->arp_delete_rotor);
685   am->arp_delete_rotor = index;
686
687   /* Try again from elt 0, could happen if an intfc goes down */
688   if (index == ~0)
689     {
690       index = pool_next_index (am->ip4_entry_pool, am->arp_delete_rotor);
691       am->arp_delete_rotor = index;
692     }
693
694   /* Nothing left in the pool */
695   if (index == ~0)
696     return;
697
698   e = pool_elt_at_index (am->ip4_entry_pool, index);
699   
700   clib_memcpy (&delme.ethernet, e->ethernet_address, 6);
701   delme.ip4.as_u32 = e->key.ip4_address.as_u32;
702   
703   vnet_arp_unset_ip4_over_ethernet (vnm, e->key.sw_if_index,
704                                     e->key.fib_index, &delme);
705 }
706   
707 static void arp_unnumbered (vlib_buffer_t * p0, 
708                        u32 pi0,
709                        ethernet_header_t * eth0,
710                        ip_interface_address_t * ifa0)
711 {
712   vlib_main_t * vm = vlib_get_main();
713   vnet_main_t * vnm = vnet_get_main();
714   vnet_interface_main_t * vim = &vnm->interface_main;
715   vnet_sw_interface_t * si;
716   vnet_hw_interface_t * hi;
717   u32 unnum_src_sw_if_index;
718   u32 * broadcast_swifs = 0;
719   u32 * buffers = 0;
720   u32 n_alloc = 0;
721   vlib_buffer_t * b0;
722   int i;
723   u8 dst_mac_address[6];
724   i16 header_size;
725   ethernet_arp_header_t * arp0;
726
727   /* Save the dst mac address */
728   clib_memcpy(dst_mac_address, eth0->dst_address, sizeof (dst_mac_address));
729
730   /* Figure out which sw_if_index supplied the address */
731   unnum_src_sw_if_index = ifa0->sw_if_index;
732
733   /* Track down all users of the unnumbered source */
734   pool_foreach (si, vim->sw_interfaces, 
735   ({
736     if (si->flags & VNET_SW_INTERFACE_FLAG_UNNUMBERED &&
737         (si->unnumbered_sw_if_index == unnum_src_sw_if_index))
738       {
739         vec_add1 (broadcast_swifs, si->sw_if_index);
740       }
741   }));
742           
743
744   ASSERT (vec_len(broadcast_swifs));
745
746   /* Allocate buffering if we need it */
747   if (vec_len(broadcast_swifs) > 1)
748     {
749       vec_validate (buffers, vec_len(broadcast_swifs)-2);
750       n_alloc = vlib_buffer_alloc (vm, buffers, vec_len(buffers));
751       _vec_len (buffers) = n_alloc;
752       for (i = 0; i < n_alloc; i++)
753         {
754           b0 = vlib_get_buffer (vm, buffers[i]);
755
756           /* xerox (partially built) ARP pkt */
757           clib_memcpy (b0->data, p0->data, p0->current_length + p0->current_data);
758           b0->current_data = p0->current_data;
759           b0->current_length = p0->current_length;
760           vnet_buffer(b0)->sw_if_index[VLIB_RX] =
761             vnet_buffer(p0)->sw_if_index[VLIB_RX];
762         }
763     }
764
765   vec_insert (buffers, 1, 0);
766   buffers[0] = pi0;
767   
768   for (i = 0; i < vec_len(buffers); i++)
769     {
770       b0 = vlib_get_buffer(vm, buffers[i]);
771       arp0 = vlib_buffer_get_current (b0);
772
773       hi = vnet_get_sup_hw_interface (vnm, broadcast_swifs[i]);
774       si = vnet_get_sw_interface (vnm, broadcast_swifs[i]);
775
776       /* For decoration, most likely */
777       vnet_buffer(b0)->sw_if_index[VLIB_TX] = hi->sw_if_index;
778
779       /* Fix ARP pkt src address */
780       clib_memcpy (arp0->ip4_over_ethernet[0].ethernet, hi->hw_address, 6);
781
782       /* Build L2 encaps for this swif */
783       header_size = sizeof (ethernet_header_t);
784       if (si->sub.eth.flags.one_tag) 
785         header_size += 4;
786       else if (si->sub.eth.flags.two_tags)
787         header_size += 8;
788       
789       vlib_buffer_advance (b0, -header_size);
790       eth0 = vlib_buffer_get_current (b0);
791
792       if (si->sub.eth.flags.one_tag) {
793         ethernet_vlan_header_t * outer = (void *) (eth0 + 1);
794         
795         eth0->type = si->sub.eth.flags.dot1ad ?
796           clib_host_to_net_u16 (ETHERNET_TYPE_DOT1AD) :
797           clib_host_to_net_u16 (ETHERNET_TYPE_VLAN);
798         outer->priority_cfi_and_id = 
799           clib_host_to_net_u16 (si->sub.eth.outer_vlan_id);
800         outer->type = clib_host_to_net_u16 (ETHERNET_TYPE_ARP);
801         
802       } else if (si->sub.eth.flags.two_tags) {
803         ethernet_vlan_header_t * outer = (void *) (eth0 + 1);
804         ethernet_vlan_header_t * inner = (void *) (outer + 1);
805         
806         eth0->type = si->sub.eth.flags.dot1ad ?
807           clib_host_to_net_u16 (ETHERNET_TYPE_DOT1AD) :
808           clib_host_to_net_u16 (ETHERNET_TYPE_VLAN);
809         outer->priority_cfi_and_id = 
810           clib_host_to_net_u16 (si->sub.eth.outer_vlan_id);
811         outer->type = clib_host_to_net_u16 (ETHERNET_TYPE_VLAN);
812         inner->priority_cfi_and_id = 
813           clib_host_to_net_u16 (si->sub.eth.inner_vlan_id);
814         inner->type = clib_host_to_net_u16 (ETHERNET_TYPE_ARP);
815         
816       } else {
817         eth0->type = clib_host_to_net_u16 (ETHERNET_TYPE_ARP);
818       }
819       
820       /* Restore the original dst address, set src address */
821       clib_memcpy (eth0->dst_address, dst_mac_address, sizeof (eth0->dst_address));
822       clib_memcpy (eth0->src_address, hi->hw_address, sizeof (eth0->src_address));
823       
824       /* Transmit replicas */
825       if (i > 0)
826         {
827           vlib_frame_t * f = vlib_get_frame_to_node (vm, hi->output_node_index);
828           u32 * to_next = vlib_frame_vector_args (f);
829           to_next[0] = buffers[i];
830           f->n_vectors = 1;
831           vlib_put_frame_to_node (vm, hi->output_node_index, f);
832         }
833     }
834
835   /* The regular path outputs the original pkt.. */
836   vnet_buffer (p0)->sw_if_index[VLIB_TX] = broadcast_swifs[0];
837
838   vec_free (broadcast_swifs);
839   vec_free (buffers);
840 }
841
842 static uword
843 arp_input (vlib_main_t * vm,
844            vlib_node_runtime_t * node,
845            vlib_frame_t * frame)
846 {
847   ethernet_arp_main_t * am = &ethernet_arp_main;
848   vnet_main_t * vnm = vnet_get_main();
849   ip4_main_t * im4 = &ip4_main;
850   u32 n_left_from, next_index, * from, * to_next;
851   u32 n_replies_sent = 0, n_proxy_arp_replies_sent = 0;
852
853   from = vlib_frame_vector_args (frame);
854   n_left_from = frame->n_vectors;
855   next_index = node->cached_next_index;
856
857   if (node->flags & VLIB_NODE_FLAG_TRACE)
858     vlib_trace_frame_buffers_only (vm, node, from, frame->n_vectors,
859                                    /* stride */ 1,
860                                    sizeof (ethernet_arp_input_trace_t));
861
862   while (n_left_from > 0)
863     {
864       u32 n_left_to_next;
865
866       vlib_get_next_frame (vm, node, next_index,
867                            to_next, n_left_to_next);
868
869       while (n_left_from > 0 && n_left_to_next > 0)
870         {
871           vlib_buffer_t * p0;
872           vnet_hw_interface_t * hw_if0;
873           ethernet_arp_header_t * arp0;
874           ethernet_header_t * eth0;
875           ip_interface_address_t * ifa0;
876           ip_adjacency_t * adj0;
877           ip4_address_t * if_addr0;
878           ip4_address_t proxy_src;
879           u32 pi0, error0, next0, sw_if_index0;
880           u8 is_request0, src_is_local0, dst_is_local0, is_unnum0;
881           ethernet_proxy_arp_t * pa;
882
883           pi0 = from[0];
884           to_next[0] = pi0;
885           from += 1;
886           to_next += 1;
887           n_left_from -= 1;
888           n_left_to_next -= 1;
889
890           p0 = vlib_get_buffer (vm, pi0);
891           arp0 = vlib_buffer_get_current (p0);
892
893           is_request0 = arp0->opcode 
894               == clib_host_to_net_u16 (ETHERNET_ARP_OPCODE_request);
895
896           error0 = ETHERNET_ARP_ERROR_replies_sent;
897
898           error0 = (arp0->l2_type != clib_net_to_host_u16 (ETHERNET_ARP_HARDWARE_TYPE_ethernet)
899                     ? ETHERNET_ARP_ERROR_l2_type_not_ethernet
900                     : error0);
901           error0 = (arp0->l3_type != clib_net_to_host_u16 (ETHERNET_TYPE_IP4)
902                     ? ETHERNET_ARP_ERROR_l3_type_not_ip4
903                     : error0);
904
905           sw_if_index0 = vnet_buffer (p0)->sw_if_index[VLIB_RX];
906
907           if (error0)
908             goto drop1;
909
910           /* Check that IP address is local and matches incoming interface. */
911           if_addr0 = ip4_interface_address_matching_destination (im4,
912                                                                  &arp0->ip4_over_ethernet[1].ip4,
913                                                                  sw_if_index0,
914                                                                  &ifa0);
915           if (! if_addr0)
916             {
917               error0 = ETHERNET_ARP_ERROR_l3_dst_address_not_local;
918               goto drop1;
919             }
920
921           /* Honor unnumbered interface, if any */
922           is_unnum0 = sw_if_index0 != ifa0->sw_if_index;
923
924           /* Source must also be local to subnet of matching interface address. */
925           if (! ip4_destination_matches_interface (im4, &arp0->ip4_over_ethernet[0].ip4, ifa0))
926             {
927               error0 = ETHERNET_ARP_ERROR_l3_src_address_not_local;
928               goto drop1;
929             }
930
931           /* Reject requests/replies with our local interface address. */
932           src_is_local0 = if_addr0->as_u32 == arp0->ip4_over_ethernet[0].ip4.as_u32;
933           if (src_is_local0)
934             {
935               error0 = ETHERNET_ARP_ERROR_l3_src_address_is_local;
936               goto drop1;
937             }
938
939           dst_is_local0 = if_addr0->as_u32 == arp0->ip4_over_ethernet[1].ip4.as_u32;
940
941           /* Fill in ethernet header. */
942           eth0 = ethernet_buffer_get_header (p0);
943
944           /* Trash ARP packets whose ARP-level source addresses do not
945              match their L2-frame-level source addresses */
946           if (memcmp (eth0->src_address, arp0->ip4_over_ethernet[0].ethernet,
947                       sizeof (eth0->src_address)))
948             {
949               error0 = ETHERNET_ARP_ERROR_l2_address_mismatch;
950               goto drop2;
951             }
952
953           /* Learn or update sender's mapping only for requests or unicasts
954              that don't match local interface address. */
955           if (ethernet_address_cast (eth0->dst_address) == ETHERNET_ADDRESS_UNICAST
956               || is_request0)
957             {
958               if (am->limit_arp_cache_size && 
959                   pool_elts (am->ip4_entry_pool) >= am->limit_arp_cache_size)
960                 unset_random_arp_entry();
961
962               vnet_arp_set_ip4_over_ethernet (vnm, sw_if_index0, 
963                                               (u32)~0 /* default fib */,
964                                               &arp0->ip4_over_ethernet[0], 
965                                               0 /* is_static */);
966               error0 = ETHERNET_ARP_ERROR_l3_src_address_learned;
967              }
968
969           /* Only send a reply for requests sent which match a local interface. */
970           if (! (is_request0 && dst_is_local0))
971             {
972               error0 = (arp0->opcode == clib_host_to_net_u16 (ETHERNET_ARP_OPCODE_reply)
973                         ? ETHERNET_ARP_ERROR_replies_received : error0);
974               goto drop1;
975             }
976
977           /* Send a reply. */
978         send_reply:
979           vnet_buffer (p0)->sw_if_index[VLIB_TX] = sw_if_index0;
980           hw_if0 = vnet_get_sup_hw_interface (vnm, sw_if_index0);
981
982           /* Send reply back through input interface */
983           vnet_buffer (p0)->sw_if_index[VLIB_TX] = sw_if_index0;
984           next0 = ARP_INPUT_NEXT_REPLY_TX;
985
986           arp0->opcode = clib_host_to_net_u16 (ETHERNET_ARP_OPCODE_reply);
987
988           arp0->ip4_over_ethernet[1] = arp0->ip4_over_ethernet[0];
989
990           clib_memcpy (arp0->ip4_over_ethernet[0].ethernet, hw_if0->hw_address, 6);
991           clib_mem_unaligned (&arp0->ip4_over_ethernet[0].ip4.data_u32, u32) = if_addr0->data_u32;
992
993           /* Hardware must be ethernet-like. */
994           ASSERT (vec_len (hw_if0->hw_address) == 6);
995
996           clib_memcpy (eth0->dst_address, eth0->src_address, 6);
997           clib_memcpy (eth0->src_address, hw_if0->hw_address, 6);
998
999           /* Figure out how much to rewind current data from adjacency. */
1000           if (ifa0)
1001             {
1002               adj0 = ip_get_adjacency (&ip4_main.lookup_main, 
1003                                        ifa0->neighbor_probe_adj_index);
1004               if (adj0->lookup_next_index != IP_LOOKUP_NEXT_ARP)
1005                 {
1006                   error0 = ETHERNET_ARP_ERROR_missing_interface_address;
1007                   goto drop2;
1008                 }
1009               if (is_unnum0)
1010                 arp_unnumbered (p0, pi0, eth0, ifa0);
1011               else
1012                 vlib_buffer_advance (p0, -adj0->rewrite_header.data_bytes);
1013             }
1014
1015           vlib_validate_buffer_enqueue_x1 (vm, node, next_index,to_next,
1016                                            n_left_to_next,pi0,next0);
1017
1018           n_replies_sent += 1;
1019           continue;
1020
1021         drop1:
1022           if (0 == arp0->ip4_over_ethernet[0].ip4.as_u32 ||
1023               (arp0->ip4_over_ethernet[0].ip4.as_u32 ==
1024                arp0->ip4_over_ethernet[1].ip4.as_u32))
1025             {
1026               error0 = ETHERNET_ARP_ERROR_gratuitous_arp;
1027               goto drop2;
1028             }
1029           /* See if proxy arp is configured for the address */
1030           if (is_request0) 
1031             {
1032               vnet_sw_interface_t * si;
1033               u32 this_addr = clib_net_to_host_u32 
1034                 (arp0->ip4_over_ethernet[1].ip4.as_u32);
1035               u32 fib_index0;
1036
1037               si = vnet_get_sw_interface (vnm, sw_if_index0);
1038               
1039               if (!(si->flags & VNET_SW_INTERFACE_FLAG_PROXY_ARP))
1040                 goto drop2;
1041
1042               fib_index0 = vec_elt (im4->fib_index_by_sw_if_index, 
1043                                     sw_if_index0);
1044
1045               vec_foreach (pa, am->proxy_arps)
1046                 {
1047                   u32 lo_addr = clib_net_to_host_u32 (pa->lo_addr);
1048                   u32 hi_addr = clib_net_to_host_u32 (pa->hi_addr);
1049
1050                    /* an ARP request hit in the proxy-arp table? */
1051                    if ((this_addr >= lo_addr && this_addr <= hi_addr) &&
1052                        (fib_index0 == pa->fib_index))
1053                     {
1054                       eth0 = ethernet_buffer_get_header (p0);
1055                       proxy_src.as_u32 = 
1056                         arp0->ip4_over_ethernet[1].ip4.data_u32;
1057
1058                       /* 
1059                        * Rewind buffer, direct code above not to
1060                        * think too hard about it. 
1061                        * $$$ is the answer ever anything other than
1062                        * vlib_buffer_reset(..)?
1063                        */
1064                       ifa0 = 0;
1065                       if_addr0 = &proxy_src;
1066                       vlib_buffer_reset (p0);
1067                       n_proxy_arp_replies_sent++;
1068                       goto send_reply;
1069                     }
1070                 }
1071             }
1072           
1073         drop2:
1074
1075           next0 = ARP_INPUT_NEXT_DROP;
1076           p0->error = node->errors[error0];
1077
1078           vlib_validate_buffer_enqueue_x1 (vm, node, next_index,to_next,
1079                                            n_left_to_next,pi0,next0);
1080         }
1081
1082       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
1083     }
1084
1085   vlib_error_count (vm, node->node_index,
1086                     ETHERNET_ARP_ERROR_replies_sent, 
1087                     n_replies_sent - n_proxy_arp_replies_sent);
1088   
1089   vlib_error_count (vm, node->node_index,
1090                     ETHERNET_ARP_ERROR_proxy_arp_replies_sent, 
1091                     n_proxy_arp_replies_sent);
1092   return frame->n_vectors;
1093 }
1094
1095 static char * ethernet_arp_error_strings[] = {
1096 #define _(sym,string) string,
1097   foreach_ethernet_arp_error
1098 #undef _
1099 };
1100
1101 VLIB_REGISTER_NODE (arp_input_node,static) = {
1102   .function = arp_input,
1103   .name = "arp-input",
1104   .vector_size = sizeof (u32),
1105
1106   .n_errors = ETHERNET_ARP_N_ERROR,
1107   .error_strings = ethernet_arp_error_strings,
1108
1109   .n_next_nodes = ARP_INPUT_N_NEXT,
1110   .next_nodes = {
1111     [ARP_INPUT_NEXT_DROP] = "error-drop",
1112     [ARP_INPUT_NEXT_REPLY_TX] = "interface-output",
1113   },
1114
1115   .format_buffer = format_ethernet_arp_header,
1116   .format_trace = format_ethernet_arp_input_trace,
1117 };
1118
1119 static int
1120 ip4_arp_entry_sort (void *a1, void *a2)
1121 {
1122   ethernet_arp_ip4_entry_t * e1 = a1;
1123   ethernet_arp_ip4_entry_t * e2 = a2;
1124
1125   int cmp;
1126   vnet_main_t * vnm = vnet_get_main();
1127
1128   cmp = vnet_sw_interface_compare 
1129     (vnm, e1->key.sw_if_index, e2->key.sw_if_index);
1130   if (! cmp)
1131     cmp = ip4_address_compare (&e1->key.ip4_address, &e2->key.ip4_address);
1132   return cmp;
1133 }
1134
1135 static clib_error_t *
1136 show_ip4_arp (vlib_main_t * vm,
1137               unformat_input_t * input,
1138               vlib_cli_command_t * cmd)
1139 {
1140   vnet_main_t * vnm = vnet_get_main();
1141   ethernet_arp_main_t * am = &ethernet_arp_main;
1142   ethernet_arp_ip4_entry_t * e, * es;
1143   ethernet_proxy_arp_t * pa;
1144   clib_error_t * error = 0;
1145   u32 sw_if_index;
1146
1147   /* Filter entries by interface if given. */
1148   sw_if_index = ~0;
1149   (void) unformat_user (input, unformat_vnet_sw_interface, vnm, &sw_if_index);
1150
1151   es = 0;
1152   pool_foreach (e, am->ip4_entry_pool, ({ vec_add1 (es, e[0]); }));
1153   if ( es )
1154     {
1155       vec_sort_with_function (es, ip4_arp_entry_sort);
1156       vlib_cli_output (vm, "%U", format_ethernet_arp_ip4_entry, vnm, 0);
1157       vec_foreach (e, es) {
1158         if (sw_if_index != ~0 && e->key.sw_if_index != sw_if_index)
1159           continue;
1160         vlib_cli_output (vm, "%U", format_ethernet_arp_ip4_entry, vnm, e);
1161       }
1162       vec_free (es);
1163     }
1164
1165   if (vec_len (am->proxy_arps))
1166     {
1167       vlib_cli_output (vm, "Proxy arps enabled for:");
1168       vec_foreach(pa, am->proxy_arps)
1169         {
1170           vlib_cli_output (vm, "Fib_index %d   %U - %U ", 
1171                            pa->fib_index,
1172                            format_ip4_address, &pa->lo_addr, 
1173                            format_ip4_address, &pa->hi_addr);
1174         }
1175     }
1176       
1177   return error;
1178 }
1179
1180 VLIB_CLI_COMMAND (show_ip4_arp_command, static) = {
1181   .path = "show ip arp",
1182   .function = show_ip4_arp,
1183   .short_help = "Show ARP table",
1184 };
1185
1186 typedef struct {
1187   pg_edit_t l2_type, l3_type;
1188   pg_edit_t n_l2_address_bytes, n_l3_address_bytes;
1189   pg_edit_t opcode;
1190   struct {
1191     pg_edit_t ethernet;
1192     pg_edit_t ip4;
1193   } ip4_over_ethernet[2];
1194 } pg_ethernet_arp_header_t;
1195
1196 static inline void
1197 pg_ethernet_arp_header_init (pg_ethernet_arp_header_t * p)
1198 {
1199   /* Initialize fields that are not bit fields in the IP header. */
1200 #define _(f) pg_edit_init (&p->f, ethernet_arp_header_t, f);
1201   _ (l2_type);
1202   _ (l3_type);
1203   _ (n_l2_address_bytes);
1204   _ (n_l3_address_bytes);
1205   _ (opcode);
1206   _ (ip4_over_ethernet[0].ethernet);
1207   _ (ip4_over_ethernet[0].ip4);
1208   _ (ip4_over_ethernet[1].ethernet);
1209   _ (ip4_over_ethernet[1].ip4);
1210 #undef _
1211 }
1212
1213 uword
1214 unformat_pg_arp_header (unformat_input_t * input, va_list * args)
1215 {
1216   pg_stream_t * s = va_arg (*args, pg_stream_t *);
1217   pg_ethernet_arp_header_t * p;
1218   u32 group_index;
1219   
1220   p = pg_create_edit_group (s, sizeof (p[0]), sizeof (ethernet_arp_header_t),
1221                             &group_index);
1222   pg_ethernet_arp_header_init (p);
1223
1224   /* Defaults. */
1225   pg_edit_set_fixed (&p->l2_type, ETHERNET_ARP_HARDWARE_TYPE_ethernet);
1226   pg_edit_set_fixed (&p->l3_type, ETHERNET_TYPE_IP4);
1227   pg_edit_set_fixed (&p->n_l2_address_bytes, 6);
1228   pg_edit_set_fixed (&p->n_l3_address_bytes, 4);
1229
1230   if (! unformat (input, "%U: %U/%U -> %U/%U",
1231                   unformat_pg_edit,
1232                   unformat_ethernet_arp_opcode_net_byte_order, &p->opcode,
1233                   unformat_pg_edit,
1234                   unformat_ethernet_address, &p->ip4_over_ethernet[0].ethernet,
1235                   unformat_pg_edit,
1236                   unformat_ip4_address, &p->ip4_over_ethernet[0].ip4,
1237                   unformat_pg_edit,
1238                   unformat_ethernet_address, &p->ip4_over_ethernet[1].ethernet,
1239                   unformat_pg_edit,
1240                   unformat_ip4_address, &p->ip4_over_ethernet[1].ip4))
1241     {
1242       /* Free up any edits we may have added. */
1243       pg_free_edit_group (s);
1244       return 0;
1245     }
1246   return 1;
1247 }
1248
1249 clib_error_t *ip4_set_arp_limit (u32 arp_limit)
1250 {
1251   ethernet_arp_main_t * am = &ethernet_arp_main;
1252
1253   am->limit_arp_cache_size = arp_limit;
1254   return 0;
1255 }
1256
1257 static void
1258 arp_ip4_entry_del_adj(ethernet_arp_ip4_entry_t *e, u32 adj_index)
1259 {
1260   int done = 0;
1261   int i;
1262
1263   while (!done)
1264     {
1265       vec_foreach_index(i, e->adjacencies)
1266         if (vec_elt(e->adjacencies, i) == adj_index)
1267           {
1268             vec_del1(e->adjacencies, i);
1269             continue;
1270           }
1271       done = 1;
1272     }
1273 }
1274
1275 static void
1276 arp_ip4_entry_add_adj(ethernet_arp_ip4_entry_t *e, u32 adj_index)
1277 {
1278   int i;
1279   vec_foreach_index(i, e->adjacencies)
1280     if (vec_elt(e->adjacencies, i) == adj_index)
1281       return;
1282   vec_add1(e->adjacencies, adj_index);
1283 }
1284
1285 static void
1286 arp_add_del_adj_cb (struct ip_lookup_main_t * lm,
1287                     u32 adj_index,
1288                     ip_adjacency_t * adj,
1289                     u32 is_del)
1290 {
1291   ethernet_arp_main_t * am = &ethernet_arp_main;
1292   ip4_main_t * im = &ip4_main;
1293   ethernet_arp_ip4_key_t k;
1294   ethernet_arp_ip4_entry_t * e = 0;
1295   uword * p;
1296   u32 ai;
1297
1298   for(ai = adj->heap_handle; ai < adj->heap_handle + adj->n_adj ; ai++)
1299     {
1300       adj = ip_get_adjacency (lm, ai);
1301       if (adj->lookup_next_index == IP_LOOKUP_NEXT_ARP && adj->arp.next_hop.ip4.as_u32)
1302         {
1303           k.sw_if_index = adj->rewrite_header.sw_if_index;
1304           k.ip4_address.as_u32 = adj->arp.next_hop.ip4.as_u32;
1305           k.fib_index = im->fib_index_by_sw_if_index[adj->rewrite_header.sw_if_index];
1306           p = mhash_get (&am->ip4_entry_by_key, &k);
1307           if (p)
1308             e = pool_elt_at_index (am->ip4_entry_pool, p[0]);
1309         }
1310       else
1311         continue;
1312
1313       if (is_del)
1314         {
1315           if (!e)
1316             clib_warning("Adjacency contains unknown ARP next hop %U (del)",
1317                          format_ip46_address, &adj->arp.next_hop, IP46_TYPE_IP4);
1318           else
1319             arp_ip4_entry_del_adj(e, adj->heap_handle);
1320         }
1321       else /* add */
1322         {
1323           if (!e)
1324             clib_warning("Adjacency contains unknown ARP next hop %U (add)",
1325                          format_ip46_address, &adj->arp.next_hop, IP46_TYPE_IP4);
1326           else
1327             arp_ip4_entry_add_adj(e, adj->heap_handle);
1328         }
1329     }
1330 }
1331
1332 static clib_error_t * ethernet_arp_init (vlib_main_t * vm)
1333 {
1334   ethernet_arp_main_t * am = &ethernet_arp_main;
1335   pg_node_t * pn;
1336   clib_error_t * error;
1337   ip4_main_t * im = &ip4_main;
1338   ip_lookup_main_t * lm = &im->lookup_main;
1339
1340   if ((error = vlib_call_init_function (vm, ethernet_init)))
1341     return error;
1342
1343   ethernet_register_input_type (vm, ETHERNET_TYPE_ARP, arp_input_node.index);
1344
1345   pn = pg_get_node (arp_input_node.index);
1346   pn->unformat_edit = unformat_pg_arp_header;
1347
1348   am->opcode_by_name = hash_create_string (0, sizeof (uword));
1349 #define _(o) hash_set_mem (am->opcode_by_name, #o, ETHERNET_ARP_OPCODE_##o);
1350   foreach_ethernet_arp_opcode;
1351 #undef _
1352
1353   mhash_init (&am->ip4_entry_by_key,
1354               /* value size */ sizeof (uword),
1355               /* key size */ sizeof (ethernet_arp_ip4_key_t));
1356
1357   /* $$$ configurable */
1358   am->limit_arp_cache_size = 50000;
1359
1360   am->pending_resolutions_by_address = hash_create (0, sizeof (uword));
1361   am->mac_changes_by_address = hash_create (0, sizeof (uword));
1362
1363   /* don't trace ARP error packets */
1364   {
1365     vlib_node_runtime_t *rt = 
1366       vlib_node_get_runtime (vm, arp_input_node.index);
1367
1368 #define _(a,b)                                  \
1369     vnet_pcap_drop_trace_filter_add_del         \
1370         (rt->errors[ETHERNET_ARP_ERROR_##a],    \
1371          1 /* is_add */);
1372     foreach_ethernet_arp_error
1373 #undef _
1374   }
1375
1376   ip_register_add_del_adjacency_callback(lm, arp_add_del_adj_cb);
1377
1378   return 0;
1379 }
1380
1381 VLIB_INIT_FUNCTION (ethernet_arp_init);
1382
1383 int 
1384 vnet_arp_unset_ip4_over_ethernet (vnet_main_t * vnm,
1385                                   u32 sw_if_index, u32 fib_index,
1386                                   void * a_arg)
1387 {
1388   ethernet_arp_ip4_over_ethernet_address_t * a = a_arg;
1389   vnet_arp_set_ip4_over_ethernet_rpc_args_t args;
1390
1391   args.sw_if_index = sw_if_index;
1392   args.fib_index = fib_index;
1393   args.is_remove = 1;
1394   clib_memcpy (&args.a, a, sizeof (*a));
1395
1396   vl_api_rpc_call_main_thread (set_ip4_over_ethernet_rpc_callback, 
1397                                (u8 *) &args, sizeof (args));
1398   return 0;
1399 }
1400
1401 static inline int 
1402 vnet_arp_unset_ip4_over_ethernet_internal (vnet_main_t * vnm,
1403                                            u32 sw_if_index, 
1404                                            u32 fib_index,
1405                                            void * a_arg)
1406 {
1407   ethernet_arp_ip4_entry_t * e;
1408   ethernet_arp_main_t * am = &ethernet_arp_main;
1409   ethernet_arp_ip4_over_ethernet_address_t * a = a_arg;
1410   ethernet_arp_ip4_key_t k;
1411   uword * p;
1412   ip4_add_del_route_args_t args;
1413   ip4_main_t * im = &ip4_main;
1414   ip_lookup_main_t * lm = &im->lookup_main;
1415   u32 adj_index;
1416   ip_adjacency_t * adj;
1417
1418   k.sw_if_index = sw_if_index;
1419   k.ip4_address = a->ip4;
1420   k.fib_index = fib_index;
1421   p = mhash_get (&am->ip4_entry_by_key, &k);
1422   if (! p)
1423     return -1;
1424
1425   memset(&args, 0, sizeof(args));
1426
1427   /* 
1428    * Make sure that the route actually exists before we try to delete it,
1429    * and make sure that it's a rewrite adjacency.
1430    *
1431    * If we point 1-N unnumbered interfaces at a loopback interface and 
1432    * shut down the loopback before shutting down 1-N unnumbered 
1433    * interfaces, the ARP cache will still have an entry, 
1434    * but the route will have disappeared.
1435    * 
1436    * See also ip4_del_interface_routes (...) 
1437    *            -> ip4_delete_matching_routes (...).
1438    */
1439   
1440   adj_index = ip4_fib_lookup_with_table 
1441       (im, fib_index, &a->ip4, 1 /* disable default route */);
1442
1443   /* Miss adj? Forget it... */
1444   if (adj_index != lm->miss_adj_index) {
1445       adj = ip_get_adjacency (lm, adj_index);
1446       /* 
1447        * Stupid control-plane trick:
1448        * admin down an interface (removes arp routes from fib),
1449        * bring the interface back up (does not reinstall them)
1450        * then remove the arp cache entry (yuck). When that happens,
1451        * the adj we find here will be the interface subnet ARP adj.
1452        */
1453       if (adj->lookup_next_index == IP_LOOKUP_NEXT_REWRITE) {
1454           args.table_index_or_table_id = fib_index;
1455           args.flags = IP4_ROUTE_FLAG_FIB_INDEX | IP4_ROUTE_FLAG_DEL 
1456               | IP4_ROUTE_FLAG_NEIGHBOR;
1457           args.dst_address = a->ip4;
1458           args.dst_address_length = 32;
1459           ip4_add_del_route (im, &args);
1460           ip4_maybe_remap_adjacencies (im, fib_index, args.flags);
1461       }
1462   }
1463
1464   e = pool_elt_at_index (am->ip4_entry_pool, p[0]);
1465   mhash_unset (&am->ip4_entry_by_key, &e->key, 0);
1466   pool_put (am->ip4_entry_pool, e);
1467   return 0;
1468 }
1469
1470 static void 
1471 increment_ip4_and_mac_address (ethernet_arp_ip4_over_ethernet_address_t *a)
1472 {
1473   u8 old;
1474   int i;
1475
1476   for (i = 3; i >= 0; i--) 
1477     {
1478       old = a->ip4.as_u8[i];
1479       a->ip4.as_u8[i] += 1;
1480       if (old < a->ip4.as_u8[i])
1481         break;
1482     }
1483
1484   for (i = 5; i >= 0; i--)
1485     {
1486       old = a->ethernet[i];
1487       a->ethernet[i] += 1;
1488       if (old < a->ethernet[i])
1489         break;
1490     }
1491 }
1492
1493 int vnet_proxy_arp_add_del (ip4_address_t *lo_addr,
1494                             ip4_address_t *hi_addr,
1495                             u32 fib_index, int is_del)
1496 {
1497   ethernet_arp_main_t *am = &ethernet_arp_main;
1498   ethernet_proxy_arp_t *pa;
1499   u32 found_at_index = ~0;
1500
1501   vec_foreach (pa, am->proxy_arps)
1502     {
1503       if (pa->lo_addr == lo_addr->as_u32 
1504           && pa->hi_addr == hi_addr->as_u32
1505           && pa->fib_index == fib_index)
1506         {
1507           found_at_index = pa - am->proxy_arps;
1508           break;
1509         }
1510     }
1511
1512   if (found_at_index != ~0)
1513     {
1514       /* Delete, otherwise it's already in the table */
1515       if (is_del)
1516         vec_delete (am->proxy_arps, 1, found_at_index);
1517       return 0;
1518     }
1519   /* delete, no such entry */
1520   if (is_del)
1521     return VNET_API_ERROR_NO_SUCH_ENTRY;
1522
1523   /* add, not in table */
1524   vec_add2 (am->proxy_arps, pa, 1);
1525   pa->lo_addr = lo_addr->as_u32;
1526   pa->hi_addr = hi_addr->as_u32;
1527   pa->fib_index = fib_index;
1528   return 0;
1529 }
1530
1531 /*
1532  * Remove any proxy arp entries asdociated with the 
1533  * specificed fib.
1534  */
1535 int vnet_proxy_arp_fib_reset (u32 fib_id)
1536 {
1537   ip4_main_t * im = &ip4_main;
1538   ethernet_arp_main_t *am = &ethernet_arp_main;
1539   ethernet_proxy_arp_t *pa;
1540   u32 * entries_to_delete = 0;
1541   u32 fib_index;
1542   uword * p;
1543   int i;
1544
1545   p = hash_get (im->fib_index_by_table_id, fib_id);
1546   if (! p)
1547       return VNET_API_ERROR_NO_SUCH_ENTRY;
1548   fib_index = p[0];
1549
1550   vec_foreach (pa, am->proxy_arps)
1551     {
1552       if (pa->fib_index == fib_index)
1553         {
1554           vec_add1 (entries_to_delete, pa - am->proxy_arps);
1555         }
1556     }
1557
1558   for (i = 0; i < vec_len(entries_to_delete); i++)
1559     {
1560        vec_delete (am->proxy_arps, 1, entries_to_delete[i]);
1561     } 
1562
1563   vec_free (entries_to_delete);
1564
1565    return 0;
1566 }
1567
1568 u32
1569 vnet_arp_glean_add(u32 fib_index, void * next_hop_arg)
1570 {
1571   ethernet_arp_main_t * am = &ethernet_arp_main;
1572   ip4_main_t * im = &ip4_main;
1573   ip_lookup_main_t * lm = &im->lookup_main;
1574   ip4_address_t * next_hop = next_hop_arg;
1575   ip_adjacency_t add_adj, *adj;
1576   ip4_add_del_route_args_t args;
1577   ethernet_arp_ip4_entry_t * e;
1578   ethernet_arp_ip4_key_t k;
1579   u32 adj_index;
1580
1581   adj_index = ip4_fib_lookup_with_table(im, fib_index, next_hop, 0);
1582   adj = ip_get_adjacency(lm, adj_index);
1583
1584   if (!adj || adj->lookup_next_index != IP_LOOKUP_NEXT_ARP)
1585     return ~0;
1586
1587   if (adj->arp.next_hop.ip4.as_u32 != 0)
1588     return adj_index;
1589
1590   k.sw_if_index = adj->rewrite_header.sw_if_index;
1591   k.fib_index = fib_index;
1592   k.ip4_address.as_u32 = next_hop->as_u32;
1593
1594   if (mhash_get (&am->ip4_entry_by_key, &k))
1595     return adj_index;
1596
1597   pool_get (am->ip4_entry_pool, e);
1598   mhash_set (&am->ip4_entry_by_key, &k, e - am->ip4_entry_pool, /* old value */ 0);
1599   e->key = k;
1600   e->cpu_time_last_updated = clib_cpu_time_now ();
1601   e->flags = ETHERNET_ARP_IP4_ENTRY_FLAG_GLEAN;
1602
1603   memset(&args, 0, sizeof(args));
1604   clib_memcpy(&add_adj, adj, sizeof(add_adj));
1605   ip46_address_set_ip4(&add_adj.arp.next_hop, next_hop); /* install neighbor /32 route */
1606   args.table_index_or_table_id = fib_index;
1607   args.flags = IP4_ROUTE_FLAG_FIB_INDEX | IP4_ROUTE_FLAG_ADD| IP4_ROUTE_FLAG_NEIGHBOR;
1608   args.dst_address.as_u32 = next_hop->as_u32;
1609   args.dst_address_length = 32;
1610   args.adj_index = ~0;
1611   args.add_adj = &add_adj;
1612   args.n_add_adj = 1;
1613   ip4_add_del_route (im, &args);
1614   return ip4_fib_lookup_with_table (im, fib_index, next_hop, 0);
1615 }
1616
1617 static clib_error_t *
1618 ip_arp_add_del_command_fn (vlib_main_t * vm,
1619                  unformat_input_t * input,
1620                  vlib_cli_command_t * cmd)
1621 {
1622   vnet_main_t * vnm = vnet_get_main();
1623   u32 sw_if_index;
1624   ethernet_arp_ip4_over_ethernet_address_t lo_addr, hi_addr, addr;
1625   int addr_valid = 0;
1626   int is_del = 0;
1627   int count = 1;
1628   u32 fib_index = 0;
1629   u32 fib_id;
1630   int is_static = 0;
1631   int is_proxy = 0;
1632
1633   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) 
1634     {
1635       /* set ip arp TenGigE1/1/0/1 1.2.3.4 aa:bb:... or aabb.ccdd... */
1636       if (unformat (input, "%U %U %U",
1637                     unformat_vnet_sw_interface, vnm, &sw_if_index,
1638                     unformat_ip4_address, &addr.ip4, 
1639                     unformat_ethernet_address, &addr.ethernet))
1640         addr_valid = 1;
1641
1642       else if (unformat (input, "delete") || unformat (input, "del"))
1643         is_del = 1;
1644
1645       else if (unformat (input, "static"))
1646         is_static = 1;
1647
1648       else if (unformat (input, "count %d", &count))
1649         ;
1650
1651       else if (unformat (input, "fib-id %d", &fib_id))
1652         {
1653           ip4_main_t * im = &ip4_main;
1654           uword * p = hash_get (im->fib_index_by_table_id, fib_id);
1655           if (! p)
1656             return clib_error_return (0, "fib ID %d doesn't exist\n",
1657                                       fib_id);
1658           fib_index = p[0];
1659         }
1660
1661       else if (unformat (input, "proxy %U - %U", 
1662                          unformat_ip4_address, &lo_addr.ip4, 
1663                          unformat_ip4_address, &hi_addr.ip4))
1664         is_proxy = 1;
1665       else
1666         break;
1667     }
1668   
1669   if (is_proxy)
1670     {
1671       (void) vnet_proxy_arp_add_del (&lo_addr.ip4, &hi_addr.ip4, 
1672                                      fib_index, is_del);
1673       return 0;
1674     }
1675
1676   if (addr_valid) 
1677     {
1678       int i;
1679
1680       for (i = 0; i < count; i++) 
1681         {
1682           if (is_del == 0) 
1683             {
1684               uword event_type, * event_data = 0;
1685
1686               /* Park the debug CLI until the arp entry is installed */
1687               vnet_register_ip4_arp_resolution_event 
1688                 (vnm, &addr.ip4, vlib_current_process(vm),
1689                  1 /* type */, 0 /* data */);
1690               
1691               vnet_arp_set_ip4_over_ethernet
1692                 (vnm, sw_if_index, fib_index, &addr, is_static);
1693               
1694               vlib_process_wait_for_event (vm);
1695               event_type = vlib_process_get_events (vm, &event_data);
1696               vec_reset_length(event_data);
1697               if (event_type != 1)
1698                 clib_warning ("event type %d unexpected", event_type);
1699             }
1700           else
1701             vnet_arp_unset_ip4_over_ethernet
1702                 (vnm, sw_if_index, fib_index, &addr);
1703
1704           increment_ip4_and_mac_address (&addr);
1705         }
1706     }
1707   else
1708     {
1709       return clib_error_return (0, "unknown input `%U'",
1710                                 format_unformat_error, input);
1711     }
1712   
1713   return 0;
1714 }
1715
1716 VLIB_CLI_COMMAND (ip_arp_add_del_command, static) = {
1717     .path = "set ip arp",
1718     .short_help = "set ip arp [del] <intfc> <ip-address> <mac-address> [static] [count <count>] [fib-id <fib-id>] [proxy <lo-addr> - <hi-addr>]",
1719     .function = ip_arp_add_del_command_fn,
1720 };
1721
1722 static clib_error_t *
1723 set_int_proxy_arp_command_fn (vlib_main_t * vm,
1724                               unformat_input_t * input,
1725                               vlib_cli_command_t * cmd)
1726 {
1727   vnet_main_t * vnm = vnet_get_main();
1728   u32 sw_if_index;
1729   vnet_sw_interface_t * si;
1730   int enable = 0;
1731   int intfc_set = 0;
1732
1733   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) 
1734     {
1735       if (unformat (input, "%U", unformat_vnet_sw_interface, 
1736                     vnm, &sw_if_index))
1737         intfc_set = 1;
1738       else if (unformat (input, "enable") || unformat (input, "on"))
1739         enable = 1;
1740       else if (unformat (input, "disable") || unformat (input, "off"))
1741         enable = 0;
1742       else
1743         break;
1744     }
1745
1746   if (intfc_set == 0)
1747     return clib_error_return (0, "unknown input '%U'",
1748                               format_unformat_error, input);
1749
1750   si = vnet_get_sw_interface (vnm, sw_if_index);
1751   ASSERT(si);
1752   if (enable)
1753     si->flags |= VNET_SW_INTERFACE_FLAG_PROXY_ARP;
1754   else 
1755     si->flags &= ~VNET_SW_INTERFACE_FLAG_PROXY_ARP;
1756   
1757   return 0;
1758 }
1759
1760 VLIB_CLI_COMMAND (set_int_proxy_enable_command, static) = {
1761     .path = "set interface proxy-arp",
1762     .short_help = "set interface proxy-arp <intfc> [enable|disable]",
1763     .function = set_int_proxy_arp_command_fn,
1764 };
1765
1766
1767 /*
1768  * ARP Termination in a L2 Bridge Domain based on an
1769  * IP4 to MAC hash table mac_by_ip4 for each BD.
1770  */
1771 typedef enum {
1772   ARP_TERM_NEXT_L2_OUTPUT,
1773   ARP_TERM_NEXT_DROP,
1774   ARP_TERM_N_NEXT,
1775 } arp_term_next_t;
1776
1777 u32 arp_term_next_node_index[32];
1778
1779 static uword
1780 arp_term_l2bd (vlib_main_t * vm,
1781                vlib_node_runtime_t * node,
1782                vlib_frame_t * frame)
1783 {
1784   l2input_main_t * l2im = &l2input_main;
1785   u32 n_left_from, next_index, * from, * to_next;
1786   u32 n_replies_sent = 0;
1787   u16 last_bd_index = ~0;
1788   l2_bridge_domain_t * last_bd_config = 0;
1789   l2_input_config_t * cfg0;
1790
1791   from = vlib_frame_vector_args (frame);
1792   n_left_from = frame->n_vectors;
1793   next_index = node->cached_next_index;
1794
1795   while (n_left_from > 0)
1796     {
1797       u32 n_left_to_next;
1798
1799       vlib_get_next_frame (vm, node, next_index,
1800                            to_next, n_left_to_next);
1801
1802       while (n_left_from > 0 && n_left_to_next > 0)
1803         {
1804           vlib_buffer_t * p0;
1805           ethernet_header_t * eth0;
1806           ethernet_arp_header_t * arp0;
1807           u8 * l3h0;
1808           u32 pi0, error0, next0, sw_if_index0;
1809           u16 ethertype0;
1810           u16 bd_index0;
1811           u32 ip0;
1812           u8 * macp0;
1813
1814           pi0 = from[0];
1815           to_next[0] = pi0;
1816           from += 1;
1817           to_next += 1;
1818           n_left_from -= 1;
1819           n_left_to_next -= 1;
1820
1821           p0 = vlib_get_buffer (vm, pi0);
1822           eth0 = vlib_buffer_get_current (p0);
1823           l3h0 = (u8 *)eth0 + vnet_buffer(p0)->l2.l2_len;
1824           ethertype0 = clib_net_to_host_u16(*(u16 *)(l3h0 - 2));
1825           arp0 = (ethernet_arp_header_t *) l3h0;
1826
1827           if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE) &&
1828                              (p0->flags & VLIB_BUFFER_IS_TRACED)))
1829             {
1830               u8 *t0 = vlib_add_trace (
1831                   vm, node, p0, sizeof(ethernet_arp_input_trace_t));
1832               clib_memcpy (t0, l3h0, sizeof(ethernet_arp_input_trace_t));
1833             }
1834
1835           if (PREDICT_FALSE  (
1836             (ethertype0 != ETHERNET_TYPE_ARP) ||
1837             (arp0->opcode != clib_host_to_net_u16(ETHERNET_ARP_OPCODE_request))))
1838             goto next_l2_feature;
1839
1840           error0 = ETHERNET_ARP_ERROR_replies_sent;
1841           error0 = (arp0->l2_type != clib_net_to_host_u16 (ETHERNET_ARP_HARDWARE_TYPE_ethernet)
1842                     ? ETHERNET_ARP_ERROR_l2_type_not_ethernet
1843                     : error0);
1844           error0 = (arp0->l3_type != clib_net_to_host_u16 (ETHERNET_TYPE_IP4)
1845                     ? ETHERNET_ARP_ERROR_l3_type_not_ip4
1846                     : error0);
1847
1848           sw_if_index0 = vnet_buffer (p0)->sw_if_index[VLIB_RX];
1849
1850           if (error0)
1851             goto drop;
1852
1853           // Trash ARP packets whose ARP-level source addresses do not
1854           // match their L2-frame-level source addresses */
1855           if (PREDICT_FALSE (
1856             memcmp (eth0->src_address, arp0->ip4_over_ethernet[0].ethernet,
1857                     sizeof (eth0->src_address))))
1858             {
1859               error0 = ETHERNET_ARP_ERROR_l2_address_mismatch;
1860               goto drop;
1861             }
1862
1863           // Check if anyone want ARP request events for L2 BDs
1864           {
1865           pending_resolution_t * mc;
1866           ethernet_arp_main_t * am = &ethernet_arp_main;
1867           uword *p = hash_get (am->mac_changes_by_address, 0);
1868           if (p && (vnet_buffer(p0)->l2.shg == 0))
1869             { // Only SHG 0 interface which is more likely local
1870               u32 next_index = p[0];
1871               while (next_index != (u32)~0)
1872                 {
1873                   int (*fp)(u32, u8 *, u32, u32);
1874                   int rv = 1;
1875                   mc = pool_elt_at_index (am->mac_changes, next_index);
1876                   fp = mc->data_callback;
1877                   // Call the callback, return 1 to suppress dup events */
1878                   if (fp) rv = (*fp)(mc->data, 
1879                                      arp0->ip4_over_ethernet[0].ethernet, 
1880                                      sw_if_index0, 
1881                                      arp0->ip4_over_ethernet[0].ip4.as_u32);
1882                   // Signal the resolver process
1883                   if (rv == 0)
1884                     vlib_process_signal_event (vm, mc->node_index,
1885                                                mc->type_opaque, 
1886                                                mc->data);
1887                   next_index = mc->next_index;
1888                 }
1889             }
1890           }
1891
1892           // lookup BD mac_by_ip4 hash table for MAC entry
1893           ip0 = arp0->ip4_over_ethernet[1].ip4.as_u32;
1894           bd_index0 = vnet_buffer(p0)->l2.bd_index;
1895           if (PREDICT_FALSE (
1896             (bd_index0 != last_bd_index) || (last_bd_index == (u16) ~0)))
1897             {
1898               last_bd_index = bd_index0;
1899               last_bd_config = vec_elt_at_index(l2im->bd_configs, bd_index0);
1900             }
1901           macp0 = (u8 *) hash_get (last_bd_config->mac_by_ip4, ip0);
1902
1903           if (PREDICT_FALSE(!macp0)) 
1904               goto next_l2_feature;     // MAC not found 
1905
1906           // MAC found, send ARP reply -
1907           // Convert ARP request packet to ARP reply
1908           arp0->opcode = clib_host_to_net_u16 (ETHERNET_ARP_OPCODE_reply);
1909           arp0->ip4_over_ethernet[1] = arp0->ip4_over_ethernet[0];
1910           arp0->ip4_over_ethernet[0].ip4.as_u32 = ip0;
1911           clib_memcpy (arp0->ip4_over_ethernet[0].ethernet, macp0, 6);
1912           clib_memcpy (eth0->dst_address, eth0->src_address, 6);
1913           clib_memcpy (eth0->src_address, macp0, 6);
1914           n_replies_sent += 1;
1915
1916           // For BVI, need to use l2-fwd node to send ARP reply as 
1917           // l2-output node cannot output packet to BVI properly
1918           cfg0 = vec_elt_at_index(l2im->configs, sw_if_index0);
1919           if (PREDICT_FALSE (cfg0->bvi))
1920             {
1921               vnet_buffer(p0)->l2.feature_bitmap |= L2INPUT_FEAT_FWD;
1922               vnet_buffer (p0)->sw_if_index[VLIB_RX] = 0;
1923               goto next_l2_feature;
1924             }
1925
1926           // Send ARP reply back out input interface through l2-output
1927           vnet_buffer (p0)->sw_if_index[VLIB_TX] = sw_if_index0;
1928           next0 = ARP_TERM_NEXT_L2_OUTPUT;
1929           // Note that output to VXLAN tunnel will fail due to SHG which
1930           // is probably desireable since ARP termination is not intended
1931           // for ARP requests from other hosts. If output to VXLAN tunnel is
1932           // required, however, can just clear the SHG in packet as follows:
1933           //   vnet_buffer(p0)->l2.shg = 0;
1934
1935           vlib_validate_buffer_enqueue_x1 (vm, node, next_index,to_next,
1936                                            n_left_to_next,pi0,next0);
1937           continue;
1938
1939         next_l2_feature:
1940           {
1941             u32 feature_bitmap0 =
1942                 vnet_buffer(p0)->l2.feature_bitmap & ~L2INPUT_FEAT_ARP_TERM;
1943             vnet_buffer(p0)->l2.feature_bitmap = feature_bitmap0;
1944             next0 = feat_bitmap_get_next_node_index(arp_term_next_node_index,
1945                                                     feature_bitmap0);
1946             vlib_validate_buffer_enqueue_x1 (vm, node, next_index,to_next,
1947                                              n_left_to_next,pi0,next0);
1948             continue;
1949           }
1950
1951         drop:
1952           if (0 == arp0->ip4_over_ethernet[0].ip4.as_u32 ||
1953               (arp0->ip4_over_ethernet[0].ip4.as_u32 ==
1954                arp0->ip4_over_ethernet[1].ip4.as_u32))
1955             {
1956               error0 = ETHERNET_ARP_ERROR_gratuitous_arp;
1957             }
1958           next0 = ARP_TERM_NEXT_DROP;
1959           p0->error = node->errors[error0];
1960
1961           vlib_validate_buffer_enqueue_x1 (vm, node, next_index,to_next,
1962                                            n_left_to_next,pi0,next0);
1963         }
1964
1965       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
1966     }
1967
1968   vlib_error_count (vm, node->node_index,
1969                     ETHERNET_ARP_ERROR_replies_sent, 
1970                     n_replies_sent);
1971   return frame->n_vectors;
1972 }
1973
1974 VLIB_REGISTER_NODE (arp_term_l2bd_node,static) = {
1975   .function = arp_term_l2bd,
1976   .name = "arp-term-l2bd",
1977   .vector_size = sizeof (u32),
1978
1979   .n_errors = ETHERNET_ARP_N_ERROR,
1980   .error_strings = ethernet_arp_error_strings,
1981
1982   .n_next_nodes = ARP_TERM_N_NEXT,
1983   .next_nodes = {
1984     [ARP_TERM_NEXT_L2_OUTPUT] = "l2-output",
1985     [ARP_TERM_NEXT_DROP] = "error-drop",
1986   },
1987
1988   .format_buffer = format_ethernet_arp_header,
1989   .format_trace = format_ethernet_arp_input_trace,
1990 };
1991
1992 clib_error_t *arp_term_init (vlib_main_t *vm)
1993 { // Initialize the feature next-node indexes 
1994   feat_bitmap_init_next_nodes(vm,
1995                               arp_term_l2bd_node.index,
1996                               L2INPUT_N_FEAT,
1997                               l2input_get_feat_names(),
1998                               arp_term_next_node_index);
1999   return 0;
2000 }
2001
2002 VLIB_INIT_FUNCTION (arp_term_init);