Add support for installing ipv4 routes via unresolved next hop
[vpp.git] / vnet / vnet / ethernet / arp.c
1 /*
2  * ethernet/arp.c: IP v4 ARP node
3  *
4  * Copyright (c) 2010 Cisco and/or its affiliates.
5  * Licensed under the Apache License, Version 2.0 (the "License");
6  * you may not use this file except in compliance with the License.
7  * You may obtain a copy of the License at:
8  *
9  *     http://www.apache.org/licenses/LICENSE-2.0
10  *
11  * Unless required by applicable law or agreed to in writing, software
12  * distributed under the License is distributed on an "AS IS" BASIS,
13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  * See the License for the specific language governing permissions and
15  * limitations under the License.
16  */
17
18 #include <vnet/ip/ip.h>
19 #include <vnet/ethernet/ethernet.h>
20 #include <vnet/ethernet/arp_packet.h>
21 #include <vnet/l2/l2_input.h>
22 #include <vppinfra/mhash.h>
23
24 void vl_api_rpc_call_main_thread (void *fp, u8 * data, u32 data_length);
25
26 typedef struct {
27   u32 sw_if_index;
28   u32 fib_index;
29   ip4_address_t ip4_address;
30 } ethernet_arp_ip4_key_t;
31
32 typedef struct {
33   ethernet_arp_ip4_key_t key;
34   u8 ethernet_address[6];
35
36   u16 flags;
37 #define ETHERNET_ARP_IP4_ENTRY_FLAG_STATIC (1 << 0)
38 #define ETHERNET_ARP_IP4_ENTRY_FLAG_GLEAN  (2 << 0)
39
40   u64 cpu_time_last_updated;
41
42   u32 * adjacencies;
43 } ethernet_arp_ip4_entry_t;
44
45 typedef struct {
46   u32 lo_addr;
47   u32 hi_addr;
48   u32 fib_index;
49 } ethernet_proxy_arp_t;
50
51 typedef struct {
52   u32 next_index;
53   uword node_index;
54   uword type_opaque;
55   uword data;
56   /* Used for arp event notification only */
57   void * data_callback;
58   u32 pid;
59 } pending_resolution_t;
60
61 typedef struct {
62   /* Hash tables mapping name to opcode. */
63   uword * opcode_by_name;
64
65   /* lite beer "glean" adjacency handling */
66   uword * pending_resolutions_by_address;
67   pending_resolution_t * pending_resolutions;
68
69   /* Mac address change notification */
70   uword * mac_changes_by_address;
71   pending_resolution_t * mac_changes;
72
73   u32 * arp_input_next_index_by_hw_if_index;
74
75   ethernet_arp_ip4_entry_t * ip4_entry_pool;
76
77   mhash_t ip4_entry_by_key;
78     
79   /* ARP attack mitigation */
80   u32 arp_delete_rotor;
81   u32 limit_arp_cache_size;
82
83   /* Proxy arp vector */
84   ethernet_proxy_arp_t * proxy_arps;
85 } ethernet_arp_main_t;
86
87 static ethernet_arp_main_t ethernet_arp_main;
88
89 static u8 * format_ethernet_arp_hardware_type (u8 * s, va_list * va)
90 {
91   ethernet_arp_hardware_type_t h = va_arg (*va, ethernet_arp_hardware_type_t);
92   char * t = 0;
93   switch (h)
94     {
95 #define _(n,f) case n: t = #f; break;
96       foreach_ethernet_arp_hardware_type;
97 #undef _
98
99     default:
100       return format (s, "unknown 0x%x", h);
101     }
102
103   return format (s, "%s", t);
104 }
105
106 static u8 * format_ethernet_arp_opcode (u8 * s, va_list * va)
107 {
108   ethernet_arp_opcode_t o = va_arg (*va, ethernet_arp_opcode_t);
109   char * t = 0;
110   switch (o)
111     {
112 #define _(f) case ETHERNET_ARP_OPCODE_##f: t = #f; break;
113       foreach_ethernet_arp_opcode;
114 #undef _
115
116     default:
117       return format (s, "unknown 0x%x", o);
118     }
119
120   return format (s, "%s", t);
121 }
122
123 static uword
124 unformat_ethernet_arp_opcode_host_byte_order (unformat_input_t * input,
125                                               va_list * args)
126 {
127   int * result = va_arg (*args, int *);
128   ethernet_arp_main_t * am = &ethernet_arp_main;
129   int x, i;
130
131   /* Numeric opcode. */
132   if (unformat (input, "0x%x", &x)
133       || unformat (input, "%d", &x))
134     {
135       if (x >= (1 << 16))
136         return 0;
137       *result = x;
138       return 1;
139     }
140
141   /* Named type. */
142   if (unformat_user (input, unformat_vlib_number_by_name,
143                      am->opcode_by_name, &i))
144     {
145       *result = i;
146       return 1;
147     }
148
149   return 0;
150 }
151
152 static uword
153 unformat_ethernet_arp_opcode_net_byte_order (unformat_input_t * input,
154                                              va_list * args)
155 {
156   int * result = va_arg (*args, int *);
157   if (! unformat_user (input, unformat_ethernet_arp_opcode_host_byte_order, result))
158     return 0;
159
160   *result = clib_host_to_net_u16 ((u16) *result);
161   return 1;
162 }
163
164 static u8 * format_ethernet_arp_header (u8 * s, va_list * va)
165 {
166   ethernet_arp_header_t * a = va_arg (*va, ethernet_arp_header_t *);
167   u32 max_header_bytes = va_arg (*va, u32);
168   uword indent;
169   u16 l2_type, l3_type;
170
171   if (max_header_bytes != 0 && sizeof (a[0]) > max_header_bytes)
172     return format (s, "ARP header truncated");
173
174   l2_type = clib_net_to_host_u16 (a->l2_type);
175   l3_type = clib_net_to_host_u16 (a->l3_type);
176
177   indent = format_get_indent (s);
178
179   s = format (s, "%U, type %U/%U, address size %d/%d",
180               format_ethernet_arp_opcode, clib_net_to_host_u16 (a->opcode),
181               format_ethernet_arp_hardware_type, l2_type,
182               format_ethernet_type, l3_type,
183               a->n_l2_address_bytes, a->n_l3_address_bytes);
184               
185   if (l2_type == ETHERNET_ARP_HARDWARE_TYPE_ethernet
186       && l3_type == ETHERNET_TYPE_IP4)
187     {
188       s = format (s, "\n%U%U/%U -> %U/%U",
189                   format_white_space, indent,
190                   format_ethernet_address, a->ip4_over_ethernet[0].ethernet,
191                   format_ip4_address, &a->ip4_over_ethernet[0].ip4,
192                   format_ethernet_address, a->ip4_over_ethernet[1].ethernet,
193                   format_ip4_address, &a->ip4_over_ethernet[1].ip4);
194     }
195   else
196     {
197       uword n2 = a->n_l2_address_bytes;
198       uword n3 = a->n_l3_address_bytes;
199       s = format (s, "\n%U%U/%U -> %U/%U",
200                   format_white_space, indent,
201                   format_hex_bytes, a->data + 0*n2 + 0*n3, n2,
202                   format_hex_bytes, a->data + 1*n2 + 0*n3, n3,
203                   format_hex_bytes, a->data + 1*n2 + 1*n3, n2,
204                   format_hex_bytes, a->data + 2*n2 + 1*n3, n3);
205     }
206
207   return s;
208 }
209
210 static u8 * format_ethernet_arp_ip4_entry (u8 * s, va_list * va)
211 {
212   vnet_main_t * vnm = va_arg (*va, vnet_main_t *);
213   ethernet_arp_ip4_entry_t * e = va_arg (*va, ethernet_arp_ip4_entry_t *);
214   vnet_sw_interface_t * si;
215   ip4_fib_t * fib;
216   u8 * flags = 0;
217
218   if (! e)
219     return format (s, "%=12s%=6s%=16s%=6s%=20s%=24s", "Time", "FIB", "IP4",
220                    "Flags", "Ethernet", "Interface");
221
222   fib = find_ip4_fib_by_table_index_or_id (&ip4_main, e->key.fib_index,
223                                            IP4_ROUTE_FLAG_FIB_INDEX);
224   si = vnet_get_sw_interface (vnm, e->key.sw_if_index);
225
226   if (e->flags & ETHERNET_ARP_IP4_ENTRY_FLAG_GLEAN)
227     flags = format(flags, "G");
228
229   if (e->flags & ETHERNET_ARP_IP4_ENTRY_FLAG_STATIC)
230     flags = format(flags, "S");
231
232   s = format (s, "%=12U%=6u%=16U%=6s%=20U%=24U",
233               format_vlib_cpu_time, vnm->vlib_main, e->cpu_time_last_updated,
234               fib->table_id,
235               format_ip4_address, &e->key.ip4_address,
236               flags ? (char *) flags : "",
237               format_ethernet_address, e->ethernet_address,
238               format_vnet_sw_interface_name, vnm, si);
239
240   vec_free(flags);
241   return s;
242 }
243
244 typedef struct {
245   u8 packet_data[64];
246 } ethernet_arp_input_trace_t;
247
248 static u8 * format_ethernet_arp_input_trace (u8 * s, va_list * va)
249 {
250   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*va, vlib_main_t *);
251   CLIB_UNUSED (vlib_node_t * node) = va_arg (*va, vlib_node_t *);
252   ethernet_arp_input_trace_t * t = va_arg (*va, ethernet_arp_input_trace_t *);
253
254   s = format (s, "%U",
255               format_ethernet_arp_header,
256               t->packet_data, sizeof (t->packet_data));
257
258   return s;
259 }
260
261 clib_error_t *
262 ethernet_arp_sw_interface_up_down (vnet_main_t * vnm,
263                                    u32 sw_if_index,
264                                    u32 flags)
265 {
266   ethernet_arp_main_t * am = &ethernet_arp_main;
267   ethernet_arp_ip4_entry_t * e;
268
269   if (! (flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP))
270     {
271       u32 i, * to_delete = 0;
272
273       pool_foreach (e, am->ip4_entry_pool, ({
274         if (e->key.sw_if_index == sw_if_index)
275           vec_add1 (to_delete, e - am->ip4_entry_pool);
276       }));
277
278       for (i = 0; i < vec_len (to_delete); i++)
279         {
280           ethernet_arp_ip4_over_ethernet_address_t delme;
281           e = pool_elt_at_index (am->ip4_entry_pool, to_delete[i]);
282
283           memcpy (&delme.ethernet, e->ethernet_address, 6);
284           delme.ip4.as_u32 = e->key.ip4_address.as_u32;
285
286           vnet_arp_unset_ip4_over_ethernet (vnm, e->key.sw_if_index,
287                                             e->key.fib_index, &delme);
288         }
289
290       vec_free (to_delete);
291     }
292
293   return 0;
294 }
295
296 VNET_SW_INTERFACE_ADMIN_UP_DOWN_FUNCTION (ethernet_arp_sw_interface_up_down);
297
298 static int
299 vnet_arp_set_ip4_over_ethernet_internal (vnet_main_t * vnm,
300                                          u32 sw_if_index,
301                                          u32 fib_index,
302                                          void * a_arg,
303                                          int is_static);
304
305 static int
306 vnet_arp_unset_ip4_over_ethernet_internal (vnet_main_t * vnm,
307                                            u32 sw_if_index,
308                                            u32 fib_index,
309                                            void * a_arg);
310
311 typedef struct {
312   u32 sw_if_index;
313   u32 fib_index;
314   ethernet_arp_ip4_over_ethernet_address_t a;
315   int is_static;
316   int is_remove; /* set is_remove=1 to clear arp entry */
317 } vnet_arp_set_ip4_over_ethernet_rpc_args_t;
318
319 static void set_ip4_over_ethernet_rpc_callback 
320 ( vnet_arp_set_ip4_over_ethernet_rpc_args_t * a)
321 {
322   vnet_main_t * vm = vnet_get_main();
323   ASSERT(os_get_cpu_number() == 0);
324
325   if (a->is_remove)
326     vnet_arp_unset_ip4_over_ethernet_internal(vm, 
327                                               a->sw_if_index, 
328                                               a->fib_index,
329                                               &(a->a));
330   else
331     vnet_arp_set_ip4_over_ethernet_internal (vm,
332                                              a->sw_if_index,
333                                              a->fib_index,
334                                              &(a->a),
335                                              a->is_static);
336 }
337
338 int
339 vnet_arp_set_ip4_over_ethernet (vnet_main_t * vnm,
340                                 u32 sw_if_index,
341                                 u32 fib_index,
342                                 void * a_arg,
343                                 int is_static)
344 {
345   ethernet_arp_ip4_over_ethernet_address_t * a = a_arg;
346   vnet_arp_set_ip4_over_ethernet_rpc_args_t args;
347
348   args.sw_if_index = sw_if_index;
349   args.fib_index = fib_index;
350   args.is_static = is_static;
351   args.is_remove = 0;
352   memcpy (&args.a, a, sizeof (*a));
353
354   vl_api_rpc_call_main_thread (set_ip4_over_ethernet_rpc_callback, 
355                                (u8 *) &args, sizeof (args));
356   return 0;
357 }
358
359 int
360 vnet_arp_set_ip4_over_ethernet_internal (vnet_main_t * vnm,
361                                          u32 sw_if_index,
362                                          u32 fib_index,
363                                          void * a_arg,
364                                          int is_static)
365 {
366   ethernet_arp_ip4_key_t k;
367   ethernet_arp_ip4_entry_t * e = 0;
368   ethernet_arp_main_t * am = &ethernet_arp_main;
369   ethernet_arp_ip4_over_ethernet_address_t * a = a_arg;
370   vlib_main_t * vm = vlib_get_main();
371   ip4_main_t * im = &ip4_main;
372   ip_lookup_main_t * lm = &im->lookup_main;
373   int make_new_arp_cache_entry=1;
374   uword * p;
375   ip4_add_del_route_args_t args;
376   ip_adjacency_t adj, * existing_adj;
377   pending_resolution_t * pr, * mc;
378   
379   u32 next_index;
380   u32 adj_index;
381
382   fib_index = (fib_index != (u32)~0) 
383     ? fib_index : im->fib_index_by_sw_if_index[sw_if_index];
384
385   k.sw_if_index = sw_if_index;
386   k.ip4_address = a->ip4;
387   k.fib_index = fib_index;
388
389   p = mhash_get (&am->ip4_entry_by_key, &k);
390   if (p)
391     {
392       e = pool_elt_at_index (am->ip4_entry_pool, p[0]);
393
394       /* Refuse to over-write static arp. */
395       if (e->flags & ETHERNET_ARP_IP4_ENTRY_FLAG_STATIC)
396         return -2;
397       make_new_arp_cache_entry = 0;
398     }
399
400   /* Note: always install the route. It might have been deleted */
401   memset(&adj, 0, sizeof(adj));
402   adj.lookup_next_index = IP_LOOKUP_NEXT_REWRITE;
403
404   vnet_rewrite_for_sw_interface
405     (vnm,
406      VNET_L3_PACKET_TYPE_IP4,
407      sw_if_index,
408      ip4_rewrite_node.index,
409      a->ethernet,               /* destination address */
410      &adj.rewrite_header,
411      sizeof (adj.rewrite_data));
412
413   /* result of this lookup should be next-hop adjacency */
414   adj_index = ip4_fib_lookup_with_table (im, fib_index, &a->ip4, 0);
415   existing_adj = ip_get_adjacency(lm, adj_index);
416
417   if (existing_adj->lookup_next_index == IP_LOOKUP_NEXT_ARP &&
418       existing_adj->arp.next_hop.ip4.as_u32 == a->ip4.as_u32)
419     {
420       u32 * ai;
421       u32 * adjs = vec_dup(e->adjacencies);
422       /* Update all adj assigned to this arp entry */
423       vec_foreach(ai, adjs)
424         {
425           int i;
426           ip_adjacency_t * uadj = ip_get_adjacency(lm, *ai);
427           for (i = 0; i < uadj->n_adj; i++)
428             if (uadj[i].lookup_next_index == IP_LOOKUP_NEXT_ARP &&
429                 uadj[i].arp.next_hop.ip4.as_u32 == a->ip4.as_u32)
430               ip_update_adjacency (lm, *ai + i, &adj);
431         }
432       vec_free(adjs);
433     }
434   else
435     {
436       /* create new adj */
437       args.table_index_or_table_id = fib_index;
438       args.flags = IP4_ROUTE_FLAG_FIB_INDEX | IP4_ROUTE_FLAG_ADD | IP4_ROUTE_FLAG_NEIGHBOR;
439       args.dst_address = a->ip4;
440       args.dst_address_length = 32;
441       args.adj_index = ~0;
442       args.add_adj = &adj;
443       args.n_add_adj = 1;
444       ip4_add_del_route (im, &args);
445     }
446
447   if (make_new_arp_cache_entry)
448     {
449       pool_get (am->ip4_entry_pool, e);
450       mhash_set (&am->ip4_entry_by_key, &k,
451                  e - am->ip4_entry_pool,
452                  /* old value */ 0);
453       e->key = k;
454     }
455
456   /* Update time stamp and ethernet address. */
457   memcpy (e->ethernet_address, a->ethernet, sizeof (e->ethernet_address));
458   e->cpu_time_last_updated = clib_cpu_time_now ();
459   if (is_static)
460     e->flags |= ETHERNET_ARP_IP4_ENTRY_FLAG_STATIC;
461
462   /* Customer(s) waiting for this address to be resolved? */
463   p = hash_get (am->pending_resolutions_by_address, a->ip4.as_u32);
464   if (p)
465     {
466       next_index = p[0];
467
468       while (next_index != (u32)~0)
469         {
470           pr = pool_elt_at_index (am->pending_resolutions, next_index);
471           vlib_process_signal_event (vm, pr->node_index,
472                                      pr->type_opaque, 
473                                      pr->data);
474           next_index = pr->next_index;
475           pool_put (am->pending_resolutions, pr);
476         }
477       
478       hash_unset (am->pending_resolutions_by_address, a->ip4.as_u32);
479     }
480
481   /* Customer(s) requesting ARP event for this address? */
482   p = hash_get (am->mac_changes_by_address, a->ip4.as_u32);
483   if (p)
484     {
485       next_index = p[0];
486
487       while (next_index != (u32)~0)
488         {
489           int (*fp)(u32, u8 *, u32, u32);
490           int rv = 1;
491           mc = pool_elt_at_index (am->mac_changes, next_index);
492           fp = mc->data_callback;
493
494           /* Call the user's data callback, return 1 to suppress dup events */
495           if (fp)
496             rv = (*fp)(mc->data, a->ethernet, sw_if_index, 0);
497           
498           /* 
499            * Signal the resolver process, as long as the user
500            * says they want to be notified
501            */
502           if (rv == 0)
503             vlib_process_signal_event (vm, mc->node_index,
504                                        mc->type_opaque, 
505                                        mc->data);
506           next_index = mc->next_index;
507         }
508     }
509
510   return 0;
511 }
512
513 void vnet_register_ip4_arp_resolution_event (vnet_main_t * vnm, 
514                                              void * address_arg,
515                                              uword node_index,
516                                              uword type_opaque,
517                                              uword data)
518 {
519   ethernet_arp_main_t * am = &ethernet_arp_main;
520   ip4_address_t * address = address_arg;
521   uword * p;
522   pending_resolution_t * pr;
523   
524   pool_get (am->pending_resolutions, pr);
525
526   pr->next_index = ~0;
527   pr->node_index = node_index;
528   pr->type_opaque = type_opaque;
529   pr->data = data;
530   pr->data_callback = 0;
531
532   p = hash_get (am->pending_resolutions_by_address, address->as_u32);
533   if (p)
534     {
535       /* Insert new resolution at the head of the list */
536       pr->next_index = p[0];
537       hash_unset (am->pending_resolutions_by_address, address->as_u32);
538     }
539   
540   hash_set (am->pending_resolutions_by_address, address->as_u32, 
541             pr - am->pending_resolutions);
542 }
543
544 int vnet_add_del_ip4_arp_change_event (vnet_main_t * vnm, 
545                                        void * data_callback,
546                                        u32 pid,
547                                        void * address_arg,
548                                        uword node_index,
549                                        uword type_opaque,
550                                        uword data, int is_add)
551 {
552   ethernet_arp_main_t * am = &ethernet_arp_main;
553   ip4_address_t * address = address_arg;
554   uword * p;
555   pending_resolution_t * mc;
556   void (*fp)(u32, u8 *) = data_callback;
557   
558   if (is_add)
559     {
560       pool_get (am->mac_changes, mc);
561
562       mc->next_index = ~0;
563       mc->node_index = node_index;
564       mc->type_opaque = type_opaque;
565       mc->data = data;
566       mc->data_callback = data_callback;
567       mc->pid = pid;
568       
569       p = hash_get (am->mac_changes_by_address, address->as_u32);
570       if (p)
571         {
572           /* Insert new resolution at the head of the list */
573           mc->next_index = p[0];
574           hash_unset (am->mac_changes_by_address, address->as_u32);
575         }
576       
577       hash_set (am->mac_changes_by_address, address->as_u32, 
578                 mc - am->mac_changes);
579       return 0;
580     }
581   else
582     {
583       u32 index;
584       pending_resolution_t * mc_last = 0;
585
586       p = hash_get (am->mac_changes_by_address, address->as_u32);
587       if (p == 0)
588         return VNET_API_ERROR_NO_SUCH_ENTRY;
589
590       index = p[0];
591
592       while (index != (u32)~0)
593         {
594           mc = pool_elt_at_index (am->mac_changes, index);
595           if (mc->node_index == node_index &&
596               mc->type_opaque == type_opaque &&
597               mc->pid == pid)
598             {
599               /* Clients may need to clean up pool entries, too */
600               if (fp)
601                 (*fp)(mc->data, 0 /* no new mac addrs */);
602               if (index == p[0])
603                 {
604                   hash_unset (am->mac_changes_by_address, address->as_u32);
605                   if (mc->next_index != ~0)
606                     hash_set (am->mac_changes_by_address, address->as_u32,
607                               mc->next_index);
608                   pool_put (am->mac_changes, mc);
609                   return 0;
610                 }
611               else
612                 {
613                   ASSERT(mc_last);
614                   mc_last->next_index = mc->next_index;
615                   pool_put (am->mac_changes, mc);
616                   return 0;
617                 }
618             }
619           mc_last = mc;
620           index = mc->next_index;
621         }
622       
623       return VNET_API_ERROR_NO_SUCH_ENTRY;
624     }
625 }
626
627 /* Either we drop the packet or we send a reply to the sender. */
628 typedef enum {
629   ARP_INPUT_NEXT_DROP,
630   ARP_INPUT_N_NEXT,
631 } arp_input_next_t;
632
633 #define foreach_ethernet_arp_error                                      \
634   _ (replies_sent, "ARP replies sent")                                  \
635   _ (l2_type_not_ethernet, "L2 type not ethernet")                      \
636   _ (l3_type_not_ip4, "L3 type not IP4")                                \
637   _ (l3_src_address_not_local, "IP4 source address not local to subnet") \
638   _ (l3_dst_address_not_local, "IP4 destination address not local to subnet") \
639   _ (l3_src_address_is_local, "IP4 source address matches local interface") \
640   _ (l3_src_address_learned, "ARP request IP4 source address learned")  \
641   _ (replies_received, "ARP replies received")                          \
642   _ (opcode_not_request, "ARP opcode not request")                      \
643   _ (proxy_arp_replies_sent, "Proxy ARP replies sent")                  \
644   _ (l2_address_mismatch, "ARP hw addr does not match L2 frame src addr") \
645   _ (missing_interface_address, "ARP missing interface address") \
646   _ (gratuitous_arp, "ARP probe or announcement dropped") \
647
648 typedef enum {
649 #define _(sym,string) ETHERNET_ARP_ERROR_##sym,
650   foreach_ethernet_arp_error
651 #undef _
652   ETHERNET_ARP_N_ERROR,
653 } ethernet_arp_input_error_t;
654
655 /* get first interface address */
656 ip4_address_t *
657 ip4_interface_first_address (ip4_main_t * im, u32 sw_if_index,
658                              ip_interface_address_t ** result_ia)
659 {
660   ip_lookup_main_t * lm = &im->lookup_main;
661   ip_interface_address_t * ia = 0;
662   ip4_address_t * result = 0;
663
664   foreach_ip_interface_address (lm, ia, sw_if_index, 
665                                 1 /* honor unnumbered */,
666   ({
667     ip4_address_t * a = ip_interface_address_get_address (lm, ia);
668     result = a;
669     break;
670   }));
671   if (result_ia)
672     *result_ia = result ? ia : 0;
673   return result;
674 }
675
676 static void unset_random_arp_entry (void)
677 {
678   ethernet_arp_main_t * am = &ethernet_arp_main;
679   ethernet_arp_ip4_entry_t * e;
680   vnet_main_t * vnm = vnet_get_main();
681   ethernet_arp_ip4_over_ethernet_address_t delme;  
682   u32 index;
683
684   index = pool_next_index (am->ip4_entry_pool, am->arp_delete_rotor);
685   am->arp_delete_rotor = index;
686
687   /* Try again from elt 0, could happen if an intfc goes down */
688   if (index == ~0)
689     {
690       index = pool_next_index (am->ip4_entry_pool, am->arp_delete_rotor);
691       am->arp_delete_rotor = index;
692     }
693
694   /* Nothing left in the pool */
695   if (index == ~0)
696     return;
697
698   e = pool_elt_at_index (am->ip4_entry_pool, index);
699   
700   memcpy (&delme.ethernet, e->ethernet_address, 6);
701   delme.ip4.as_u32 = e->key.ip4_address.as_u32;
702   
703   vnet_arp_unset_ip4_over_ethernet (vnm, e->key.sw_if_index,
704                                     e->key.fib_index, &delme);
705 }
706   
707 static u32 arp_unnumbered (vlib_buffer_t * p0, 
708                            u32 pi0,
709                            ethernet_header_t * eth0,
710                            ip_interface_address_t * ifa0)
711 {
712   ethernet_arp_main_t * am = &ethernet_arp_main;
713   vlib_main_t * vm = vlib_get_main();
714   vnet_main_t * vnm = vnet_get_main();
715   vnet_interface_main_t * vim = &vnm->interface_main;
716   vnet_sw_interface_t * si;
717   vnet_hw_interface_t * hi;
718   u32 unnum_src_sw_if_index;
719   u32 * broadcast_swifs = 0;
720   u32 * buffers = 0;
721   u32 n_alloc = 0;
722   vlib_buffer_t * b0;
723   int i;
724   u8 dst_mac_address[6];
725   i16 header_size;
726   ethernet_arp_header_t * arp0;
727
728   /* Save the dst mac address */
729   memcpy(dst_mac_address, eth0->dst_address, sizeof (dst_mac_address));
730
731   /* Figure out which sw_if_index supplied the address */
732   unnum_src_sw_if_index = ifa0->sw_if_index;
733
734   /* Track down all users of the unnumbered source */
735   pool_foreach (si, vim->sw_interfaces, 
736   ({
737     if (si->flags & VNET_SW_INTERFACE_FLAG_UNNUMBERED &&
738         (si->unnumbered_sw_if_index == unnum_src_sw_if_index))
739       {
740         vec_add1 (broadcast_swifs, si->sw_if_index);
741       }
742   }));
743           
744
745   ASSERT (vec_len(broadcast_swifs));
746
747   /* Allocate buffering if we need it */
748   if (vec_len(broadcast_swifs) > 1)
749     {
750       vec_validate (buffers, vec_len(broadcast_swifs)-2);
751       n_alloc = vlib_buffer_alloc (vm, buffers, vec_len(buffers));
752       _vec_len (buffers) = n_alloc;
753       for (i = 0; i < n_alloc; i++)
754         {
755           b0 = vlib_get_buffer (vm, buffers[i]);
756
757           /* xerox (partially built) ARP pkt */
758           memcpy (b0->data, p0->data, p0->current_length + p0->current_data);
759           b0->current_data = p0->current_data;
760           b0->current_length = p0->current_length;
761           vnet_buffer(b0)->sw_if_index[VLIB_RX] =
762             vnet_buffer(p0)->sw_if_index[VLIB_RX];
763         }
764     }
765
766   vec_insert (buffers, 1, 0);
767   buffers[0] = pi0;
768   
769   for (i = 0; i < vec_len(buffers); i++)
770     {
771       b0 = vlib_get_buffer(vm, buffers[i]);
772       arp0 = vlib_buffer_get_current (b0);
773
774       hi = vnet_get_sup_hw_interface (vnm, broadcast_swifs[i]);
775       si = vnet_get_sw_interface (vnm, broadcast_swifs[i]);
776
777       /* For decoration, most likely */
778       vnet_buffer(b0)->sw_if_index[VLIB_TX] = hi->sw_if_index;
779
780       /* Fix ARP pkt src address */
781       memcpy (arp0->ip4_over_ethernet[0].ethernet, hi->hw_address, 6);
782
783       /* Build L2 encaps for this swif */
784       header_size = sizeof (ethernet_header_t);
785       if (si->sub.eth.flags.one_tag) 
786         header_size += 4;
787       else if (si->sub.eth.flags.two_tags)
788         header_size += 8;
789       
790       vlib_buffer_advance (b0, -header_size);
791       eth0 = vlib_buffer_get_current (b0);
792
793       if (si->sub.eth.flags.one_tag) {
794         ethernet_vlan_header_t * outer = (void *) (eth0 + 1);
795         
796         eth0->type = si->sub.eth.flags.dot1ad ?
797           clib_host_to_net_u16 (ETHERNET_TYPE_DOT1AD) :
798           clib_host_to_net_u16 (ETHERNET_TYPE_VLAN);
799         outer->priority_cfi_and_id = 
800           clib_host_to_net_u16 (si->sub.eth.outer_vlan_id);
801         outer->type = clib_host_to_net_u16 (ETHERNET_TYPE_ARP);
802         
803       } else if (si->sub.eth.flags.two_tags) {
804         ethernet_vlan_header_t * outer = (void *) (eth0 + 1);
805         ethernet_vlan_header_t * inner = (void *) (outer + 1);
806         
807         eth0->type = si->sub.eth.flags.dot1ad ?
808           clib_host_to_net_u16 (ETHERNET_TYPE_DOT1AD) :
809           clib_host_to_net_u16 (ETHERNET_TYPE_VLAN);
810         outer->priority_cfi_and_id = 
811           clib_host_to_net_u16 (si->sub.eth.outer_vlan_id);
812         outer->type = clib_host_to_net_u16 (ETHERNET_TYPE_VLAN);
813         inner->priority_cfi_and_id = 
814           clib_host_to_net_u16 (si->sub.eth.inner_vlan_id);
815         inner->type = clib_host_to_net_u16 (ETHERNET_TYPE_ARP);
816         
817       } else {
818         eth0->type = clib_host_to_net_u16 (ETHERNET_TYPE_ARP);
819       }
820       
821       /* Restore the original dst address, set src address */
822       memcpy (eth0->dst_address, dst_mac_address, sizeof (eth0->dst_address));
823       memcpy (eth0->src_address, hi->hw_address, sizeof (eth0->src_address));
824       
825       /* Transmit replicas */
826       if (i > 0)
827         {
828           vlib_frame_t * f = vlib_get_frame_to_node (vm, hi->output_node_index);
829           u32 * to_next = vlib_frame_vector_args (f);
830           to_next[0] = buffers[i];
831           f->n_vectors = 1;
832           vlib_put_frame_to_node (vm, hi->output_node_index, f);
833         }
834     }
835
836   hi = vnet_get_sup_hw_interface (vnm, broadcast_swifs[0]);
837
838   vec_free (broadcast_swifs);
839   vec_free (buffers);
840
841   /* The regular path outputs the original pkt.. */
842   return vec_elt (am->arp_input_next_index_by_hw_if_index, hi->hw_if_index);
843 }
844
845 static uword
846 arp_input (vlib_main_t * vm,
847            vlib_node_runtime_t * node,
848            vlib_frame_t * frame)
849 {
850   ethernet_arp_main_t * am = &ethernet_arp_main;
851   vnet_main_t * vnm = vnet_get_main();
852   ip4_main_t * im4 = &ip4_main;
853   u32 n_left_from, next_index, * from, * to_next;
854   u32 n_replies_sent = 0, n_proxy_arp_replies_sent = 0;
855
856   from = vlib_frame_vector_args (frame);
857   n_left_from = frame->n_vectors;
858   next_index = node->cached_next_index;
859
860   if (node->flags & VLIB_NODE_FLAG_TRACE)
861     vlib_trace_frame_buffers_only (vm, node, from, frame->n_vectors,
862                                    /* stride */ 1,
863                                    sizeof (ethernet_arp_input_trace_t));
864
865   while (n_left_from > 0)
866     {
867       u32 n_left_to_next;
868
869       vlib_get_next_frame (vm, node, next_index,
870                            to_next, n_left_to_next);
871
872       while (n_left_from > 0 && n_left_to_next > 0)
873         {
874           vlib_buffer_t * p0;
875           vnet_hw_interface_t * hw_if0;
876           ethernet_arp_header_t * arp0;
877           ethernet_header_t * eth0;
878           ip_interface_address_t * ifa0;
879           ip_adjacency_t * adj0;
880           ip4_address_t * if_addr0;
881           ip4_address_t proxy_src;
882           u32 pi0, error0, next0, sw_if_index0;
883           u8 is_request0, src_is_local0, dst_is_local0, is_unnum0;
884           ethernet_proxy_arp_t * pa;
885
886           pi0 = from[0];
887           to_next[0] = pi0;
888           from += 1;
889           to_next += 1;
890           n_left_from -= 1;
891           n_left_to_next -= 1;
892
893           p0 = vlib_get_buffer (vm, pi0);
894           arp0 = vlib_buffer_get_current (p0);
895
896           is_request0 = arp0->opcode 
897               == clib_host_to_net_u16 (ETHERNET_ARP_OPCODE_request);
898
899           error0 = ETHERNET_ARP_ERROR_replies_sent;
900
901           error0 = (arp0->l2_type != clib_net_to_host_u16 (ETHERNET_ARP_HARDWARE_TYPE_ethernet)
902                     ? ETHERNET_ARP_ERROR_l2_type_not_ethernet
903                     : error0);
904           error0 = (arp0->l3_type != clib_net_to_host_u16 (ETHERNET_TYPE_IP4)
905                     ? ETHERNET_ARP_ERROR_l3_type_not_ip4
906                     : error0);
907
908           sw_if_index0 = vnet_buffer (p0)->sw_if_index[VLIB_RX];
909
910           if (error0)
911             goto drop1;
912
913           /* Check that IP address is local and matches incoming interface. */
914           if_addr0 = ip4_interface_address_matching_destination (im4,
915                                                                  &arp0->ip4_over_ethernet[1].ip4,
916                                                                  sw_if_index0,
917                                                                  &ifa0);
918           if (! if_addr0)
919             {
920               error0 = ETHERNET_ARP_ERROR_l3_dst_address_not_local;
921               goto drop1;
922             }
923
924           /* Honor unnumbered interface, if any */
925           is_unnum0 = sw_if_index0 != ifa0->sw_if_index;
926
927           /* Source must also be local to subnet of matching interface address. */
928           if (! ip4_destination_matches_interface (im4, &arp0->ip4_over_ethernet[0].ip4, ifa0))
929             {
930               error0 = ETHERNET_ARP_ERROR_l3_src_address_not_local;
931               goto drop1;
932             }
933
934           /* Reject requests/replies with our local interface address. */
935           src_is_local0 = if_addr0->as_u32 == arp0->ip4_over_ethernet[0].ip4.as_u32;
936           if (src_is_local0)
937             {
938               error0 = ETHERNET_ARP_ERROR_l3_src_address_is_local;
939               goto drop1;
940             }
941
942           dst_is_local0 = if_addr0->as_u32 == arp0->ip4_over_ethernet[1].ip4.as_u32;
943
944           /* Fill in ethernet header. */
945           eth0 = ethernet_buffer_get_header (p0);
946
947           /* Trash ARP packets whose ARP-level source addresses do not
948              match their L2-frame-level source addresses */
949           if (memcmp (eth0->src_address, arp0->ip4_over_ethernet[0].ethernet,
950                       sizeof (eth0->src_address)))
951             {
952               error0 = ETHERNET_ARP_ERROR_l2_address_mismatch;
953               goto drop2;
954             }
955
956           /* Learn or update sender's mapping only for requests or unicasts
957              that don't match local interface address. */
958           if (ethernet_address_cast (eth0->dst_address) == ETHERNET_ADDRESS_UNICAST
959               || is_request0)
960             {
961               if (am->limit_arp_cache_size && 
962                   pool_elts (am->ip4_entry_pool) >= am->limit_arp_cache_size)
963                 unset_random_arp_entry();
964
965               vnet_arp_set_ip4_over_ethernet (vnm, sw_if_index0, 
966                                               (u32)~0 /* default fib */,
967                                               &arp0->ip4_over_ethernet[0], 
968                                               0 /* is_static */);
969               error0 = ETHERNET_ARP_ERROR_l3_src_address_learned;
970              }
971
972           /* Only send a reply for requests sent which match a local interface. */
973           if (! (is_request0 && dst_is_local0))
974             {
975               error0 = (arp0->opcode == clib_host_to_net_u16 (ETHERNET_ARP_OPCODE_reply)
976                         ? ETHERNET_ARP_ERROR_replies_received : error0);
977               goto drop1;
978             }
979
980           /* Send a reply. */
981         send_reply:
982           vnet_buffer (p0)->sw_if_index[VLIB_TX] = sw_if_index0;
983           hw_if0 = vnet_get_sup_hw_interface (vnm, sw_if_index0);
984
985           /* Can happen in a multi-core env. */
986           if (PREDICT_FALSE(hw_if0->hw_if_index >= vec_len (am->arp_input_next_index_by_hw_if_index)))
987             {
988               error0 = ETHERNET_ARP_ERROR_missing_interface_address;
989               goto drop2;
990             }
991
992           next0 = vec_elt (am->arp_input_next_index_by_hw_if_index, hw_if0->hw_if_index);
993
994           arp0->opcode = clib_host_to_net_u16 (ETHERNET_ARP_OPCODE_reply);
995
996           arp0->ip4_over_ethernet[1] = arp0->ip4_over_ethernet[0];
997
998           memcpy (arp0->ip4_over_ethernet[0].ethernet, hw_if0->hw_address, 6);
999           clib_mem_unaligned (&arp0->ip4_over_ethernet[0].ip4.data_u32, u32) = if_addr0->data_u32;
1000
1001           /* Hardware must be ethernet-like. */
1002           ASSERT (vec_len (hw_if0->hw_address) == 6);
1003
1004           memcpy (eth0->dst_address, eth0->src_address, 6);
1005           memcpy (eth0->src_address, hw_if0->hw_address, 6);
1006
1007           /* Figure out how much to rewind current data from adjacency. */
1008           if (ifa0)
1009             {
1010               adj0 = ip_get_adjacency (&ip4_main.lookup_main, 
1011                                        ifa0->neighbor_probe_adj_index);
1012               if (adj0->lookup_next_index != IP_LOOKUP_NEXT_ARP)
1013                 {
1014                   error0 = ETHERNET_ARP_ERROR_missing_interface_address;
1015                   goto drop2;
1016                 }
1017               if (is_unnum0)
1018                 next0 = arp_unnumbered (p0, pi0, eth0, ifa0);
1019               else
1020                 vlib_buffer_advance (p0, -adj0->rewrite_header.data_bytes);
1021             }
1022
1023           vlib_validate_buffer_enqueue_x1 (vm, node, next_index,to_next,
1024                                            n_left_to_next,pi0,next0);
1025
1026           n_replies_sent += 1;
1027           continue;
1028
1029         drop1:
1030           if (0 == arp0->ip4_over_ethernet[0].ip4.as_u32 ||
1031               (arp0->ip4_over_ethernet[0].ip4.as_u32 ==
1032                arp0->ip4_over_ethernet[1].ip4.as_u32))
1033             {
1034               error0 = ETHERNET_ARP_ERROR_gratuitous_arp;
1035               goto drop2;
1036             }
1037           /* See if proxy arp is configured for the address */
1038           if (is_request0) 
1039             {
1040               vnet_sw_interface_t * si;
1041               u32 this_addr = clib_net_to_host_u32 
1042                 (arp0->ip4_over_ethernet[1].ip4.as_u32);
1043               u32 fib_index0;
1044
1045               si = vnet_get_sw_interface (vnm, sw_if_index0);
1046               
1047               if (!(si->flags & VNET_SW_INTERFACE_FLAG_PROXY_ARP))
1048                 goto drop2;
1049
1050               fib_index0 = vec_elt (im4->fib_index_by_sw_if_index, 
1051                                     sw_if_index0);
1052
1053               vec_foreach (pa, am->proxy_arps)
1054                 {
1055                   u32 lo_addr = clib_net_to_host_u32 (pa->lo_addr);
1056                   u32 hi_addr = clib_net_to_host_u32 (pa->hi_addr);
1057
1058                    /* an ARP request hit in the proxy-arp table? */
1059                    if ((this_addr >= lo_addr && this_addr <= hi_addr) &&
1060                        (fib_index0 == pa->fib_index))
1061                     {
1062                       eth0 = ethernet_buffer_get_header (p0);
1063                       proxy_src.as_u32 = 
1064                         arp0->ip4_over_ethernet[1].ip4.data_u32;
1065
1066                       /* 
1067                        * Rewind buffer, direct code above not to
1068                        * think too hard about it. 
1069                        * $$$ is the answer ever anything other than
1070                        * vlib_buffer_reset(..)?
1071                        */
1072                       ifa0 = 0;
1073                       if_addr0 = &proxy_src;
1074                       vlib_buffer_reset (p0);
1075                       n_proxy_arp_replies_sent++;
1076                       goto send_reply;
1077                     }
1078                 }
1079             }
1080           
1081         drop2:
1082
1083           next0 = ARP_INPUT_NEXT_DROP;
1084           p0->error = node->errors[error0];
1085
1086           vlib_validate_buffer_enqueue_x1 (vm, node, next_index,to_next,
1087                                            n_left_to_next,pi0,next0);
1088         }
1089
1090       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
1091     }
1092
1093   vlib_error_count (vm, node->node_index,
1094                     ETHERNET_ARP_ERROR_replies_sent, 
1095                     n_replies_sent - n_proxy_arp_replies_sent);
1096   
1097   vlib_error_count (vm, node->node_index,
1098                     ETHERNET_ARP_ERROR_proxy_arp_replies_sent, 
1099                     n_proxy_arp_replies_sent);
1100   return frame->n_vectors;
1101 }
1102
1103 static char * ethernet_arp_error_strings[] = {
1104 #define _(sym,string) string,
1105   foreach_ethernet_arp_error
1106 #undef _
1107 };
1108
1109 VLIB_REGISTER_NODE (arp_input_node,static) = {
1110   .function = arp_input,
1111   .name = "arp-input",
1112   .vector_size = sizeof (u32),
1113
1114   .n_errors = ETHERNET_ARP_N_ERROR,
1115   .error_strings = ethernet_arp_error_strings,
1116
1117   .n_next_nodes = ARP_INPUT_N_NEXT,
1118   .next_nodes = {
1119     [ARP_INPUT_NEXT_DROP] = "error-drop",
1120   },
1121
1122   .format_buffer = format_ethernet_arp_header,
1123   .format_trace = format_ethernet_arp_input_trace,
1124 };
1125
1126 clib_error_t *
1127 ethernet_arp_hw_interface_link_up_down (vnet_main_t * vnm,
1128                                         u32 hw_if_index,
1129                                         u32 flags)
1130 {
1131   ethernet_arp_main_t * am = &ethernet_arp_main;
1132   vnet_hw_interface_t * hw_if;
1133
1134   hw_if = vnet_get_hw_interface (vnm, hw_if_index);
1135
1136   /* Fill in lookup tables with default table (0). */
1137   vec_validate_init_empty (am->arp_input_next_index_by_hw_if_index, hw_if_index, ~0);
1138   am->arp_input_next_index_by_hw_if_index[hw_if_index]
1139     = vlib_node_add_next (vnm->vlib_main, arp_input_node.index, hw_if->output_node_index);
1140
1141   return 0;
1142 }
1143
1144 VNET_HW_INTERFACE_LINK_UP_DOWN_FUNCTION (ethernet_arp_hw_interface_link_up_down);
1145
1146 static int
1147 ip4_arp_entry_sort (void *a1, void *a2)
1148 {
1149   ethernet_arp_ip4_entry_t * e1 = a1;
1150   ethernet_arp_ip4_entry_t * e2 = a2;
1151
1152   int cmp;
1153   vnet_main_t * vnm = vnet_get_main();
1154
1155   cmp = vnet_sw_interface_compare 
1156     (vnm, e1->key.sw_if_index, e2->key.sw_if_index);
1157   if (! cmp)
1158     cmp = ip4_address_compare (&e1->key.ip4_address, &e2->key.ip4_address);
1159   return cmp;
1160 }
1161
1162 static clib_error_t *
1163 show_ip4_arp (vlib_main_t * vm,
1164               unformat_input_t * input,
1165               vlib_cli_command_t * cmd)
1166 {
1167   vnet_main_t * vnm = vnet_get_main();
1168   ethernet_arp_main_t * am = &ethernet_arp_main;
1169   ethernet_arp_ip4_entry_t * e, * es;
1170   ethernet_proxy_arp_t * pa;
1171   clib_error_t * error = 0;
1172   u32 sw_if_index;
1173
1174   /* Filter entries by interface if given. */
1175   sw_if_index = ~0;
1176   (void) unformat_user (input, unformat_vnet_sw_interface, vnm, &sw_if_index);
1177
1178   es = 0;
1179   pool_foreach (e, am->ip4_entry_pool, ({ vec_add1 (es, e[0]); }));
1180   if ( es )
1181     {
1182       vec_sort_with_function (es, ip4_arp_entry_sort);
1183       vlib_cli_output (vm, "%U", format_ethernet_arp_ip4_entry, vnm, 0);
1184       vec_foreach (e, es) {
1185         if (sw_if_index != ~0 && e->key.sw_if_index != sw_if_index)
1186           continue;
1187         vlib_cli_output (vm, "%U", format_ethernet_arp_ip4_entry, vnm, e);
1188       }
1189       vec_free (es);
1190     }
1191
1192   if (vec_len (am->proxy_arps))
1193     {
1194       vlib_cli_output (vm, "Proxy arps enabled for:");
1195       vec_foreach(pa, am->proxy_arps)
1196         {
1197           vlib_cli_output (vm, "Fib_index %d   %U - %U ", 
1198                            pa->fib_index,
1199                            format_ip4_address, &pa->lo_addr, 
1200                            format_ip4_address, &pa->hi_addr);
1201         }
1202     }
1203       
1204   return error;
1205 }
1206
1207 VLIB_CLI_COMMAND (show_ip4_arp_command, static) = {
1208   .path = "show ip arp",
1209   .function = show_ip4_arp,
1210   .short_help = "Show ARP table",
1211 };
1212
1213 typedef struct {
1214   pg_edit_t l2_type, l3_type;
1215   pg_edit_t n_l2_address_bytes, n_l3_address_bytes;
1216   pg_edit_t opcode;
1217   struct {
1218     pg_edit_t ethernet;
1219     pg_edit_t ip4;
1220   } ip4_over_ethernet[2];
1221 } pg_ethernet_arp_header_t;
1222
1223 static inline void
1224 pg_ethernet_arp_header_init (pg_ethernet_arp_header_t * p)
1225 {
1226   /* Initialize fields that are not bit fields in the IP header. */
1227 #define _(f) pg_edit_init (&p->f, ethernet_arp_header_t, f);
1228   _ (l2_type);
1229   _ (l3_type);
1230   _ (n_l2_address_bytes);
1231   _ (n_l3_address_bytes);
1232   _ (opcode);
1233   _ (ip4_over_ethernet[0].ethernet);
1234   _ (ip4_over_ethernet[0].ip4);
1235   _ (ip4_over_ethernet[1].ethernet);
1236   _ (ip4_over_ethernet[1].ip4);
1237 #undef _
1238 }
1239
1240 uword
1241 unformat_pg_arp_header (unformat_input_t * input, va_list * args)
1242 {
1243   pg_stream_t * s = va_arg (*args, pg_stream_t *);
1244   pg_ethernet_arp_header_t * p;
1245   u32 group_index;
1246   
1247   p = pg_create_edit_group (s, sizeof (p[0]), sizeof (ethernet_arp_header_t),
1248                             &group_index);
1249   pg_ethernet_arp_header_init (p);
1250
1251   /* Defaults. */
1252   pg_edit_set_fixed (&p->l2_type, ETHERNET_ARP_HARDWARE_TYPE_ethernet);
1253   pg_edit_set_fixed (&p->l3_type, ETHERNET_TYPE_IP4);
1254   pg_edit_set_fixed (&p->n_l2_address_bytes, 6);
1255   pg_edit_set_fixed (&p->n_l3_address_bytes, 4);
1256
1257   if (! unformat (input, "%U: %U/%U -> %U/%U",
1258                   unformat_pg_edit,
1259                   unformat_ethernet_arp_opcode_net_byte_order, &p->opcode,
1260                   unformat_pg_edit,
1261                   unformat_ethernet_address, &p->ip4_over_ethernet[0].ethernet,
1262                   unformat_pg_edit,
1263                   unformat_ip4_address, &p->ip4_over_ethernet[0].ip4,
1264                   unformat_pg_edit,
1265                   unformat_ethernet_address, &p->ip4_over_ethernet[1].ethernet,
1266                   unformat_pg_edit,
1267                   unformat_ip4_address, &p->ip4_over_ethernet[1].ip4))
1268     {
1269       /* Free up any edits we may have added. */
1270       pg_free_edit_group (s);
1271       return 0;
1272     }
1273   return 1;
1274 }
1275
1276 clib_error_t *ip4_set_arp_limit (u32 arp_limit)
1277 {
1278   ethernet_arp_main_t * am = &ethernet_arp_main;
1279
1280   am->limit_arp_cache_size = arp_limit;
1281   return 0;
1282 }
1283
1284 static void
1285 arp_ip4_entry_del_adj(ethernet_arp_ip4_entry_t *e, u32 adj_index)
1286 {
1287   int done = 0;
1288   int i;
1289
1290   while (!done)
1291     {
1292       vec_foreach_index(i, e->adjacencies)
1293         if (vec_elt(e->adjacencies, i) == adj_index)
1294           {
1295             vec_del1(e->adjacencies, i);
1296             continue;
1297           }
1298       done = 1;
1299     }
1300 }
1301
1302 static void
1303 arp_ip4_entry_add_adj(ethernet_arp_ip4_entry_t *e, u32 adj_index)
1304 {
1305   int i;
1306   vec_foreach_index(i, e->adjacencies)
1307     if (vec_elt(e->adjacencies, i) == adj_index)
1308       return;
1309   vec_add1(e->adjacencies, adj_index);
1310 }
1311
1312 static void
1313 arp_add_del_adj_cb (struct ip_lookup_main_t * lm,
1314                     u32 adj_index,
1315                     ip_adjacency_t * adj,
1316                     u32 is_del)
1317 {
1318   ethernet_arp_main_t * am = &ethernet_arp_main;
1319   ip4_main_t * im = &ip4_main;
1320   ethernet_arp_ip4_key_t k;
1321   ethernet_arp_ip4_entry_t * e = 0;
1322   uword * p;
1323   u32 ai;
1324
1325   for(ai = adj->heap_handle; ai < adj->heap_handle + adj->n_adj ; ai++)
1326     {
1327       adj = ip_get_adjacency (lm, ai);
1328       if (adj->lookup_next_index == IP_LOOKUP_NEXT_ARP && adj->arp.next_hop.ip4.as_u32)
1329         {
1330           k.sw_if_index = adj->rewrite_header.sw_if_index;
1331           k.ip4_address.as_u32 = adj->arp.next_hop.ip4.as_u32;
1332           k.fib_index = im->fib_index_by_sw_if_index[adj->rewrite_header.sw_if_index];
1333           p = mhash_get (&am->ip4_entry_by_key, &k);
1334           if (p)
1335             e = pool_elt_at_index (am->ip4_entry_pool, p[0]);
1336         }
1337       else
1338         continue;
1339
1340       if (is_del)
1341         {
1342           if (!e)
1343             clib_warning("Adjacency contains unknown ARP next hop %U (del)",
1344                          format_ip4_address, &adj->arp.next_hop);
1345           else
1346             arp_ip4_entry_del_adj(e, adj->heap_handle);
1347         }
1348       else /* add */
1349         {
1350           if (!e)
1351             clib_warning("Adjacency contains unknown ARP next hop %U (add)",
1352                          format_ip4_address, &adj->arp.next_hop);
1353           else
1354             arp_ip4_entry_add_adj(e, adj->heap_handle);
1355         }
1356     }
1357 }
1358
1359 static clib_error_t * ethernet_arp_init (vlib_main_t * vm)
1360 {
1361   ethernet_arp_main_t * am = &ethernet_arp_main;
1362   pg_node_t * pn;
1363   clib_error_t * error;
1364   ip4_main_t * im = &ip4_main;
1365   ip_lookup_main_t * lm = &im->lookup_main;
1366
1367   if ((error = vlib_call_init_function (vm, ethernet_init)))
1368     return error;
1369
1370   ethernet_register_input_type (vm, ETHERNET_TYPE_ARP, arp_input_node.index);
1371
1372   pn = pg_get_node (arp_input_node.index);
1373   pn->unformat_edit = unformat_pg_arp_header;
1374
1375   am->opcode_by_name = hash_create_string (0, sizeof (uword));
1376 #define _(o) hash_set_mem (am->opcode_by_name, #o, ETHERNET_ARP_OPCODE_##o);
1377   foreach_ethernet_arp_opcode;
1378 #undef _
1379
1380   mhash_init (&am->ip4_entry_by_key,
1381               /* value size */ sizeof (uword),
1382               /* key size */ sizeof (ethernet_arp_ip4_key_t));
1383
1384   /* $$$ configurable */
1385   am->limit_arp_cache_size = 50000;
1386
1387   am->pending_resolutions_by_address = hash_create (0, sizeof (uword));
1388   am->mac_changes_by_address = hash_create (0, sizeof (uword));
1389
1390   /* don't trace ARP error packets */
1391   {
1392     vlib_node_runtime_t *rt = 
1393       vlib_node_get_runtime (vm, arp_input_node.index);
1394
1395 #define _(a,b)                                  \
1396     vnet_pcap_drop_trace_filter_add_del         \
1397         (rt->errors[ETHERNET_ARP_ERROR_##a],    \
1398          1 /* is_add */);
1399     foreach_ethernet_arp_error
1400 #undef _
1401   }
1402
1403   ip_register_add_del_adjacency_callback(lm, arp_add_del_adj_cb);
1404
1405   return 0;
1406 }
1407
1408 VLIB_INIT_FUNCTION (ethernet_arp_init);
1409
1410 int 
1411 vnet_arp_unset_ip4_over_ethernet (vnet_main_t * vnm,
1412                                   u32 sw_if_index, u32 fib_index,
1413                                   void * a_arg)
1414 {
1415   ethernet_arp_ip4_over_ethernet_address_t * a = a_arg;
1416   vnet_arp_set_ip4_over_ethernet_rpc_args_t args;
1417
1418   args.sw_if_index = sw_if_index;
1419   args.fib_index = fib_index;
1420   args.is_remove = 1;
1421   memcpy (&args.a, a, sizeof (*a));
1422
1423   vl_api_rpc_call_main_thread (set_ip4_over_ethernet_rpc_callback, 
1424                                (u8 *) &args, sizeof (args));
1425   return 0;
1426 }
1427
1428 static inline int 
1429 vnet_arp_unset_ip4_over_ethernet_internal (vnet_main_t * vnm,
1430                                            u32 sw_if_index, 
1431                                            u32 fib_index,
1432                                            void * a_arg)
1433 {
1434   ethernet_arp_ip4_entry_t * e;
1435   ethernet_arp_main_t * am = &ethernet_arp_main;
1436   ethernet_arp_ip4_over_ethernet_address_t * a = a_arg;
1437   ethernet_arp_ip4_key_t k;
1438   uword * p;
1439   ip4_add_del_route_args_t args;
1440   ip4_main_t * im = &ip4_main;
1441   ip_lookup_main_t * lm = &im->lookup_main;
1442   u32 adj_index;
1443   ip_adjacency_t * adj;
1444
1445   k.sw_if_index = sw_if_index;
1446   k.ip4_address = a->ip4;
1447   k.fib_index = fib_index;
1448   p = mhash_get (&am->ip4_entry_by_key, &k);
1449   if (! p)
1450     return -1;
1451
1452   memset(&args, 0, sizeof(args));
1453
1454   /* 
1455    * Make sure that the route actually exists before we try to delete it,
1456    * and make sure that it's a rewrite adjacency.
1457    *
1458    * If we point 1-N unnumbered interfaces at a loopback interface and 
1459    * shut down the loopback before shutting down 1-N unnumbered 
1460    * interfaces, the ARP cache will still have an entry, 
1461    * but the route will have disappeared.
1462    * 
1463    * See also ip4_del_interface_routes (...) 
1464    *            -> ip4_delete_matching_routes (...).
1465    */
1466   
1467   adj_index = ip4_fib_lookup_with_table 
1468       (im, fib_index, &a->ip4, 1 /* disable default route */);
1469
1470   /* Miss adj? Forget it... */
1471   if (adj_index != lm->miss_adj_index) {
1472       adj = ip_get_adjacency (lm, adj_index);
1473       /* 
1474        * Stupid control-plane trick:
1475        * admin down an interface (removes arp routes from fib),
1476        * bring the interface back up (does not reinstall them)
1477        * then remove the arp cache entry (yuck). When that happens,
1478        * the adj we find here will be the interface subnet ARP adj.
1479        */
1480       if (adj->lookup_next_index == IP_LOOKUP_NEXT_REWRITE) {
1481           args.table_index_or_table_id = fib_index;
1482           args.flags = IP4_ROUTE_FLAG_FIB_INDEX | IP4_ROUTE_FLAG_DEL 
1483               | IP4_ROUTE_FLAG_NEIGHBOR;
1484           args.dst_address = a->ip4;
1485           args.dst_address_length = 32;
1486           ip4_add_del_route (im, &args);
1487           ip4_maybe_remap_adjacencies (im, fib_index, args.flags);
1488       }
1489   }
1490
1491   e = pool_elt_at_index (am->ip4_entry_pool, p[0]);
1492   mhash_unset (&am->ip4_entry_by_key, &e->key, 0);
1493   pool_put (am->ip4_entry_pool, e);
1494   return 0;
1495 }
1496
1497 static void 
1498 increment_ip4_and_mac_address (ethernet_arp_ip4_over_ethernet_address_t *a)
1499 {
1500   u8 old;
1501   int i;
1502
1503   for (i = 3; i >= 0; i--) 
1504     {
1505       old = a->ip4.as_u8[i];
1506       a->ip4.as_u8[i] += 1;
1507       if (old < a->ip4.as_u8[i])
1508         break;
1509     }
1510
1511   for (i = 5; i >= 0; i--)
1512     {
1513       old = a->ethernet[i];
1514       a->ethernet[i] += 1;
1515       if (old < a->ethernet[i])
1516         break;
1517     }
1518 }
1519
1520 int vnet_proxy_arp_add_del (ip4_address_t *lo_addr,
1521                             ip4_address_t *hi_addr,
1522                             u32 fib_index, int is_del)
1523 {
1524   ethernet_arp_main_t *am = &ethernet_arp_main;
1525   ethernet_proxy_arp_t *pa;
1526   u32 found_at_index = ~0;
1527
1528   vec_foreach (pa, am->proxy_arps)
1529     {
1530       if (pa->lo_addr == lo_addr->as_u32 
1531           && pa->hi_addr == hi_addr->as_u32
1532           && pa->fib_index == fib_index)
1533         {
1534           found_at_index = pa - am->proxy_arps;
1535           break;
1536         }
1537     }
1538
1539   if (found_at_index != ~0)
1540     {
1541       /* Delete, otherwise it's already in the table */
1542       if (is_del)
1543         vec_delete (am->proxy_arps, 1, found_at_index);
1544       return 0;
1545     }
1546   /* delete, no such entry */
1547   if (is_del)
1548     return VNET_API_ERROR_NO_SUCH_ENTRY;
1549
1550   /* add, not in table */
1551   vec_add2 (am->proxy_arps, pa, 1);
1552   pa->lo_addr = lo_addr->as_u32;
1553   pa->hi_addr = hi_addr->as_u32;
1554   pa->fib_index = fib_index;
1555   return 0;
1556 }
1557
1558 /*
1559  * Remove any proxy arp entries asdociated with the 
1560  * specificed fib.
1561  */
1562 int vnet_proxy_arp_fib_reset (u32 fib_id)
1563 {
1564   ip4_main_t * im = &ip4_main;
1565   ethernet_arp_main_t *am = &ethernet_arp_main;
1566   ethernet_proxy_arp_t *pa;
1567   u32 * entries_to_delete = 0;
1568   u32 fib_index;
1569   uword * p;
1570   int i;
1571
1572   p = hash_get (im->fib_index_by_table_id, fib_id);
1573   if (! p)
1574       return VNET_API_ERROR_NO_SUCH_ENTRY;
1575   fib_index = p[0];
1576
1577   vec_foreach (pa, am->proxy_arps)
1578     {
1579       if (pa->fib_index == fib_index)
1580         {
1581           vec_add1 (entries_to_delete, pa - am->proxy_arps);
1582         }
1583     }
1584
1585   for (i = 0; i < vec_len(entries_to_delete); i++)
1586     {
1587        vec_delete (am->proxy_arps, 1, entries_to_delete[i]);
1588     } 
1589
1590   vec_free (entries_to_delete);
1591
1592    return 0;
1593 }
1594
1595 u32
1596 vnet_arp_glean_add(u32 fib_index, void * next_hop_arg)
1597 {
1598   ethernet_arp_main_t * am = &ethernet_arp_main;
1599   ip4_main_t * im = &ip4_main;
1600   ip_lookup_main_t * lm = &im->lookup_main;
1601   ip4_address_t * next_hop = next_hop_arg;
1602   ip_adjacency_t add_adj, *adj;
1603   ip4_add_del_route_args_t args;
1604   ethernet_arp_ip4_entry_t * e;
1605   ethernet_arp_ip4_key_t k;
1606   u32 adj_index;
1607
1608   adj_index = ip4_fib_lookup_with_table(im, fib_index, next_hop, 0);
1609   adj = ip_get_adjacency(lm, adj_index);
1610
1611   if (!adj || adj->lookup_next_index != IP_LOOKUP_NEXT_ARP)
1612     return ~0;
1613
1614   if (adj->arp.next_hop.ip4.as_u32 != 0)
1615     return adj_index;
1616
1617   k.sw_if_index = adj->rewrite_header.sw_if_index;
1618   k.fib_index = fib_index;
1619   k.ip4_address.as_u32 = next_hop->as_u32;
1620
1621   if (mhash_get (&am->ip4_entry_by_key, &k))
1622     return adj_index;
1623
1624   pool_get (am->ip4_entry_pool, e);
1625   mhash_set (&am->ip4_entry_by_key, &k, e - am->ip4_entry_pool, /* old value */ 0);
1626   e->key = k;
1627   e->cpu_time_last_updated = clib_cpu_time_now ();
1628   e->flags = ETHERNET_ARP_IP4_ENTRY_FLAG_GLEAN;
1629
1630   memset(&args, 0, sizeof(args));
1631   memcpy(&add_adj, adj, sizeof(add_adj));
1632   add_adj.arp.next_hop.ip4.as_u32 = next_hop->as_u32; /* install neighbor /32 route */
1633   args.table_index_or_table_id = fib_index;
1634   args.flags = IP4_ROUTE_FLAG_FIB_INDEX | IP4_ROUTE_FLAG_ADD| IP4_ROUTE_FLAG_NEIGHBOR;
1635   args.dst_address.as_u32 = next_hop->as_u32;
1636   args.dst_address_length = 32;
1637   args.adj_index = ~0;
1638   args.add_adj = &add_adj;
1639   args.n_add_adj = 1;
1640   ip4_add_del_route (im, &args);
1641   return ip4_fib_lookup_with_table (im, fib_index, next_hop, 0);
1642 }
1643
1644 static clib_error_t *
1645 ip_arp_add_del_command_fn (vlib_main_t * vm,
1646                  unformat_input_t * input,
1647                  vlib_cli_command_t * cmd)
1648 {
1649   vnet_main_t * vnm = vnet_get_main();
1650   u32 sw_if_index;
1651   ethernet_arp_ip4_over_ethernet_address_t lo_addr, hi_addr, addr;
1652   int addr_valid = 0;
1653   int is_del = 0;
1654   int count = 1;
1655   u32 fib_index = 0;
1656   u32 fib_id;
1657   int is_static = 0;
1658   int is_proxy = 0;
1659
1660   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) 
1661     {
1662       /* set ip arp TenGigE1/1/0/1 1.2.3.4 aa:bb:... or aabb.ccdd... */
1663       if (unformat (input, "%U %U %U",
1664                     unformat_vnet_sw_interface, vnm, &sw_if_index,
1665                     unformat_ip4_address, &addr.ip4, 
1666                     unformat_ethernet_address, &addr.ethernet))
1667         addr_valid = 1;
1668
1669       else if (unformat (input, "delete") || unformat (input, "del"))
1670         is_del = 1;
1671
1672       else if (unformat (input, "static"))
1673         is_static = 1;
1674
1675       else if (unformat (input, "count %d", &count))
1676         ;
1677
1678       else if (unformat (input, "fib-id %d", &fib_id))
1679         {
1680           ip4_main_t * im = &ip4_main;
1681           uword * p = hash_get (im->fib_index_by_table_id, fib_id);
1682           if (! p)
1683             return clib_error_return (0, "fib ID %d doesn't exist\n",
1684                                       fib_id);
1685           fib_index = p[0];
1686         }
1687
1688       else if (unformat (input, "proxy %U - %U", 
1689                          unformat_ip4_address, &lo_addr.ip4, 
1690                          unformat_ip4_address, &hi_addr.ip4))
1691         is_proxy = 1;
1692       else
1693         break;
1694     }
1695   
1696   if (is_proxy)
1697     {
1698       (void) vnet_proxy_arp_add_del (&lo_addr.ip4, &hi_addr.ip4, 
1699                                      fib_index, is_del);
1700       return 0;
1701     }
1702
1703   if (addr_valid) 
1704     {
1705       int i;
1706
1707       for (i = 0; i < count; i++) 
1708         {
1709           if (is_del == 0) 
1710             {
1711               uword event_type, * event_data = 0;
1712
1713               /* Park the debug CLI until the arp entry is installed */
1714               vnet_register_ip4_arp_resolution_event 
1715                 (vnm, &addr.ip4, vlib_current_process(vm),
1716                  1 /* type */, 0 /* data */);
1717               
1718               vnet_arp_set_ip4_over_ethernet
1719                 (vnm, sw_if_index, fib_index, &addr, is_static);
1720               
1721               vlib_process_wait_for_event (vm);
1722               event_type = vlib_process_get_events (vm, &event_data);
1723               vec_reset_length(event_data);
1724               if (event_type != 1)
1725                 clib_warning ("event type %d unexpected", event_type);
1726             }
1727           else
1728             vnet_arp_unset_ip4_over_ethernet
1729                 (vnm, sw_if_index, fib_index, &addr);
1730
1731           increment_ip4_and_mac_address (&addr);
1732         }
1733     }
1734   else
1735     {
1736       return clib_error_return (0, "unknown input `%U'",
1737                                 format_unformat_error, input);
1738     }
1739   
1740   return 0;
1741 }
1742
1743 VLIB_CLI_COMMAND (ip_arp_add_del_command, static) = {
1744     .path = "set ip arp",
1745     .short_help = "set ip arp [del] <intfc> <ip-address> <mac-address>",
1746     .function = ip_arp_add_del_command_fn,
1747 };
1748
1749 static clib_error_t *
1750 set_int_proxy_arp_command_fn (vlib_main_t * vm,
1751                               unformat_input_t * input,
1752                               vlib_cli_command_t * cmd)
1753 {
1754   vnet_main_t * vnm = vnet_get_main();
1755   u32 sw_if_index;
1756   vnet_sw_interface_t * si;
1757   int enable = 0;
1758   int intfc_set = 0;
1759
1760   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) 
1761     {
1762       if (unformat (input, "%U", unformat_vnet_sw_interface, 
1763                     vnm, &sw_if_index))
1764         intfc_set = 1;
1765       else if (unformat (input, "enable") || unformat (input, "on"))
1766         enable = 1;
1767       else if (unformat (input, "disable") || unformat (input, "off"))
1768         enable = 0;
1769       else
1770         break;
1771     }
1772
1773   if (intfc_set == 0)
1774     return clib_error_return (0, "unknown input '%U'",
1775                               format_unformat_error, input);
1776
1777   si = vnet_get_sw_interface (vnm, sw_if_index);
1778   ASSERT(si);
1779   if (enable)
1780     si->flags |= VNET_SW_INTERFACE_FLAG_PROXY_ARP;
1781   else 
1782     si->flags &= ~VNET_SW_INTERFACE_FLAG_PROXY_ARP;
1783   
1784   return 0;
1785 }
1786
1787 VLIB_CLI_COMMAND (set_int_proxy_enable_command, static) = {
1788     .path = "set interface proxy-arp",
1789     .short_help = "set interface proxy-arp <intfc> [enable|disable]",
1790     .function = set_int_proxy_arp_command_fn,
1791 };
1792
1793
1794 /*
1795  * ARP Termination in a L2 Bridge Domain based on an
1796  * IP4 to MAC hash table mac_by_ip4 for each BD.
1797  */
1798 typedef enum {
1799   ARP_TERM_NEXT_L2_OUTPUT,
1800   ARP_TERM_NEXT_DROP,
1801   ARP_TERM_N_NEXT,
1802 } arp_term_next_t;
1803
1804 u32 arp_term_next_node_index[32];
1805
1806 static uword
1807 arp_term_l2bd (vlib_main_t * vm,
1808                vlib_node_runtime_t * node,
1809                vlib_frame_t * frame)
1810 {
1811   l2input_main_t * l2im = &l2input_main;
1812   u32 n_left_from, next_index, * from, * to_next;
1813   u32 n_replies_sent = 0;
1814   u16 last_bd_index = ~0;
1815   l2_bridge_domain_t * last_bd_config = 0;
1816   l2_input_config_t * cfg0;
1817
1818   from = vlib_frame_vector_args (frame);
1819   n_left_from = frame->n_vectors;
1820   next_index = node->cached_next_index;
1821
1822   while (n_left_from > 0)
1823     {
1824       u32 n_left_to_next;
1825
1826       vlib_get_next_frame (vm, node, next_index,
1827                            to_next, n_left_to_next);
1828
1829       while (n_left_from > 0 && n_left_to_next > 0)
1830         {
1831           vlib_buffer_t * p0;
1832           ethernet_header_t * eth0;
1833           ethernet_arp_header_t * arp0;
1834           u8 * l3h0;
1835           u32 pi0, error0, next0, sw_if_index0;
1836           u16 ethertype0;
1837           u16 bd_index0;
1838           u32 ip0;
1839           u8 * macp0;
1840
1841           pi0 = from[0];
1842           to_next[0] = pi0;
1843           from += 1;
1844           to_next += 1;
1845           n_left_from -= 1;
1846           n_left_to_next -= 1;
1847
1848           p0 = vlib_get_buffer (vm, pi0);
1849           eth0 = vlib_buffer_get_current (p0);
1850           l3h0 = (u8 *)eth0 + vnet_buffer(p0)->l2.l2_len;
1851           ethertype0 = clib_net_to_host_u16(*(u16 *)(l3h0 - 2));
1852           arp0 = (ethernet_arp_header_t *) l3h0;
1853
1854           if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE) &&
1855                              (p0->flags & VLIB_BUFFER_IS_TRACED)))
1856             {
1857               u8 *t0 = vlib_add_trace (
1858                   vm, node, p0, sizeof(ethernet_arp_input_trace_t));
1859               memcpy (t0, l3h0, sizeof(ethernet_arp_input_trace_t));
1860             }
1861
1862           if (PREDICT_FALSE  (
1863             (ethertype0 != ETHERNET_TYPE_ARP) ||
1864             (arp0->opcode != clib_host_to_net_u16(ETHERNET_ARP_OPCODE_request))))
1865             goto next_l2_feature;
1866
1867           error0 = ETHERNET_ARP_ERROR_replies_sent;
1868           error0 = (arp0->l2_type != clib_net_to_host_u16 (ETHERNET_ARP_HARDWARE_TYPE_ethernet)
1869                     ? ETHERNET_ARP_ERROR_l2_type_not_ethernet
1870                     : error0);
1871           error0 = (arp0->l3_type != clib_net_to_host_u16 (ETHERNET_TYPE_IP4)
1872                     ? ETHERNET_ARP_ERROR_l3_type_not_ip4
1873                     : error0);
1874
1875           sw_if_index0 = vnet_buffer (p0)->sw_if_index[VLIB_RX];
1876
1877           if (error0)
1878             goto drop;
1879
1880           // Trash ARP packets whose ARP-level source addresses do not
1881           // match their L2-frame-level source addresses */
1882           if (PREDICT_FALSE (
1883             memcmp (eth0->src_address, arp0->ip4_over_ethernet[0].ethernet,
1884                     sizeof (eth0->src_address))))
1885             {
1886               error0 = ETHERNET_ARP_ERROR_l2_address_mismatch;
1887               goto drop;
1888             }
1889
1890           // Check if anyone want ARP request events for L2 BDs
1891           {
1892           pending_resolution_t * mc;
1893           ethernet_arp_main_t * am = &ethernet_arp_main;
1894           uword *p = hash_get (am->mac_changes_by_address, 0);
1895           if (p && (vnet_buffer(p0)->l2.shg == 0))
1896             { // Only SHG 0 interface which is more likely local
1897               u32 next_index = p[0];
1898               while (next_index != (u32)~0)
1899                 {
1900                   int (*fp)(u32, u8 *, u32, u32);
1901                   int rv = 1;
1902                   mc = pool_elt_at_index (am->mac_changes, next_index);
1903                   fp = mc->data_callback;
1904                   // Call the callback, return 1 to suppress dup events */
1905                   if (fp) rv = (*fp)(mc->data, 
1906                                      arp0->ip4_over_ethernet[0].ethernet, 
1907                                      sw_if_index0, 
1908                                      arp0->ip4_over_ethernet[0].ip4.as_u32);
1909                   // Signal the resolver process
1910                   if (rv == 0)
1911                     vlib_process_signal_event (vm, mc->node_index,
1912                                                mc->type_opaque, 
1913                                                mc->data);
1914                   next_index = mc->next_index;
1915                 }
1916             }
1917           }
1918
1919           // lookup BD mac_by_ip4 hash table for MAC entry
1920           ip0 = arp0->ip4_over_ethernet[1].ip4.as_u32;
1921           bd_index0 = vnet_buffer(p0)->l2.bd_index;
1922           if (PREDICT_FALSE (
1923             (bd_index0 != last_bd_index) || (last_bd_index == (u16) ~0)))
1924             {
1925               last_bd_index = bd_index0;
1926               last_bd_config = vec_elt_at_index(l2im->bd_configs, bd_index0);
1927             }
1928           macp0 = (u8 *) hash_get (last_bd_config->mac_by_ip4, ip0);
1929
1930           if (PREDICT_FALSE(!macp0)) 
1931               goto next_l2_feature;     // MAC not found 
1932
1933           // MAC found, send ARP reply -
1934           // Convert ARP request packet to ARP reply
1935           arp0->opcode = clib_host_to_net_u16 (ETHERNET_ARP_OPCODE_reply);
1936           arp0->ip4_over_ethernet[1] = arp0->ip4_over_ethernet[0];
1937           arp0->ip4_over_ethernet[0].ip4.as_u32 = ip0;
1938           memcpy (arp0->ip4_over_ethernet[0].ethernet, macp0, 6);
1939           memcpy (eth0->dst_address, eth0->src_address, 6);
1940           memcpy (eth0->src_address, macp0, 6);
1941           n_replies_sent += 1;
1942
1943           // For BVI, need to use l2-fwd node to send ARP reply as 
1944           // l2-output node cannot output packet to BVI properly
1945           cfg0 = vec_elt_at_index(l2im->configs, sw_if_index0);
1946           if (PREDICT_FALSE (cfg0->bvi))
1947             {
1948               vnet_buffer(p0)->l2.feature_bitmap |= L2INPUT_FEAT_FWD;
1949               vnet_buffer (p0)->sw_if_index[VLIB_RX] = 0;
1950               goto next_l2_feature;
1951             }
1952
1953           // Send ARP reply back out input interface through l2-output
1954           vnet_buffer (p0)->sw_if_index[VLIB_TX] = sw_if_index0;
1955           next0 = ARP_TERM_NEXT_L2_OUTPUT;
1956           // Note that output to VXLAN tunnel will fail due to SHG which
1957           // is probably desireable since ARP termination is not intended
1958           // for ARP requests from other hosts. If output to VXLAN tunnel is
1959           // required, however, can just clear the SHG in packet as follows:
1960           //   vnet_buffer(p0)->l2.shg = 0;
1961
1962           vlib_validate_buffer_enqueue_x1 (vm, node, next_index,to_next,
1963                                            n_left_to_next,pi0,next0);
1964           continue;
1965
1966         next_l2_feature:
1967           {
1968             u32 feature_bitmap0 =
1969                 vnet_buffer(p0)->l2.feature_bitmap & ~L2INPUT_FEAT_ARP_TERM;
1970             vnet_buffer(p0)->l2.feature_bitmap = feature_bitmap0;
1971             next0 = feat_bitmap_get_next_node_index(arp_term_next_node_index,
1972                                                     feature_bitmap0);
1973             vlib_validate_buffer_enqueue_x1 (vm, node, next_index,to_next,
1974                                              n_left_to_next,pi0,next0);
1975             continue;
1976           }
1977
1978         drop:
1979           if (0 == arp0->ip4_over_ethernet[0].ip4.as_u32 ||
1980               (arp0->ip4_over_ethernet[0].ip4.as_u32 ==
1981                arp0->ip4_over_ethernet[1].ip4.as_u32))
1982             {
1983               error0 = ETHERNET_ARP_ERROR_gratuitous_arp;
1984             }
1985           next0 = ARP_TERM_NEXT_DROP;
1986           p0->error = node->errors[error0];
1987
1988           vlib_validate_buffer_enqueue_x1 (vm, node, next_index,to_next,
1989                                            n_left_to_next,pi0,next0);
1990         }
1991
1992       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
1993     }
1994
1995   vlib_error_count (vm, node->node_index,
1996                     ETHERNET_ARP_ERROR_replies_sent, 
1997                     n_replies_sent);
1998   return frame->n_vectors;
1999 }
2000
2001 VLIB_REGISTER_NODE (arp_term_l2bd_node,static) = {
2002   .function = arp_term_l2bd,
2003   .name = "arp-term-l2bd",
2004   .vector_size = sizeof (u32),
2005
2006   .n_errors = ETHERNET_ARP_N_ERROR,
2007   .error_strings = ethernet_arp_error_strings,
2008
2009   .n_next_nodes = ARP_TERM_N_NEXT,
2010   .next_nodes = {
2011     [ARP_TERM_NEXT_L2_OUTPUT] = "l2-output",
2012     [ARP_TERM_NEXT_DROP] = "error-drop",
2013   },
2014
2015   .format_buffer = format_ethernet_arp_header,
2016   .format_trace = format_ethernet_arp_input_trace,
2017 };
2018
2019 clib_error_t *arp_term_init (vlib_main_t *vm)
2020 { // Initialize the feature next-node indexes 
2021   feat_bitmap_init_next_nodes(vm,
2022                               arp_term_l2bd_node.index,
2023                               L2INPUT_N_FEAT,
2024                               l2input_get_feat_names(),
2025                               arp_term_next_node_index);
2026   return 0;
2027 }
2028
2029 VLIB_INIT_FUNCTION (arp_term_init);