Don't re-write adjacency with same information.
[vpp.git] / vnet / vnet / ethernet / arp.c
1 /*
2  * ethernet/arp.c: IP v4 ARP node
3  *
4  * Copyright (c) 2010 Cisco and/or its affiliates.
5  * Licensed under the Apache License, Version 2.0 (the "License");
6  * you may not use this file except in compliance with the License.
7  * You may obtain a copy of the License at:
8  *
9  *     http://www.apache.org/licenses/LICENSE-2.0
10  *
11  * Unless required by applicable law or agreed to in writing, software
12  * distributed under the License is distributed on an "AS IS" BASIS,
13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  * See the License for the specific language governing permissions and
15  * limitations under the License.
16  */
17
18 #include <vnet/ip/ip.h>
19 #include <vnet/ethernet/ethernet.h>
20 #include <vnet/ethernet/arp_packet.h>
21 #include <vnet/l2/l2_input.h>
22 #include <vppinfra/mhash.h>
23
24 void vl_api_rpc_call_main_thread (void *fp, u8 * data, u32 data_length);
25
26 typedef struct {
27   u32 sw_if_index;
28   u32 fib_index;
29   ip4_address_t ip4_address;
30 } ethernet_arp_ip4_key_t;
31
32 typedef struct {
33   ethernet_arp_ip4_key_t key;
34   u8 ethernet_address[6];
35
36   u16 flags;
37 #define ETHERNET_ARP_IP4_ENTRY_FLAG_STATIC (1 << 0)
38 #define ETHERNET_ARP_IP4_ENTRY_FLAG_GLEAN  (2 << 0)
39
40   u64 cpu_time_last_updated;
41
42   u32 * adjacencies;
43 } ethernet_arp_ip4_entry_t;
44
45 typedef struct {
46   u32 lo_addr;
47   u32 hi_addr;
48   u32 fib_index;
49 } ethernet_proxy_arp_t;
50
51 typedef struct {
52   u32 next_index;
53   uword node_index;
54   uword type_opaque;
55   uword data;
56   /* Used for arp event notification only */
57   void * data_callback;
58   u32 pid;
59 } pending_resolution_t;
60
61 typedef struct {
62   /* Hash tables mapping name to opcode. */
63   uword * opcode_by_name;
64
65   /* lite beer "glean" adjacency handling */
66   uword * pending_resolutions_by_address;
67   pending_resolution_t * pending_resolutions;
68
69   /* Mac address change notification */
70   uword * mac_changes_by_address;
71   pending_resolution_t * mac_changes;
72
73   ethernet_arp_ip4_entry_t * ip4_entry_pool;
74
75   mhash_t ip4_entry_by_key;
76     
77   /* ARP attack mitigation */
78   u32 arp_delete_rotor;
79   u32 limit_arp_cache_size;
80
81   /* Proxy arp vector */
82   ethernet_proxy_arp_t * proxy_arps;
83 } ethernet_arp_main_t;
84
85 static ethernet_arp_main_t ethernet_arp_main;
86
87 static u8 * format_ethernet_arp_hardware_type (u8 * s, va_list * va)
88 {
89   ethernet_arp_hardware_type_t h = va_arg (*va, ethernet_arp_hardware_type_t);
90   char * t = 0;
91   switch (h)
92     {
93 #define _(n,f) case n: t = #f; break;
94       foreach_ethernet_arp_hardware_type;
95 #undef _
96
97     default:
98       return format (s, "unknown 0x%x", h);
99     }
100
101   return format (s, "%s", t);
102 }
103
104 static u8 * format_ethernet_arp_opcode (u8 * s, va_list * va)
105 {
106   ethernet_arp_opcode_t o = va_arg (*va, ethernet_arp_opcode_t);
107   char * t = 0;
108   switch (o)
109     {
110 #define _(f) case ETHERNET_ARP_OPCODE_##f: t = #f; break;
111       foreach_ethernet_arp_opcode;
112 #undef _
113
114     default:
115       return format (s, "unknown 0x%x", o);
116     }
117
118   return format (s, "%s", t);
119 }
120
121 static uword
122 unformat_ethernet_arp_opcode_host_byte_order (unformat_input_t * input,
123                                               va_list * args)
124 {
125   int * result = va_arg (*args, int *);
126   ethernet_arp_main_t * am = &ethernet_arp_main;
127   int x, i;
128
129   /* Numeric opcode. */
130   if (unformat (input, "0x%x", &x)
131       || unformat (input, "%d", &x))
132     {
133       if (x >= (1 << 16))
134         return 0;
135       *result = x;
136       return 1;
137     }
138
139   /* Named type. */
140   if (unformat_user (input, unformat_vlib_number_by_name,
141                      am->opcode_by_name, &i))
142     {
143       *result = i;
144       return 1;
145     }
146
147   return 0;
148 }
149
150 static uword
151 unformat_ethernet_arp_opcode_net_byte_order (unformat_input_t * input,
152                                              va_list * args)
153 {
154   int * result = va_arg (*args, int *);
155   if (! unformat_user (input, unformat_ethernet_arp_opcode_host_byte_order, result))
156     return 0;
157
158   *result = clib_host_to_net_u16 ((u16) *result);
159   return 1;
160 }
161
162 static u8 * format_ethernet_arp_header (u8 * s, va_list * va)
163 {
164   ethernet_arp_header_t * a = va_arg (*va, ethernet_arp_header_t *);
165   u32 max_header_bytes = va_arg (*va, u32);
166   uword indent;
167   u16 l2_type, l3_type;
168
169   if (max_header_bytes != 0 && sizeof (a[0]) > max_header_bytes)
170     return format (s, "ARP header truncated");
171
172   l2_type = clib_net_to_host_u16 (a->l2_type);
173   l3_type = clib_net_to_host_u16 (a->l3_type);
174
175   indent = format_get_indent (s);
176
177   s = format (s, "%U, type %U/%U, address size %d/%d",
178               format_ethernet_arp_opcode, clib_net_to_host_u16 (a->opcode),
179               format_ethernet_arp_hardware_type, l2_type,
180               format_ethernet_type, l3_type,
181               a->n_l2_address_bytes, a->n_l3_address_bytes);
182               
183   if (l2_type == ETHERNET_ARP_HARDWARE_TYPE_ethernet
184       && l3_type == ETHERNET_TYPE_IP4)
185     {
186       s = format (s, "\n%U%U/%U -> %U/%U",
187                   format_white_space, indent,
188                   format_ethernet_address, a->ip4_over_ethernet[0].ethernet,
189                   format_ip4_address, &a->ip4_over_ethernet[0].ip4,
190                   format_ethernet_address, a->ip4_over_ethernet[1].ethernet,
191                   format_ip4_address, &a->ip4_over_ethernet[1].ip4);
192     }
193   else
194     {
195       uword n2 = a->n_l2_address_bytes;
196       uword n3 = a->n_l3_address_bytes;
197       s = format (s, "\n%U%U/%U -> %U/%U",
198                   format_white_space, indent,
199                   format_hex_bytes, a->data + 0*n2 + 0*n3, n2,
200                   format_hex_bytes, a->data + 1*n2 + 0*n3, n3,
201                   format_hex_bytes, a->data + 1*n2 + 1*n3, n2,
202                   format_hex_bytes, a->data + 2*n2 + 1*n3, n3);
203     }
204
205   return s;
206 }
207
208 static u8 * format_ethernet_arp_ip4_entry (u8 * s, va_list * va)
209 {
210   vnet_main_t * vnm = va_arg (*va, vnet_main_t *);
211   ethernet_arp_ip4_entry_t * e = va_arg (*va, ethernet_arp_ip4_entry_t *);
212   vnet_sw_interface_t * si;
213   ip4_fib_t * fib;
214   u8 * flags = 0;
215
216   if (! e)
217     return format (s, "%=12s%=6s%=16s%=6s%=20s%=24s", "Time", "FIB", "IP4",
218                    "Flags", "Ethernet", "Interface");
219
220   fib = find_ip4_fib_by_table_index_or_id (&ip4_main, e->key.fib_index,
221                                            IP4_ROUTE_FLAG_FIB_INDEX);
222   si = vnet_get_sw_interface (vnm, e->key.sw_if_index);
223
224   if (e->flags & ETHERNET_ARP_IP4_ENTRY_FLAG_GLEAN)
225     flags = format(flags, "G");
226
227   if (e->flags & ETHERNET_ARP_IP4_ENTRY_FLAG_STATIC)
228     flags = format(flags, "S");
229
230   s = format (s, "%=12U%=6u%=16U%=6s%=20U%=24U",
231               format_vlib_cpu_time, vnm->vlib_main, e->cpu_time_last_updated,
232               fib->table_id,
233               format_ip4_address, &e->key.ip4_address,
234               flags ? (char *) flags : "",
235               format_ethernet_address, e->ethernet_address,
236               format_vnet_sw_interface_name, vnm, si);
237
238   vec_free(flags);
239   return s;
240 }
241
242 typedef struct {
243   u8 packet_data[64];
244 } ethernet_arp_input_trace_t;
245
246 static u8 * format_ethernet_arp_input_trace (u8 * s, va_list * va)
247 {
248   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*va, vlib_main_t *);
249   CLIB_UNUSED (vlib_node_t * node) = va_arg (*va, vlib_node_t *);
250   ethernet_arp_input_trace_t * t = va_arg (*va, ethernet_arp_input_trace_t *);
251
252   s = format (s, "%U",
253               format_ethernet_arp_header,
254               t->packet_data, sizeof (t->packet_data));
255
256   return s;
257 }
258
259 clib_error_t *
260 ethernet_arp_sw_interface_up_down (vnet_main_t * vnm,
261                                    u32 sw_if_index,
262                                    u32 flags)
263 {
264   ethernet_arp_main_t * am = &ethernet_arp_main;
265   ethernet_arp_ip4_entry_t * e;
266
267   if (! (flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP))
268     {
269       u32 i, * to_delete = 0;
270
271       pool_foreach (e, am->ip4_entry_pool, ({
272         if (e->key.sw_if_index == sw_if_index)
273           vec_add1 (to_delete, e - am->ip4_entry_pool);
274       }));
275
276       for (i = 0; i < vec_len (to_delete); i++)
277         {
278           ethernet_arp_ip4_over_ethernet_address_t delme;
279           e = pool_elt_at_index (am->ip4_entry_pool, to_delete[i]);
280
281           clib_memcpy (&delme.ethernet, e->ethernet_address, 6);
282           delme.ip4.as_u32 = e->key.ip4_address.as_u32;
283
284           vnet_arp_unset_ip4_over_ethernet (vnm, e->key.sw_if_index,
285                                             e->key.fib_index, &delme);
286         }
287
288       vec_free (to_delete);
289     }
290
291   return 0;
292 }
293
294 VNET_SW_INTERFACE_ADMIN_UP_DOWN_FUNCTION (ethernet_arp_sw_interface_up_down);
295
296 static int
297 vnet_arp_set_ip4_over_ethernet_internal (vnet_main_t * vnm,
298                                          u32 sw_if_index,
299                                          u32 fib_index,
300                                          void * a_arg,
301                                          int is_static);
302
303 static int
304 vnet_arp_unset_ip4_over_ethernet_internal (vnet_main_t * vnm,
305                                            u32 sw_if_index,
306                                            u32 fib_index,
307                                            void * a_arg);
308
309 typedef struct {
310   u32 sw_if_index;
311   u32 fib_index;
312   ethernet_arp_ip4_over_ethernet_address_t a;
313   int is_static;
314   int is_remove; /* set is_remove=1 to clear arp entry */
315 } vnet_arp_set_ip4_over_ethernet_rpc_args_t;
316
317 static void set_ip4_over_ethernet_rpc_callback 
318 ( vnet_arp_set_ip4_over_ethernet_rpc_args_t * a)
319 {
320   vnet_main_t * vm = vnet_get_main();
321   ASSERT(os_get_cpu_number() == 0);
322
323   if (a->is_remove)
324     vnet_arp_unset_ip4_over_ethernet_internal(vm, 
325                                               a->sw_if_index, 
326                                               a->fib_index,
327                                               &(a->a));
328   else
329     vnet_arp_set_ip4_over_ethernet_internal (vm,
330                                              a->sw_if_index,
331                                              a->fib_index,
332                                              &(a->a),
333                                              a->is_static);
334 }
335
336 int
337 vnet_arp_set_ip4_over_ethernet (vnet_main_t * vnm,
338                                 u32 sw_if_index,
339                                 u32 fib_index,
340                                 void * a_arg,
341                                 int is_static)
342 {
343   ethernet_arp_ip4_over_ethernet_address_t * a = a_arg;
344   vnet_arp_set_ip4_over_ethernet_rpc_args_t args;
345
346   args.sw_if_index = sw_if_index;
347   args.fib_index = fib_index;
348   args.is_static = is_static;
349   args.is_remove = 0;
350   clib_memcpy (&args.a, a, sizeof (*a));
351
352   vl_api_rpc_call_main_thread (set_ip4_over_ethernet_rpc_callback, 
353                                (u8 *) &args, sizeof (args));
354   return 0;
355 }
356
357 int
358 vnet_arp_set_ip4_over_ethernet_internal (vnet_main_t * vnm,
359                                          u32 sw_if_index,
360                                          u32 fib_index,
361                                          void * a_arg,
362                                          int is_static)
363 {
364   ethernet_arp_ip4_key_t k;
365   ethernet_arp_ip4_entry_t * e = 0;
366   ethernet_arp_main_t * am = &ethernet_arp_main;
367   ethernet_arp_ip4_over_ethernet_address_t * a = a_arg;
368   vlib_main_t * vm = vlib_get_main();
369   ip4_main_t * im = &ip4_main;
370   ip_lookup_main_t * lm = &im->lookup_main;
371   int make_new_arp_cache_entry=1;
372   uword * p;
373   ip4_add_del_route_args_t args;
374   ip_adjacency_t adj, * existing_adj;
375   pending_resolution_t * pr, * mc;
376   
377   u32 next_index;
378   u32 adj_index;
379
380   fib_index = (fib_index != (u32)~0) 
381     ? fib_index : im->fib_index_by_sw_if_index[sw_if_index];
382
383   k.sw_if_index = sw_if_index;
384   k.ip4_address = a->ip4;
385   k.fib_index = fib_index;
386
387   p = mhash_get (&am->ip4_entry_by_key, &k);
388   if (p)
389     {
390       e = pool_elt_at_index (am->ip4_entry_pool, p[0]);
391
392       /* Refuse to over-write static arp. */
393       if (!is_static &&
394           (e->flags & ETHERNET_ARP_IP4_ENTRY_FLAG_STATIC))
395         return -2;
396       make_new_arp_cache_entry = 0;
397     }
398
399   /* Note: always install the route. It might have been deleted */
400   memset(&adj, 0, sizeof(adj));
401   adj.lookup_next_index = IP_LOOKUP_NEXT_REWRITE;
402   adj.n_adj = 1; /*  otherwise signature compare fails */
403
404   vnet_rewrite_for_sw_interface
405     (vnm,
406      VNET_L3_PACKET_TYPE_IP4,
407      sw_if_index,
408      ip4_rewrite_node.index,
409      a->ethernet,               /* destination address */
410      &adj.rewrite_header,
411      sizeof (adj.rewrite_data));
412
413   /* result of this lookup should be next-hop adjacency */
414   adj_index = ip4_fib_lookup_with_table (im, fib_index, &a->ip4, 0);
415   existing_adj = ip_get_adjacency(lm, adj_index);
416
417   if (existing_adj->lookup_next_index == IP_LOOKUP_NEXT_ARP &&
418       existing_adj->arp.next_hop.ip4.as_u32 == a->ip4.as_u32)
419     {
420       u32 * ai;
421       u32 * adjs = vec_dup(e->adjacencies);
422       /* Update all adj assigned to this arp entry */
423       vec_foreach(ai, adjs)
424         {
425           int i;
426           ip_adjacency_t * uadj = ip_get_adjacency(lm, *ai);
427           for (i = 0; i < uadj->n_adj; i++)
428             if (uadj[i].lookup_next_index == IP_LOOKUP_NEXT_ARP &&
429                 uadj[i].arp.next_hop.ip4.as_u32 == a->ip4.as_u32)
430               ip_update_adjacency (lm, *ai + i, &adj);
431         }
432       vec_free(adjs);
433     }
434   else
435     {
436       /* Check that new adjacency actually isn't exactly the same as
437        *  what is already there. If we over-write the adjacency with
438        *  exactly the same info, its technically a new adjacency with
439        *  new counters, but to user it appears as counters reset.
440        */
441       if (vnet_ip_adjacency_share_compare (&adj, existing_adj) == 0) {
442         /* create new adj */
443         args.table_index_or_table_id = fib_index;
444         args.flags = IP4_ROUTE_FLAG_FIB_INDEX | IP4_ROUTE_FLAG_ADD | IP4_ROUTE_FLAG_NEIGHBOR;
445         args.dst_address = a->ip4;
446         args.dst_address_length = 32;
447         args.adj_index = ~0;
448         args.add_adj = &adj;
449         args.n_add_adj = 1;
450         ip4_add_del_route (im, &args);
451       }
452     }
453
454   if (make_new_arp_cache_entry)
455     {
456       pool_get (am->ip4_entry_pool, e);
457       mhash_set (&am->ip4_entry_by_key, &k,
458                  e - am->ip4_entry_pool,
459                  /* old value */ 0);
460       e->key = k;
461     }
462
463   /* Update time stamp and ethernet address. */
464   clib_memcpy (e->ethernet_address, a->ethernet, sizeof (e->ethernet_address));
465   e->cpu_time_last_updated = clib_cpu_time_now ();
466   if (is_static)
467     e->flags |= ETHERNET_ARP_IP4_ENTRY_FLAG_STATIC;
468
469   /* Customer(s) waiting for this address to be resolved? */
470   p = hash_get (am->pending_resolutions_by_address, a->ip4.as_u32);
471   if (p)
472     {
473       next_index = p[0];
474
475       while (next_index != (u32)~0)
476         {
477           pr = pool_elt_at_index (am->pending_resolutions, next_index);
478           vlib_process_signal_event (vm, pr->node_index,
479                                      pr->type_opaque, 
480                                      pr->data);
481           next_index = pr->next_index;
482           pool_put (am->pending_resolutions, pr);
483         }
484       
485       hash_unset (am->pending_resolutions_by_address, a->ip4.as_u32);
486     }
487
488   /* Customer(s) requesting ARP event for this address? */
489   p = hash_get (am->mac_changes_by_address, a->ip4.as_u32);
490   if (p)
491     {
492       next_index = p[0];
493
494       while (next_index != (u32)~0)
495         {
496           int (*fp)(u32, u8 *, u32, u32);
497           int rv = 1;
498           mc = pool_elt_at_index (am->mac_changes, next_index);
499           fp = mc->data_callback;
500
501           /* Call the user's data callback, return 1 to suppress dup events */
502           if (fp)
503             rv = (*fp)(mc->data, a->ethernet, sw_if_index, 0);
504           
505           /* 
506            * Signal the resolver process, as long as the user
507            * says they want to be notified
508            */
509           if (rv == 0)
510             vlib_process_signal_event (vm, mc->node_index,
511                                        mc->type_opaque, 
512                                        mc->data);
513           next_index = mc->next_index;
514         }
515     }
516
517   return 0;
518 }
519
520 void vnet_register_ip4_arp_resolution_event (vnet_main_t * vnm, 
521                                              void * address_arg,
522                                              uword node_index,
523                                              uword type_opaque,
524                                              uword data)
525 {
526   ethernet_arp_main_t * am = &ethernet_arp_main;
527   ip4_address_t * address = address_arg;
528   uword * p;
529   pending_resolution_t * pr;
530   
531   pool_get (am->pending_resolutions, pr);
532
533   pr->next_index = ~0;
534   pr->node_index = node_index;
535   pr->type_opaque = type_opaque;
536   pr->data = data;
537   pr->data_callback = 0;
538
539   p = hash_get (am->pending_resolutions_by_address, address->as_u32);
540   if (p)
541     {
542       /* Insert new resolution at the head of the list */
543       pr->next_index = p[0];
544       hash_unset (am->pending_resolutions_by_address, address->as_u32);
545     }
546   
547   hash_set (am->pending_resolutions_by_address, address->as_u32, 
548             pr - am->pending_resolutions);
549 }
550
551 int vnet_add_del_ip4_arp_change_event (vnet_main_t * vnm, 
552                                        void * data_callback,
553                                        u32 pid,
554                                        void * address_arg,
555                                        uword node_index,
556                                        uword type_opaque,
557                                        uword data, int is_add)
558 {
559   ethernet_arp_main_t * am = &ethernet_arp_main;
560   ip4_address_t * address = address_arg;
561   uword * p;
562   pending_resolution_t * mc;
563   void (*fp)(u32, u8 *) = data_callback;
564   
565   if (is_add)
566     {
567       pool_get (am->mac_changes, mc);
568
569       mc->next_index = ~0;
570       mc->node_index = node_index;
571       mc->type_opaque = type_opaque;
572       mc->data = data;
573       mc->data_callback = data_callback;
574       mc->pid = pid;
575       
576       p = hash_get (am->mac_changes_by_address, address->as_u32);
577       if (p)
578         {
579           /* Insert new resolution at the head of the list */
580           mc->next_index = p[0];
581           hash_unset (am->mac_changes_by_address, address->as_u32);
582         }
583       
584       hash_set (am->mac_changes_by_address, address->as_u32, 
585                 mc - am->mac_changes);
586       return 0;
587     }
588   else
589     {
590       u32 index;
591       pending_resolution_t * mc_last = 0;
592
593       p = hash_get (am->mac_changes_by_address, address->as_u32);
594       if (p == 0)
595         return VNET_API_ERROR_NO_SUCH_ENTRY;
596
597       index = p[0];
598
599       while (index != (u32)~0)
600         {
601           mc = pool_elt_at_index (am->mac_changes, index);
602           if (mc->node_index == node_index &&
603               mc->type_opaque == type_opaque &&
604               mc->pid == pid)
605             {
606               /* Clients may need to clean up pool entries, too */
607               if (fp)
608                 (*fp)(mc->data, 0 /* no new mac addrs */);
609               if (index == p[0])
610                 {
611                   hash_unset (am->mac_changes_by_address, address->as_u32);
612                   if (mc->next_index != ~0)
613                     hash_set (am->mac_changes_by_address, address->as_u32,
614                               mc->next_index);
615                   pool_put (am->mac_changes, mc);
616                   return 0;
617                 }
618               else
619                 {
620                   ASSERT(mc_last);
621                   mc_last->next_index = mc->next_index;
622                   pool_put (am->mac_changes, mc);
623                   return 0;
624                 }
625             }
626           mc_last = mc;
627           index = mc->next_index;
628         }
629       
630       return VNET_API_ERROR_NO_SUCH_ENTRY;
631     }
632 }
633
634 /* Either we drop the packet or we send a reply to the sender. */
635 typedef enum {
636   ARP_INPUT_NEXT_DROP,
637   ARP_INPUT_NEXT_REPLY_TX,
638   ARP_INPUT_N_NEXT,
639 } arp_input_next_t;
640
641 #define foreach_ethernet_arp_error                                      \
642   _ (replies_sent, "ARP replies sent")                                  \
643   _ (l2_type_not_ethernet, "L2 type not ethernet")                      \
644   _ (l3_type_not_ip4, "L3 type not IP4")                                \
645   _ (l3_src_address_not_local, "IP4 source address not local to subnet") \
646   _ (l3_dst_address_not_local, "IP4 destination address not local to subnet") \
647   _ (l3_src_address_is_local, "IP4 source address matches local interface") \
648   _ (l3_src_address_learned, "ARP request IP4 source address learned")  \
649   _ (replies_received, "ARP replies received")                          \
650   _ (opcode_not_request, "ARP opcode not request")                      \
651   _ (proxy_arp_replies_sent, "Proxy ARP replies sent")                  \
652   _ (l2_address_mismatch, "ARP hw addr does not match L2 frame src addr") \
653   _ (missing_interface_address, "ARP missing interface address") \
654   _ (gratuitous_arp, "ARP probe or announcement dropped") \
655
656 typedef enum {
657 #define _(sym,string) ETHERNET_ARP_ERROR_##sym,
658   foreach_ethernet_arp_error
659 #undef _
660   ETHERNET_ARP_N_ERROR,
661 } ethernet_arp_input_error_t;
662
663 /* get first interface address */
664 ip4_address_t *
665 ip4_interface_first_address (ip4_main_t * im, u32 sw_if_index,
666                              ip_interface_address_t ** result_ia)
667 {
668   ip_lookup_main_t * lm = &im->lookup_main;
669   ip_interface_address_t * ia = 0;
670   ip4_address_t * result = 0;
671
672   foreach_ip_interface_address (lm, ia, sw_if_index, 
673                                 1 /* honor unnumbered */,
674   ({
675     ip4_address_t * a = ip_interface_address_get_address (lm, ia);
676     result = a;
677     break;
678   }));
679   if (result_ia)
680     *result_ia = result ? ia : 0;
681   return result;
682 }
683
684 static void unset_random_arp_entry (void)
685 {
686   ethernet_arp_main_t * am = &ethernet_arp_main;
687   ethernet_arp_ip4_entry_t * e;
688   vnet_main_t * vnm = vnet_get_main();
689   ethernet_arp_ip4_over_ethernet_address_t delme;  
690   u32 index;
691
692   index = pool_next_index (am->ip4_entry_pool, am->arp_delete_rotor);
693   am->arp_delete_rotor = index;
694
695   /* Try again from elt 0, could happen if an intfc goes down */
696   if (index == ~0)
697     {
698       index = pool_next_index (am->ip4_entry_pool, am->arp_delete_rotor);
699       am->arp_delete_rotor = index;
700     }
701
702   /* Nothing left in the pool */
703   if (index == ~0)
704     return;
705
706   e = pool_elt_at_index (am->ip4_entry_pool, index);
707   
708   clib_memcpy (&delme.ethernet, e->ethernet_address, 6);
709   delme.ip4.as_u32 = e->key.ip4_address.as_u32;
710   
711   vnet_arp_unset_ip4_over_ethernet (vnm, e->key.sw_if_index,
712                                     e->key.fib_index, &delme);
713 }
714   
715 static void arp_unnumbered (vlib_buffer_t * p0, 
716                        u32 pi0,
717                        ethernet_header_t * eth0,
718                        ip_interface_address_t * ifa0)
719 {
720   vlib_main_t * vm = vlib_get_main();
721   vnet_main_t * vnm = vnet_get_main();
722   vnet_interface_main_t * vim = &vnm->interface_main;
723   vnet_sw_interface_t * si;
724   vnet_hw_interface_t * hi;
725   u32 unnum_src_sw_if_index;
726   u32 * broadcast_swifs = 0;
727   u32 * buffers = 0;
728   u32 n_alloc = 0;
729   vlib_buffer_t * b0;
730   int i;
731   u8 dst_mac_address[6];
732   i16 header_size;
733   ethernet_arp_header_t * arp0;
734
735   /* Save the dst mac address */
736   clib_memcpy(dst_mac_address, eth0->dst_address, sizeof (dst_mac_address));
737
738   /* Figure out which sw_if_index supplied the address */
739   unnum_src_sw_if_index = ifa0->sw_if_index;
740
741   /* Track down all users of the unnumbered source */
742   pool_foreach (si, vim->sw_interfaces, 
743   ({
744     if (si->flags & VNET_SW_INTERFACE_FLAG_UNNUMBERED &&
745         (si->unnumbered_sw_if_index == unnum_src_sw_if_index))
746       {
747         vec_add1 (broadcast_swifs, si->sw_if_index);
748       }
749   }));
750           
751
752   ASSERT (vec_len(broadcast_swifs));
753
754   /* Allocate buffering if we need it */
755   if (vec_len(broadcast_swifs) > 1)
756     {
757       vec_validate (buffers, vec_len(broadcast_swifs)-2);
758       n_alloc = vlib_buffer_alloc (vm, buffers, vec_len(buffers));
759       _vec_len (buffers) = n_alloc;
760       for (i = 0; i < n_alloc; i++)
761         {
762           b0 = vlib_get_buffer (vm, buffers[i]);
763
764           /* xerox (partially built) ARP pkt */
765           clib_memcpy (b0->data, p0->data, p0->current_length + p0->current_data);
766           b0->current_data = p0->current_data;
767           b0->current_length = p0->current_length;
768           vnet_buffer(b0)->sw_if_index[VLIB_RX] =
769             vnet_buffer(p0)->sw_if_index[VLIB_RX];
770         }
771     }
772
773   vec_insert (buffers, 1, 0);
774   buffers[0] = pi0;
775   
776   for (i = 0; i < vec_len(buffers); i++)
777     {
778       b0 = vlib_get_buffer(vm, buffers[i]);
779       arp0 = vlib_buffer_get_current (b0);
780
781       hi = vnet_get_sup_hw_interface (vnm, broadcast_swifs[i]);
782       si = vnet_get_sw_interface (vnm, broadcast_swifs[i]);
783
784       /* For decoration, most likely */
785       vnet_buffer(b0)->sw_if_index[VLIB_TX] = hi->sw_if_index;
786
787       /* Fix ARP pkt src address */
788       clib_memcpy (arp0->ip4_over_ethernet[0].ethernet, hi->hw_address, 6);
789
790       /* Build L2 encaps for this swif */
791       header_size = sizeof (ethernet_header_t);
792       if (si->sub.eth.flags.one_tag) 
793         header_size += 4;
794       else if (si->sub.eth.flags.two_tags)
795         header_size += 8;
796       
797       vlib_buffer_advance (b0, -header_size);
798       eth0 = vlib_buffer_get_current (b0);
799
800       if (si->sub.eth.flags.one_tag) {
801         ethernet_vlan_header_t * outer = (void *) (eth0 + 1);
802         
803         eth0->type = si->sub.eth.flags.dot1ad ?
804           clib_host_to_net_u16 (ETHERNET_TYPE_DOT1AD) :
805           clib_host_to_net_u16 (ETHERNET_TYPE_VLAN);
806         outer->priority_cfi_and_id = 
807           clib_host_to_net_u16 (si->sub.eth.outer_vlan_id);
808         outer->type = clib_host_to_net_u16 (ETHERNET_TYPE_ARP);
809         
810       } else if (si->sub.eth.flags.two_tags) {
811         ethernet_vlan_header_t * outer = (void *) (eth0 + 1);
812         ethernet_vlan_header_t * inner = (void *) (outer + 1);
813         
814         eth0->type = si->sub.eth.flags.dot1ad ?
815           clib_host_to_net_u16 (ETHERNET_TYPE_DOT1AD) :
816           clib_host_to_net_u16 (ETHERNET_TYPE_VLAN);
817         outer->priority_cfi_and_id = 
818           clib_host_to_net_u16 (si->sub.eth.outer_vlan_id);
819         outer->type = clib_host_to_net_u16 (ETHERNET_TYPE_VLAN);
820         inner->priority_cfi_and_id = 
821           clib_host_to_net_u16 (si->sub.eth.inner_vlan_id);
822         inner->type = clib_host_to_net_u16 (ETHERNET_TYPE_ARP);
823         
824       } else {
825         eth0->type = clib_host_to_net_u16 (ETHERNET_TYPE_ARP);
826       }
827       
828       /* Restore the original dst address, set src address */
829       clib_memcpy (eth0->dst_address, dst_mac_address, sizeof (eth0->dst_address));
830       clib_memcpy (eth0->src_address, hi->hw_address, sizeof (eth0->src_address));
831       
832       /* Transmit replicas */
833       if (i > 0)
834         {
835           vlib_frame_t * f = vlib_get_frame_to_node (vm, hi->output_node_index);
836           u32 * to_next = vlib_frame_vector_args (f);
837           to_next[0] = buffers[i];
838           f->n_vectors = 1;
839           vlib_put_frame_to_node (vm, hi->output_node_index, f);
840         }
841     }
842
843   /* The regular path outputs the original pkt.. */
844   vnet_buffer (p0)->sw_if_index[VLIB_TX] = broadcast_swifs[0];
845
846   vec_free (broadcast_swifs);
847   vec_free (buffers);
848 }
849
850 static uword
851 arp_input (vlib_main_t * vm,
852            vlib_node_runtime_t * node,
853            vlib_frame_t * frame)
854 {
855   ethernet_arp_main_t * am = &ethernet_arp_main;
856   vnet_main_t * vnm = vnet_get_main();
857   ip4_main_t * im4 = &ip4_main;
858   u32 n_left_from, next_index, * from, * to_next;
859   u32 n_replies_sent = 0, n_proxy_arp_replies_sent = 0;
860
861   from = vlib_frame_vector_args (frame);
862   n_left_from = frame->n_vectors;
863   next_index = node->cached_next_index;
864
865   if (node->flags & VLIB_NODE_FLAG_TRACE)
866     vlib_trace_frame_buffers_only (vm, node, from, frame->n_vectors,
867                                    /* stride */ 1,
868                                    sizeof (ethernet_arp_input_trace_t));
869
870   while (n_left_from > 0)
871     {
872       u32 n_left_to_next;
873
874       vlib_get_next_frame (vm, node, next_index,
875                            to_next, n_left_to_next);
876
877       while (n_left_from > 0 && n_left_to_next > 0)
878         {
879           vlib_buffer_t * p0;
880           vnet_hw_interface_t * hw_if0;
881           ethernet_arp_header_t * arp0;
882           ethernet_header_t * eth0;
883           ip_interface_address_t * ifa0;
884           ip_adjacency_t * adj0;
885           ip4_address_t * if_addr0;
886           ip4_address_t proxy_src;
887           u32 pi0, error0, next0, sw_if_index0;
888           u8 is_request0, src_is_local0, dst_is_local0, is_unnum0;
889           ethernet_proxy_arp_t * pa;
890
891           pi0 = from[0];
892           to_next[0] = pi0;
893           from += 1;
894           to_next += 1;
895           n_left_from -= 1;
896           n_left_to_next -= 1;
897
898           p0 = vlib_get_buffer (vm, pi0);
899           arp0 = vlib_buffer_get_current (p0);
900
901           is_request0 = arp0->opcode 
902               == clib_host_to_net_u16 (ETHERNET_ARP_OPCODE_request);
903
904           error0 = ETHERNET_ARP_ERROR_replies_sent;
905
906           error0 = (arp0->l2_type != clib_net_to_host_u16 (ETHERNET_ARP_HARDWARE_TYPE_ethernet)
907                     ? ETHERNET_ARP_ERROR_l2_type_not_ethernet
908                     : error0);
909           error0 = (arp0->l3_type != clib_net_to_host_u16 (ETHERNET_TYPE_IP4)
910                     ? ETHERNET_ARP_ERROR_l3_type_not_ip4
911                     : error0);
912
913           sw_if_index0 = vnet_buffer (p0)->sw_if_index[VLIB_RX];
914
915           if (error0)
916             goto drop1;
917
918           /* Check that IP address is local and matches incoming interface. */
919           if_addr0 = ip4_interface_address_matching_destination (im4,
920                                                                  &arp0->ip4_over_ethernet[1].ip4,
921                                                                  sw_if_index0,
922                                                                  &ifa0);
923           if (! if_addr0)
924             {
925               error0 = ETHERNET_ARP_ERROR_l3_dst_address_not_local;
926               goto drop1;
927             }
928
929           /* Honor unnumbered interface, if any */
930           is_unnum0 = sw_if_index0 != ifa0->sw_if_index;
931
932           /* Source must also be local to subnet of matching interface address. */
933           if (! ip4_destination_matches_interface (im4, &arp0->ip4_over_ethernet[0].ip4, ifa0))
934             {
935               error0 = ETHERNET_ARP_ERROR_l3_src_address_not_local;
936               goto drop1;
937             }
938
939           /* Reject requests/replies with our local interface address. */
940           src_is_local0 = if_addr0->as_u32 == arp0->ip4_over_ethernet[0].ip4.as_u32;
941           if (src_is_local0)
942             {
943               error0 = ETHERNET_ARP_ERROR_l3_src_address_is_local;
944               goto drop1;
945             }
946
947           dst_is_local0 = if_addr0->as_u32 == arp0->ip4_over_ethernet[1].ip4.as_u32;
948
949           /* Fill in ethernet header. */
950           eth0 = ethernet_buffer_get_header (p0);
951
952           /* Trash ARP packets whose ARP-level source addresses do not
953              match their L2-frame-level source addresses */
954           if (memcmp (eth0->src_address, arp0->ip4_over_ethernet[0].ethernet,
955                       sizeof (eth0->src_address)))
956             {
957               error0 = ETHERNET_ARP_ERROR_l2_address_mismatch;
958               goto drop2;
959             }
960
961           /* Learn or update sender's mapping only for requests or unicasts
962              that don't match local interface address. */
963           if (ethernet_address_cast (eth0->dst_address) == ETHERNET_ADDRESS_UNICAST
964               || is_request0)
965             {
966               if (am->limit_arp_cache_size && 
967                   pool_elts (am->ip4_entry_pool) >= am->limit_arp_cache_size)
968                 unset_random_arp_entry();
969
970               vnet_arp_set_ip4_over_ethernet (vnm, sw_if_index0, 
971                                               (u32)~0 /* default fib */,
972                                               &arp0->ip4_over_ethernet[0], 
973                                               0 /* is_static */);
974               error0 = ETHERNET_ARP_ERROR_l3_src_address_learned;
975              }
976
977           /* Only send a reply for requests sent which match a local interface. */
978           if (! (is_request0 && dst_is_local0))
979             {
980               error0 = (arp0->opcode == clib_host_to_net_u16 (ETHERNET_ARP_OPCODE_reply)
981                         ? ETHERNET_ARP_ERROR_replies_received : error0);
982               goto drop1;
983             }
984
985           /* Send a reply. */
986         send_reply:
987           vnet_buffer (p0)->sw_if_index[VLIB_TX] = sw_if_index0;
988           hw_if0 = vnet_get_sup_hw_interface (vnm, sw_if_index0);
989
990           /* Send reply back through input interface */
991           vnet_buffer (p0)->sw_if_index[VLIB_TX] = sw_if_index0;
992           next0 = ARP_INPUT_NEXT_REPLY_TX;
993
994           arp0->opcode = clib_host_to_net_u16 (ETHERNET_ARP_OPCODE_reply);
995
996           arp0->ip4_over_ethernet[1] = arp0->ip4_over_ethernet[0];
997
998           clib_memcpy (arp0->ip4_over_ethernet[0].ethernet, hw_if0->hw_address, 6);
999           clib_mem_unaligned (&arp0->ip4_over_ethernet[0].ip4.data_u32, u32) = if_addr0->data_u32;
1000
1001           /* Hardware must be ethernet-like. */
1002           ASSERT (vec_len (hw_if0->hw_address) == 6);
1003
1004           clib_memcpy (eth0->dst_address, eth0->src_address, 6);
1005           clib_memcpy (eth0->src_address, hw_if0->hw_address, 6);
1006
1007           /* Figure out how much to rewind current data from adjacency. */
1008           if (ifa0)
1009             {
1010               adj0 = ip_get_adjacency (&ip4_main.lookup_main, 
1011                                        ifa0->neighbor_probe_adj_index);
1012               if (adj0->lookup_next_index != IP_LOOKUP_NEXT_ARP)
1013                 {
1014                   error0 = ETHERNET_ARP_ERROR_missing_interface_address;
1015                   goto drop2;
1016                 }
1017               if (is_unnum0)
1018                 arp_unnumbered (p0, pi0, eth0, ifa0);
1019               else
1020                 vlib_buffer_advance (p0, -adj0->rewrite_header.data_bytes);
1021             }
1022
1023           vlib_validate_buffer_enqueue_x1 (vm, node, next_index,to_next,
1024                                            n_left_to_next,pi0,next0);
1025
1026           n_replies_sent += 1;
1027           continue;
1028
1029         drop1:
1030           if (0 == arp0->ip4_over_ethernet[0].ip4.as_u32 ||
1031               (arp0->ip4_over_ethernet[0].ip4.as_u32 ==
1032                arp0->ip4_over_ethernet[1].ip4.as_u32))
1033             {
1034               error0 = ETHERNET_ARP_ERROR_gratuitous_arp;
1035               goto drop2;
1036             }
1037           /* See if proxy arp is configured for the address */
1038           if (is_request0) 
1039             {
1040               vnet_sw_interface_t * si;
1041               u32 this_addr = clib_net_to_host_u32 
1042                 (arp0->ip4_over_ethernet[1].ip4.as_u32);
1043               u32 fib_index0;
1044
1045               si = vnet_get_sw_interface (vnm, sw_if_index0);
1046               
1047               if (!(si->flags & VNET_SW_INTERFACE_FLAG_PROXY_ARP))
1048                 goto drop2;
1049
1050               fib_index0 = vec_elt (im4->fib_index_by_sw_if_index, 
1051                                     sw_if_index0);
1052
1053               vec_foreach (pa, am->proxy_arps)
1054                 {
1055                   u32 lo_addr = clib_net_to_host_u32 (pa->lo_addr);
1056                   u32 hi_addr = clib_net_to_host_u32 (pa->hi_addr);
1057
1058                    /* an ARP request hit in the proxy-arp table? */
1059                    if ((this_addr >= lo_addr && this_addr <= hi_addr) &&
1060                        (fib_index0 == pa->fib_index))
1061                     {
1062                       eth0 = ethernet_buffer_get_header (p0);
1063                       proxy_src.as_u32 = 
1064                         arp0->ip4_over_ethernet[1].ip4.data_u32;
1065
1066                       /* 
1067                        * Rewind buffer, direct code above not to
1068                        * think too hard about it. 
1069                        * $$$ is the answer ever anything other than
1070                        * vlib_buffer_reset(..)?
1071                        */
1072                       ifa0 = 0;
1073                       if_addr0 = &proxy_src;
1074                       vlib_buffer_reset (p0);
1075                       n_proxy_arp_replies_sent++;
1076                       goto send_reply;
1077                     }
1078                 }
1079             }
1080           
1081         drop2:
1082
1083           next0 = ARP_INPUT_NEXT_DROP;
1084           p0->error = node->errors[error0];
1085
1086           vlib_validate_buffer_enqueue_x1 (vm, node, next_index,to_next,
1087                                            n_left_to_next,pi0,next0);
1088         }
1089
1090       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
1091     }
1092
1093   vlib_error_count (vm, node->node_index,
1094                     ETHERNET_ARP_ERROR_replies_sent, 
1095                     n_replies_sent - n_proxy_arp_replies_sent);
1096   
1097   vlib_error_count (vm, node->node_index,
1098                     ETHERNET_ARP_ERROR_proxy_arp_replies_sent, 
1099                     n_proxy_arp_replies_sent);
1100   return frame->n_vectors;
1101 }
1102
1103 static char * ethernet_arp_error_strings[] = {
1104 #define _(sym,string) string,
1105   foreach_ethernet_arp_error
1106 #undef _
1107 };
1108
1109 VLIB_REGISTER_NODE (arp_input_node,static) = {
1110   .function = arp_input,
1111   .name = "arp-input",
1112   .vector_size = sizeof (u32),
1113
1114   .n_errors = ETHERNET_ARP_N_ERROR,
1115   .error_strings = ethernet_arp_error_strings,
1116
1117   .n_next_nodes = ARP_INPUT_N_NEXT,
1118   .next_nodes = {
1119     [ARP_INPUT_NEXT_DROP] = "error-drop",
1120     [ARP_INPUT_NEXT_REPLY_TX] = "interface-output",
1121   },
1122
1123   .format_buffer = format_ethernet_arp_header,
1124   .format_trace = format_ethernet_arp_input_trace,
1125 };
1126
1127 static int
1128 ip4_arp_entry_sort (void *a1, void *a2)
1129 {
1130   ethernet_arp_ip4_entry_t * e1 = a1;
1131   ethernet_arp_ip4_entry_t * e2 = a2;
1132
1133   int cmp;
1134   vnet_main_t * vnm = vnet_get_main();
1135
1136   cmp = vnet_sw_interface_compare 
1137     (vnm, e1->key.sw_if_index, e2->key.sw_if_index);
1138   if (! cmp)
1139     cmp = ip4_address_compare (&e1->key.ip4_address, &e2->key.ip4_address);
1140   return cmp;
1141 }
1142
1143 static clib_error_t *
1144 show_ip4_arp (vlib_main_t * vm,
1145               unformat_input_t * input,
1146               vlib_cli_command_t * cmd)
1147 {
1148   vnet_main_t * vnm = vnet_get_main();
1149   ethernet_arp_main_t * am = &ethernet_arp_main;
1150   ethernet_arp_ip4_entry_t * e, * es;
1151   ethernet_proxy_arp_t * pa;
1152   clib_error_t * error = 0;
1153   u32 sw_if_index;
1154
1155   /* Filter entries by interface if given. */
1156   sw_if_index = ~0;
1157   (void) unformat_user (input, unformat_vnet_sw_interface, vnm, &sw_if_index);
1158
1159   es = 0;
1160   pool_foreach (e, am->ip4_entry_pool, ({ vec_add1 (es, e[0]); }));
1161   if ( es )
1162     {
1163       vec_sort_with_function (es, ip4_arp_entry_sort);
1164       vlib_cli_output (vm, "%U", format_ethernet_arp_ip4_entry, vnm, 0);
1165       vec_foreach (e, es) {
1166         if (sw_if_index != ~0 && e->key.sw_if_index != sw_if_index)
1167           continue;
1168         vlib_cli_output (vm, "%U", format_ethernet_arp_ip4_entry, vnm, e);
1169       }
1170       vec_free (es);
1171     }
1172
1173   if (vec_len (am->proxy_arps))
1174     {
1175       vlib_cli_output (vm, "Proxy arps enabled for:");
1176       vec_foreach(pa, am->proxy_arps)
1177         {
1178           vlib_cli_output (vm, "Fib_index %d   %U - %U ", 
1179                            pa->fib_index,
1180                            format_ip4_address, &pa->lo_addr, 
1181                            format_ip4_address, &pa->hi_addr);
1182         }
1183     }
1184       
1185   return error;
1186 }
1187
1188 VLIB_CLI_COMMAND (show_ip4_arp_command, static) = {
1189   .path = "show ip arp",
1190   .function = show_ip4_arp,
1191   .short_help = "Show ARP table",
1192 };
1193
1194 typedef struct {
1195   pg_edit_t l2_type, l3_type;
1196   pg_edit_t n_l2_address_bytes, n_l3_address_bytes;
1197   pg_edit_t opcode;
1198   struct {
1199     pg_edit_t ethernet;
1200     pg_edit_t ip4;
1201   } ip4_over_ethernet[2];
1202 } pg_ethernet_arp_header_t;
1203
1204 static inline void
1205 pg_ethernet_arp_header_init (pg_ethernet_arp_header_t * p)
1206 {
1207   /* Initialize fields that are not bit fields in the IP header. */
1208 #define _(f) pg_edit_init (&p->f, ethernet_arp_header_t, f);
1209   _ (l2_type);
1210   _ (l3_type);
1211   _ (n_l2_address_bytes);
1212   _ (n_l3_address_bytes);
1213   _ (opcode);
1214   _ (ip4_over_ethernet[0].ethernet);
1215   _ (ip4_over_ethernet[0].ip4);
1216   _ (ip4_over_ethernet[1].ethernet);
1217   _ (ip4_over_ethernet[1].ip4);
1218 #undef _
1219 }
1220
1221 uword
1222 unformat_pg_arp_header (unformat_input_t * input, va_list * args)
1223 {
1224   pg_stream_t * s = va_arg (*args, pg_stream_t *);
1225   pg_ethernet_arp_header_t * p;
1226   u32 group_index;
1227   
1228   p = pg_create_edit_group (s, sizeof (p[0]), sizeof (ethernet_arp_header_t),
1229                             &group_index);
1230   pg_ethernet_arp_header_init (p);
1231
1232   /* Defaults. */
1233   pg_edit_set_fixed (&p->l2_type, ETHERNET_ARP_HARDWARE_TYPE_ethernet);
1234   pg_edit_set_fixed (&p->l3_type, ETHERNET_TYPE_IP4);
1235   pg_edit_set_fixed (&p->n_l2_address_bytes, 6);
1236   pg_edit_set_fixed (&p->n_l3_address_bytes, 4);
1237
1238   if (! unformat (input, "%U: %U/%U -> %U/%U",
1239                   unformat_pg_edit,
1240                   unformat_ethernet_arp_opcode_net_byte_order, &p->opcode,
1241                   unformat_pg_edit,
1242                   unformat_ethernet_address, &p->ip4_over_ethernet[0].ethernet,
1243                   unformat_pg_edit,
1244                   unformat_ip4_address, &p->ip4_over_ethernet[0].ip4,
1245                   unformat_pg_edit,
1246                   unformat_ethernet_address, &p->ip4_over_ethernet[1].ethernet,
1247                   unformat_pg_edit,
1248                   unformat_ip4_address, &p->ip4_over_ethernet[1].ip4))
1249     {
1250       /* Free up any edits we may have added. */
1251       pg_free_edit_group (s);
1252       return 0;
1253     }
1254   return 1;
1255 }
1256
1257 clib_error_t *ip4_set_arp_limit (u32 arp_limit)
1258 {
1259   ethernet_arp_main_t * am = &ethernet_arp_main;
1260
1261   am->limit_arp_cache_size = arp_limit;
1262   return 0;
1263 }
1264
1265 static void
1266 arp_ip4_entry_del_adj(ethernet_arp_ip4_entry_t *e, u32 adj_index)
1267 {
1268   int done = 0;
1269   int i;
1270
1271   while (!done)
1272     {
1273       vec_foreach_index(i, e->adjacencies)
1274         if (vec_elt(e->adjacencies, i) == adj_index)
1275           {
1276             vec_del1(e->adjacencies, i);
1277             continue;
1278           }
1279       done = 1;
1280     }
1281 }
1282
1283 static void
1284 arp_ip4_entry_add_adj(ethernet_arp_ip4_entry_t *e, u32 adj_index)
1285 {
1286   int i;
1287   vec_foreach_index(i, e->adjacencies)
1288     if (vec_elt(e->adjacencies, i) == adj_index)
1289       return;
1290   vec_add1(e->adjacencies, adj_index);
1291 }
1292
1293 static void
1294 arp_add_del_adj_cb (struct ip_lookup_main_t * lm,
1295                     u32 adj_index,
1296                     ip_adjacency_t * adj,
1297                     u32 is_del)
1298 {
1299   ethernet_arp_main_t * am = &ethernet_arp_main;
1300   ip4_main_t * im = &ip4_main;
1301   ethernet_arp_ip4_key_t k;
1302   ethernet_arp_ip4_entry_t * e = 0;
1303   uword * p;
1304   u32 ai;
1305
1306   for(ai = adj->heap_handle; ai < adj->heap_handle + adj->n_adj ; ai++)
1307     {
1308       adj = ip_get_adjacency (lm, ai);
1309       if (adj->lookup_next_index == IP_LOOKUP_NEXT_ARP && adj->arp.next_hop.ip4.as_u32)
1310         {
1311           k.sw_if_index = adj->rewrite_header.sw_if_index;
1312           k.ip4_address.as_u32 = adj->arp.next_hop.ip4.as_u32;
1313           k.fib_index = im->fib_index_by_sw_if_index[adj->rewrite_header.sw_if_index];
1314           p = mhash_get (&am->ip4_entry_by_key, &k);
1315           if (p)
1316             e = pool_elt_at_index (am->ip4_entry_pool, p[0]);
1317         }
1318       else
1319         continue;
1320
1321       if (is_del)
1322         {
1323           if (!e)
1324             clib_warning("Adjacency contains unknown ARP next hop %U (del)",
1325                          format_ip46_address, &adj->arp.next_hop, IP46_TYPE_IP4);
1326           else
1327             arp_ip4_entry_del_adj(e, adj->heap_handle);
1328         }
1329       else /* add */
1330         {
1331           if (!e)
1332             clib_warning("Adjacency contains unknown ARP next hop %U (add)",
1333                          format_ip46_address, &adj->arp.next_hop, IP46_TYPE_IP4);
1334           else
1335             arp_ip4_entry_add_adj(e, adj->heap_handle);
1336         }
1337     }
1338 }
1339
1340 static clib_error_t * ethernet_arp_init (vlib_main_t * vm)
1341 {
1342   ethernet_arp_main_t * am = &ethernet_arp_main;
1343   pg_node_t * pn;
1344   clib_error_t * error;
1345   ip4_main_t * im = &ip4_main;
1346   ip_lookup_main_t * lm = &im->lookup_main;
1347
1348   if ((error = vlib_call_init_function (vm, ethernet_init)))
1349     return error;
1350
1351   ethernet_register_input_type (vm, ETHERNET_TYPE_ARP, arp_input_node.index);
1352
1353   pn = pg_get_node (arp_input_node.index);
1354   pn->unformat_edit = unformat_pg_arp_header;
1355
1356   am->opcode_by_name = hash_create_string (0, sizeof (uword));
1357 #define _(o) hash_set_mem (am->opcode_by_name, #o, ETHERNET_ARP_OPCODE_##o);
1358   foreach_ethernet_arp_opcode;
1359 #undef _
1360
1361   mhash_init (&am->ip4_entry_by_key,
1362               /* value size */ sizeof (uword),
1363               /* key size */ sizeof (ethernet_arp_ip4_key_t));
1364
1365   /* $$$ configurable */
1366   am->limit_arp_cache_size = 50000;
1367
1368   am->pending_resolutions_by_address = hash_create (0, sizeof (uword));
1369   am->mac_changes_by_address = hash_create (0, sizeof (uword));
1370
1371   /* don't trace ARP error packets */
1372   {
1373     vlib_node_runtime_t *rt = 
1374       vlib_node_get_runtime (vm, arp_input_node.index);
1375
1376 #define _(a,b)                                  \
1377     vnet_pcap_drop_trace_filter_add_del         \
1378         (rt->errors[ETHERNET_ARP_ERROR_##a],    \
1379          1 /* is_add */);
1380     foreach_ethernet_arp_error
1381 #undef _
1382   }
1383
1384   ip_register_add_del_adjacency_callback(lm, arp_add_del_adj_cb);
1385
1386   return 0;
1387 }
1388
1389 VLIB_INIT_FUNCTION (ethernet_arp_init);
1390
1391 int 
1392 vnet_arp_unset_ip4_over_ethernet (vnet_main_t * vnm,
1393                                   u32 sw_if_index, u32 fib_index,
1394                                   void * a_arg)
1395 {
1396   ethernet_arp_ip4_over_ethernet_address_t * a = a_arg;
1397   vnet_arp_set_ip4_over_ethernet_rpc_args_t args;
1398
1399   args.sw_if_index = sw_if_index;
1400   args.fib_index = fib_index;
1401   args.is_remove = 1;
1402   clib_memcpy (&args.a, a, sizeof (*a));
1403
1404   vl_api_rpc_call_main_thread (set_ip4_over_ethernet_rpc_callback, 
1405                                (u8 *) &args, sizeof (args));
1406   return 0;
1407 }
1408
1409 static inline int 
1410 vnet_arp_unset_ip4_over_ethernet_internal (vnet_main_t * vnm,
1411                                            u32 sw_if_index, 
1412                                            u32 fib_index,
1413                                            void * a_arg)
1414 {
1415   ethernet_arp_ip4_entry_t * e;
1416   ethernet_arp_main_t * am = &ethernet_arp_main;
1417   ethernet_arp_ip4_over_ethernet_address_t * a = a_arg;
1418   ethernet_arp_ip4_key_t k;
1419   uword * p;
1420   ip4_add_del_route_args_t args;
1421   ip4_main_t * im = &ip4_main;
1422   ip_lookup_main_t * lm = &im->lookup_main;
1423   u32 adj_index;
1424   ip_adjacency_t * adj;
1425
1426   k.sw_if_index = sw_if_index;
1427   k.ip4_address = a->ip4;
1428   k.fib_index = fib_index;
1429   p = mhash_get (&am->ip4_entry_by_key, &k);
1430   if (! p)
1431     return -1;
1432
1433   memset(&args, 0, sizeof(args));
1434
1435   /* 
1436    * Make sure that the route actually exists before we try to delete it,
1437    * and make sure that it's a rewrite adjacency.
1438    *
1439    * If we point 1-N unnumbered interfaces at a loopback interface and 
1440    * shut down the loopback before shutting down 1-N unnumbered 
1441    * interfaces, the ARP cache will still have an entry, 
1442    * but the route will have disappeared.
1443    * 
1444    * See also ip4_del_interface_routes (...) 
1445    *            -> ip4_delete_matching_routes (...).
1446    */
1447   
1448   adj_index = ip4_fib_lookup_with_table 
1449       (im, fib_index, &a->ip4, 1 /* disable default route */);
1450
1451   /* Miss adj? Forget it... */
1452   if (adj_index != lm->miss_adj_index) {
1453       adj = ip_get_adjacency (lm, adj_index);
1454       /* 
1455        * Stupid control-plane trick:
1456        * admin down an interface (removes arp routes from fib),
1457        * bring the interface back up (does not reinstall them)
1458        * then remove the arp cache entry (yuck). When that happens,
1459        * the adj we find here will be the interface subnet ARP adj.
1460        */
1461       if (adj->lookup_next_index == IP_LOOKUP_NEXT_REWRITE) {
1462           args.table_index_or_table_id = fib_index;
1463           args.flags = IP4_ROUTE_FLAG_FIB_INDEX | IP4_ROUTE_FLAG_DEL 
1464               | IP4_ROUTE_FLAG_NEIGHBOR;
1465           args.dst_address = a->ip4;
1466           args.dst_address_length = 32;
1467           ip4_add_del_route (im, &args);
1468           ip4_maybe_remap_adjacencies (im, fib_index, args.flags);
1469       }
1470   }
1471
1472   e = pool_elt_at_index (am->ip4_entry_pool, p[0]);
1473   mhash_unset (&am->ip4_entry_by_key, &e->key, 0);
1474   pool_put (am->ip4_entry_pool, e);
1475   return 0;
1476 }
1477
1478 static void 
1479 increment_ip4_and_mac_address (ethernet_arp_ip4_over_ethernet_address_t *a)
1480 {
1481   u8 old;
1482   int i;
1483
1484   for (i = 3; i >= 0; i--) 
1485     {
1486       old = a->ip4.as_u8[i];
1487       a->ip4.as_u8[i] += 1;
1488       if (old < a->ip4.as_u8[i])
1489         break;
1490     }
1491
1492   for (i = 5; i >= 0; i--)
1493     {
1494       old = a->ethernet[i];
1495       a->ethernet[i] += 1;
1496       if (old < a->ethernet[i])
1497         break;
1498     }
1499 }
1500
1501 int vnet_proxy_arp_add_del (ip4_address_t *lo_addr,
1502                             ip4_address_t *hi_addr,
1503                             u32 fib_index, int is_del)
1504 {
1505   ethernet_arp_main_t *am = &ethernet_arp_main;
1506   ethernet_proxy_arp_t *pa;
1507   u32 found_at_index = ~0;
1508
1509   vec_foreach (pa, am->proxy_arps)
1510     {
1511       if (pa->lo_addr == lo_addr->as_u32 
1512           && pa->hi_addr == hi_addr->as_u32
1513           && pa->fib_index == fib_index)
1514         {
1515           found_at_index = pa - am->proxy_arps;
1516           break;
1517         }
1518     }
1519
1520   if (found_at_index != ~0)
1521     {
1522       /* Delete, otherwise it's already in the table */
1523       if (is_del)
1524         vec_delete (am->proxy_arps, 1, found_at_index);
1525       return 0;
1526     }
1527   /* delete, no such entry */
1528   if (is_del)
1529     return VNET_API_ERROR_NO_SUCH_ENTRY;
1530
1531   /* add, not in table */
1532   vec_add2 (am->proxy_arps, pa, 1);
1533   pa->lo_addr = lo_addr->as_u32;
1534   pa->hi_addr = hi_addr->as_u32;
1535   pa->fib_index = fib_index;
1536   return 0;
1537 }
1538
1539 /*
1540  * Remove any proxy arp entries asdociated with the 
1541  * specificed fib.
1542  */
1543 int vnet_proxy_arp_fib_reset (u32 fib_id)
1544 {
1545   ip4_main_t * im = &ip4_main;
1546   ethernet_arp_main_t *am = &ethernet_arp_main;
1547   ethernet_proxy_arp_t *pa;
1548   u32 * entries_to_delete = 0;
1549   u32 fib_index;
1550   uword * p;
1551   int i;
1552
1553   p = hash_get (im->fib_index_by_table_id, fib_id);
1554   if (! p)
1555       return VNET_API_ERROR_NO_SUCH_ENTRY;
1556   fib_index = p[0];
1557
1558   vec_foreach (pa, am->proxy_arps)
1559     {
1560       if (pa->fib_index == fib_index)
1561         {
1562           vec_add1 (entries_to_delete, pa - am->proxy_arps);
1563         }
1564     }
1565
1566   for (i = 0; i < vec_len(entries_to_delete); i++)
1567     {
1568        vec_delete (am->proxy_arps, 1, entries_to_delete[i]);
1569     } 
1570
1571   vec_free (entries_to_delete);
1572
1573    return 0;
1574 }
1575
1576 u32
1577 vnet_arp_glean_add(u32 fib_index, void * next_hop_arg)
1578 {
1579   ethernet_arp_main_t * am = &ethernet_arp_main;
1580   ip4_main_t * im = &ip4_main;
1581   ip_lookup_main_t * lm = &im->lookup_main;
1582   ip4_address_t * next_hop = next_hop_arg;
1583   ip_adjacency_t add_adj, *adj;
1584   ip4_add_del_route_args_t args;
1585   ethernet_arp_ip4_entry_t * e;
1586   ethernet_arp_ip4_key_t k;
1587   u32 adj_index;
1588
1589   adj_index = ip4_fib_lookup_with_table(im, fib_index, next_hop, 0);
1590   adj = ip_get_adjacency(lm, adj_index);
1591
1592   if (!adj || adj->lookup_next_index != IP_LOOKUP_NEXT_ARP)
1593     return ~0;
1594
1595   if (adj->arp.next_hop.ip4.as_u32 != 0)
1596     return adj_index;
1597
1598   k.sw_if_index = adj->rewrite_header.sw_if_index;
1599   k.fib_index = fib_index;
1600   k.ip4_address.as_u32 = next_hop->as_u32;
1601
1602   if (mhash_get (&am->ip4_entry_by_key, &k))
1603     return adj_index;
1604
1605   pool_get (am->ip4_entry_pool, e);
1606   mhash_set (&am->ip4_entry_by_key, &k, e - am->ip4_entry_pool, /* old value */ 0);
1607   e->key = k;
1608   e->cpu_time_last_updated = clib_cpu_time_now ();
1609   e->flags = ETHERNET_ARP_IP4_ENTRY_FLAG_GLEAN;
1610
1611   memset(&args, 0, sizeof(args));
1612   clib_memcpy(&add_adj, adj, sizeof(add_adj));
1613   ip46_address_set_ip4(&add_adj.arp.next_hop, next_hop); /* install neighbor /32 route */
1614   args.table_index_or_table_id = fib_index;
1615   args.flags = IP4_ROUTE_FLAG_FIB_INDEX | IP4_ROUTE_FLAG_ADD| IP4_ROUTE_FLAG_NEIGHBOR;
1616   args.dst_address.as_u32 = next_hop->as_u32;
1617   args.dst_address_length = 32;
1618   args.adj_index = ~0;
1619   args.add_adj = &add_adj;
1620   args.n_add_adj = 1;
1621   ip4_add_del_route (im, &args);
1622   return ip4_fib_lookup_with_table (im, fib_index, next_hop, 0);
1623 }
1624
1625 static clib_error_t *
1626 ip_arp_add_del_command_fn (vlib_main_t * vm,
1627                  unformat_input_t * input,
1628                  vlib_cli_command_t * cmd)
1629 {
1630   vnet_main_t * vnm = vnet_get_main();
1631   u32 sw_if_index;
1632   ethernet_arp_ip4_over_ethernet_address_t lo_addr, hi_addr, addr;
1633   int addr_valid = 0;
1634   int is_del = 0;
1635   int count = 1;
1636   u32 fib_index = 0;
1637   u32 fib_id;
1638   int is_static = 0;
1639   int is_proxy = 0;
1640
1641   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) 
1642     {
1643       /* set ip arp TenGigE1/1/0/1 1.2.3.4 aa:bb:... or aabb.ccdd... */
1644       if (unformat (input, "%U %U %U",
1645                     unformat_vnet_sw_interface, vnm, &sw_if_index,
1646                     unformat_ip4_address, &addr.ip4, 
1647                     unformat_ethernet_address, &addr.ethernet))
1648         addr_valid = 1;
1649
1650       else if (unformat (input, "delete") || unformat (input, "del"))
1651         is_del = 1;
1652
1653       else if (unformat (input, "static"))
1654         is_static = 1;
1655
1656       else if (unformat (input, "count %d", &count))
1657         ;
1658
1659       else if (unformat (input, "fib-id %d", &fib_id))
1660         {
1661           ip4_main_t * im = &ip4_main;
1662           uword * p = hash_get (im->fib_index_by_table_id, fib_id);
1663           if (! p)
1664             return clib_error_return (0, "fib ID %d doesn't exist\n",
1665                                       fib_id);
1666           fib_index = p[0];
1667         }
1668
1669       else if (unformat (input, "proxy %U - %U", 
1670                          unformat_ip4_address, &lo_addr.ip4, 
1671                          unformat_ip4_address, &hi_addr.ip4))
1672         is_proxy = 1;
1673       else
1674         break;
1675     }
1676   
1677   if (is_proxy)
1678     {
1679       (void) vnet_proxy_arp_add_del (&lo_addr.ip4, &hi_addr.ip4, 
1680                                      fib_index, is_del);
1681       return 0;
1682     }
1683
1684   if (addr_valid) 
1685     {
1686       int i;
1687
1688       for (i = 0; i < count; i++) 
1689         {
1690           if (is_del == 0) 
1691             {
1692               uword event_type, * event_data = 0;
1693
1694               /* Park the debug CLI until the arp entry is installed */
1695               vnet_register_ip4_arp_resolution_event 
1696                 (vnm, &addr.ip4, vlib_current_process(vm),
1697                  1 /* type */, 0 /* data */);
1698               
1699               vnet_arp_set_ip4_over_ethernet
1700                 (vnm, sw_if_index, fib_index, &addr, is_static);
1701               
1702               vlib_process_wait_for_event (vm);
1703               event_type = vlib_process_get_events (vm, &event_data);
1704               vec_reset_length(event_data);
1705               if (event_type != 1)
1706                 clib_warning ("event type %d unexpected", event_type);
1707             }
1708           else
1709             vnet_arp_unset_ip4_over_ethernet
1710                 (vnm, sw_if_index, fib_index, &addr);
1711
1712           increment_ip4_and_mac_address (&addr);
1713         }
1714     }
1715   else
1716     {
1717       return clib_error_return (0, "unknown input `%U'",
1718                                 format_unformat_error, input);
1719     }
1720   
1721   return 0;
1722 }
1723
1724 VLIB_CLI_COMMAND (ip_arp_add_del_command, static) = {
1725     .path = "set ip arp",
1726     .short_help = "set ip arp [del] <intfc> <ip-address> <mac-address> [static] [count <count>] [fib-id <fib-id>] [proxy <lo-addr> - <hi-addr>]",
1727     .function = ip_arp_add_del_command_fn,
1728 };
1729
1730 static clib_error_t *
1731 set_int_proxy_arp_command_fn (vlib_main_t * vm,
1732                               unformat_input_t * input,
1733                               vlib_cli_command_t * cmd)
1734 {
1735   vnet_main_t * vnm = vnet_get_main();
1736   u32 sw_if_index;
1737   vnet_sw_interface_t * si;
1738   int enable = 0;
1739   int intfc_set = 0;
1740
1741   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) 
1742     {
1743       if (unformat (input, "%U", unformat_vnet_sw_interface, 
1744                     vnm, &sw_if_index))
1745         intfc_set = 1;
1746       else if (unformat (input, "enable") || unformat (input, "on"))
1747         enable = 1;
1748       else if (unformat (input, "disable") || unformat (input, "off"))
1749         enable = 0;
1750       else
1751         break;
1752     }
1753
1754   if (intfc_set == 0)
1755     return clib_error_return (0, "unknown input '%U'",
1756                               format_unformat_error, input);
1757
1758   si = vnet_get_sw_interface (vnm, sw_if_index);
1759   ASSERT(si);
1760   if (enable)
1761     si->flags |= VNET_SW_INTERFACE_FLAG_PROXY_ARP;
1762   else 
1763     si->flags &= ~VNET_SW_INTERFACE_FLAG_PROXY_ARP;
1764   
1765   return 0;
1766 }
1767
1768 VLIB_CLI_COMMAND (set_int_proxy_enable_command, static) = {
1769     .path = "set interface proxy-arp",
1770     .short_help = "set interface proxy-arp <intfc> [enable|disable]",
1771     .function = set_int_proxy_arp_command_fn,
1772 };
1773
1774
1775 /*
1776  * ARP Termination in a L2 Bridge Domain based on an
1777  * IP4 to MAC hash table mac_by_ip4 for each BD.
1778  */
1779 typedef enum {
1780   ARP_TERM_NEXT_L2_OUTPUT,
1781   ARP_TERM_NEXT_DROP,
1782   ARP_TERM_N_NEXT,
1783 } arp_term_next_t;
1784
1785 u32 arp_term_next_node_index[32];
1786
1787 static uword
1788 arp_term_l2bd (vlib_main_t * vm,
1789                vlib_node_runtime_t * node,
1790                vlib_frame_t * frame)
1791 {
1792   l2input_main_t * l2im = &l2input_main;
1793   u32 n_left_from, next_index, * from, * to_next;
1794   u32 n_replies_sent = 0;
1795   u16 last_bd_index = ~0;
1796   l2_bridge_domain_t * last_bd_config = 0;
1797   l2_input_config_t * cfg0;
1798
1799   from = vlib_frame_vector_args (frame);
1800   n_left_from = frame->n_vectors;
1801   next_index = node->cached_next_index;
1802
1803   while (n_left_from > 0)
1804     {
1805       u32 n_left_to_next;
1806
1807       vlib_get_next_frame (vm, node, next_index,
1808                            to_next, n_left_to_next);
1809
1810       while (n_left_from > 0 && n_left_to_next > 0)
1811         {
1812           vlib_buffer_t * p0;
1813           ethernet_header_t * eth0;
1814           ethernet_arp_header_t * arp0;
1815           u8 * l3h0;
1816           u32 pi0, error0, next0, sw_if_index0;
1817           u16 ethertype0;
1818           u16 bd_index0;
1819           u32 ip0;
1820           u8 * macp0;
1821
1822           pi0 = from[0];
1823           to_next[0] = pi0;
1824           from += 1;
1825           to_next += 1;
1826           n_left_from -= 1;
1827           n_left_to_next -= 1;
1828
1829           p0 = vlib_get_buffer (vm, pi0);
1830           eth0 = vlib_buffer_get_current (p0);
1831           l3h0 = (u8 *)eth0 + vnet_buffer(p0)->l2.l2_len;
1832           ethertype0 = clib_net_to_host_u16(*(u16 *)(l3h0 - 2));
1833           arp0 = (ethernet_arp_header_t *) l3h0;
1834
1835           if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE) &&
1836                              (p0->flags & VLIB_BUFFER_IS_TRACED)))
1837             {
1838               u8 *t0 = vlib_add_trace (
1839                   vm, node, p0, sizeof(ethernet_arp_input_trace_t));
1840               clib_memcpy (t0, l3h0, sizeof(ethernet_arp_input_trace_t));
1841             }
1842
1843           if (PREDICT_FALSE  (
1844             (ethertype0 != ETHERNET_TYPE_ARP) ||
1845             (arp0->opcode != clib_host_to_net_u16(ETHERNET_ARP_OPCODE_request))))
1846             goto next_l2_feature;
1847
1848           error0 = ETHERNET_ARP_ERROR_replies_sent;
1849           error0 = (arp0->l2_type != clib_net_to_host_u16 (ETHERNET_ARP_HARDWARE_TYPE_ethernet)
1850                     ? ETHERNET_ARP_ERROR_l2_type_not_ethernet
1851                     : error0);
1852           error0 = (arp0->l3_type != clib_net_to_host_u16 (ETHERNET_TYPE_IP4)
1853                     ? ETHERNET_ARP_ERROR_l3_type_not_ip4
1854                     : error0);
1855
1856           sw_if_index0 = vnet_buffer (p0)->sw_if_index[VLIB_RX];
1857
1858           if (error0)
1859             goto drop;
1860
1861           // Trash ARP packets whose ARP-level source addresses do not
1862           // match their L2-frame-level source addresses */
1863           if (PREDICT_FALSE (
1864             memcmp (eth0->src_address, arp0->ip4_over_ethernet[0].ethernet,
1865                     sizeof (eth0->src_address))))
1866             {
1867               error0 = ETHERNET_ARP_ERROR_l2_address_mismatch;
1868               goto drop;
1869             }
1870
1871           // Check if anyone want ARP request events for L2 BDs
1872           {
1873           pending_resolution_t * mc;
1874           ethernet_arp_main_t * am = &ethernet_arp_main;
1875           uword *p = hash_get (am->mac_changes_by_address, 0);
1876           if (p && (vnet_buffer(p0)->l2.shg == 0))
1877             { // Only SHG 0 interface which is more likely local
1878               u32 next_index = p[0];
1879               while (next_index != (u32)~0)
1880                 {
1881                   int (*fp)(u32, u8 *, u32, u32);
1882                   int rv = 1;
1883                   mc = pool_elt_at_index (am->mac_changes, next_index);
1884                   fp = mc->data_callback;
1885                   // Call the callback, return 1 to suppress dup events */
1886                   if (fp) rv = (*fp)(mc->data, 
1887                                      arp0->ip4_over_ethernet[0].ethernet, 
1888                                      sw_if_index0, 
1889                                      arp0->ip4_over_ethernet[0].ip4.as_u32);
1890                   // Signal the resolver process
1891                   if (rv == 0)
1892                     vlib_process_signal_event (vm, mc->node_index,
1893                                                mc->type_opaque, 
1894                                                mc->data);
1895                   next_index = mc->next_index;
1896                 }
1897             }
1898           }
1899
1900           // lookup BD mac_by_ip4 hash table for MAC entry
1901           ip0 = arp0->ip4_over_ethernet[1].ip4.as_u32;
1902           bd_index0 = vnet_buffer(p0)->l2.bd_index;
1903           if (PREDICT_FALSE (
1904             (bd_index0 != last_bd_index) || (last_bd_index == (u16) ~0)))
1905             {
1906               last_bd_index = bd_index0;
1907               last_bd_config = vec_elt_at_index(l2im->bd_configs, bd_index0);
1908             }
1909           macp0 = (u8 *) hash_get (last_bd_config->mac_by_ip4, ip0);
1910
1911           if (PREDICT_FALSE(!macp0)) 
1912               goto next_l2_feature;     // MAC not found 
1913
1914           // MAC found, send ARP reply -
1915           // Convert ARP request packet to ARP reply
1916           arp0->opcode = clib_host_to_net_u16 (ETHERNET_ARP_OPCODE_reply);
1917           arp0->ip4_over_ethernet[1] = arp0->ip4_over_ethernet[0];
1918           arp0->ip4_over_ethernet[0].ip4.as_u32 = ip0;
1919           clib_memcpy (arp0->ip4_over_ethernet[0].ethernet, macp0, 6);
1920           clib_memcpy (eth0->dst_address, eth0->src_address, 6);
1921           clib_memcpy (eth0->src_address, macp0, 6);
1922           n_replies_sent += 1;
1923
1924           // For BVI, need to use l2-fwd node to send ARP reply as 
1925           // l2-output node cannot output packet to BVI properly
1926           cfg0 = vec_elt_at_index(l2im->configs, sw_if_index0);
1927           if (PREDICT_FALSE (cfg0->bvi))
1928             {
1929               vnet_buffer(p0)->l2.feature_bitmap |= L2INPUT_FEAT_FWD;
1930               vnet_buffer (p0)->sw_if_index[VLIB_RX] = 0;
1931               goto next_l2_feature;
1932             }
1933
1934           // Send ARP reply back out input interface through l2-output
1935           vnet_buffer (p0)->sw_if_index[VLIB_TX] = sw_if_index0;
1936           next0 = ARP_TERM_NEXT_L2_OUTPUT;
1937           // Note that output to VXLAN tunnel will fail due to SHG which
1938           // is probably desireable since ARP termination is not intended
1939           // for ARP requests from other hosts. If output to VXLAN tunnel is
1940           // required, however, can just clear the SHG in packet as follows:
1941           //   vnet_buffer(p0)->l2.shg = 0;
1942
1943           vlib_validate_buffer_enqueue_x1 (vm, node, next_index,to_next,
1944                                            n_left_to_next,pi0,next0);
1945           continue;
1946
1947         next_l2_feature:
1948           {
1949             u32 feature_bitmap0 =
1950                 vnet_buffer(p0)->l2.feature_bitmap & ~L2INPUT_FEAT_ARP_TERM;
1951             vnet_buffer(p0)->l2.feature_bitmap = feature_bitmap0;
1952             next0 = feat_bitmap_get_next_node_index(arp_term_next_node_index,
1953                                                     feature_bitmap0);
1954             vlib_validate_buffer_enqueue_x1 (vm, node, next_index,to_next,
1955                                              n_left_to_next,pi0,next0);
1956             continue;
1957           }
1958
1959         drop:
1960           if (0 == arp0->ip4_over_ethernet[0].ip4.as_u32 ||
1961               (arp0->ip4_over_ethernet[0].ip4.as_u32 ==
1962                arp0->ip4_over_ethernet[1].ip4.as_u32))
1963             {
1964               error0 = ETHERNET_ARP_ERROR_gratuitous_arp;
1965             }
1966           next0 = ARP_TERM_NEXT_DROP;
1967           p0->error = node->errors[error0];
1968
1969           vlib_validate_buffer_enqueue_x1 (vm, node, next_index,to_next,
1970                                            n_left_to_next,pi0,next0);
1971         }
1972
1973       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
1974     }
1975
1976   vlib_error_count (vm, node->node_index,
1977                     ETHERNET_ARP_ERROR_replies_sent, 
1978                     n_replies_sent);
1979   return frame->n_vectors;
1980 }
1981
1982 VLIB_REGISTER_NODE (arp_term_l2bd_node,static) = {
1983   .function = arp_term_l2bd,
1984   .name = "arp-term-l2bd",
1985   .vector_size = sizeof (u32),
1986
1987   .n_errors = ETHERNET_ARP_N_ERROR,
1988   .error_strings = ethernet_arp_error_strings,
1989
1990   .n_next_nodes = ARP_TERM_N_NEXT,
1991   .next_nodes = {
1992     [ARP_TERM_NEXT_L2_OUTPUT] = "l2-output",
1993     [ARP_TERM_NEXT_DROP] = "error-drop",
1994   },
1995
1996   .format_buffer = format_ethernet_arp_header,
1997   .format_trace = format_ethernet_arp_input_trace,
1998 };
1999
2000 clib_error_t *arp_term_init (vlib_main_t *vm)
2001 { // Initialize the feature next-node indexes 
2002   feat_bitmap_init_next_nodes(vm,
2003                               arp_term_l2bd_node.index,
2004                               L2INPUT_N_FEAT,
2005                               l2input_get_feat_names(),
2006                               arp_term_next_node_index);
2007   return 0;
2008 }
2009
2010 VLIB_INIT_FUNCTION (arp_term_init);