Reorganize source tree to use single autotools instance
[vpp.git] / src / vnet / ethernet / arp.c
1 /*
2  * ethernet/arp.c: IP v4 ARP node
3  *
4  * Copyright (c) 2010 Cisco and/or its affiliates.
5  * Licensed under the Apache License, Version 2.0 (the "License");
6  * you may not use this file except in compliance with the License.
7  * You may obtain a copy of the License at:
8  *
9  *     http://www.apache.org/licenses/LICENSE-2.0
10  *
11  * Unless required by applicable law or agreed to in writing, software
12  * distributed under the License is distributed on an "AS IS" BASIS,
13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  * See the License for the specific language governing permissions and
15  * limitations under the License.
16  */
17
18 #include <vnet/ip/ip.h>
19 #include <vnet/ip/ip6.h>
20 #include <vnet/ethernet/ethernet.h>
21 #include <vnet/ethernet/arp_packet.h>
22 #include <vnet/l2/l2_input.h>
23 #include <vppinfra/mhash.h>
24 #include <vnet/fib/ip4_fib.h>
25 #include <vnet/adj/adj_nbr.h>
26 #include <vnet/mpls/mpls.h>
27
28 /**
29  * @file
30  * @brief IPv4 ARP.
31  *
32  * This file contains code to manage the IPv4 ARP tables (IP Address
33  * to MAC Address lookup).
34  */
35
36
37 void vl_api_rpc_call_main_thread (void *fp, u8 * data, u32 data_length);
38
39 /**
40  * @brief Per-interface ARP configuration and state
41  */
42 typedef struct ethernet_arp_interface_t_
43 {
44   /**
45    * Hash table of ARP entries.
46    * Since this hash table is per-interface, the key is only the IPv4 address.
47    */
48   uword *arp_entries;
49 } ethernet_arp_interface_t;
50
51 typedef struct
52 {
53   u32 lo_addr;
54   u32 hi_addr;
55   u32 fib_index;
56 } ethernet_proxy_arp_t;
57
58 typedef struct
59 {
60   u32 next_index;
61   uword node_index;
62   uword type_opaque;
63   uword data;
64   /* Used for arp event notification only */
65   void *data_callback;
66   u32 pid;
67 } pending_resolution_t;
68
69 typedef struct
70 {
71   /* Hash tables mapping name to opcode. */
72   uword *opcode_by_name;
73
74   /* lite beer "glean" adjacency handling */
75   uword *pending_resolutions_by_address;
76   pending_resolution_t *pending_resolutions;
77
78   /* Mac address change notification */
79   uword *mac_changes_by_address;
80   pending_resolution_t *mac_changes;
81
82   ethernet_arp_ip4_entry_t *ip4_entry_pool;
83
84   /* ARP attack mitigation */
85   u32 arp_delete_rotor;
86   u32 limit_arp_cache_size;
87
88   /** Per interface state */
89   ethernet_arp_interface_t *ethernet_arp_by_sw_if_index;
90
91   /* Proxy arp vector */
92   ethernet_proxy_arp_t *proxy_arps;
93 } ethernet_arp_main_t;
94
95 static ethernet_arp_main_t ethernet_arp_main;
96
97 typedef struct
98 {
99   u32 sw_if_index;
100   ethernet_arp_ip4_over_ethernet_address_t a;
101   int is_static;
102   int flags;
103 #define ETHERNET_ARP_ARGS_REMOVE (1<<0)
104 #define ETHERNET_ARP_ARGS_FLUSH  (1<<1)
105 #define ETHERNET_ARP_ARGS_POPULATE  (1<<2)
106 } vnet_arp_set_ip4_over_ethernet_rpc_args_t;
107
108 static void
109 set_ip4_over_ethernet_rpc_callback (vnet_arp_set_ip4_over_ethernet_rpc_args_t
110                                     * a);
111
112 static u8 *
113 format_ethernet_arp_hardware_type (u8 * s, va_list * va)
114 {
115   ethernet_arp_hardware_type_t h = va_arg (*va, ethernet_arp_hardware_type_t);
116   char *t = 0;
117   switch (h)
118     {
119 #define _(n,f) case n: t = #f; break;
120       foreach_ethernet_arp_hardware_type;
121 #undef _
122
123     default:
124       return format (s, "unknown 0x%x", h);
125     }
126
127   return format (s, "%s", t);
128 }
129
130 static u8 *
131 format_ethernet_arp_opcode (u8 * s, va_list * va)
132 {
133   ethernet_arp_opcode_t o = va_arg (*va, ethernet_arp_opcode_t);
134   char *t = 0;
135   switch (o)
136     {
137 #define _(f) case ETHERNET_ARP_OPCODE_##f: t = #f; break;
138       foreach_ethernet_arp_opcode;
139 #undef _
140
141     default:
142       return format (s, "unknown 0x%x", o);
143     }
144
145   return format (s, "%s", t);
146 }
147
148 static uword
149 unformat_ethernet_arp_opcode_host_byte_order (unformat_input_t * input,
150                                               va_list * args)
151 {
152   int *result = va_arg (*args, int *);
153   ethernet_arp_main_t *am = &ethernet_arp_main;
154   int x, i;
155
156   /* Numeric opcode. */
157   if (unformat (input, "0x%x", &x) || unformat (input, "%d", &x))
158     {
159       if (x >= (1 << 16))
160         return 0;
161       *result = x;
162       return 1;
163     }
164
165   /* Named type. */
166   if (unformat_user (input, unformat_vlib_number_by_name,
167                      am->opcode_by_name, &i))
168     {
169       *result = i;
170       return 1;
171     }
172
173   return 0;
174 }
175
176 static uword
177 unformat_ethernet_arp_opcode_net_byte_order (unformat_input_t * input,
178                                              va_list * args)
179 {
180   int *result = va_arg (*args, int *);
181   if (!unformat_user
182       (input, unformat_ethernet_arp_opcode_host_byte_order, result))
183     return 0;
184
185   *result = clib_host_to_net_u16 ((u16) * result);
186   return 1;
187 }
188
189 static u8 *
190 format_ethernet_arp_header (u8 * s, va_list * va)
191 {
192   ethernet_arp_header_t *a = va_arg (*va, ethernet_arp_header_t *);
193   u32 max_header_bytes = va_arg (*va, u32);
194   uword indent;
195   u16 l2_type, l3_type;
196
197   if (max_header_bytes != 0 && sizeof (a[0]) > max_header_bytes)
198     return format (s, "ARP header truncated");
199
200   l2_type = clib_net_to_host_u16 (a->l2_type);
201   l3_type = clib_net_to_host_u16 (a->l3_type);
202
203   indent = format_get_indent (s);
204
205   s = format (s, "%U, type %U/%U, address size %d/%d",
206               format_ethernet_arp_opcode, clib_net_to_host_u16 (a->opcode),
207               format_ethernet_arp_hardware_type, l2_type,
208               format_ethernet_type, l3_type,
209               a->n_l2_address_bytes, a->n_l3_address_bytes);
210
211   if (l2_type == ETHERNET_ARP_HARDWARE_TYPE_ethernet
212       && l3_type == ETHERNET_TYPE_IP4)
213     {
214       s = format (s, "\n%U%U/%U -> %U/%U",
215                   format_white_space, indent,
216                   format_ethernet_address, a->ip4_over_ethernet[0].ethernet,
217                   format_ip4_address, &a->ip4_over_ethernet[0].ip4,
218                   format_ethernet_address, a->ip4_over_ethernet[1].ethernet,
219                   format_ip4_address, &a->ip4_over_ethernet[1].ip4);
220     }
221   else
222     {
223       uword n2 = a->n_l2_address_bytes;
224       uword n3 = a->n_l3_address_bytes;
225       s = format (s, "\n%U%U/%U -> %U/%U",
226                   format_white_space, indent,
227                   format_hex_bytes, a->data + 0 * n2 + 0 * n3, n2,
228                   format_hex_bytes, a->data + 1 * n2 + 0 * n3, n3,
229                   format_hex_bytes, a->data + 1 * n2 + 1 * n3, n2,
230                   format_hex_bytes, a->data + 2 * n2 + 1 * n3, n3);
231     }
232
233   return s;
234 }
235
236 u8 *
237 format_ethernet_arp_ip4_entry (u8 * s, va_list * va)
238 {
239   vnet_main_t *vnm = va_arg (*va, vnet_main_t *);
240   ethernet_arp_ip4_entry_t *e = va_arg (*va, ethernet_arp_ip4_entry_t *);
241   vnet_sw_interface_t *si;
242   u8 *flags = 0;
243
244   if (!e)
245     return format (s, "%=12s%=16s%=6s%=20s%=24s", "Time", "IP4",
246                    "Flags", "Ethernet", "Interface");
247
248   si = vnet_get_sw_interface (vnm, e->sw_if_index);
249
250   if (e->flags & ETHERNET_ARP_IP4_ENTRY_FLAG_STATIC)
251     flags = format (flags, "S");
252
253   if (e->flags & ETHERNET_ARP_IP4_ENTRY_FLAG_DYNAMIC)
254     flags = format (flags, "D");
255
256   s = format (s, "%=12U%=16U%=6s%=20U%=24U",
257               format_vlib_cpu_time, vnm->vlib_main, e->cpu_time_last_updated,
258               format_ip4_address, &e->ip4_address,
259               flags ? (char *) flags : "",
260               format_ethernet_address, e->ethernet_address,
261               format_vnet_sw_interface_name, vnm, si);
262
263   vec_free (flags);
264   return s;
265 }
266
267 typedef struct
268 {
269   u8 packet_data[64];
270 } ethernet_arp_input_trace_t;
271
272 static u8 *
273 format_ethernet_arp_input_trace (u8 * s, va_list * va)
274 {
275   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*va, vlib_main_t *);
276   CLIB_UNUSED (vlib_node_t * node) = va_arg (*va, vlib_node_t *);
277   ethernet_arp_input_trace_t *t = va_arg (*va, ethernet_arp_input_trace_t *);
278
279   s = format (s, "%U",
280               format_ethernet_arp_header,
281               t->packet_data, sizeof (t->packet_data));
282
283   return s;
284 }
285
286 static u8 *
287 format_arp_term_input_trace (u8 * s, va_list * va)
288 {
289   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*va, vlib_main_t *);
290   CLIB_UNUSED (vlib_node_t * node) = va_arg (*va, vlib_node_t *);
291   ethernet_arp_input_trace_t *t = va_arg (*va, ethernet_arp_input_trace_t *);
292
293   /* arp-term trace data saved is either arp or ip6/icmp6 packet:
294      - for arp, the 1st 16-bit field is hw type of value of 0x0001.
295      - for ip6, the first nibble has value of 6. */
296   s = format (s, "%U", t->packet_data[0] == 0 ?
297               format_ethernet_arp_header : format_ip6_header,
298               t->packet_data, sizeof (t->packet_data));
299
300   return s;
301 }
302
303 static void
304 arp_nbr_probe (ip_adjacency_t * adj)
305 {
306   vnet_main_t *vnm = vnet_get_main ();
307   ip4_main_t *im = &ip4_main;
308   ip_interface_address_t *ia;
309   ethernet_arp_header_t *h;
310   vnet_hw_interface_t *hi;
311   vnet_sw_interface_t *si;
312   ip4_address_t *src;
313   vlib_buffer_t *b;
314   vlib_main_t *vm;
315   u32 bi = 0;
316
317   vm = vlib_get_main ();
318
319   si = vnet_get_sw_interface (vnm, adj->rewrite_header.sw_if_index);
320
321   if (!(si->flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP))
322     {
323       return;
324     }
325
326   src =
327     ip4_interface_address_matching_destination (im,
328                                                 &adj->sub_type.nbr.next_hop.
329                                                 ip4,
330                                                 adj->rewrite_header.
331                                                 sw_if_index, &ia);
332   if (!src)
333     {
334       return;
335     }
336
337   h =
338     vlib_packet_template_get_packet (vm, &im->ip4_arp_request_packet_template,
339                                      &bi);
340
341   hi = vnet_get_sup_hw_interface (vnm, adj->rewrite_header.sw_if_index);
342
343   clib_memcpy (h->ip4_over_ethernet[0].ethernet,
344                hi->hw_address, sizeof (h->ip4_over_ethernet[0].ethernet));
345
346   h->ip4_over_ethernet[0].ip4 = src[0];
347   h->ip4_over_ethernet[1].ip4 = adj->sub_type.nbr.next_hop.ip4;
348
349   b = vlib_get_buffer (vm, bi);
350   vnet_buffer (b)->sw_if_index[VLIB_RX] =
351     vnet_buffer (b)->sw_if_index[VLIB_TX] = adj->rewrite_header.sw_if_index;
352
353   /* Add encapsulation string for software interface (e.g. ethernet header). */
354   vnet_rewrite_one_header (adj[0], h, sizeof (ethernet_header_t));
355   vlib_buffer_advance (b, -adj->rewrite_header.data_bytes);
356
357   {
358     vlib_frame_t *f = vlib_get_frame_to_node (vm, hi->output_node_index);
359     u32 *to_next = vlib_frame_vector_args (f);
360     to_next[0] = bi;
361     f->n_vectors = 1;
362     vlib_put_frame_to_node (vm, hi->output_node_index, f);
363   }
364 }
365
366 static void
367 arp_mk_complete (adj_index_t ai, ethernet_arp_ip4_entry_t * e)
368 {
369   adj_nbr_update_rewrite
370     (ai, ADJ_NBR_REWRITE_FLAG_COMPLETE,
371      ethernet_build_rewrite (vnet_get_main (),
372                              e->sw_if_index,
373                              adj_get_link_type (ai), e->ethernet_address));
374 }
375
376 static void
377 arp_mk_incomplete (adj_index_t ai)
378 {
379   ip_adjacency_t *adj = adj_get (ai);
380
381   adj_nbr_update_rewrite
382     (ai,
383      ADJ_NBR_REWRITE_FLAG_INCOMPLETE,
384      ethernet_build_rewrite (vnet_get_main (),
385                              adj->rewrite_header.sw_if_index,
386                              VNET_LINK_ARP,
387                              VNET_REWRITE_FOR_SW_INTERFACE_ADDRESS_BROADCAST));
388 }
389
390 static ethernet_arp_ip4_entry_t *
391 arp_entry_find (ethernet_arp_interface_t * eai, const ip4_address_t * addr)
392 {
393   ethernet_arp_main_t *am = &ethernet_arp_main;
394   ethernet_arp_ip4_entry_t *e = NULL;
395   uword *p;
396
397   if (NULL != eai->arp_entries)
398     {
399       p = hash_get (eai->arp_entries, addr->as_u32);
400       if (!p)
401         return (NULL);
402
403       e = pool_elt_at_index (am->ip4_entry_pool, p[0]);
404     }
405
406   return (e);
407 }
408
409 static adj_walk_rc_t
410 arp_mk_complete_walk (adj_index_t ai, void *ctx)
411 {
412   ethernet_arp_ip4_entry_t *e = ctx;
413
414   arp_mk_complete (ai, e);
415
416   return (ADJ_WALK_RC_CONTINUE);
417 }
418
419 static adj_walk_rc_t
420 arp_mk_incomplete_walk (adj_index_t ai, void *ctx)
421 {
422   arp_mk_incomplete (ai);
423
424   return (ADJ_WALK_RC_CONTINUE);
425 }
426
427 void
428 arp_update_adjacency (vnet_main_t * vnm, u32 sw_if_index, u32 ai)
429 {
430   ethernet_arp_main_t *am = &ethernet_arp_main;
431   ethernet_arp_interface_t *arp_int;
432   ethernet_arp_ip4_entry_t *e;
433   ip_adjacency_t *adj;
434
435   adj = adj_get (ai);
436
437   vec_validate (am->ethernet_arp_by_sw_if_index, sw_if_index);
438   arp_int = &am->ethernet_arp_by_sw_if_index[sw_if_index];
439   e = arp_entry_find (arp_int, &adj->sub_type.nbr.next_hop.ip4);
440
441   if (NULL != e)
442     {
443       adj_nbr_walk_nh4 (sw_if_index,
444                         &e->ip4_address, arp_mk_complete_walk, e);
445     }
446   else
447     {
448       /*
449        * no matching ARP entry.
450        * construct the rewire required to for an ARP packet, and stick
451        * that in the adj's pipe to smoke.
452        */
453       adj_nbr_update_rewrite (ai,
454                               ADJ_NBR_REWRITE_FLAG_INCOMPLETE,
455                               ethernet_build_rewrite (vnm,
456                                                       sw_if_index,
457                                                       VNET_LINK_ARP,
458                                                       VNET_REWRITE_FOR_SW_INTERFACE_ADDRESS_BROADCAST));
459
460       /*
461        * since the FIB has added this adj for a route, it makes sense it may
462        * want to forward traffic sometime soon. Let's send a speculative ARP.
463        * just one. If we were to do periodically that wouldn't be bad either,
464        * but that's more code than i'm prepared to write at this time for
465        * relatively little reward.
466        */
467       arp_nbr_probe (adj);
468     }
469 }
470
471 int
472 vnet_arp_set_ip4_over_ethernet_internal (vnet_main_t * vnm,
473                                          vnet_arp_set_ip4_over_ethernet_rpc_args_t
474                                          * args)
475 {
476   ethernet_arp_ip4_entry_t *e = 0;
477   ethernet_arp_main_t *am = &ethernet_arp_main;
478   ethernet_arp_ip4_over_ethernet_address_t *a = &args->a;
479   vlib_main_t *vm = vlib_get_main ();
480   int make_new_arp_cache_entry = 1;
481   uword *p;
482   pending_resolution_t *pr, *mc;
483   ethernet_arp_interface_t *arp_int;
484   int is_static = args->is_static;
485   u32 sw_if_index = args->sw_if_index;
486
487   vec_validate (am->ethernet_arp_by_sw_if_index, sw_if_index);
488
489   arp_int = &am->ethernet_arp_by_sw_if_index[sw_if_index];
490
491   if (NULL != arp_int->arp_entries)
492     {
493       p = hash_get (arp_int->arp_entries, a->ip4.as_u32);
494       if (p)
495         {
496           e = pool_elt_at_index (am->ip4_entry_pool, p[0]);
497
498           /* Refuse to over-write static arp. */
499           if (!is_static && (e->flags & ETHERNET_ARP_IP4_ENTRY_FLAG_STATIC))
500             return -2;
501           make_new_arp_cache_entry = 0;
502         }
503     }
504
505   if (make_new_arp_cache_entry)
506     {
507       fib_prefix_t pfx = {
508         .fp_len = 32,
509         .fp_proto = FIB_PROTOCOL_IP4,
510         .fp_addr = {
511                     .ip4 = a->ip4,
512                     }
513         ,
514       };
515       u32 fib_index;
516
517       pool_get (am->ip4_entry_pool, e);
518
519       if (NULL == arp_int->arp_entries)
520         {
521           arp_int->arp_entries = hash_create (0, sizeof (u32));
522         }
523
524       hash_set (arp_int->arp_entries, a->ip4.as_u32, e - am->ip4_entry_pool);
525
526       e->sw_if_index = sw_if_index;
527       e->ip4_address = a->ip4;
528       clib_memcpy (e->ethernet_address,
529                    a->ethernet, sizeof (e->ethernet_address));
530
531       fib_index = ip4_fib_table_get_index_for_sw_if_index (e->sw_if_index);
532       e->fib_entry_index =
533         fib_table_entry_update_one_path (fib_index,
534                                          &pfx,
535                                          FIB_SOURCE_ADJ,
536                                          FIB_ENTRY_FLAG_ATTACHED,
537                                          FIB_PROTOCOL_IP4,
538                                          &pfx.fp_addr,
539                                          e->sw_if_index,
540                                          ~0,
541                                          1, NULL, FIB_ROUTE_PATH_FLAG_NONE);
542     }
543   else
544     {
545       /*
546        * prevent a DoS attack from the data-plane that
547        * spams us with no-op updates to the MAC address
548        */
549       if (0 == memcmp (e->ethernet_address,
550                        a->ethernet, sizeof (e->ethernet_address)))
551         return -1;
552
553       /* Update time stamp and ethernet address. */
554       clib_memcpy (e->ethernet_address, a->ethernet,
555                    sizeof (e->ethernet_address));
556     }
557
558   e->cpu_time_last_updated = clib_cpu_time_now ();
559   if (is_static)
560     e->flags |= ETHERNET_ARP_IP4_ENTRY_FLAG_STATIC;
561   else
562     e->flags |= ETHERNET_ARP_IP4_ENTRY_FLAG_DYNAMIC;
563
564   adj_nbr_walk_nh4 (sw_if_index, &e->ip4_address, arp_mk_complete_walk, e);
565
566   /* Customer(s) waiting for this address to be resolved? */
567   p = hash_get (am->pending_resolutions_by_address, a->ip4.as_u32);
568   if (p)
569     {
570       u32 next_index;
571       next_index = p[0];
572
573       while (next_index != (u32) ~ 0)
574         {
575           pr = pool_elt_at_index (am->pending_resolutions, next_index);
576           vlib_process_signal_event (vm, pr->node_index,
577                                      pr->type_opaque, pr->data);
578           next_index = pr->next_index;
579           pool_put (am->pending_resolutions, pr);
580         }
581
582       hash_unset (am->pending_resolutions_by_address, a->ip4.as_u32);
583     }
584
585   /* Customer(s) requesting ARP event for this address? */
586   p = hash_get (am->mac_changes_by_address, a->ip4.as_u32);
587   if (p)
588     {
589       u32 next_index;
590       next_index = p[0];
591
592       while (next_index != (u32) ~ 0)
593         {
594           int (*fp) (u32, u8 *, u32, u32);
595           int rv = 1;
596           mc = pool_elt_at_index (am->mac_changes, next_index);
597           fp = mc->data_callback;
598
599           /* Call the user's data callback, return 1 to suppress dup events */
600           if (fp)
601             rv = (*fp) (mc->data, a->ethernet, sw_if_index, 0);
602
603           /*
604            * Signal the resolver process, as long as the user
605            * says they want to be notified
606            */
607           if (rv == 0)
608             vlib_process_signal_event (vm, mc->node_index,
609                                        mc->type_opaque, mc->data);
610           next_index = mc->next_index;
611         }
612     }
613
614   return 0;
615 }
616
617 void
618 vnet_register_ip4_arp_resolution_event (vnet_main_t * vnm,
619                                         void *address_arg,
620                                         uword node_index,
621                                         uword type_opaque, uword data)
622 {
623   ethernet_arp_main_t *am = &ethernet_arp_main;
624   ip4_address_t *address = address_arg;
625   uword *p;
626   pending_resolution_t *pr;
627
628   pool_get (am->pending_resolutions, pr);
629
630   pr->next_index = ~0;
631   pr->node_index = node_index;
632   pr->type_opaque = type_opaque;
633   pr->data = data;
634   pr->data_callback = 0;
635
636   p = hash_get (am->pending_resolutions_by_address, address->as_u32);
637   if (p)
638     {
639       /* Insert new resolution at the head of the list */
640       pr->next_index = p[0];
641       hash_unset (am->pending_resolutions_by_address, address->as_u32);
642     }
643
644   hash_set (am->pending_resolutions_by_address, address->as_u32,
645             pr - am->pending_resolutions);
646 }
647
648 int
649 vnet_add_del_ip4_arp_change_event (vnet_main_t * vnm,
650                                    void *data_callback,
651                                    u32 pid,
652                                    void *address_arg,
653                                    uword node_index,
654                                    uword type_opaque, uword data, int is_add)
655 {
656   ethernet_arp_main_t *am = &ethernet_arp_main;
657   ip4_address_t *address = address_arg;
658   uword *p;
659   pending_resolution_t *mc;
660   void (*fp) (u32, u8 *) = data_callback;
661
662   if (is_add)
663     {
664       pool_get (am->mac_changes, mc);
665
666       mc->next_index = ~0;
667       mc->node_index = node_index;
668       mc->type_opaque = type_opaque;
669       mc->data = data;
670       mc->data_callback = data_callback;
671       mc->pid = pid;
672
673       p = hash_get (am->mac_changes_by_address, address->as_u32);
674       if (p)
675         {
676           /* Insert new resolution at the head of the list */
677           mc->next_index = p[0];
678           hash_unset (am->mac_changes_by_address, address->as_u32);
679         }
680
681       hash_set (am->mac_changes_by_address, address->as_u32,
682                 mc - am->mac_changes);
683       return 0;
684     }
685   else
686     {
687       u32 index;
688       pending_resolution_t *mc_last = 0;
689
690       p = hash_get (am->mac_changes_by_address, address->as_u32);
691       if (p == 0)
692         return VNET_API_ERROR_NO_SUCH_ENTRY;
693
694       index = p[0];
695
696       while (index != (u32) ~ 0)
697         {
698           mc = pool_elt_at_index (am->mac_changes, index);
699           if (mc->node_index == node_index &&
700               mc->type_opaque == type_opaque && mc->pid == pid)
701             {
702               /* Clients may need to clean up pool entries, too */
703               if (fp)
704                 (*fp) (mc->data, 0 /* no new mac addrs */ );
705               if (index == p[0])
706                 {
707                   hash_unset (am->mac_changes_by_address, address->as_u32);
708                   if (mc->next_index != ~0)
709                     hash_set (am->mac_changes_by_address, address->as_u32,
710                               mc->next_index);
711                   pool_put (am->mac_changes, mc);
712                   return 0;
713                 }
714               else
715                 {
716                   ASSERT (mc_last);
717                   mc_last->next_index = mc->next_index;
718                   pool_put (am->mac_changes, mc);
719                   return 0;
720                 }
721             }
722           mc_last = mc;
723           index = mc->next_index;
724         }
725
726       return VNET_API_ERROR_NO_SUCH_ENTRY;
727     }
728 }
729
730 /* Either we drop the packet or we send a reply to the sender. */
731 typedef enum
732 {
733   ARP_INPUT_NEXT_DROP,
734   ARP_INPUT_NEXT_REPLY_TX,
735   ARP_INPUT_N_NEXT,
736 } arp_input_next_t;
737
738 #define foreach_ethernet_arp_error                                      \
739   _ (replies_sent, "ARP replies sent")                                  \
740   _ (l2_type_not_ethernet, "L2 type not ethernet")                      \
741   _ (l3_type_not_ip4, "L3 type not IP4")                                \
742   _ (l3_src_address_not_local, "IP4 source address not local to subnet") \
743   _ (l3_dst_address_not_local, "IP4 destination address not local to subnet") \
744   _ (l3_src_address_is_local, "IP4 source address matches local interface") \
745   _ (l3_src_address_learned, "ARP request IP4 source address learned")  \
746   _ (replies_received, "ARP replies received")                          \
747   _ (opcode_not_request, "ARP opcode not request")                      \
748   _ (proxy_arp_replies_sent, "Proxy ARP replies sent")                  \
749   _ (l2_address_mismatch, "ARP hw addr does not match L2 frame src addr") \
750   _ (missing_interface_address, "ARP missing interface address") \
751   _ (gratuitous_arp, "ARP probe or announcement dropped") \
752   _ (interface_no_table, "Interface is not mapped to an IP table") \
753
754 typedef enum
755 {
756 #define _(sym,string) ETHERNET_ARP_ERROR_##sym,
757   foreach_ethernet_arp_error
758 #undef _
759     ETHERNET_ARP_N_ERROR,
760 } ethernet_arp_input_error_t;
761
762
763 static void
764 unset_random_arp_entry (void)
765 {
766   ethernet_arp_main_t *am = &ethernet_arp_main;
767   ethernet_arp_ip4_entry_t *e;
768   vnet_main_t *vnm = vnet_get_main ();
769   ethernet_arp_ip4_over_ethernet_address_t delme;
770   u32 index;
771
772   index = pool_next_index (am->ip4_entry_pool, am->arp_delete_rotor);
773   am->arp_delete_rotor = index;
774
775   /* Try again from elt 0, could happen if an intfc goes down */
776   if (index == ~0)
777     {
778       index = pool_next_index (am->ip4_entry_pool, am->arp_delete_rotor);
779       am->arp_delete_rotor = index;
780     }
781
782   /* Nothing left in the pool */
783   if (index == ~0)
784     return;
785
786   e = pool_elt_at_index (am->ip4_entry_pool, index);
787
788   clib_memcpy (&delme.ethernet, e->ethernet_address, 6);
789   delme.ip4.as_u32 = e->ip4_address.as_u32;
790
791   vnet_arp_unset_ip4_over_ethernet (vnm, e->sw_if_index, &delme);
792 }
793
794 static int
795 arp_unnumbered (vlib_buffer_t * p0,
796                 u32 pi0, ethernet_header_t * eth0, u32 sw_if_index)
797 {
798   vlib_main_t *vm = vlib_get_main ();
799   vnet_main_t *vnm = vnet_get_main ();
800   vnet_interface_main_t *vim = &vnm->interface_main;
801   vnet_sw_interface_t *si;
802   vnet_hw_interface_t *hi;
803   u32 unnum_src_sw_if_index;
804   u32 *broadcast_swifs = 0;
805   u32 *buffers = 0;
806   u32 n_alloc = 0;
807   vlib_buffer_t *b0;
808   int i;
809   u8 dst_mac_address[6];
810   i16 header_size;
811   ethernet_arp_header_t *arp0;
812
813   /* Save the dst mac address */
814   clib_memcpy (dst_mac_address, eth0->dst_address, sizeof (dst_mac_address));
815
816   /* Figure out which sw_if_index supplied the address */
817   unnum_src_sw_if_index = sw_if_index;
818
819   /* Track down all users of the unnumbered source */
820   /* *INDENT-OFF* */
821   pool_foreach (si, vim->sw_interfaces,
822   ({
823     if (si->flags & VNET_SW_INTERFACE_FLAG_UNNUMBERED &&
824         (si->unnumbered_sw_if_index == unnum_src_sw_if_index))
825       {
826         vec_add1 (broadcast_swifs, si->sw_if_index);
827       }
828   }));
829   /* *INDENT-ON* */
830
831   /* If there are no interfaces un-unmbered to this interface,
832      we are done  here. */
833   if (0 == vec_len (broadcast_swifs))
834     return 0;
835
836   /* Allocate buffering if we need it */
837   if (vec_len (broadcast_swifs) > 1)
838     {
839       vec_validate (buffers, vec_len (broadcast_swifs) - 2);
840       n_alloc = vlib_buffer_alloc (vm, buffers, vec_len (buffers));
841       _vec_len (buffers) = n_alloc;
842       for (i = 0; i < n_alloc; i++)
843         {
844           b0 = vlib_get_buffer (vm, buffers[i]);
845
846           /* xerox (partially built) ARP pkt */
847           clib_memcpy (b0->data, p0->data,
848                        p0->current_length + p0->current_data);
849           b0->current_data = p0->current_data;
850           b0->current_length = p0->current_length;
851           vnet_buffer (b0)->sw_if_index[VLIB_RX] =
852             vnet_buffer (p0)->sw_if_index[VLIB_RX];
853         }
854     }
855
856   vec_insert (buffers, 1, 0);
857   buffers[0] = pi0;
858
859   for (i = 0; i < vec_len (buffers); i++)
860     {
861       b0 = vlib_get_buffer (vm, buffers[i]);
862       arp0 = vlib_buffer_get_current (b0);
863
864       hi = vnet_get_sup_hw_interface (vnm, broadcast_swifs[i]);
865       si = vnet_get_sw_interface (vnm, broadcast_swifs[i]);
866
867       /* For decoration, most likely */
868       vnet_buffer (b0)->sw_if_index[VLIB_TX] = hi->sw_if_index;
869
870       /* Fix ARP pkt src address */
871       clib_memcpy (arp0->ip4_over_ethernet[0].ethernet, hi->hw_address, 6);
872
873       /* Build L2 encaps for this swif */
874       header_size = sizeof (ethernet_header_t);
875       if (si->sub.eth.flags.one_tag)
876         header_size += 4;
877       else if (si->sub.eth.flags.two_tags)
878         header_size += 8;
879
880       vlib_buffer_advance (b0, -header_size);
881       eth0 = vlib_buffer_get_current (b0);
882
883       if (si->sub.eth.flags.one_tag)
884         {
885           ethernet_vlan_header_t *outer = (void *) (eth0 + 1);
886
887           eth0->type = si->sub.eth.flags.dot1ad ?
888             clib_host_to_net_u16 (ETHERNET_TYPE_DOT1AD) :
889             clib_host_to_net_u16 (ETHERNET_TYPE_VLAN);
890           outer->priority_cfi_and_id =
891             clib_host_to_net_u16 (si->sub.eth.outer_vlan_id);
892           outer->type = clib_host_to_net_u16 (ETHERNET_TYPE_ARP);
893
894         }
895       else if (si->sub.eth.flags.two_tags)
896         {
897           ethernet_vlan_header_t *outer = (void *) (eth0 + 1);
898           ethernet_vlan_header_t *inner = (void *) (outer + 1);
899
900           eth0->type = si->sub.eth.flags.dot1ad ?
901             clib_host_to_net_u16 (ETHERNET_TYPE_DOT1AD) :
902             clib_host_to_net_u16 (ETHERNET_TYPE_VLAN);
903           outer->priority_cfi_and_id =
904             clib_host_to_net_u16 (si->sub.eth.outer_vlan_id);
905           outer->type = clib_host_to_net_u16 (ETHERNET_TYPE_VLAN);
906           inner->priority_cfi_and_id =
907             clib_host_to_net_u16 (si->sub.eth.inner_vlan_id);
908           inner->type = clib_host_to_net_u16 (ETHERNET_TYPE_ARP);
909
910         }
911       else
912         {
913           eth0->type = clib_host_to_net_u16 (ETHERNET_TYPE_ARP);
914         }
915
916       /* Restore the original dst address, set src address */
917       clib_memcpy (eth0->dst_address, dst_mac_address,
918                    sizeof (eth0->dst_address));
919       clib_memcpy (eth0->src_address, hi->hw_address,
920                    sizeof (eth0->src_address));
921
922       /* Transmit replicas */
923       if (i > 0)
924         {
925           vlib_frame_t *f =
926             vlib_get_frame_to_node (vm, hi->output_node_index);
927           u32 *to_next = vlib_frame_vector_args (f);
928           to_next[0] = buffers[i];
929           f->n_vectors = 1;
930           vlib_put_frame_to_node (vm, hi->output_node_index, f);
931         }
932     }
933
934   /* The regular path outputs the original pkt.. */
935   vnet_buffer (p0)->sw_if_index[VLIB_TX] = broadcast_swifs[0];
936
937   vec_free (broadcast_swifs);
938   vec_free (buffers);
939
940   return !0;
941 }
942
943 static uword
944 arp_input (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
945 {
946   ethernet_arp_main_t *am = &ethernet_arp_main;
947   vnet_main_t *vnm = vnet_get_main ();
948   ip4_main_t *im4 = &ip4_main;
949   u32 n_left_from, next_index, *from, *to_next;
950   u32 n_replies_sent = 0, n_proxy_arp_replies_sent = 0;
951
952   from = vlib_frame_vector_args (frame);
953   n_left_from = frame->n_vectors;
954   next_index = node->cached_next_index;
955
956   if (node->flags & VLIB_NODE_FLAG_TRACE)
957     vlib_trace_frame_buffers_only (vm, node, from, frame->n_vectors,
958                                    /* stride */ 1,
959                                    sizeof (ethernet_arp_input_trace_t));
960
961   while (n_left_from > 0)
962     {
963       u32 n_left_to_next;
964
965       vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
966
967       while (n_left_from > 0 && n_left_to_next > 0)
968         {
969           vlib_buffer_t *p0;
970           vnet_hw_interface_t *hw_if0;
971           ethernet_arp_header_t *arp0;
972           ethernet_header_t *eth0;
973           ip_adjacency_t *adj0;
974           ip4_address_t *if_addr0, proxy_src;
975           u32 pi0, error0, next0, sw_if_index0, conn_sw_if_index0, fib_index0;
976           u8 is_request0, dst_is_local0, is_unnum0;
977           ethernet_proxy_arp_t *pa;
978           fib_node_index_t dst_fei, src_fei;
979           fib_prefix_t pfx0;
980           fib_entry_flag_t src_flags, dst_flags;
981
982           pi0 = from[0];
983           to_next[0] = pi0;
984           from += 1;
985           to_next += 1;
986           n_left_from -= 1;
987           n_left_to_next -= 1;
988           pa = 0;
989
990           p0 = vlib_get_buffer (vm, pi0);
991           arp0 = vlib_buffer_get_current (p0);
992
993           is_request0 = arp0->opcode
994             == clib_host_to_net_u16 (ETHERNET_ARP_OPCODE_request);
995
996           error0 = ETHERNET_ARP_ERROR_replies_sent;
997
998           error0 =
999             (arp0->l2_type !=
1000              clib_net_to_host_u16 (ETHERNET_ARP_HARDWARE_TYPE_ethernet) ?
1001              ETHERNET_ARP_ERROR_l2_type_not_ethernet : error0);
1002           error0 =
1003             (arp0->l3_type !=
1004              clib_net_to_host_u16 (ETHERNET_TYPE_IP4) ?
1005              ETHERNET_ARP_ERROR_l3_type_not_ip4 : error0);
1006
1007           sw_if_index0 = vnet_buffer (p0)->sw_if_index[VLIB_RX];
1008
1009           if (error0)
1010             goto drop2;
1011
1012           /* Check that IP address is local and matches incoming interface. */
1013           fib_index0 = ip4_fib_table_get_index_for_sw_if_index (sw_if_index0);
1014           if (~0 == fib_index0)
1015             {
1016               error0 = ETHERNET_ARP_ERROR_interface_no_table;
1017               goto drop2;
1018
1019             }
1020           dst_fei = ip4_fib_table_lookup (ip4_fib_get (fib_index0),
1021                                           &arp0->ip4_over_ethernet[1].ip4,
1022                                           32);
1023           dst_flags = fib_entry_get_flags_for_source (dst_fei,
1024                                                       FIB_SOURCE_INTERFACE);
1025
1026           conn_sw_if_index0 =
1027             fib_entry_get_resolving_interface_for_source (dst_fei,
1028                                                           FIB_SOURCE_INTERFACE);
1029
1030           if (!(FIB_ENTRY_FLAG_CONNECTED & dst_flags))
1031             {
1032               error0 = ETHERNET_ARP_ERROR_l3_dst_address_not_local;
1033               goto drop1;
1034             }
1035
1036           /* Honor unnumbered interface, if any */
1037           is_unnum0 = sw_if_index0 != conn_sw_if_index0;
1038
1039           /* Source must also be local to subnet of matching interface address. */
1040           src_fei = ip4_fib_table_lookup (ip4_fib_get (fib_index0),
1041                                           &arp0->ip4_over_ethernet[0].ip4,
1042                                           32);
1043           src_flags = fib_entry_get_flags (src_fei);
1044
1045           if (!((FIB_ENTRY_FLAG_ATTACHED & src_flags) ||
1046                 (FIB_ENTRY_FLAG_CONNECTED & src_flags)) ||
1047               sw_if_index0 != fib_entry_get_resolving_interface (src_fei))
1048             {
1049               error0 = ETHERNET_ARP_ERROR_l3_src_address_not_local;
1050               goto drop2;
1051             }
1052
1053           /* Reject requests/replies with our local interface address. */
1054           if (FIB_ENTRY_FLAG_LOCAL & src_flags)
1055             {
1056               error0 = ETHERNET_ARP_ERROR_l3_src_address_is_local;
1057               goto drop2;
1058             }
1059
1060           dst_is_local0 = (FIB_ENTRY_FLAG_LOCAL & dst_flags);
1061           fib_entry_get_prefix (dst_fei, &pfx0);
1062           if_addr0 = &pfx0.fp_addr.ip4;
1063
1064           /* Fill in ethernet header. */
1065           eth0 = ethernet_buffer_get_header (p0);
1066
1067           /* Trash ARP packets whose ARP-level source addresses do not
1068              match their L2-frame-level source addresses */
1069           if (memcmp (eth0->src_address, arp0->ip4_over_ethernet[0].ethernet,
1070                       sizeof (eth0->src_address)))
1071             {
1072               error0 = ETHERNET_ARP_ERROR_l2_address_mismatch;
1073               goto drop2;
1074             }
1075
1076           /* Learn or update sender's mapping only for requests or unicasts
1077              that don't match local interface address. */
1078           if (ethernet_address_cast (eth0->dst_address) ==
1079               ETHERNET_ADDRESS_UNICAST || is_request0)
1080             {
1081               if (am->limit_arp_cache_size &&
1082                   pool_elts (am->ip4_entry_pool) >= am->limit_arp_cache_size)
1083                 unset_random_arp_entry ();
1084
1085               vnet_arp_set_ip4_over_ethernet (vnm, sw_if_index0,
1086                                               &arp0->ip4_over_ethernet[0],
1087                                               0 /* is_static */ );
1088               error0 = ETHERNET_ARP_ERROR_l3_src_address_learned;
1089             }
1090
1091           /* Only send a reply for requests sent which match a local interface. */
1092           if (!(is_request0 && dst_is_local0))
1093             {
1094               error0 =
1095                 (arp0->opcode ==
1096                  clib_host_to_net_u16 (ETHERNET_ARP_OPCODE_reply) ?
1097                  ETHERNET_ARP_ERROR_replies_received : error0);
1098               goto drop1;
1099             }
1100
1101           /* Send a reply. */
1102         send_reply:
1103           vnet_buffer (p0)->sw_if_index[VLIB_TX] = sw_if_index0;
1104           hw_if0 = vnet_get_sup_hw_interface (vnm, sw_if_index0);
1105
1106           /* Send reply back through input interface */
1107           vnet_buffer (p0)->sw_if_index[VLIB_TX] = sw_if_index0;
1108           next0 = ARP_INPUT_NEXT_REPLY_TX;
1109
1110           arp0->opcode = clib_host_to_net_u16 (ETHERNET_ARP_OPCODE_reply);
1111
1112           arp0->ip4_over_ethernet[1] = arp0->ip4_over_ethernet[0];
1113
1114           clib_memcpy (arp0->ip4_over_ethernet[0].ethernet,
1115                        hw_if0->hw_address, 6);
1116           clib_mem_unaligned (&arp0->ip4_over_ethernet[0].ip4.data_u32, u32) =
1117             if_addr0->data_u32;
1118
1119           /* Hardware must be ethernet-like. */
1120           ASSERT (vec_len (hw_if0->hw_address) == 6);
1121
1122           clib_memcpy (eth0->dst_address, eth0->src_address, 6);
1123           clib_memcpy (eth0->src_address, hw_if0->hw_address, 6);
1124
1125           /* Figure out how much to rewind current data from adjacency. */
1126           /* get the adj from the destination's covering connected */
1127           if (NULL == pa)
1128             {
1129               adj0 =
1130                 adj_get (fib_entry_get_adj_for_source
1131                          (ip4_fib_table_lookup
1132                           (ip4_fib_get (fib_index0),
1133                            &arp0->ip4_over_ethernet[1].ip4, 31),
1134                           FIB_SOURCE_INTERFACE));
1135               if (adj0->lookup_next_index != IP_LOOKUP_NEXT_GLEAN)
1136                 {
1137                   error0 = ETHERNET_ARP_ERROR_missing_interface_address;
1138                   goto drop2;
1139                 }
1140               if (is_unnum0)
1141                 {
1142                   if (!arp_unnumbered (p0, pi0, eth0, conn_sw_if_index0))
1143                     goto drop2;
1144                 }
1145               else
1146                 vlib_buffer_advance (p0, -adj0->rewrite_header.data_bytes);
1147             }
1148           vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
1149                                            n_left_to_next, pi0, next0);
1150
1151           n_replies_sent += 1;
1152           continue;
1153
1154         drop1:
1155           if (0 == arp0->ip4_over_ethernet[0].ip4.as_u32 ||
1156               (arp0->ip4_over_ethernet[0].ip4.as_u32 ==
1157                arp0->ip4_over_ethernet[1].ip4.as_u32))
1158             {
1159               error0 = ETHERNET_ARP_ERROR_gratuitous_arp;
1160               goto drop2;
1161             }
1162           /* See if proxy arp is configured for the address */
1163           if (is_request0)
1164             {
1165               vnet_sw_interface_t *si;
1166               u32 this_addr = clib_net_to_host_u32
1167                 (arp0->ip4_over_ethernet[1].ip4.as_u32);
1168               u32 fib_index0;
1169
1170               si = vnet_get_sw_interface (vnm, sw_if_index0);
1171
1172               if (!(si->flags & VNET_SW_INTERFACE_FLAG_PROXY_ARP))
1173                 goto drop2;
1174
1175               fib_index0 = vec_elt (im4->fib_index_by_sw_if_index,
1176                                     sw_if_index0);
1177
1178               vec_foreach (pa, am->proxy_arps)
1179               {
1180                 u32 lo_addr = clib_net_to_host_u32 (pa->lo_addr);
1181                 u32 hi_addr = clib_net_to_host_u32 (pa->hi_addr);
1182
1183                 /* an ARP request hit in the proxy-arp table? */
1184                 if ((this_addr >= lo_addr && this_addr <= hi_addr) &&
1185                     (fib_index0 == pa->fib_index))
1186                   {
1187                     eth0 = ethernet_buffer_get_header (p0);
1188                     proxy_src.as_u32 =
1189                       arp0->ip4_over_ethernet[1].ip4.data_u32;
1190
1191                     /*
1192                      * Rewind buffer, direct code above not to
1193                      * think too hard about it.
1194                      */
1195                     if_addr0 = &proxy_src;
1196                     is_unnum0 = 0;
1197                     i32 ethernet_start =
1198                       vnet_buffer (p0)->ethernet.start_of_ethernet_header;
1199                     i32 rewind = p0->current_data - ethernet_start;
1200                     vlib_buffer_advance (p0, -rewind);
1201                     n_proxy_arp_replies_sent++;
1202                     goto send_reply;
1203                   }
1204               }
1205             }
1206
1207         drop2:
1208
1209           next0 = ARP_INPUT_NEXT_DROP;
1210           p0->error = node->errors[error0];
1211
1212           vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
1213                                            n_left_to_next, pi0, next0);
1214         }
1215
1216       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
1217     }
1218
1219   vlib_error_count (vm, node->node_index,
1220                     ETHERNET_ARP_ERROR_replies_sent,
1221                     n_replies_sent - n_proxy_arp_replies_sent);
1222
1223   vlib_error_count (vm, node->node_index,
1224                     ETHERNET_ARP_ERROR_proxy_arp_replies_sent,
1225                     n_proxy_arp_replies_sent);
1226   return frame->n_vectors;
1227 }
1228
1229 static char *ethernet_arp_error_strings[] = {
1230 #define _(sym,string) string,
1231   foreach_ethernet_arp_error
1232 #undef _
1233 };
1234
1235 /* *INDENT-OFF* */
1236 VLIB_REGISTER_NODE (arp_input_node, static) =
1237 {
1238   .function = arp_input,
1239   .name = "arp-input",
1240   .vector_size = sizeof (u32),
1241   .n_errors = ETHERNET_ARP_N_ERROR,
1242   .error_strings = ethernet_arp_error_strings,
1243   .n_next_nodes = ARP_INPUT_N_NEXT,
1244   .next_nodes = {
1245     [ARP_INPUT_NEXT_DROP] = "error-drop",
1246     [ARP_INPUT_NEXT_REPLY_TX] = "interface-output",
1247   },
1248   .format_buffer = format_ethernet_arp_header,
1249   .format_trace = format_ethernet_arp_input_trace,
1250 };
1251 /* *INDENT-ON* */
1252
1253 static int
1254 ip4_arp_entry_sort (void *a1, void *a2)
1255 {
1256   ethernet_arp_ip4_entry_t *e1 = a1;
1257   ethernet_arp_ip4_entry_t *e2 = a2;
1258
1259   int cmp;
1260   vnet_main_t *vnm = vnet_get_main ();
1261
1262   cmp = vnet_sw_interface_compare (vnm, e1->sw_if_index, e2->sw_if_index);
1263   if (!cmp)
1264     cmp = ip4_address_compare (&e1->ip4_address, &e2->ip4_address);
1265   return cmp;
1266 }
1267
1268 ethernet_arp_ip4_entry_t *
1269 ip4_neighbor_entries (u32 sw_if_index)
1270 {
1271   ethernet_arp_main_t *am = &ethernet_arp_main;
1272   ethernet_arp_ip4_entry_t *n, *ns = 0;
1273
1274   /* *INDENT-OFF* */
1275   pool_foreach (n, am->ip4_entry_pool, ({
1276     if (sw_if_index != ~0 && n->sw_if_index != sw_if_index)
1277       continue;
1278     vec_add1 (ns, n[0]);
1279   }));
1280   /* *INDENT-ON* */
1281
1282   if (ns)
1283     vec_sort_with_function (ns, ip4_arp_entry_sort);
1284   return ns;
1285 }
1286
1287 static clib_error_t *
1288 show_ip4_arp (vlib_main_t * vm,
1289               unformat_input_t * input, vlib_cli_command_t * cmd)
1290 {
1291   vnet_main_t *vnm = vnet_get_main ();
1292   ethernet_arp_main_t *am = &ethernet_arp_main;
1293   ethernet_arp_ip4_entry_t *e, *es;
1294   ethernet_proxy_arp_t *pa;
1295   clib_error_t *error = 0;
1296   u32 sw_if_index;
1297
1298   /* Filter entries by interface if given. */
1299   sw_if_index = ~0;
1300   (void) unformat_user (input, unformat_vnet_sw_interface, vnm, &sw_if_index);
1301
1302   es = ip4_neighbor_entries (sw_if_index);
1303   if (es)
1304     {
1305       vlib_cli_output (vm, "%U", format_ethernet_arp_ip4_entry, vnm, 0);
1306       vec_foreach (e, es)
1307       {
1308         vlib_cli_output (vm, "%U", format_ethernet_arp_ip4_entry, vnm, e);
1309       }
1310       vec_free (es);
1311     }
1312
1313   if (vec_len (am->proxy_arps))
1314     {
1315       vlib_cli_output (vm, "Proxy arps enabled for:");
1316       vec_foreach (pa, am->proxy_arps)
1317       {
1318         vlib_cli_output (vm, "Fib_index %d   %U - %U ",
1319                          pa->fib_index,
1320                          format_ip4_address, &pa->lo_addr,
1321                          format_ip4_address, &pa->hi_addr);
1322       }
1323     }
1324
1325   return error;
1326 }
1327
1328 /*?
1329  * Display all the IPv4 ARP entries.
1330  *
1331  * @cliexpar
1332  * Example of how to display the IPv4 ARP table:
1333  * @cliexstart{show ip arp}
1334  *    Time      FIB        IP4       Flags      Ethernet              Interface
1335  *    346.3028   0       6.1.1.3            de:ad:be:ef:ba:be   GigabitEthernet2/0/0
1336  *   3077.4271   0       6.1.1.4       S    de:ad:be:ef:ff:ff   GigabitEthernet2/0/0
1337  *   2998.6409   1       6.2.2.3            de:ad:be:ef:00:01   GigabitEthernet2/0/0
1338  * Proxy arps enabled for:
1339  * Fib_index 0   6.0.0.1 - 6.0.0.11
1340  * @cliexend
1341  ?*/
1342 /* *INDENT-OFF* */
1343 VLIB_CLI_COMMAND (show_ip4_arp_command, static) = {
1344   .path = "show ip arp",
1345   .function = show_ip4_arp,
1346   .short_help = "show ip arp",
1347 };
1348 /* *INDENT-ON* */
1349
1350 typedef struct
1351 {
1352   pg_edit_t l2_type, l3_type;
1353   pg_edit_t n_l2_address_bytes, n_l3_address_bytes;
1354   pg_edit_t opcode;
1355   struct
1356   {
1357     pg_edit_t ethernet;
1358     pg_edit_t ip4;
1359   } ip4_over_ethernet[2];
1360 } pg_ethernet_arp_header_t;
1361
1362 static inline void
1363 pg_ethernet_arp_header_init (pg_ethernet_arp_header_t * p)
1364 {
1365   /* Initialize fields that are not bit fields in the IP header. */
1366 #define _(f) pg_edit_init (&p->f, ethernet_arp_header_t, f);
1367   _(l2_type);
1368   _(l3_type);
1369   _(n_l2_address_bytes);
1370   _(n_l3_address_bytes);
1371   _(opcode);
1372   _(ip4_over_ethernet[0].ethernet);
1373   _(ip4_over_ethernet[0].ip4);
1374   _(ip4_over_ethernet[1].ethernet);
1375   _(ip4_over_ethernet[1].ip4);
1376 #undef _
1377 }
1378
1379 uword
1380 unformat_pg_arp_header (unformat_input_t * input, va_list * args)
1381 {
1382   pg_stream_t *s = va_arg (*args, pg_stream_t *);
1383   pg_ethernet_arp_header_t *p;
1384   u32 group_index;
1385
1386   p = pg_create_edit_group (s, sizeof (p[0]), sizeof (ethernet_arp_header_t),
1387                             &group_index);
1388   pg_ethernet_arp_header_init (p);
1389
1390   /* Defaults. */
1391   pg_edit_set_fixed (&p->l2_type, ETHERNET_ARP_HARDWARE_TYPE_ethernet);
1392   pg_edit_set_fixed (&p->l3_type, ETHERNET_TYPE_IP4);
1393   pg_edit_set_fixed (&p->n_l2_address_bytes, 6);
1394   pg_edit_set_fixed (&p->n_l3_address_bytes, 4);
1395
1396   if (!unformat (input, "%U: %U/%U -> %U/%U",
1397                  unformat_pg_edit,
1398                  unformat_ethernet_arp_opcode_net_byte_order, &p->opcode,
1399                  unformat_pg_edit,
1400                  unformat_ethernet_address, &p->ip4_over_ethernet[0].ethernet,
1401                  unformat_pg_edit,
1402                  unformat_ip4_address, &p->ip4_over_ethernet[0].ip4,
1403                  unformat_pg_edit,
1404                  unformat_ethernet_address, &p->ip4_over_ethernet[1].ethernet,
1405                  unformat_pg_edit,
1406                  unformat_ip4_address, &p->ip4_over_ethernet[1].ip4))
1407     {
1408       /* Free up any edits we may have added. */
1409       pg_free_edit_group (s);
1410       return 0;
1411     }
1412   return 1;
1413 }
1414
1415 clib_error_t *
1416 ip4_set_arp_limit (u32 arp_limit)
1417 {
1418   ethernet_arp_main_t *am = &ethernet_arp_main;
1419
1420   am->limit_arp_cache_size = arp_limit;
1421   return 0;
1422 }
1423
1424 /**
1425  * @brief Control Plane hook to remove an ARP entry
1426  */
1427 int
1428 vnet_arp_unset_ip4_over_ethernet (vnet_main_t * vnm,
1429                                   u32 sw_if_index, void *a_arg)
1430 {
1431   ethernet_arp_ip4_over_ethernet_address_t *a = a_arg;
1432   vnet_arp_set_ip4_over_ethernet_rpc_args_t args;
1433
1434   args.sw_if_index = sw_if_index;
1435   args.flags = ETHERNET_ARP_ARGS_REMOVE;
1436   clib_memcpy (&args.a, a, sizeof (*a));
1437
1438   vl_api_rpc_call_main_thread (set_ip4_over_ethernet_rpc_callback,
1439                                (u8 *) & args, sizeof (args));
1440   return 0;
1441 }
1442
1443 /**
1444  * @brief Internally generated event to flush the ARP cache on an
1445  * interface state change event.
1446  * A flush will remove dynamic ARP entries, and for statics remove the MAC
1447  * address from the corresponding adjacencies.
1448  */
1449 static int
1450 vnet_arp_flush_ip4_over_ethernet (vnet_main_t * vnm,
1451                                   u32 sw_if_index, void *a_arg)
1452 {
1453   ethernet_arp_ip4_over_ethernet_address_t *a = a_arg;
1454   vnet_arp_set_ip4_over_ethernet_rpc_args_t args;
1455
1456   args.sw_if_index = sw_if_index;
1457   args.flags = ETHERNET_ARP_ARGS_FLUSH;
1458   clib_memcpy (&args.a, a, sizeof (*a));
1459
1460   vl_api_rpc_call_main_thread (set_ip4_over_ethernet_rpc_callback,
1461                                (u8 *) & args, sizeof (args));
1462   return 0;
1463 }
1464
1465 /**
1466  * @brief Internally generated event to populate the ARP cache on an
1467  * interface state change event.
1468  * For static entries this will re-source the adjacencies.
1469  *
1470  * @param sw_if_index The interface on which the ARP entires are acted
1471  */
1472 static int
1473 vnet_arp_populate_ip4_over_ethernet (vnet_main_t * vnm,
1474                                      u32 sw_if_index, void *a_arg)
1475 {
1476   ethernet_arp_ip4_over_ethernet_address_t *a = a_arg;
1477   vnet_arp_set_ip4_over_ethernet_rpc_args_t args;
1478
1479   args.sw_if_index = sw_if_index;
1480   args.flags = ETHERNET_ARP_ARGS_POPULATE;
1481   clib_memcpy (&args.a, a, sizeof (*a));
1482
1483   vl_api_rpc_call_main_thread (set_ip4_over_ethernet_rpc_callback,
1484                                (u8 *) & args, sizeof (args));
1485   return 0;
1486 }
1487
1488 /*
1489  * arp_add_del_interface_address
1490  *
1491  * callback when an interface address is added or deleted
1492  */
1493 static void
1494 arp_add_del_interface_address (ip4_main_t * im,
1495                                uword opaque,
1496                                u32 sw_if_index,
1497                                ip4_address_t * address,
1498                                u32 address_length,
1499                                u32 if_address_index, u32 is_del)
1500 {
1501   /*
1502    * Flush the ARP cache of all entries covered by the address
1503    * that is being removed.
1504    */
1505   ethernet_arp_main_t *am = &ethernet_arp_main;
1506   ethernet_arp_ip4_entry_t *e;
1507
1508   if (vec_len (am->ethernet_arp_by_sw_if_index) <= sw_if_index)
1509     return;
1510
1511   if (is_del)
1512     {
1513       ethernet_arp_interface_t *eai;
1514       u32 i, *to_delete = 0;
1515       hash_pair_t *pair;
1516
1517       eai = &am->ethernet_arp_by_sw_if_index[sw_if_index];
1518
1519       /* *INDENT-OFF* */
1520       hash_foreach_pair (pair, eai->arp_entries,
1521       ({
1522         e = pool_elt_at_index(am->ip4_entry_pool,
1523                               pair->value[0]);
1524         if (ip4_destination_matches_route (im, &e->ip4_address,
1525                                            address, address_length))
1526           {
1527             vec_add1 (to_delete, e - am->ip4_entry_pool);
1528           }
1529       }));
1530       /* *INDENT-ON* */
1531
1532       for (i = 0; i < vec_len (to_delete); i++)
1533         {
1534           ethernet_arp_ip4_over_ethernet_address_t delme;
1535           e = pool_elt_at_index (am->ip4_entry_pool, to_delete[i]);
1536
1537           clib_memcpy (&delme.ethernet, e->ethernet_address, 6);
1538           delme.ip4.as_u32 = e->ip4_address.as_u32;
1539
1540           vnet_arp_flush_ip4_over_ethernet (vnet_get_main (),
1541                                             e->sw_if_index, &delme);
1542         }
1543
1544       vec_free (to_delete);
1545     }
1546 }
1547
1548 static clib_error_t *
1549 ethernet_arp_init (vlib_main_t * vm)
1550 {
1551   ethernet_arp_main_t *am = &ethernet_arp_main;
1552   ip4_main_t *im = &ip4_main;
1553   clib_error_t *error;
1554   pg_node_t *pn;
1555
1556   if ((error = vlib_call_init_function (vm, ethernet_init)))
1557     return error;
1558
1559   ethernet_register_input_type (vm, ETHERNET_TYPE_ARP, arp_input_node.index);
1560
1561   pn = pg_get_node (arp_input_node.index);
1562   pn->unformat_edit = unformat_pg_arp_header;
1563
1564   am->opcode_by_name = hash_create_string (0, sizeof (uword));
1565 #define _(o) hash_set_mem (am->opcode_by_name, #o, ETHERNET_ARP_OPCODE_##o);
1566   foreach_ethernet_arp_opcode;
1567 #undef _
1568
1569   /* $$$ configurable */
1570   am->limit_arp_cache_size = 50000;
1571
1572   am->pending_resolutions_by_address = hash_create (0, sizeof (uword));
1573   am->mac_changes_by_address = hash_create (0, sizeof (uword));
1574
1575   /* don't trace ARP error packets */
1576   {
1577     vlib_node_runtime_t *rt =
1578       vlib_node_get_runtime (vm, arp_input_node.index);
1579
1580 #define _(a,b)                                  \
1581     vnet_pcap_drop_trace_filter_add_del         \
1582         (rt->errors[ETHERNET_ARP_ERROR_##a],    \
1583          1 /* is_add */);
1584     foreach_ethernet_arp_error
1585 #undef _
1586   }
1587
1588   ip4_add_del_interface_address_callback_t cb;
1589   cb.function = arp_add_del_interface_address;
1590   cb.function_opaque = 0;
1591   vec_add1 (im->add_del_interface_address_callbacks, cb);
1592
1593   return 0;
1594 }
1595
1596 VLIB_INIT_FUNCTION (ethernet_arp_init);
1597
1598 static void
1599 arp_entry_free (ethernet_arp_interface_t * eai, ethernet_arp_ip4_entry_t * e)
1600 {
1601   ethernet_arp_main_t *am = &ethernet_arp_main;
1602
1603   fib_table_entry_delete_index (e->fib_entry_index, FIB_SOURCE_ADJ);
1604   hash_unset (eai->arp_entries, e->ip4_address.as_u32);
1605   pool_put (am->ip4_entry_pool, e);
1606 }
1607
1608 static inline int
1609 vnet_arp_unset_ip4_over_ethernet_internal (vnet_main_t * vnm,
1610                                            vnet_arp_set_ip4_over_ethernet_rpc_args_t
1611                                            * args)
1612 {
1613   ethernet_arp_main_t *am = &ethernet_arp_main;
1614   ethernet_arp_ip4_entry_t *e;
1615   ethernet_arp_interface_t *eai;
1616
1617   eai = &am->ethernet_arp_by_sw_if_index[args->sw_if_index];
1618
1619   e = arp_entry_find (eai, &args->a.ip4);
1620
1621   if (NULL != e)
1622     {
1623       arp_entry_free (eai, e);
1624
1625       adj_nbr_walk_nh4 (e->sw_if_index,
1626                         &e->ip4_address, arp_mk_incomplete_walk, NULL);
1627     }
1628
1629   return 0;
1630 }
1631
1632 static int
1633 vnet_arp_flush_ip4_over_ethernet_internal (vnet_main_t * vnm,
1634                                            vnet_arp_set_ip4_over_ethernet_rpc_args_t
1635                                            * args)
1636 {
1637   ethernet_arp_main_t *am = &ethernet_arp_main;
1638   ethernet_arp_ip4_entry_t *e;
1639   ethernet_arp_interface_t *eai;
1640
1641   eai = &am->ethernet_arp_by_sw_if_index[args->sw_if_index];
1642
1643   e = arp_entry_find (eai, &args->a.ip4);
1644
1645   if (NULL != e)
1646     {
1647       adj_nbr_walk_nh4 (e->sw_if_index,
1648                         &e->ip4_address, arp_mk_incomplete_walk, e);
1649
1650       /*
1651        * The difference between flush and unset, is that an unset
1652        * means delete for static and dynamic entries. A flush
1653        * means delete only for dynamic. Flushing is what the DP
1654        * does in response to interface events. unset is only done
1655        * by the control plane.
1656        */
1657       if (e->flags & ETHERNET_ARP_IP4_ENTRY_FLAG_DYNAMIC)
1658         {
1659           arp_entry_free (eai, e);
1660         }
1661     }
1662   return (0);
1663 }
1664
1665 static int
1666 vnet_arp_populate_ip4_over_ethernet_internal (vnet_main_t * vnm,
1667                                               vnet_arp_set_ip4_over_ethernet_rpc_args_t
1668                                               * args)
1669 {
1670   ethernet_arp_main_t *am = &ethernet_arp_main;
1671   ethernet_arp_ip4_entry_t *e;
1672   ethernet_arp_interface_t *eai;
1673
1674   eai = &am->ethernet_arp_by_sw_if_index[args->sw_if_index];
1675
1676   e = arp_entry_find (eai, &args->a.ip4);
1677
1678   if (NULL != e)
1679     {
1680       adj_nbr_walk_nh4 (e->sw_if_index,
1681                         &e->ip4_address, arp_mk_complete_walk, e);
1682     }
1683   return (0);
1684 }
1685
1686 static void
1687 set_ip4_over_ethernet_rpc_callback (vnet_arp_set_ip4_over_ethernet_rpc_args_t
1688                                     * a)
1689 {
1690   vnet_main_t *vm = vnet_get_main ();
1691   ASSERT (os_get_cpu_number () == 0);
1692
1693   if (a->flags & ETHERNET_ARP_ARGS_REMOVE)
1694     vnet_arp_unset_ip4_over_ethernet_internal (vm, a);
1695   else if (a->flags & ETHERNET_ARP_ARGS_FLUSH)
1696     vnet_arp_flush_ip4_over_ethernet_internal (vm, a);
1697   else if (a->flags & ETHERNET_ARP_ARGS_POPULATE)
1698     vnet_arp_populate_ip4_over_ethernet_internal (vm, a);
1699   else
1700     vnet_arp_set_ip4_over_ethernet_internal (vm, a);
1701 }
1702
1703 /**
1704  * @brief Invoked when the interface's admin state changes
1705  */
1706 static clib_error_t *
1707 ethernet_arp_sw_interface_up_down (vnet_main_t * vnm,
1708                                    u32 sw_if_index, u32 flags)
1709 {
1710   ethernet_arp_main_t *am = &ethernet_arp_main;
1711   ethernet_arp_ip4_entry_t *e;
1712   u32 i, *to_delete = 0;
1713
1714   /* *INDENT-OFF* */
1715   pool_foreach (e, am->ip4_entry_pool,
1716   ({
1717     if (e->sw_if_index == sw_if_index)
1718       vec_add1 (to_delete,
1719                 e - am->ip4_entry_pool);
1720   }));
1721   /* *INDENT-ON* */
1722
1723   for (i = 0; i < vec_len (to_delete); i++)
1724     {
1725       ethernet_arp_ip4_over_ethernet_address_t delme;
1726       e = pool_elt_at_index (am->ip4_entry_pool, to_delete[i]);
1727
1728       clib_memcpy (&delme.ethernet, e->ethernet_address, 6);
1729       delme.ip4.as_u32 = e->ip4_address.as_u32;
1730
1731       if (flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP)
1732         {
1733           vnet_arp_populate_ip4_over_ethernet (vnm, e->sw_if_index, &delme);
1734         }
1735       else
1736         {
1737           vnet_arp_flush_ip4_over_ethernet (vnm, e->sw_if_index, &delme);
1738         }
1739
1740     }
1741   vec_free (to_delete);
1742
1743   return 0;
1744 }
1745
1746 VNET_SW_INTERFACE_ADMIN_UP_DOWN_FUNCTION (ethernet_arp_sw_interface_up_down);
1747
1748 static void
1749 increment_ip4_and_mac_address (ethernet_arp_ip4_over_ethernet_address_t * a)
1750 {
1751   u8 old;
1752   int i;
1753
1754   for (i = 3; i >= 0; i--)
1755     {
1756       old = a->ip4.as_u8[i];
1757       a->ip4.as_u8[i] += 1;
1758       if (old < a->ip4.as_u8[i])
1759         break;
1760     }
1761
1762   for (i = 5; i >= 0; i--)
1763     {
1764       old = a->ethernet[i];
1765       a->ethernet[i] += 1;
1766       if (old < a->ethernet[i])
1767         break;
1768     }
1769 }
1770
1771 int
1772 vnet_arp_set_ip4_over_ethernet (vnet_main_t * vnm,
1773                                 u32 sw_if_index, void *a_arg, int is_static)
1774 {
1775   ethernet_arp_ip4_over_ethernet_address_t *a = a_arg;
1776   vnet_arp_set_ip4_over_ethernet_rpc_args_t args;
1777
1778   args.sw_if_index = sw_if_index;
1779   args.is_static = is_static;
1780   args.flags = 0;
1781   clib_memcpy (&args.a, a, sizeof (*a));
1782
1783   vl_api_rpc_call_main_thread (set_ip4_over_ethernet_rpc_callback,
1784                                (u8 *) & args, sizeof (args));
1785   return 0;
1786 }
1787
1788 int
1789 vnet_proxy_arp_add_del (ip4_address_t * lo_addr,
1790                         ip4_address_t * hi_addr, u32 fib_index, int is_del)
1791 {
1792   ethernet_arp_main_t *am = &ethernet_arp_main;
1793   ethernet_proxy_arp_t *pa;
1794   u32 found_at_index = ~0;
1795
1796   vec_foreach (pa, am->proxy_arps)
1797   {
1798     if (pa->lo_addr == lo_addr->as_u32
1799         && pa->hi_addr == hi_addr->as_u32 && pa->fib_index == fib_index)
1800       {
1801         found_at_index = pa - am->proxy_arps;
1802         break;
1803       }
1804   }
1805
1806   if (found_at_index != ~0)
1807     {
1808       /* Delete, otherwise it's already in the table */
1809       if (is_del)
1810         vec_delete (am->proxy_arps, 1, found_at_index);
1811       return 0;
1812     }
1813   /* delete, no such entry */
1814   if (is_del)
1815     return VNET_API_ERROR_NO_SUCH_ENTRY;
1816
1817   /* add, not in table */
1818   vec_add2 (am->proxy_arps, pa, 1);
1819   pa->lo_addr = lo_addr->as_u32;
1820   pa->hi_addr = hi_addr->as_u32;
1821   pa->fib_index = fib_index;
1822   return 0;
1823 }
1824
1825 /*
1826  * Remove any proxy arp entries asdociated with the
1827  * specificed fib.
1828  */
1829 int
1830 vnet_proxy_arp_fib_reset (u32 fib_id)
1831 {
1832   ip4_main_t *im = &ip4_main;
1833   ethernet_arp_main_t *am = &ethernet_arp_main;
1834   ethernet_proxy_arp_t *pa;
1835   u32 *entries_to_delete = 0;
1836   u32 fib_index;
1837   uword *p;
1838   int i;
1839
1840   p = hash_get (im->fib_index_by_table_id, fib_id);
1841   if (!p)
1842     return VNET_API_ERROR_NO_SUCH_ENTRY;
1843   fib_index = p[0];
1844
1845   vec_foreach (pa, am->proxy_arps)
1846   {
1847     if (pa->fib_index == fib_index)
1848       {
1849         vec_add1 (entries_to_delete, pa - am->proxy_arps);
1850       }
1851   }
1852
1853   for (i = 0; i < vec_len (entries_to_delete); i++)
1854     {
1855       vec_delete (am->proxy_arps, 1, entries_to_delete[i]);
1856     }
1857
1858   vec_free (entries_to_delete);
1859
1860   return 0;
1861 }
1862
1863 static clib_error_t *
1864 ip_arp_add_del_command_fn (vlib_main_t * vm,
1865                            unformat_input_t * input, vlib_cli_command_t * cmd)
1866 {
1867   vnet_main_t *vnm = vnet_get_main ();
1868   u32 sw_if_index;
1869   ethernet_arp_ip4_over_ethernet_address_t lo_addr, hi_addr, addr;
1870   int addr_valid = 0;
1871   int is_del = 0;
1872   int count = 1;
1873   u32 fib_index = 0;
1874   u32 fib_id;
1875   int is_static = 0;
1876   int is_proxy = 0;
1877
1878   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
1879     {
1880       /* set ip arp TenGigE1/1/0/1 1.2.3.4 aa:bb:... or aabb.ccdd... */
1881       if (unformat (input, "%U %U %U",
1882                     unformat_vnet_sw_interface, vnm, &sw_if_index,
1883                     unformat_ip4_address, &addr.ip4,
1884                     unformat_ethernet_address, &addr.ethernet))
1885         addr_valid = 1;
1886
1887       else if (unformat (input, "delete") || unformat (input, "del"))
1888         is_del = 1;
1889
1890       else if (unformat (input, "static"))
1891         is_static = 1;
1892
1893       else if (unformat (input, "count %d", &count))
1894         ;
1895
1896       else if (unformat (input, "fib-id %d", &fib_id))
1897         {
1898           ip4_main_t *im = &ip4_main;
1899           uword *p = hash_get (im->fib_index_by_table_id, fib_id);
1900           if (!p)
1901             return clib_error_return (0, "fib ID %d doesn't exist\n", fib_id);
1902           fib_index = p[0];
1903         }
1904
1905       else if (unformat (input, "proxy %U - %U",
1906                          unformat_ip4_address, &lo_addr.ip4,
1907                          unformat_ip4_address, &hi_addr.ip4))
1908         is_proxy = 1;
1909       else
1910         break;
1911     }
1912
1913   if (is_proxy)
1914     {
1915       (void) vnet_proxy_arp_add_del (&lo_addr.ip4, &hi_addr.ip4,
1916                                      fib_index, is_del);
1917       return 0;
1918     }
1919
1920   if (addr_valid)
1921     {
1922       int i;
1923
1924       for (i = 0; i < count; i++)
1925         {
1926           if (is_del == 0)
1927             {
1928               uword event_type, *event_data = 0;
1929
1930               /* Park the debug CLI until the arp entry is installed */
1931               vnet_register_ip4_arp_resolution_event
1932                 (vnm, &addr.ip4, vlib_current_process (vm),
1933                  1 /* type */ , 0 /* data */ );
1934
1935               vnet_arp_set_ip4_over_ethernet
1936                 (vnm, sw_if_index, &addr, is_static);
1937
1938               vlib_process_wait_for_event (vm);
1939               event_type = vlib_process_get_events (vm, &event_data);
1940               vec_reset_length (event_data);
1941               if (event_type != 1)
1942                 clib_warning ("event type %d unexpected", event_type);
1943             }
1944           else
1945             vnet_arp_unset_ip4_over_ethernet (vnm, sw_if_index, &addr);
1946
1947           increment_ip4_and_mac_address (&addr);
1948         }
1949     }
1950   else
1951     {
1952       return clib_error_return (0, "unknown input `%U'",
1953                                 format_unformat_error, input);
1954     }
1955
1956   return 0;
1957 }
1958
1959 /* *INDENT-OFF* */
1960 /*?
1961  * Add or delete IPv4 ARP cache entries.
1962  *
1963  * @note 'set ip arp' options (e.g. delete, static, 'fib-id <id>',
1964  * 'count <number>', 'interface ip4_addr mac_addr') can be added in
1965  * any order and combination.
1966  *
1967  * @cliexpar
1968  * @parblock
1969  * Add or delete IPv4 ARP cache entries as follows. MAC Address can be in
1970  * either aa:bb:cc:dd:ee:ff format or aabb.ccdd.eeff format.
1971  * @cliexcmd{set ip arp GigabitEthernet2/0/0 6.0.0.3 dead.beef.babe}
1972  * @cliexcmd{set ip arp delete GigabitEthernet2/0/0 6.0.0.3 de:ad:be:ef:ba:be}
1973  *
1974  * To add or delete an IPv4 ARP cache entry to or from a specific fib
1975  * table:
1976  * @cliexcmd{set ip arp fib-id 1 GigabitEthernet2/0/0 6.0.0.3 dead.beef.babe}
1977  * @cliexcmd{set ip arp fib-id 1 delete GigabitEthernet2/0/0 6.0.0.3 dead.beef.babe}
1978  *
1979  * Add or delete IPv4 static ARP cache entries as follows:
1980  * @cliexcmd{set ip arp static GigabitEthernet2/0/0 6.0.0.3 dead.beef.babe}
1981  * @cliexcmd{set ip arp static delete GigabitEthernet2/0/0 6.0.0.3 dead.beef.babe}
1982  *
1983  * For testing / debugging purposes, the 'set ip arp' command can add or
1984  * delete multiple entries. Supply the 'count N' parameter:
1985  * @cliexcmd{set ip arp count 10 GigabitEthernet2/0/0 6.0.0.3 dead.beef.babe}
1986  * @endparblock
1987  ?*/
1988 VLIB_CLI_COMMAND (ip_arp_add_del_command, static) = {
1989   .path = "set ip arp",
1990   .short_help =
1991   "set ip arp [del] <intfc> <ip-address> <mac-address> [static] [count <count>] [fib-id <fib-id>] [proxy <lo-addr> - <hi-addr>]",
1992   .function = ip_arp_add_del_command_fn,
1993 };
1994 /* *INDENT-ON* */
1995
1996 static clib_error_t *
1997 set_int_proxy_arp_command_fn (vlib_main_t * vm,
1998                               unformat_input_t *
1999                               input, vlib_cli_command_t * cmd)
2000 {
2001   vnet_main_t *vnm = vnet_get_main ();
2002   u32 sw_if_index;
2003   vnet_sw_interface_t *si;
2004   int enable = 0;
2005   int intfc_set = 0;
2006
2007   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
2008     {
2009       if (unformat (input, "%U", unformat_vnet_sw_interface,
2010                     vnm, &sw_if_index))
2011         intfc_set = 1;
2012       else if (unformat (input, "enable") || unformat (input, "on"))
2013         enable = 1;
2014       else if (unformat (input, "disable") || unformat (input, "off"))
2015         enable = 0;
2016       else
2017         break;
2018     }
2019
2020   if (intfc_set == 0)
2021     return clib_error_return (0, "unknown input '%U'",
2022                               format_unformat_error, input);
2023
2024   si = vnet_get_sw_interface (vnm, sw_if_index);
2025   ASSERT (si);
2026   if (enable)
2027     si->flags |= VNET_SW_INTERFACE_FLAG_PROXY_ARP;
2028   else
2029     si->flags &= ~VNET_SW_INTERFACE_FLAG_PROXY_ARP;
2030
2031   return 0;
2032 }
2033
2034 /* *INDENT-OFF* */
2035 /*?
2036  * Enable proxy-arp on an interface. The vpp stack will answer ARP
2037  * requests for the indicated address range. Multiple proxy-arp
2038  * ranges may be provisioned.
2039  *
2040  * @note Proxy ARP as a technology is infamous for blackholing traffic.
2041  * Also, the underlying implementation has not been performance-tuned.
2042  * Avoid creating an unnecessarily large set of ranges.
2043  *
2044  * @cliexpar
2045  * To enable proxy arp on a range of addresses, use:
2046  * @cliexcmd{set ip arp proxy 6.0.0.1 - 6.0.0.11}
2047  * Append 'del' to delete a range of proxy ARP addresses:
2048  * @cliexcmd{set ip arp proxy 6.0.0.1 - 6.0.0.11 del}
2049  * You must then specifically enable proxy arp on individual interfaces:
2050  * @cliexcmd{set interface proxy-arp GigabitEthernet0/8/0 enable}
2051  * To disable proxy arp on an individual interface:
2052  * @cliexcmd{set interface proxy-arp GigabitEthernet0/8/0 disable}
2053  ?*/
2054 VLIB_CLI_COMMAND (set_int_proxy_enable_command, static) = {
2055   .path = "set interface proxy-arp",
2056   .short_help =
2057   "set interface proxy-arp <intfc> [enable|disable]",
2058   .function = set_int_proxy_arp_command_fn,
2059 };
2060 /* *INDENT-ON* */
2061
2062
2063 /*
2064  * ARP/ND Termination in a L2 Bridge Domain based on IP4/IP6 to MAC
2065  * hash tables mac_by_ip4 and mac_by_ip6 for each BD.
2066  */
2067 typedef enum
2068 {
2069   ARP_TERM_NEXT_L2_OUTPUT,
2070   ARP_TERM_NEXT_DROP,
2071   ARP_TERM_N_NEXT,
2072 } arp_term_next_t;
2073
2074 u32 arp_term_next_node_index[32];
2075
2076 static uword
2077 arp_term_l2bd (vlib_main_t * vm,
2078                vlib_node_runtime_t * node, vlib_frame_t * frame)
2079 {
2080   l2input_main_t *l2im = &l2input_main;
2081   u32 n_left_from, next_index, *from, *to_next;
2082   u32 n_replies_sent = 0;
2083   u16 last_bd_index = ~0;
2084   l2_bridge_domain_t *last_bd_config = 0;
2085   l2_input_config_t *cfg0;
2086
2087   from = vlib_frame_vector_args (frame);
2088   n_left_from = frame->n_vectors;
2089   next_index = node->cached_next_index;
2090
2091   while (n_left_from > 0)
2092     {
2093       u32 n_left_to_next;
2094
2095       vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
2096
2097       while (n_left_from > 0 && n_left_to_next > 0)
2098         {
2099           vlib_buffer_t *p0;
2100           ethernet_header_t *eth0;
2101           ethernet_arp_header_t *arp0;
2102           ip6_header_t *iph0;
2103           u8 *l3h0;
2104           u32 pi0, error0, next0, sw_if_index0;
2105           u16 ethertype0;
2106           u16 bd_index0;
2107           u32 ip0;
2108           u8 *macp0;
2109
2110           pi0 = from[0];
2111           to_next[0] = pi0;
2112           from += 1;
2113           to_next += 1;
2114           n_left_from -= 1;
2115           n_left_to_next -= 1;
2116
2117           p0 = vlib_get_buffer (vm, pi0);
2118           eth0 = vlib_buffer_get_current (p0);
2119           l3h0 = (u8 *) eth0 + vnet_buffer (p0)->l2.l2_len;
2120           ethertype0 = clib_net_to_host_u16 (*(u16 *) (l3h0 - 2));
2121           arp0 = (ethernet_arp_header_t *) l3h0;
2122
2123           if (PREDICT_FALSE ((ethertype0 != ETHERNET_TYPE_ARP) ||
2124                              (arp0->opcode !=
2125                               clib_host_to_net_u16
2126                               (ETHERNET_ARP_OPCODE_request))))
2127             goto check_ip6_nd;
2128
2129           /* Must be ARP request packet here */
2130           if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE) &&
2131                              (p0->flags & VLIB_BUFFER_IS_TRACED)))
2132             {
2133               u8 *t0 = vlib_add_trace (vm, node, p0,
2134                                        sizeof (ethernet_arp_input_trace_t));
2135               clib_memcpy (t0, l3h0, sizeof (ethernet_arp_input_trace_t));
2136             }
2137
2138           error0 = ETHERNET_ARP_ERROR_replies_sent;
2139           error0 =
2140             (arp0->l2_type !=
2141              clib_net_to_host_u16 (ETHERNET_ARP_HARDWARE_TYPE_ethernet)
2142              ? ETHERNET_ARP_ERROR_l2_type_not_ethernet : error0);
2143           error0 =
2144             (arp0->l3_type !=
2145              clib_net_to_host_u16 (ETHERNET_TYPE_IP4) ?
2146              ETHERNET_ARP_ERROR_l3_type_not_ip4 : error0);
2147
2148           sw_if_index0 = vnet_buffer (p0)->sw_if_index[VLIB_RX];
2149
2150           if (error0)
2151             goto drop;
2152
2153           /* Trash ARP packets whose ARP-level source addresses do not
2154              match their L2-frame-level source addresses  */
2155           if (PREDICT_FALSE
2156               (memcmp
2157                (eth0->src_address, arp0->ip4_over_ethernet[0].ethernet,
2158                 sizeof (eth0->src_address))))
2159             {
2160               error0 = ETHERNET_ARP_ERROR_l2_address_mismatch;
2161               goto drop;
2162             }
2163
2164           /* Check if anyone want ARP request events for L2 BDs */
2165           {
2166             pending_resolution_t *mc;
2167             ethernet_arp_main_t *am = &ethernet_arp_main;
2168             uword *p = hash_get (am->mac_changes_by_address, 0);
2169             if (p && (vnet_buffer (p0)->l2.shg == 0))
2170               {                 // Only SHG 0 interface which is more likely local
2171                 u32 next_index = p[0];
2172                 while (next_index != (u32) ~ 0)
2173                   {
2174                     int (*fp) (u32, u8 *, u32, u32);
2175                     int rv = 1;
2176                     mc = pool_elt_at_index (am->mac_changes, next_index);
2177                     fp = mc->data_callback;
2178                     /* Call the callback, return 1 to suppress dup events */
2179                     if (fp)
2180                       rv = (*fp) (mc->data,
2181                                   arp0->ip4_over_ethernet[0].ethernet,
2182                                   sw_if_index0,
2183                                   arp0->ip4_over_ethernet[0].ip4.as_u32);
2184                     /* Signal the resolver process */
2185                     if (rv == 0)
2186                       vlib_process_signal_event (vm, mc->node_index,
2187                                                  mc->type_opaque, mc->data);
2188                     next_index = mc->next_index;
2189                   }
2190               }
2191           }
2192
2193           /* lookup BD mac_by_ip4 hash table for MAC entry */
2194           ip0 = arp0->ip4_over_ethernet[1].ip4.as_u32;
2195           bd_index0 = vnet_buffer (p0)->l2.bd_index;
2196           if (PREDICT_FALSE ((bd_index0 != last_bd_index)
2197                              || (last_bd_index == (u16) ~ 0)))
2198             {
2199               last_bd_index = bd_index0;
2200               last_bd_config = vec_elt_at_index (l2im->bd_configs, bd_index0);
2201             }
2202           macp0 = (u8 *) hash_get (last_bd_config->mac_by_ip4, ip0);
2203
2204           if (PREDICT_FALSE (!macp0))
2205             goto next_l2_feature;       /* MAC not found */
2206
2207           /* MAC found, send ARP reply -
2208              Convert ARP request packet to ARP reply */
2209           arp0->opcode = clib_host_to_net_u16 (ETHERNET_ARP_OPCODE_reply);
2210           arp0->ip4_over_ethernet[1] = arp0->ip4_over_ethernet[0];
2211           arp0->ip4_over_ethernet[0].ip4.as_u32 = ip0;
2212           clib_memcpy (arp0->ip4_over_ethernet[0].ethernet, macp0, 6);
2213           clib_memcpy (eth0->dst_address, eth0->src_address, 6);
2214           clib_memcpy (eth0->src_address, macp0, 6);
2215           n_replies_sent += 1;
2216
2217         output_response:
2218           /* For BVI, need to use l2-fwd node to send ARP reply as
2219              l2-output node cannot output packet to BVI properly */
2220           cfg0 = vec_elt_at_index (l2im->configs, sw_if_index0);
2221           if (PREDICT_FALSE (cfg0->bvi))
2222             {
2223               vnet_buffer (p0)->l2.feature_bitmap |= L2INPUT_FEAT_FWD;
2224               vnet_buffer (p0)->sw_if_index[VLIB_RX] = 0;
2225               goto next_l2_feature;
2226             }
2227
2228           /* Send ARP/ND reply back out input interface through l2-output */
2229           vnet_buffer (p0)->sw_if_index[VLIB_TX] = sw_if_index0;
2230           next0 = ARP_TERM_NEXT_L2_OUTPUT;
2231           /* Note that output to VXLAN tunnel will fail due to SHG which
2232              is probably desireable since ARP termination is not intended
2233              for ARP requests from other hosts. If output to VXLAN tunnel is
2234              required, however, can just clear the SHG in packet as follows:
2235              vnet_buffer(p0)->l2.shg = 0;         */
2236           vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
2237                                            to_next, n_left_to_next, pi0,
2238                                            next0);
2239           continue;
2240
2241         check_ip6_nd:
2242           /* IP6 ND event notification or solicitation handling to generate
2243              local response instead of flooding */
2244           iph0 = (ip6_header_t *) l3h0;
2245           if (PREDICT_FALSE (ethertype0 == ETHERNET_TYPE_IP6 &&
2246                              iph0->protocol == IP_PROTOCOL_ICMP6 &&
2247                              !ip6_address_is_unspecified
2248                              (&iph0->src_address)))
2249             {
2250               sw_if_index0 = vnet_buffer (p0)->sw_if_index[VLIB_RX];
2251               if (vnet_ip6_nd_term
2252                   (vm, node, p0, eth0, iph0, sw_if_index0,
2253                    vnet_buffer (p0)->l2.bd_index, vnet_buffer (p0)->l2.shg))
2254                 goto output_response;
2255             }
2256
2257         next_l2_feature:
2258           {
2259             u32 feature_bitmap0 =
2260               vnet_buffer (p0)->l2.feature_bitmap & ~L2INPUT_FEAT_ARP_TERM;
2261             vnet_buffer (p0)->l2.feature_bitmap = feature_bitmap0;
2262             next0 =
2263               feat_bitmap_get_next_node_index (arp_term_next_node_index,
2264                                                feature_bitmap0);
2265             vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
2266                                              to_next, n_left_to_next,
2267                                              pi0, next0);
2268             continue;
2269           }
2270
2271         drop:
2272           if (0 == arp0->ip4_over_ethernet[0].ip4.as_u32 ||
2273               (arp0->ip4_over_ethernet[0].ip4.as_u32 ==
2274                arp0->ip4_over_ethernet[1].ip4.as_u32))
2275             {
2276               error0 = ETHERNET_ARP_ERROR_gratuitous_arp;
2277             }
2278           next0 = ARP_TERM_NEXT_DROP;
2279           p0->error = node->errors[error0];
2280
2281           vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
2282                                            to_next, n_left_to_next, pi0,
2283                                            next0);
2284         }
2285
2286       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
2287     }
2288
2289   vlib_error_count (vm, node->node_index,
2290                     ETHERNET_ARP_ERROR_replies_sent, n_replies_sent);
2291   return frame->n_vectors;
2292 }
2293
2294 /* *INDENT-OFF* */
2295 VLIB_REGISTER_NODE (arp_term_l2bd_node, static) = {
2296   .function = arp_term_l2bd,
2297   .name = "arp-term-l2bd",
2298   .vector_size = sizeof (u32),
2299   .n_errors = ETHERNET_ARP_N_ERROR,
2300   .error_strings = ethernet_arp_error_strings,
2301   .n_next_nodes = ARP_TERM_N_NEXT,
2302   .next_nodes = {
2303     [ARP_TERM_NEXT_L2_OUTPUT] = "l2-output",
2304     [ARP_TERM_NEXT_DROP] = "error-drop",
2305   },
2306   .format_buffer = format_ethernet_arp_header,
2307   .format_trace = format_arp_term_input_trace,
2308 };
2309 /* *INDENT-ON* */
2310
2311 clib_error_t *
2312 arp_term_init (vlib_main_t * vm)
2313 {
2314   // Initialize the feature next-node indexes
2315   feat_bitmap_init_next_nodes (vm,
2316                                arp_term_l2bd_node.index,
2317                                L2INPUT_N_FEAT,
2318                                l2input_get_feat_names (),
2319                                arp_term_next_node_index);
2320   return 0;
2321 }
2322
2323 VLIB_INIT_FUNCTION (arp_term_init);
2324
2325 void
2326 change_arp_mac (u32 sw_if_index, ethernet_arp_ip4_entry_t * e)
2327 {
2328   if (e->sw_if_index == sw_if_index)
2329     {
2330       adj_nbr_walk_nh4 (e->sw_if_index,
2331                         &e->ip4_address, arp_mk_complete_walk, e);
2332     }
2333 }
2334
2335 void
2336 ethernet_arp_change_mac (u32 sw_if_index)
2337 {
2338   ethernet_arp_main_t *am = &ethernet_arp_main;
2339   ethernet_arp_ip4_entry_t *e;
2340
2341   /* *INDENT-OFF* */
2342   pool_foreach (e, am->ip4_entry_pool,
2343   ({
2344     change_arp_mac (sw_if_index, e);
2345   }));
2346   /* *INDENT-ON* */
2347 }
2348
2349 /*
2350  * fd.io coding-style-patch-verification: ON
2351  *
2352  * Local Variables:
2353  * eval: (c-set-style "gnu")
2354  * End:
2355  */