6319f886b701cda0d8c4cba052f5234c60c68664
[vpp.git] / src / vnet / arp / arp.c
1 /*
2  * ethernet/arp.c: IP v4 ARP node
3  *
4  * Copyright (c) 2010 Cisco and/or its affiliates.
5  * Licensed under the Apache License, Version 2.0 (the "License");
6  * you may not use this file except in compliance with the License.
7  * You may obtain a copy of the License at:
8  *
9  *     http://www.apache.org/licenses/LICENSE-2.0
10  *
11  * Unless required by applicable law or agreed to in writing, software
12  * distributed under the License is distributed on an "AS IS" BASIS,
13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  * See the License for the specific language governing permissions and
15  * limitations under the License.
16  */
17
18 #include <vnet/arp/arp.h>
19 #include <vnet/arp/arp_packet.h>
20
21 #include <vnet/fib/ip4_fib.h>
22 #include <vnet/fib/fib_entry_src.h>
23 #include <vnet/adj/adj_nbr.h>
24 #include <vnet/adj/adj_mcast.h>
25 #include <vnet/pg/pg.h>
26
27 #include <vnet/ip-neighbor/ip_neighbor.h>
28 #include <vnet/ip-neighbor/ip4_neighbor.h>
29 #include <vnet/ip-neighbor/ip_neighbor_dp.h>
30
31 #include <vlibmemory/api.h>
32
33 /**
34  * @file
35  * @brief IPv4 ARP.
36  *
37  * This file contains code to manage the IPv4 ARP tables (IP Address
38  * to MAC Address lookup).
39  */
40
41 /**
42  * @brief Per-interface ARP configuration and state
43  */
44 typedef struct ethernet_arp_interface_t_
45 {
46   /**
47    * Is ARP enabled on this interface
48    */
49   u32 enabled;
50 } ethernet_arp_interface_t;
51
52 typedef struct
53 {
54   /* Hash tables mapping name to opcode. */
55   uword *opcode_by_name;
56
57   /** Per interface state */
58   ethernet_arp_interface_t *ethernet_arp_by_sw_if_index;
59
60   /* ARP feature arc index */
61   u8 feature_arc_index;
62 } ethernet_arp_main_t;
63
64 static ethernet_arp_main_t ethernet_arp_main;
65
66 static const u8 vrrp_prefix[] = { 0x00, 0x00, 0x5E, 0x00, 0x01 };
67
68 static uword
69 unformat_ethernet_arp_opcode_host_byte_order (unformat_input_t * input,
70                                               va_list * args)
71 {
72   int *result = va_arg (*args, int *);
73   ethernet_arp_main_t *am = &ethernet_arp_main;
74   int x, i;
75
76   /* Numeric opcode. */
77   if (unformat (input, "0x%x", &x) || unformat (input, "%d", &x))
78     {
79       if (x >= (1 << 16))
80         return 0;
81       *result = x;
82       return 1;
83     }
84
85   /* Named type. */
86   if (unformat_user (input, unformat_vlib_number_by_name,
87                      am->opcode_by_name, &i))
88     {
89       *result = i;
90       return 1;
91     }
92
93   return 0;
94 }
95
96 static uword
97 unformat_ethernet_arp_opcode_net_byte_order (unformat_input_t * input,
98                                              va_list * args)
99 {
100   int *result = va_arg (*args, int *);
101   if (!unformat_user
102       (input, unformat_ethernet_arp_opcode_host_byte_order, result))
103     return 0;
104
105   *result = clib_host_to_net_u16 ((u16) * result);
106   return 1;
107 }
108
109 typedef struct
110 {
111   u8 packet_data[64];
112 } ethernet_arp_input_trace_t;
113
114 static u8 *
115 format_ethernet_arp_input_trace (u8 * s, va_list * va)
116 {
117   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*va, vlib_main_t *);
118   CLIB_UNUSED (vlib_node_t * node) = va_arg (*va, vlib_node_t *);
119   ethernet_arp_input_trace_t *t = va_arg (*va, ethernet_arp_input_trace_t *);
120
121   s = format (s, "%U",
122               format_ethernet_arp_header,
123               t->packet_data, sizeof (t->packet_data));
124
125   return s;
126 }
127
128 static int
129 arp_is_enabled (ethernet_arp_main_t * am, u32 sw_if_index)
130 {
131   if (vec_len (am->ethernet_arp_by_sw_if_index) <= sw_if_index)
132     return 0;
133
134   return (am->ethernet_arp_by_sw_if_index[sw_if_index].enabled);
135 }
136
137 static void
138 arp_enable (ethernet_arp_main_t * am, u32 sw_if_index)
139 {
140   if (arp_is_enabled (am, sw_if_index))
141     return;
142
143   vec_validate (am->ethernet_arp_by_sw_if_index, sw_if_index);
144
145   am->ethernet_arp_by_sw_if_index[sw_if_index].enabled = 1;
146
147   vnet_feature_enable_disable ("arp", "arp-reply", sw_if_index, 1, NULL, 0);
148   vnet_feature_enable_disable ("arp", "arp-disabled", sw_if_index, 0, NULL,
149                                0);
150 }
151
152 static void
153 arp_disable (ethernet_arp_main_t * am, u32 sw_if_index)
154 {
155   if (!arp_is_enabled (am, sw_if_index))
156     return;
157
158   vnet_feature_enable_disable ("arp", "arp-disabled", sw_if_index, 1, NULL,
159                                0);
160   vnet_feature_enable_disable ("arp", "arp-reply", sw_if_index, 0, NULL, 0);
161
162   am->ethernet_arp_by_sw_if_index[sw_if_index].enabled = 0;
163 }
164
165 static int
166 arp_unnumbered (vlib_buffer_t * p0,
167                 u32 input_sw_if_index, u32 conn_sw_if_index)
168 {
169   vnet_main_t *vnm = vnet_get_main ();
170   vnet_interface_main_t *vim = &vnm->interface_main;
171   vnet_sw_interface_t *si;
172
173   /* verify that the input interface is unnumbered to the connected.
174    * the connected interface is the interface on which the subnet is
175    * configured */
176   si = &vim->sw_interfaces[input_sw_if_index];
177
178   if (!(si->flags & VNET_SW_INTERFACE_FLAG_UNNUMBERED &&
179         (si->unnumbered_sw_if_index == conn_sw_if_index)))
180     {
181       /* the input interface is not unnumbered to the interface on which
182        * the sub-net is configured that covers the ARP request.
183        * So this is not the case for unnumbered.. */
184       return 0;
185     }
186
187   return !0;
188 }
189
190 always_inline u32
191 arp_learn (u32 sw_if_index,
192            const ethernet_arp_ip4_over_ethernet_address_t * addr)
193 {
194   /* *INDENT-OFF* */
195   ip_neighbor_learn_t l = {
196     .ip = {
197       .ip.ip4 = addr->ip4,
198       .version = AF_IP4,
199     },
200     .mac = addr->mac,
201     .sw_if_index = sw_if_index,
202   };
203   /* *INDENT-ON* */
204
205   ip_neighbor_learn_dp (&l);
206
207   return (ARP_ERROR_L3_SRC_ADDRESS_LEARNED);
208 }
209
210 typedef enum arp_input_next_t_
211 {
212   ARP_INPUT_NEXT_DROP,
213   ARP_INPUT_NEXT_DISABLED,
214   ARP_INPUT_N_NEXT,
215 } arp_input_next_t;
216
217 static uword
218 arp_input (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
219 {
220   u32 n_left_from, next_index, *from, *to_next, n_left_to_next;
221   ethernet_arp_main_t *am = &ethernet_arp_main;
222
223   from = vlib_frame_vector_args (frame);
224   n_left_from = frame->n_vectors;
225   next_index = node->cached_next_index;
226
227   if (node->flags & VLIB_NODE_FLAG_TRACE)
228     vlib_trace_frame_buffers_only (vm, node, from, frame->n_vectors,
229                                    /* stride */ 1,
230                                    sizeof (ethernet_arp_input_trace_t));
231
232   while (n_left_from > 0)
233     {
234       vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
235
236       while (n_left_from > 0 && n_left_to_next > 0)
237         {
238           const ethernet_arp_header_t *arp0;
239           arp_input_next_t next0;
240           vlib_buffer_t *p0;
241           u32 pi0, error0;
242
243           pi0 = to_next[0] = from[0];
244           from += 1;
245           to_next += 1;
246           n_left_from -= 1;
247           n_left_to_next -= 1;
248
249           p0 = vlib_get_buffer (vm, pi0);
250           arp0 = vlib_buffer_get_current (p0);
251
252           error0 = ARP_ERROR_REPLIES_SENT;
253           next0 = ARP_INPUT_NEXT_DROP;
254
255           error0 = (arp0->l2_type != clib_net_to_host_u16 (
256                                        ETHERNET_ARP_HARDWARE_TYPE_ethernet) ?
257                             ARP_ERROR_L2_TYPE_NOT_ETHERNET :
258                             error0);
259           error0 = (arp0->l3_type != clib_net_to_host_u16 (ETHERNET_TYPE_IP4) ?
260                             ARP_ERROR_L3_TYPE_NOT_IP4 :
261                             error0);
262           error0 = (0 == arp0->ip4_over_ethernet[0].ip4.as_u32 ?
263                             ARP_ERROR_L3_DST_ADDRESS_UNSET :
264                             error0);
265
266           if (ARP_ERROR_REPLIES_SENT == error0)
267             {
268               next0 = ARP_INPUT_NEXT_DISABLED;
269               vnet_feature_arc_start (am->feature_arc_index,
270                                       vnet_buffer (p0)->sw_if_index[VLIB_RX],
271                                       &next0, p0);
272             }
273           else
274             p0->error = node->errors[error0];
275
276           vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
277                                            n_left_to_next, pi0, next0);
278         }
279
280       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
281     }
282
283   return frame->n_vectors;
284 }
285
286 typedef enum arp_disabled_next_t_
287 {
288   ARP_DISABLED_NEXT_DROP,
289   ARP_DISABLED_N_NEXT,
290 } arp_disabled_next_t;
291
292 static uword
293 arp_disabled (vlib_main_t * vm,
294               vlib_node_runtime_t * node, vlib_frame_t * frame)
295 {
296   u32 n_left_from, next_index, *from, *to_next, n_left_to_next;
297
298   from = vlib_frame_vector_args (frame);
299   n_left_from = frame->n_vectors;
300   next_index = node->cached_next_index;
301
302   if (node->flags & VLIB_NODE_FLAG_TRACE)
303     vlib_trace_frame_buffers_only (vm, node, from, frame->n_vectors,
304                                    /* stride */ 1,
305                                    sizeof (ethernet_arp_input_trace_t));
306
307   while (n_left_from > 0)
308     {
309       vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
310
311       while (n_left_from > 0 && n_left_to_next > 0)
312         {
313           arp_disabled_next_t next0 = ARP_DISABLED_NEXT_DROP;
314           vlib_buffer_t *p0;
315           u32 pi0, error0;
316
317           next0 = ARP_DISABLED_NEXT_DROP;
318           error0 = ARP_ERROR_DISABLED;
319
320           pi0 = to_next[0] = from[0];
321           from += 1;
322           to_next += 1;
323           n_left_from -= 1;
324           n_left_to_next -= 1;
325
326           p0 = vlib_get_buffer (vm, pi0);
327           p0->error = node->errors[error0];
328
329           vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
330                                            n_left_to_next, pi0, next0);
331         }
332
333       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
334     }
335
336   return frame->n_vectors;
337 }
338
339 enum arp_dst_fib_type
340 {
341   ARP_DST_FIB_NONE,
342   ARP_DST_FIB_ADJ,
343   ARP_DST_FIB_CONN
344 };
345
346 /*
347  * we're looking for FIB sources that indicate the destination
348  * is attached. There may be interposed DPO prior to the one
349  * we are looking for
350  */
351 static enum arp_dst_fib_type
352 arp_dst_fib_check (const fib_node_index_t fei, fib_entry_flag_t * flags)
353 {
354   const fib_entry_t *entry = fib_entry_get (fei);
355   const fib_entry_src_t *entry_src;
356   fib_source_t src;
357   /* *INDENT-OFF* */
358   FOR_EACH_SRC_ADDED(entry, entry_src, src,
359   ({
360     *flags = fib_entry_get_flags_for_source (fei, src);
361     if (fib_entry_is_sourced (fei, FIB_SOURCE_ADJ))
362         return ARP_DST_FIB_ADJ;
363       else if (FIB_ENTRY_FLAG_CONNECTED & *flags)
364         return ARP_DST_FIB_CONN;
365   }))
366   /* *INDENT-ON* */
367
368   return ARP_DST_FIB_NONE;
369 }
370
371 static uword
372 arp_reply (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
373 {
374   vnet_main_t *vnm = vnet_get_main ();
375   u32 n_left_from, next_index, *from, *to_next;
376   u32 n_replies_sent = 0;
377
378   from = vlib_frame_vector_args (frame);
379   n_left_from = frame->n_vectors;
380   next_index = node->cached_next_index;
381
382   if (node->flags & VLIB_NODE_FLAG_TRACE)
383     vlib_trace_frame_buffers_only (vm, node, from, frame->n_vectors,
384                                    /* stride */ 1,
385                                    sizeof (ethernet_arp_input_trace_t));
386
387   while (n_left_from > 0)
388     {
389       u32 n_left_to_next;
390
391       vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
392
393       while (n_left_from > 0 && n_left_to_next > 0)
394         {
395           vlib_buffer_t *p0;
396           ethernet_arp_header_t *arp0;
397           ethernet_header_t *eth_rx;
398           const ip4_address_t *if_addr0;
399           u32 pi0, error0, next0, sw_if_index0, conn_sw_if_index0, fib_index0;
400           u8 dst_is_local0, is_vrrp_reply0;
401           fib_node_index_t dst_fei, src_fei;
402           const fib_prefix_t *pfx0;
403           fib_entry_flag_t src_flags, dst_flags;
404
405           pi0 = from[0];
406           to_next[0] = pi0;
407           from += 1;
408           to_next += 1;
409           n_left_from -= 1;
410           n_left_to_next -= 1;
411
412           p0 = vlib_get_buffer (vm, pi0);
413           arp0 = vlib_buffer_get_current (p0);
414           /* Fill in ethernet header. */
415           eth_rx = ethernet_buffer_get_header (p0);
416
417           next0 = ARP_REPLY_NEXT_DROP;
418           error0 = ARP_ERROR_REPLIES_SENT;
419           sw_if_index0 = vnet_buffer (p0)->sw_if_index[VLIB_RX];
420
421           /* Check that IP address is local and matches incoming interface. */
422           fib_index0 = ip4_fib_table_get_index_for_sw_if_index (sw_if_index0);
423           if (~0 == fib_index0)
424             {
425               error0 = ARP_ERROR_INTERFACE_NO_TABLE;
426               goto drop;
427
428             }
429
430           {
431             /*
432              * we're looking for FIB entries that indicate the source
433              * is attached. There may be more specific non-attached
434              * routes that match the source, but these do not influence
435              * whether we respond to an ARP request, i.e. they do not
436              * influence whether we are the correct way for the sender
437              * to reach us, they only affect how we reach the sender.
438              */
439             fib_entry_t *src_fib_entry;
440             const fib_prefix_t *pfx;
441             fib_entry_src_t *src;
442             fib_source_t source;
443             int attached;
444             int mask;
445
446             mask = 32;
447             attached = 0;
448
449             do
450               {
451                 src_fei = ip4_fib_table_lookup (ip4_fib_get (fib_index0),
452                                                 &arp0->
453                                                 ip4_over_ethernet[0].ip4,
454                                                 mask);
455                 src_fib_entry = fib_entry_get (src_fei);
456
457                 /*
458                  * It's possible that the source that provides the
459                  * flags we need, or the flags we must not have,
460                  * is not the best source, so check then all.
461                  */
462                 /* *INDENT-OFF* */
463                 FOR_EACH_SRC_ADDED(src_fib_entry, src, source,
464                 ({
465                   src_flags = fib_entry_get_flags_for_source (src_fei, source);
466
467                   /* Reject requests/replies with our local interface
468                      address. */
469                   if (FIB_ENTRY_FLAG_LOCAL & src_flags)
470                     {
471                       error0 = ARP_ERROR_L3_SRC_ADDRESS_IS_LOCAL;
472                       /*
473                        * When VPP has an interface whose address is also
474                        * applied to a TAP interface on the host, then VPP's
475                        * TAP interface will be unnumbered  to the 'real'
476                        * interface and do proxy ARP from the host.
477                        * The curious aspect of this setup is that ARP requests
478                        * from the host will come from the VPP's own address.
479                        * So don't drop immediately here, instead go see if this
480                        * is a proxy ARP case.
481                        */
482                       goto next_feature;
483                     }
484                   /* A Source must also be local to subnet of matching
485                    * interface address. */
486                   if ((FIB_ENTRY_FLAG_ATTACHED & src_flags) ||
487                       (FIB_ENTRY_FLAG_CONNECTED & src_flags))
488                     {
489                       attached = 1;
490                       break;
491                     }
492                   /*
493                    * else
494                    *  The packet was sent from an address that is not
495                    *  connected nor attached i.e. it is not from an
496                    *  address that is covered by a link's sub-net,
497                    *  nor is it a already learned host resp.
498                    */
499                 }));
500                 /* *INDENT-ON* */
501
502                 /*
503                  * shorter mask lookup for the next iteration.
504                  */
505                 pfx = fib_entry_get_prefix (src_fei);
506                 mask = pfx->fp_len - 1;
507
508                 /*
509                  * continue until we hit the default route or we find
510                  * the attached we are looking for. The most likely
511                  * outcome is we find the attached with the first source
512                  * on the first lookup.
513                  */
514               }
515             while (!attached &&
516                    !fib_entry_is_sourced (src_fei, FIB_SOURCE_DEFAULT_ROUTE));
517
518             if (!attached)
519               {
520                 /*
521                  * the matching route is a not attached, i.e. it was
522                  * added as a result of routing, rather than interface/ARP
523                  * configuration. If the matching route is not a host route
524                  * (i.e. a /32)
525                  */
526                 error0 = ARP_ERROR_L3_SRC_ADDRESS_NOT_LOCAL;
527                 goto drop;
528               }
529           }
530
531           dst_fei = ip4_fib_table_lookup (ip4_fib_get (fib_index0),
532                                           &arp0->ip4_over_ethernet[1].ip4,
533                                           32);
534           conn_sw_if_index0 = fib_entry_get_any_resolving_interface (dst_fei);
535
536           switch (arp_dst_fib_check (dst_fei, &dst_flags))
537             {
538             case ARP_DST_FIB_ADJ:
539               /*
540                * We matched an adj-fib on ths source subnet (a /32 previously
541                * added as a result of ARP). If this request is a gratuitous
542                * ARP, then learn from it.
543                * The check for matching an adj-fib, is to prevent hosts
544                * from spamming us with gratuitous ARPS that might otherwise
545                * blow our ARP cache
546                */
547               if (conn_sw_if_index0 != sw_if_index0)
548                 error0 = ARP_ERROR_L3_DST_ADDRESS_NOT_LOCAL;
549               else if (arp0->ip4_over_ethernet[0].ip4.as_u32 ==
550                        arp0->ip4_over_ethernet[1].ip4.as_u32)
551                 {
552                   vlib_increment_simple_counter (
553                     &ip_neighbor_counters[AF_IP4]
554                        .ipnc[VLIB_RX][IP_NEIGHBOR_CTR_GRAT],
555                     vm->thread_index, sw_if_index0, 1);
556                   error0 =
557                     arp_learn (sw_if_index0, &arp0->ip4_over_ethernet[0]);
558                 }
559               goto drop;
560             case ARP_DST_FIB_CONN:
561               /* destination is connected, continue to process */
562               break;
563             case ARP_DST_FIB_NONE:
564               /* destination is not connected, stop here */
565               error0 = ARP_ERROR_L3_DST_ADDRESS_NOT_LOCAL;
566               goto next_feature;
567             }
568
569           dst_is_local0 = (FIB_ENTRY_FLAG_LOCAL & dst_flags);
570           pfx0 = fib_entry_get_prefix (dst_fei);
571           if_addr0 = &pfx0->fp_addr.ip4;
572
573           is_vrrp_reply0 =
574             ((arp0->opcode ==
575               clib_host_to_net_u16 (ETHERNET_ARP_OPCODE_reply))
576              &&
577              (!memcmp
578               (arp0->ip4_over_ethernet[0].mac.bytes, vrrp_prefix,
579                sizeof (vrrp_prefix))));
580
581           /* Trash ARP packets whose ARP-level source addresses do not
582              match their L2-frame-level source addresses, unless it's
583              a reply from a VRRP virtual router */
584           if (!ethernet_mac_address_equal
585               (eth_rx->src_address,
586                arp0->ip4_over_ethernet[0].mac.bytes) && !is_vrrp_reply0)
587             {
588               error0 = ARP_ERROR_L2_ADDRESS_MISMATCH;
589               goto drop;
590             }
591
592           vlib_increment_simple_counter (
593             &ip_neighbor_counters[AF_IP4]
594                .ipnc[VLIB_RX][arp0->opcode == clib_host_to_net_u16 (
595                                                 ETHERNET_ARP_OPCODE_reply) ?
596                                       IP_NEIGHBOR_CTR_REPLY :
597                                       IP_NEIGHBOR_CTR_REQUEST],
598             vm->thread_index, sw_if_index0, 1);
599
600           /* Learn or update sender's mapping only for replies to addresses
601            * that are local to the subnet */
602           if (arp0->opcode ==
603               clib_host_to_net_u16 (ETHERNET_ARP_OPCODE_reply))
604             {
605               if (dst_is_local0)
606                 error0 =
607                   arp_learn (sw_if_index0, &arp0->ip4_over_ethernet[0]);
608               else
609                 /* a reply for a non-local destination could be a GARP.
610                  * GARPs for hosts we know were handled above, so this one
611                  * we drop */
612                 error0 = ARP_ERROR_L3_DST_ADDRESS_NOT_LOCAL;
613
614               goto next_feature;
615             }
616           else if (arp0->opcode ==
617                    clib_host_to_net_u16 (ETHERNET_ARP_OPCODE_request) &&
618                    (dst_is_local0 == 0))
619             {
620               goto next_feature;
621             }
622
623           /* Honor unnumbered interface, if any */
624           if (sw_if_index0 != conn_sw_if_index0 ||
625               sw_if_index0 != fib_entry_get_resolving_interface (src_fei))
626             {
627               /*
628                * The interface the ARP is sent to or was received on is not the
629                * interface on which the covering prefix is configured.
630                * Maybe this is a case for unnumbered.
631                */
632               if (!arp_unnumbered (p0, sw_if_index0, conn_sw_if_index0))
633                 {
634                   error0 = ARP_ERROR_UNNUMBERED_MISMATCH;
635                   goto drop;
636                 }
637             }
638           if (arp0->ip4_over_ethernet[0].ip4.as_u32 ==
639               arp0->ip4_over_ethernet[1].ip4.as_u32)
640             {
641               error0 = ARP_ERROR_GRATUITOUS_ARP;
642               goto drop;
643             }
644
645           next0 = arp_mk_reply (vnm, p0, sw_if_index0,
646                                 if_addr0, arp0, eth_rx);
647
648           /* We are going to reply to this request, so, in the absence of
649              errors, learn the sender */
650           if (!error0)
651             error0 = arp_learn (sw_if_index0, &arp0->ip4_over_ethernet[1]);
652
653           vlib_increment_simple_counter (
654             &ip_neighbor_counters[AF_IP4].ipnc[VLIB_TX][IP_NEIGHBOR_CTR_REPLY],
655             vm->thread_index, sw_if_index0, 1);
656           n_replies_sent += 1;
657           goto enqueue;
658
659         next_feature:
660           vnet_feature_next (&next0, p0);
661           goto enqueue;
662
663         drop:
664           p0->error = node->errors[error0];
665
666         enqueue:
667           vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
668                                            n_left_to_next, pi0, next0);
669         }
670
671       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
672     }
673
674   vlib_error_count (vm, node->node_index, ARP_ERROR_REPLIES_SENT,
675                     n_replies_sent);
676
677   return frame->n_vectors;
678 }
679
680
681 /* *INDENT-OFF* */
682
683 VLIB_REGISTER_NODE (arp_input_node, static) =
684 {
685   .function = arp_input,
686   .name = "arp-input",
687   .vector_size = sizeof (u32),
688   .n_errors = ARP_N_ERROR,
689   .error_counters = arp_error_counters,
690   .n_next_nodes = ARP_INPUT_N_NEXT,
691   .next_nodes = {
692     [ARP_INPUT_NEXT_DROP] = "error-drop",
693     [ARP_INPUT_NEXT_DISABLED] = "arp-disabled",
694   },
695   .format_buffer = format_ethernet_arp_header,
696   .format_trace = format_ethernet_arp_input_trace,
697 };
698
699 VLIB_REGISTER_NODE (arp_disabled_node, static) =
700 {
701   .function = arp_disabled,
702   .name = "arp-disabled",
703   .vector_size = sizeof (u32),
704   .n_errors = ARP_N_ERROR,
705   .error_counters = arp_error_counters,
706   .n_next_nodes = ARP_DISABLED_N_NEXT,
707   .next_nodes = {
708     [ARP_INPUT_NEXT_DROP] = "error-drop",
709   },
710   .format_buffer = format_ethernet_arp_header,
711   .format_trace = format_ethernet_arp_input_trace,
712 };
713
714 VLIB_REGISTER_NODE (arp_reply_node, static) =
715 {
716   .function = arp_reply,
717   .name = "arp-reply",
718   .vector_size = sizeof (u32),
719   .n_errors = ARP_N_ERROR,
720   .error_counters = arp_error_counters,
721   .n_next_nodes = ARP_REPLY_N_NEXT,
722   .next_nodes = {
723     [ARP_REPLY_NEXT_DROP] = "error-drop",
724     [ARP_REPLY_NEXT_REPLY_TX] = "interface-output",
725   },
726   .format_buffer = format_ethernet_arp_header,
727   .format_trace = format_ethernet_arp_input_trace,
728 };
729
730 /* Built-in ARP rx feature path definition */
731 VNET_FEATURE_ARC_INIT (arp_feat, static) =
732 {
733   .arc_name = "arp",
734   .start_nodes = VNET_FEATURES ("arp-input"),
735   .last_in_arc = "error-drop",
736   .arc_index_ptr = &ethernet_arp_main.feature_arc_index,
737 };
738
739 VNET_FEATURE_INIT (arp_reply_feat_node, static) =
740 {
741   .arc_name = "arp",
742   .node_name = "arp-reply",
743   .runs_before = VNET_FEATURES ("arp-disabled"),
744 };
745
746 VNET_FEATURE_INIT (arp_proxy_feat_node, static) =
747 {
748   .arc_name = "arp",
749   .node_name = "arp-proxy",
750   .runs_after = VNET_FEATURES ("arp-reply"),
751   .runs_before = VNET_FEATURES ("arp-disabled"),
752 };
753
754 VNET_FEATURE_INIT (arp_disabled_feat_node, static) =
755 {
756   .arc_name = "arp",
757   .node_name = "arp-disabled",
758   .runs_before = VNET_FEATURES ("error-drop"),
759 };
760
761 VNET_FEATURE_INIT (arp_drop_feat_node, static) =
762 {
763   .arc_name = "arp",
764   .node_name = "error-drop",
765   .runs_before = 0,     /* last feature */
766 };
767
768 /* *INDENT-ON* */
769
770 typedef struct
771 {
772   pg_edit_t l2_type, l3_type;
773   pg_edit_t n_l2_address_bytes, n_l3_address_bytes;
774   pg_edit_t opcode;
775   struct
776   {
777     pg_edit_t mac;
778     pg_edit_t ip4;
779   } ip4_over_ethernet[2];
780 } pg_ethernet_arp_header_t;
781
782 static inline void
783 pg_ethernet_arp_header_init (pg_ethernet_arp_header_t * p)
784 {
785   /* Initialize fields that are not bit fields in the IP header. */
786 #define _(f) pg_edit_init (&p->f, ethernet_arp_header_t, f);
787   _(l2_type);
788   _(l3_type);
789   _(n_l2_address_bytes);
790   _(n_l3_address_bytes);
791   _(opcode);
792   _(ip4_over_ethernet[0].mac);
793   _(ip4_over_ethernet[0].ip4);
794   _(ip4_over_ethernet[1].mac);
795   _(ip4_over_ethernet[1].ip4);
796 #undef _
797 }
798
799 uword
800 unformat_pg_arp_header (unformat_input_t * input, va_list * args)
801 {
802   pg_stream_t *s = va_arg (*args, pg_stream_t *);
803   pg_ethernet_arp_header_t *p;
804   u32 group_index;
805
806   p = pg_create_edit_group (s, sizeof (p[0]), sizeof (ethernet_arp_header_t),
807                             &group_index);
808   pg_ethernet_arp_header_init (p);
809
810   /* Defaults. */
811   pg_edit_set_fixed (&p->l2_type, ETHERNET_ARP_HARDWARE_TYPE_ethernet);
812   pg_edit_set_fixed (&p->l3_type, ETHERNET_TYPE_IP4);
813   pg_edit_set_fixed (&p->n_l2_address_bytes, 6);
814   pg_edit_set_fixed (&p->n_l3_address_bytes, 4);
815
816   if (!unformat (input, "%U: %U/%U -> %U/%U",
817                  unformat_pg_edit,
818                  unformat_ethernet_arp_opcode_net_byte_order, &p->opcode,
819                  unformat_pg_edit,
820                  unformat_mac_address_t, &p->ip4_over_ethernet[0].mac,
821                  unformat_pg_edit,
822                  unformat_ip4_address, &p->ip4_over_ethernet[0].ip4,
823                  unformat_pg_edit,
824                  unformat_mac_address_t, &p->ip4_over_ethernet[1].mac,
825                  unformat_pg_edit,
826                  unformat_ip4_address, &p->ip4_over_ethernet[1].ip4))
827     {
828       /* Free up any edits we may have added. */
829       pg_free_edit_group (s);
830       return 0;
831     }
832   return 1;
833 }
834
835 /*
836  * callback when an interface address is added or deleted
837  */
838 static void
839 arp_enable_disable_interface (ip4_main_t * im,
840                               uword opaque, u32 sw_if_index, u32 is_enable)
841 {
842   ethernet_arp_main_t *am = &ethernet_arp_main;
843
844   if (is_enable)
845     arp_enable (am, sw_if_index);
846   else
847     arp_disable (am, sw_if_index);
848 }
849
850 /*
851  * Remove any arp entries associated with the specified interface
852  */
853 static clib_error_t *
854 vnet_arp_add_del_sw_interface (vnet_main_t * vnm, u32 sw_if_index, u32 is_add)
855 {
856   ethernet_arp_main_t *am = &ethernet_arp_main;
857   if (is_add)
858     arp_disable (am, sw_if_index);
859   return (NULL);
860 }
861
862 VNET_SW_INTERFACE_ADD_DEL_FUNCTION (vnet_arp_add_del_sw_interface);
863
864 const static ip_neighbor_vft_t arp_vft = {
865   .inv_proxy4_add = arp_proxy_add,
866   .inv_proxy4_del = arp_proxy_del,
867   .inv_proxy4_enable = arp_proxy_enable,
868   .inv_proxy4_disable = arp_proxy_disable,
869 };
870
871 static clib_error_t *
872 ethernet_arp_init (vlib_main_t * vm)
873 {
874   ethernet_arp_main_t *am = &ethernet_arp_main;
875   ip4_main_t *im = &ip4_main;
876   pg_node_t *pn;
877
878   ethernet_register_input_type (vm, ETHERNET_TYPE_ARP, arp_input_node.index);
879
880   pn = pg_get_node (arp_input_node.index);
881   pn->unformat_edit = unformat_pg_arp_header;
882
883   am->opcode_by_name = hash_create_string (0, sizeof (uword));
884 #define _(o) hash_set_mem (am->opcode_by_name, #o, ETHERNET_ARP_OPCODE_##o);
885   foreach_ethernet_arp_opcode;
886 #undef _
887
888   /* don't trace ARP error packets */
889   {
890     vlib_node_runtime_t *rt =
891       vlib_node_get_runtime (vm, arp_input_node.index);
892
893     vnet_pcap_drop_trace_filter_add_del (rt->errors[ARP_ERROR_REPLIES_SENT],
894                                          1);
895     vnet_pcap_drop_trace_filter_add_del (rt->errors[ARP_ERROR_DISABLED], 1);
896     vnet_pcap_drop_trace_filter_add_del (
897       rt->errors[ARP_ERROR_L2_TYPE_NOT_ETHERNET], 1);
898     vnet_pcap_drop_trace_filter_add_del (rt->errors[ARP_ERROR_L3_TYPE_NOT_IP4],
899                                          1);
900     vnet_pcap_drop_trace_filter_add_del (
901       rt->errors[ARP_ERROR_L3_SRC_ADDRESS_NOT_LOCAL], 1);
902     vnet_pcap_drop_trace_filter_add_del (
903       rt->errors[ARP_ERROR_L3_DST_ADDRESS_NOT_LOCAL], 1);
904     vnet_pcap_drop_trace_filter_add_del (
905       rt->errors[ARP_ERROR_L3_DST_ADDRESS_UNSET], 1);
906     vnet_pcap_drop_trace_filter_add_del (
907       rt->errors[ARP_ERROR_L3_SRC_ADDRESS_IS_LOCAL], 1);
908     vnet_pcap_drop_trace_filter_add_del (
909       rt->errors[ARP_ERROR_L3_SRC_ADDRESS_LEARNED], 1);
910     vnet_pcap_drop_trace_filter_add_del (
911       rt->errors[ARP_ERROR_REPLIES_RECEIVED], 1);
912     vnet_pcap_drop_trace_filter_add_del (
913       rt->errors[ARP_ERROR_OPCODE_NOT_REQUEST], 1);
914     vnet_pcap_drop_trace_filter_add_del (
915       rt->errors[ARP_ERROR_PROXY_ARP_REPLIES_SENT], 1);
916     vnet_pcap_drop_trace_filter_add_del (
917       rt->errors[ARP_ERROR_L2_ADDRESS_MISMATCH], 1);
918     vnet_pcap_drop_trace_filter_add_del (rt->errors[ARP_ERROR_GRATUITOUS_ARP],
919                                          1);
920     vnet_pcap_drop_trace_filter_add_del (
921       rt->errors[ARP_ERROR_INTERFACE_NO_TABLE], 1);
922     vnet_pcap_drop_trace_filter_add_del (
923       rt->errors[ARP_ERROR_INTERFACE_NOT_IP_ENABLED], 1);
924     vnet_pcap_drop_trace_filter_add_del (
925       rt->errors[ARP_ERROR_UNNUMBERED_MISMATCH], 1);
926   }
927
928   {
929     ip4_enable_disable_interface_callback_t cb = {
930       .function = arp_enable_disable_interface,
931     };
932     vec_add1 (im->enable_disable_interface_callbacks, cb);
933   }
934
935   ip_neighbor_register (AF_IP4, &arp_vft);
936
937   return 0;
938 }
939
940 /* *INDENT-OFF* */
941 VLIB_INIT_FUNCTION (ethernet_arp_init) =
942 {
943   .runs_after = VLIB_INITS("ethernet_init",
944                            "ip_neighbor_init"),
945 };
946 /* *INDENT-ON* */
947
948 /*
949  * fd.io coding-style-patch-verification: ON
950  *
951  * Local Variables:
952  * eval: (c-set-style "gnu")
953  * End:
954  */