udp: fix csum computation when offload disabled
[vpp.git] / src / vnet / l2 / l2_arp_term.c
1 /*
2  * l2/l2_arp_term.c: IP v4 ARP L2 BD termination
3  *
4  * Copyright (c) 2010 Cisco and/or its affiliates.
5  * Licensed under the Apache License, Version 2.0 (the "License");
6  * you may not use this file except in compliance with the License.
7  * You may obtain a copy of the License at:
8  *
9  *     http://www.apache.org/licenses/LICENSE-2.0
10  *
11  * Unless required by applicable law or agreed to in writing, software
12  * distributed under the License is distributed on an "AS IS" BASIS,
13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  * See the License for the specific language governing permissions and
15  * limitations under the License.
16  */
17
18 #include <vlibmemory/api.h>
19
20 #include <vnet/l2/l2_arp_term.h>
21 #include <vnet/l2/l2_input.h>
22 #include <vnet/l2/feat_bitmap.h>
23
24 #include <vnet/ip/ip4_packet.h>
25 #include <vnet/ip/ip6_packet.h>
26 #include <vnet/ip/icmp6.h>
27 #include <vnet/ip/ip6.h>
28 #include <vnet/ip/ip.api_enum.h>
29 #include <vnet/ip/format.h>
30 #include <vnet/ethernet/arp_packet.h>
31
32 static const u8 vrrp_prefix[] = { 0x00, 0x00, 0x5E, 0x00, 0x01 };
33
34 l2_arp_term_main_t l2_arp_term_main;
35
36 /*
37  * ARP/ND Termination in a L2 Bridge Domain based on IP4/IP6 to MAC
38  * hash tables mac_by_ip4 and mac_by_ip6 for each BD.
39  */
40 typedef enum
41 {
42   ARP_TERM_NEXT_L2_OUTPUT,
43   ARP_TERM_NEXT_DROP,
44   ARP_TERM_N_NEXT,
45 } arp_term_next_t;
46
47 u32 arp_term_next_node_index[32];
48
49 typedef struct
50 {
51   u8 packet_data[64];
52 } ethernet_arp_input_trace_t;
53
54 #define foreach_ethernet_arp_error                                      \
55   _ (replies_sent, "ARP replies sent")                                  \
56   _ (l2_type_not_ethernet, "L2 type not ethernet")                      \
57   _ (l3_type_not_ip4, "L3 type not IP4")                                \
58   _ (l3_src_address_not_local, "IP4 source address not local to subnet") \
59   _ (l3_dst_address_not_local, "IP4 destination address not local to subnet") \
60   _ (l3_dst_address_unset, "IP4 destination address is unset")          \
61   _ (l3_src_address_is_local, "IP4 source address matches local interface") \
62   _ (l3_src_address_learned, "ARP request IP4 source address learned")  \
63   _ (replies_received, "ARP replies received")                          \
64   _ (opcode_not_request, "ARP opcode not request")                      \
65   _ (proxy_arp_replies_sent, "Proxy ARP replies sent")                  \
66   _ (l2_address_mismatch, "ARP hw addr does not match L2 frame src addr") \
67   _ (gratuitous_arp, "ARP probe or announcement dropped") \
68   _ (interface_no_table, "Interface is not mapped to an IP table") \
69   _ (interface_not_ip_enabled, "Interface is not IP enabled") \
70   _ (unnumbered_mismatch, "RX interface is unnumbered to different subnet") \
71
72 typedef enum
73 {
74 #define _(sym,string) ETHERNET_ARP_ERROR_##sym,
75   foreach_ethernet_arp_error
76 #undef _
77     ETHERNET_ARP_N_ERROR,
78 } ethernet_arp_reply_error_t;
79
80 static char *ethernet_arp_error_strings[] = {
81 #define _(sym,string) string,
82   foreach_ethernet_arp_error
83 #undef _
84 };
85
86 static u8 *
87 format_arp_term_input_trace (u8 * s, va_list * va)
88 {
89   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*va, vlib_main_t *);
90   CLIB_UNUSED (vlib_node_t * node) = va_arg (*va, vlib_node_t *);
91   ethernet_arp_input_trace_t *t = va_arg (*va, ethernet_arp_input_trace_t *);
92
93   /* arp-term trace data saved is either arp or ip6/icmp6 packet:
94      - for arp, the 1st 16-bit field is hw type of value of 0x0001.
95      - for ip6, the first nibble has value of 6. */
96   s = format (s, "%U", t->packet_data[0] == 0 ?
97               format_ethernet_arp_header : format_ip6_header,
98               t->packet_data, sizeof (t->packet_data));
99
100   return s;
101 }
102
103 void
104 l2_arp_term_set_publisher_node (bool on)
105 {
106   l2_arp_term_main_t *l2am = &l2_arp_term_main;
107
108   l2am->publish = on;
109 }
110
111 static int
112 l2_arp_term_publish (l2_arp_term_publish_event_t * ctx)
113 {
114   l2_arp_term_main_t *l2am = &l2_arp_term_main;
115
116   vec_add1 (l2am->publish_events, *ctx);
117
118   vlib_process_signal_event (vlib_get_main (),
119                              l2_arp_term_process_node.index,
120                              L2_ARP_TERM_EVENT_PUBLISH, 0);
121
122   return 0;
123 }
124
125 static inline void
126 l2_arp_term_publish_v4_dp (u32 sw_if_index,
127                            const ethernet_arp_ip4_over_ethernet_address_t * a)
128 {
129   l2_arp_term_main_t *l2am = &l2_arp_term_main;
130
131   if (!l2am->publish)
132     return;
133
134   l2_arp_term_publish_event_t args = {
135     .sw_if_index = sw_if_index,
136     .type = IP46_TYPE_IP4,
137     .ip.ip4 = a->ip4,
138     .mac = a->mac,
139   };
140
141   vl_api_rpc_call_main_thread (l2_arp_term_publish, (u8 *) & args,
142                                sizeof (args));
143 }
144
145 static inline void
146 l2_arp_term_publish_v6_dp (u32 sw_if_index,
147                            const ip6_address_t * addr,
148                            const mac_address_t * mac)
149 {
150   l2_arp_term_main_t *l2am = &l2_arp_term_main;
151
152   if (!l2am->publish)
153     return;
154
155   l2_arp_term_publish_event_t args = {
156     .sw_if_index = sw_if_index,
157     .type = IP46_TYPE_IP6,
158     .ip.ip6 = *addr,
159     .mac = *mac,
160   };
161
162   vl_api_rpc_call_main_thread (l2_arp_term_publish, (u8 *) & args,
163                                sizeof (args));
164 }
165
166 static inline int
167 vnet_ip6_nd_term (vlib_main_t * vm,
168                   vlib_node_runtime_t * node,
169                   vlib_buffer_t * p0,
170                   ethernet_header_t * eth,
171                   ip6_header_t * ip, u32 sw_if_index, u16 bd_index)
172 {
173   icmp6_neighbor_solicitation_or_advertisement_header_t *ndh;
174   mac_address_t mac;
175
176   mac_address_from_bytes (&mac, eth->src_address);
177   ndh = ip6_next_header (ip);
178   if (ndh->icmp.type != ICMP6_neighbor_solicitation &&
179       ndh->icmp.type != ICMP6_neighbor_advertisement)
180     return 0;
181
182   if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE) &&
183                      (p0->flags & VLIB_BUFFER_IS_TRACED)))
184     {
185       u8 *t0 = vlib_add_trace (vm, node, p0,
186                                sizeof (icmp6_input_trace_t));
187       clib_memcpy (t0, ip, sizeof (icmp6_input_trace_t));
188     }
189
190   /* Check if anyone want ND events for L2 BDs */
191   if (PREDICT_FALSE (!ip6_address_is_link_local_unicast (&ip->src_address)))
192     {
193       l2_arp_term_publish_v6_dp (sw_if_index, &ip->src_address, &mac);
194     }
195
196   /* Check if MAC entry exsist for solicited target IP */
197   if (ndh->icmp.type == ICMP6_neighbor_solicitation)
198     {
199       icmp6_neighbor_discovery_ethernet_link_layer_address_option_t *opt;
200       l2_bridge_domain_t *bd_config;
201       u8 *macp;
202
203       opt = (void *) (ndh + 1);
204       if ((opt->header.type !=
205            ICMP6_NEIGHBOR_DISCOVERY_OPTION_source_link_layer_address) ||
206           (opt->header.n_data_u64s != 1))
207         return 0;               /* source link layer address option not present */
208
209       bd_config = vec_elt_at_index (l2input_main.bd_configs, bd_index);
210       macp =
211         (u8 *) hash_get_mem (bd_config->mac_by_ip6, &ndh->target_address);
212       if (macp)
213         {                       /* found ip-mac entry, generate eighbor advertisement response */
214           int bogus_length;
215           vlib_node_runtime_t *error_node =
216             vlib_node_get_runtime (vm, ip6_icmp_input_node.index);
217           ip->dst_address = ip->src_address;
218           ip->src_address = ndh->target_address;
219           ip->hop_limit = 255;
220           opt->header.type =
221             ICMP6_NEIGHBOR_DISCOVERY_OPTION_target_link_layer_address;
222           clib_memcpy (opt->ethernet_address, macp, 6);
223           ndh->icmp.type = ICMP6_neighbor_advertisement;
224           ndh->advertisement_flags = clib_host_to_net_u32
225             (ICMP6_NEIGHBOR_ADVERTISEMENT_FLAG_SOLICITED |
226              ICMP6_NEIGHBOR_ADVERTISEMENT_FLAG_OVERRIDE);
227           ndh->icmp.checksum = 0;
228           ndh->icmp.checksum =
229             ip6_tcp_udp_icmp_compute_checksum (vm, p0, ip, &bogus_length);
230           clib_memcpy (eth->dst_address, eth->src_address, 6);
231           clib_memcpy (eth->src_address, macp, 6);
232           vlib_error_count (vm, error_node->node_index,
233                             ICMP6_ERROR_NEIGHBOR_ADVERTISEMENTS_TX, 1);
234           return 1;
235         }
236     }
237
238   return 0;
239
240 }
241
242 static uword
243 arp_term_l2bd (vlib_main_t * vm,
244                vlib_node_runtime_t * node, vlib_frame_t * frame)
245 {
246   l2input_main_t *l2im = &l2input_main;
247   u32 n_left_from, next_index, *from, *to_next;
248   u32 n_replies_sent = 0;
249   u16 last_bd_index = ~0;
250   l2_bridge_domain_t *last_bd_config = 0;
251   l2_input_config_t *cfg0;
252
253   from = vlib_frame_vector_args (frame);
254   n_left_from = frame->n_vectors;
255   next_index = node->cached_next_index;
256
257   while (n_left_from > 0)
258     {
259       u32 n_left_to_next;
260
261       vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
262
263       while (n_left_from > 0 && n_left_to_next > 0)
264         {
265           vlib_buffer_t *p0;
266           ethernet_header_t *eth0;
267           ethernet_arp_header_t *arp0;
268           ip6_header_t *iph0;
269           u8 *l3h0;
270           u32 pi0, error0, next0, sw_if_index0;
271           u16 ethertype0;
272           u16 bd_index0;
273           u32 ip0;
274           u8 *macp0;
275
276           pi0 = from[0];
277           to_next[0] = pi0;
278           from += 1;
279           to_next += 1;
280           n_left_from -= 1;
281           n_left_to_next -= 1;
282
283           p0 = vlib_get_buffer (vm, pi0);
284           // Terminate only local (SHG == 0) ARP
285           if (vnet_buffer (p0)->l2.shg != 0)
286             goto next_l2_feature;
287
288           eth0 = vlib_buffer_get_current (p0);
289           l3h0 = (u8 *) eth0 + vnet_buffer (p0)->l2.l2_len;
290           ethertype0 = clib_net_to_host_u16 (*(u16 *) (l3h0 - 2));
291           arp0 = (ethernet_arp_header_t *) l3h0;
292
293           if (p0->flags & VNET_BUFFER_F_LOCALLY_ORIGINATED)
294             goto next_l2_feature;
295
296           if (ethertype0 != ETHERNET_TYPE_ARP)
297             goto check_ip6_nd;
298
299           if ((arp0->opcode !=
300                clib_host_to_net_u16 (ETHERNET_ARP_OPCODE_request)) &&
301               (arp0->opcode !=
302                clib_host_to_net_u16 (ETHERNET_ARP_OPCODE_reply)))
303             goto check_ip6_nd;
304
305           /* Must be ARP request/reply packet here */
306           if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE) &&
307                              (p0->flags & VLIB_BUFFER_IS_TRACED)))
308             {
309               u8 *t0 = vlib_add_trace (vm, node, p0,
310                                        sizeof (ethernet_arp_input_trace_t));
311               clib_memcpy_fast (t0, l3h0,
312                                 sizeof (ethernet_arp_input_trace_t));
313             }
314
315           error0 = 0;
316           error0 =
317             (arp0->l2_type !=
318              clib_net_to_host_u16 (ETHERNET_ARP_HARDWARE_TYPE_ethernet)
319              ? ETHERNET_ARP_ERROR_l2_type_not_ethernet : error0);
320           error0 =
321             (arp0->l3_type !=
322              clib_net_to_host_u16 (ETHERNET_TYPE_IP4) ?
323              ETHERNET_ARP_ERROR_l3_type_not_ip4 : error0);
324
325           sw_if_index0 = vnet_buffer (p0)->sw_if_index[VLIB_RX];
326
327           if (error0)
328             goto drop;
329
330           /* Trash ARP packets whose ARP-level source addresses do not
331              match, or if requester address is mcast */
332           if (PREDICT_FALSE
333               (!ethernet_mac_address_equal (eth0->src_address,
334                                             arp0->ip4_over_ethernet[0].
335                                             mac.bytes))
336               || ethernet_address_cast (arp0->ip4_over_ethernet[0].mac.bytes))
337             {
338               /* VRRP virtual MAC may be different to SMAC in ARP reply */
339               if (clib_memcmp (arp0->ip4_over_ethernet[0].mac.bytes,
340                                vrrp_prefix, sizeof (vrrp_prefix)) != 0)
341                 {
342                   error0 = ETHERNET_ARP_ERROR_l2_address_mismatch;
343                   goto drop;
344                 }
345             }
346           if (PREDICT_FALSE
347               (ip4_address_is_multicast (&arp0->ip4_over_ethernet[0].ip4)))
348             {
349               error0 = ETHERNET_ARP_ERROR_l3_src_address_not_local;
350               goto drop;
351             }
352
353           /* Check if anyone want ARP request events for L2 BDs */
354           l2_arp_term_publish_v4_dp (sw_if_index0,
355                                      &arp0->ip4_over_ethernet[0]);
356
357           /* lookup BD mac_by_ip4 hash table for MAC entry */
358           ip0 = arp0->ip4_over_ethernet[1].ip4.as_u32;
359           bd_index0 = vnet_buffer (p0)->l2.bd_index;
360           if (PREDICT_FALSE ((bd_index0 != last_bd_index)
361                              || (last_bd_index == (u16) ~ 0)))
362             {
363               last_bd_index = bd_index0;
364               last_bd_config = vec_elt_at_index (l2im->bd_configs, bd_index0);
365             }
366           macp0 = (u8 *) hash_get (last_bd_config->mac_by_ip4, ip0);
367
368           if (PREDICT_FALSE (!macp0))
369             goto next_l2_feature;       /* MAC not found */
370           if (PREDICT_FALSE (arp0->ip4_over_ethernet[0].ip4.as_u32 ==
371                              arp0->ip4_over_ethernet[1].ip4.as_u32))
372             goto next_l2_feature;       /* GARP */
373
374           /* MAC found, send ARP reply -
375              Convert ARP request packet to ARP reply */
376           arp0->opcode = clib_host_to_net_u16 (ETHERNET_ARP_OPCODE_reply);
377           arp0->ip4_over_ethernet[1] = arp0->ip4_over_ethernet[0];
378           arp0->ip4_over_ethernet[0].ip4.as_u32 = ip0;
379           mac_address_from_bytes (&arp0->ip4_over_ethernet[0].mac, macp0);
380           clib_memcpy_fast (eth0->dst_address, eth0->src_address, 6);
381           clib_memcpy_fast (eth0->src_address, macp0, 6);
382           n_replies_sent += 1;
383
384         output_response:
385           /* For BVI, need to use l2-fwd node to send ARP reply as
386              l2-output node cannot output packet to BVI properly */
387           cfg0 = vec_elt_at_index (l2im->configs, sw_if_index0);
388           if (PREDICT_FALSE (l2_input_is_bvi (cfg0)))
389             {
390               vnet_buffer (p0)->l2.feature_bitmap |= L2INPUT_FEAT_FWD;
391               vnet_buffer (p0)->sw_if_index[VLIB_RX] = 0;
392               goto next_l2_feature;
393             }
394
395           /* Send ARP/ND reply back out input interface through l2-output */
396           vnet_buffer (p0)->sw_if_index[VLIB_TX] = sw_if_index0;
397           next0 = ARP_TERM_NEXT_L2_OUTPUT;
398           vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
399                                            to_next, n_left_to_next, pi0,
400                                            next0);
401           continue;
402
403         check_ip6_nd:
404           /* IP6 ND event notification or solicitation handling to generate
405              local response instead of flooding */
406           iph0 = (ip6_header_t *) l3h0;
407           if (PREDICT_FALSE (ethertype0 == ETHERNET_TYPE_IP6 &&
408                              iph0->protocol == IP_PROTOCOL_ICMP6 &&
409                              !ip6_address_is_unspecified
410                              (&iph0->src_address)))
411             {
412               sw_if_index0 = vnet_buffer (p0)->sw_if_index[VLIB_RX];
413               if (vnet_ip6_nd_term
414                   (vm, node, p0, eth0, iph0, sw_if_index0,
415                    vnet_buffer (p0)->l2.bd_index))
416                 goto output_response;
417             }
418
419         next_l2_feature:
420           {
421             next0 = vnet_l2_feature_next (p0, arp_term_next_node_index,
422                                           L2INPUT_FEAT_ARP_TERM);
423             vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
424                                              to_next, n_left_to_next,
425                                              pi0, next0);
426             continue;
427           }
428
429         drop:
430           if (0 == arp0->ip4_over_ethernet[0].ip4.as_u32 ||
431               (arp0->ip4_over_ethernet[0].ip4.as_u32 ==
432                arp0->ip4_over_ethernet[1].ip4.as_u32))
433             {
434               error0 = ETHERNET_ARP_ERROR_gratuitous_arp;
435             }
436           next0 = ARP_TERM_NEXT_DROP;
437           p0->error = node->errors[error0];
438
439           vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
440                                            to_next, n_left_to_next, pi0,
441                                            next0);
442         }
443
444       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
445     }
446
447   vlib_error_count (vm, node->node_index,
448                     ETHERNET_ARP_ERROR_replies_sent, n_replies_sent);
449   return frame->n_vectors;
450 }
451
452 VLIB_REGISTER_NODE (arp_term_l2bd_node, static) = {
453   .function = arp_term_l2bd,
454   .name = "arp-term-l2bd",
455   .vector_size = sizeof (u32),
456   .n_errors = ETHERNET_ARP_N_ERROR,
457   .error_strings = ethernet_arp_error_strings,
458   .n_next_nodes = ARP_TERM_N_NEXT,
459   .next_nodes = {
460     [ARP_TERM_NEXT_L2_OUTPUT] = "l2-output",
461     [ARP_TERM_NEXT_DROP] = "error-drop",
462   },
463   .format_buffer = format_ethernet_arp_header,
464   .format_trace = format_arp_term_input_trace,
465 };
466
467 clib_error_t *
468 arp_term_init (vlib_main_t * vm)
469 {
470   // Initialize the feature next-node indexes
471   feat_bitmap_init_next_nodes (vm,
472                                arp_term_l2bd_node.index,
473                                L2INPUT_N_FEAT,
474                                l2input_get_feat_names (),
475                                arp_term_next_node_index);
476   return 0;
477 }
478
479 VLIB_INIT_FUNCTION (arp_term_init);
480
481 /*
482  * fd.io coding-style-patch-verification: ON
483  *
484  * Local Variables:
485  * eval: (c-set-style "gnu")
486  * End:
487  */