2 * Copyright (c) 2015 Cisco and/or its affiliates.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at:
7 * http://www.apache.org/licenses/LICENSE-2.0
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
16 * ip/icmp4.c: ipv4 icmp
18 * Copyright (c) 2008 Eliot Dresselhaus
20 * Permission is hereby granted, free of charge, to any person obtaining
21 * a copy of this software and associated documentation files (the
22 * "Software"), to deal in the Software without restriction, including
23 * without limitation the rights to use, copy, modify, merge, publish,
24 * distribute, sublicense, and/or sell copies of the Software, and to
25 * permit persons to whom the Software is furnished to do so, subject to
26 * the following conditions:
28 * The above copyright notice and this permission notice shall be
29 * included in all copies or substantial portions of the Software.
31 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
32 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
33 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
34 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
35 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
36 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
37 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
40 #include <vlib/vlib.h>
41 #include <vnet/ip/ip.h>
42 #include <vnet/pg/pg.h>
45 static char *icmp_error_strings[] = {
52 format_ip4_icmp_type_and_code (u8 * s, va_list * args)
54 icmp4_type_t type = va_arg (*args, int);
55 u8 code = va_arg (*args, int);
58 #define _(n,f) case n: t = #f; break;
71 return format (s, "unknown 0x%x", type);
73 s = format (s, "%s", t);
76 switch ((type << 8) | code)
78 #define _(a,n,f) case (ICMP4_##a << 8) | (n): t = #f; break;
86 s = format (s, " %s", t);
92 format_ip4_icmp_header (u8 * s, va_list * args)
94 icmp46_header_t *icmp = va_arg (*args, icmp46_header_t *);
95 u32 max_header_bytes = va_arg (*args, u32);
98 if (max_header_bytes < sizeof (icmp[0]))
99 return format (s, "ICMP header truncated");
101 s = format (s, "ICMP %U checksum 0x%x",
102 format_ip4_icmp_type_and_code, icmp->type, icmp->code,
103 clib_net_to_host_u16 (icmp->checksum));
109 format_icmp_input_trace (u8 * s, va_list * va)
111 CLIB_UNUSED (vlib_main_t * vm) = va_arg (*va, vlib_main_t *);
112 CLIB_UNUSED (vlib_node_t * node) = va_arg (*va, vlib_node_t *);
113 icmp_input_trace_t *t = va_arg (*va, icmp_input_trace_t *);
116 format_ip4_header, t->packet_data, sizeof (t->packet_data));
123 ICMP_INPUT_NEXT_ERROR,
129 uword *type_and_code_by_name;
133 /* Vector dispatch table indexed by [icmp type]. */
134 u8 ip4_input_next_index_by_type[256];
137 icmp4_main_t icmp4_main;
140 ip4_icmp_input (vlib_main_t * vm,
141 vlib_node_runtime_t * node, vlib_frame_t * frame)
143 icmp4_main_t *im = &icmp4_main;
144 uword n_packets = frame->n_vectors;
146 u32 n_left_from, n_left_to_next, next;
148 from = vlib_frame_vector_args (frame);
149 n_left_from = n_packets;
150 next = node->cached_next_index;
152 if (node->flags & VLIB_NODE_FLAG_TRACE)
153 vlib_trace_frame_buffers_only (vm, node, from, frame->n_vectors,
155 sizeof (icmp_input_trace_t));
157 while (n_left_from > 0)
159 vlib_get_next_frame (vm, node, next, to_next, n_left_to_next);
161 while (n_left_from > 0 && n_left_to_next > 0)
165 icmp46_header_t *icmp0;
169 if (PREDICT_TRUE (n_left_from > 2))
171 vlib_prefetch_buffer_with_index (vm, from[2], LOAD);
172 p0 = vlib_get_buffer (vm, from[1]);
173 ip0 = vlib_buffer_get_current (p0);
174 CLIB_PREFETCH (ip0, CLIB_CACHE_LINE_BYTES, LOAD);
177 bi0 = to_next[0] = from[0];
184 p0 = vlib_get_buffer (vm, bi0);
185 ip0 = vlib_buffer_get_current (p0);
186 icmp0 = ip4_next_header (ip0);
188 next0 = im->ip4_input_next_index_by_type[type0];
190 p0->error = node->errors[ICMP4_ERROR_UNKNOWN_TYPE];
191 if (PREDICT_FALSE (next0 != next))
193 vlib_put_next_frame (vm, node, next, n_left_to_next + 1);
195 vlib_get_next_frame (vm, node, next, to_next, n_left_to_next);
202 vlib_put_next_frame (vm, node, next, n_left_to_next);
205 return frame->n_vectors;
209 VLIB_REGISTER_NODE (ip4_icmp_input_node,static) = {
210 .function = ip4_icmp_input,
211 .name = "ip4-icmp-input",
213 .vector_size = sizeof (u32),
215 .format_trace = format_icmp_input_trace,
217 .n_errors = ARRAY_LEN (icmp_error_strings),
218 .error_strings = icmp_error_strings,
222 [ICMP_INPUT_NEXT_ERROR] = "ip4-punt",
228 ip4_icmp_echo_request (vlib_main_t * vm,
229 vlib_node_runtime_t * node, vlib_frame_t * frame)
231 uword n_packets = frame->n_vectors;
233 u32 n_left_from, n_left_to_next, next;
234 ip4_main_t *i4m = &ip4_main;
235 u16 *fragment_ids, *fid;
236 u8 host_config_ttl = i4m->host_config.ttl;
238 from = vlib_frame_vector_args (frame);
239 n_left_from = n_packets;
240 next = node->cached_next_index;
242 if (node->flags & VLIB_NODE_FLAG_TRACE)
243 vlib_trace_frame_buffers_only (vm, node, from, frame->n_vectors,
245 sizeof (icmp_input_trace_t));
247 /* Get random fragment IDs for replies. */
248 fid = fragment_ids = clib_random_buffer_get_data (&vm->random_buffer,
250 sizeof (fragment_ids[0]));
252 while (n_left_from > 0)
254 vlib_get_next_frame (vm, node, next, to_next, n_left_to_next);
256 while (n_left_from > 2 && n_left_to_next > 2)
258 vlib_buffer_t *p0, *p1;
259 ip4_header_t *ip0, *ip1;
260 icmp46_header_t *icmp0, *icmp1;
263 ip_csum_t sum0, sum1;
265 bi0 = to_next[0] = from[0];
266 bi1 = to_next[1] = from[1];
273 p0 = vlib_get_buffer (vm, bi0);
274 p1 = vlib_get_buffer (vm, bi1);
275 ip0 = vlib_buffer_get_current (p0);
276 ip1 = vlib_buffer_get_current (p1);
277 icmp0 = ip4_next_header (ip0);
278 icmp1 = ip4_next_header (ip1);
280 vnet_buffer (p0)->sw_if_index[VLIB_RX] =
281 vnet_main.local_interface_sw_if_index;
282 vnet_buffer (p1)->sw_if_index[VLIB_RX] =
283 vnet_main.local_interface_sw_if_index;
285 /* Update ICMP checksum. */
286 sum0 = icmp0->checksum;
287 sum1 = icmp1->checksum;
289 ASSERT (icmp0->type == ICMP4_echo_request);
290 ASSERT (icmp1->type == ICMP4_echo_request);
291 sum0 = ip_csum_update (sum0, ICMP4_echo_request, ICMP4_echo_reply,
292 icmp46_header_t, type);
293 sum1 = ip_csum_update (sum1, ICMP4_echo_request, ICMP4_echo_reply,
294 icmp46_header_t, type);
295 icmp0->type = ICMP4_echo_reply;
296 icmp1->type = ICMP4_echo_reply;
298 icmp0->checksum = ip_csum_fold (sum0);
299 icmp1->checksum = ip_csum_fold (sum1);
301 src0 = ip0->src_address.data_u32;
302 src1 = ip1->src_address.data_u32;
303 dst0 = ip0->dst_address.data_u32;
304 dst1 = ip1->dst_address.data_u32;
306 /* Swap source and destination address.
307 Does not change checksum. */
308 ip0->src_address.data_u32 = dst0;
309 ip1->src_address.data_u32 = dst1;
310 ip0->dst_address.data_u32 = src0;
311 ip1->dst_address.data_u32 = src1;
313 /* Update IP checksum. */
314 sum0 = ip0->checksum;
315 sum1 = ip1->checksum;
317 sum0 = ip_csum_update (sum0, ip0->ttl, host_config_ttl,
319 sum1 = ip_csum_update (sum1, ip1->ttl, host_config_ttl,
321 ip0->ttl = host_config_ttl;
322 ip1->ttl = host_config_ttl;
324 /* New fragment id. */
325 sum0 = ip_csum_update (sum0, ip0->fragment_id, fid[0],
326 ip4_header_t, fragment_id);
327 sum1 = ip_csum_update (sum1, ip1->fragment_id, fid[1],
328 ip4_header_t, fragment_id);
329 ip0->fragment_id = fid[0];
330 ip1->fragment_id = fid[1];
333 ip0->checksum = ip_csum_fold (sum0);
334 ip1->checksum = ip_csum_fold (sum1);
336 ASSERT (ip0->checksum == ip4_header_checksum (ip0));
337 ASSERT (ip1->checksum == ip4_header_checksum (ip1));
339 p0->flags |= VNET_BUFFER_F_LOCALLY_ORIGINATED;
340 p1->flags |= VNET_BUFFER_F_LOCALLY_ORIGINATED;
343 while (n_left_from > 0 && n_left_to_next > 0)
347 icmp46_header_t *icmp0;
351 bi0 = to_next[0] = from[0];
358 p0 = vlib_get_buffer (vm, bi0);
359 ip0 = vlib_buffer_get_current (p0);
360 icmp0 = ip4_next_header (ip0);
362 vnet_buffer (p0)->sw_if_index[VLIB_RX] =
363 vnet_main.local_interface_sw_if_index;
365 /* Update ICMP checksum. */
366 sum0 = icmp0->checksum;
368 ASSERT (icmp0->type == ICMP4_echo_request);
369 sum0 = ip_csum_update (sum0, ICMP4_echo_request, ICMP4_echo_reply,
370 icmp46_header_t, type);
371 icmp0->type = ICMP4_echo_reply;
372 icmp0->checksum = ip_csum_fold (sum0);
374 src0 = ip0->src_address.data_u32;
375 dst0 = ip0->dst_address.data_u32;
376 ip0->src_address.data_u32 = dst0;
377 ip0->dst_address.data_u32 = src0;
379 /* Update IP checksum. */
380 sum0 = ip0->checksum;
382 sum0 = ip_csum_update (sum0, ip0->ttl, host_config_ttl,
384 ip0->ttl = host_config_ttl;
386 sum0 = ip_csum_update (sum0, ip0->fragment_id, fid[0],
387 ip4_header_t, fragment_id);
388 ip0->fragment_id = fid[0];
391 ip0->checksum = ip_csum_fold (sum0);
393 ASSERT (ip0->checksum == ip4_header_checksum (ip0));
395 p0->flags |= VNET_BUFFER_F_LOCALLY_ORIGINATED;
398 vlib_put_next_frame (vm, node, next, n_left_to_next);
401 vlib_error_count (vm, ip4_icmp_input_node.index,
402 ICMP4_ERROR_ECHO_REPLIES_SENT, frame->n_vectors);
404 return frame->n_vectors;
408 VLIB_REGISTER_NODE (ip4_icmp_echo_request_node,static) = {
409 .function = ip4_icmp_echo_request,
410 .name = "ip4-icmp-echo-request",
412 .vector_size = sizeof (u32),
414 .format_trace = format_icmp_input_trace,
418 [0] = "ip4-load-balance",
425 IP4_ICMP_ERROR_NEXT_DROP,
426 IP4_ICMP_ERROR_NEXT_LOOKUP,
427 IP4_ICMP_ERROR_N_NEXT,
428 } ip4_icmp_error_next_t;
431 icmp4_error_set_vnet_buffer (vlib_buffer_t * b, u8 type, u8 code, u32 data)
433 vnet_buffer (b)->ip.icmp.type = type;
434 vnet_buffer (b)->ip.icmp.code = code;
435 vnet_buffer (b)->ip.icmp.data = data;
439 icmp4_icmp_type_to_error (u8 type)
443 case ICMP4_destination_unreachable:
444 return ICMP4_ERROR_DEST_UNREACH_SENT;
445 case ICMP4_time_exceeded:
446 return ICMP4_ERROR_TTL_EXPIRE_SENT;
447 case ICMP4_parameter_problem:
448 return ICMP4_ERROR_PARAM_PROBLEM_SENT;
450 return ICMP4_ERROR_DROP;
455 ip4_icmp_error (vlib_main_t * vm,
456 vlib_node_runtime_t * node, vlib_frame_t * frame)
459 uword n_left_from, n_left_to_next;
460 ip4_icmp_error_next_t next_index;
461 ip4_main_t *im = &ip4_main;
462 ip_lookup_main_t *lm = &im->lookup_main;
464 from = vlib_frame_vector_args (frame);
465 n_left_from = frame->n_vectors;
466 next_index = node->cached_next_index;
468 if (node->flags & VLIB_NODE_FLAG_TRACE)
469 vlib_trace_frame_buffers_only (vm, node, from, frame->n_vectors,
471 sizeof (icmp_input_trace_t));
473 while (n_left_from > 0)
475 vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
477 while (n_left_from > 0 && n_left_to_next > 0)
480 u32 next0 = IP4_ICMP_ERROR_NEXT_LOOKUP;
481 u8 error0 = ICMP4_ERROR_NONE;
483 ip4_header_t *ip0, *out_ip0;
484 icmp46_header_t *icmp0;
485 u32 sw_if_index0, if_add_index0;
488 /* Speculatively enqueue p0 to the current next frame */
495 p0 = vlib_get_buffer (vm, pi0);
496 ip0 = vlib_buffer_get_current (p0);
497 sw_if_index0 = vnet_buffer (p0)->sw_if_index[VLIB_RX];
500 * RFC1812 says to keep as much of the original packet as
501 * possible within the minimum MTU (576). We cheat "a little"
502 * here by keeping whatever fits in the first buffer, to be more
505 if (PREDICT_FALSE (p0->total_length_not_including_first_buffer))
507 /* clear current_length of all other buffers in chain */
508 vlib_buffer_t *b = p0;
509 p0->total_length_not_including_first_buffer = 0;
510 while (b->flags & VLIB_BUFFER_NEXT_PRESENT)
512 b = vlib_get_buffer (vm, b->next_buffer);
513 b->current_length = 0;
517 /* Add IP header and ICMPv4 header including a 4 byte data field */
518 vlib_buffer_advance (p0,
519 -sizeof (ip4_header_t) -
520 sizeof (icmp46_header_t) - 4);
523 p0->current_length > 576 ? 576 : p0->current_length;
525 out_ip0 = vlib_buffer_get_current (p0);
526 icmp0 = (icmp46_header_t *) & out_ip0[1];
528 /* Fill ip header fields */
529 out_ip0->ip_version_and_header_length = 0x45;
531 out_ip0->length = clib_host_to_net_u16 (p0->current_length);
532 out_ip0->fragment_id = 0;
533 out_ip0->flags_and_fragment_offset = 0;
535 out_ip0->protocol = IP_PROTOCOL_ICMP;
536 out_ip0->dst_address = ip0->src_address;
538 if (PREDICT_TRUE (vec_len (lm->if_address_pool_index_by_sw_if_index)
541 lm->if_address_pool_index_by_sw_if_index[sw_if_index0];
542 if (PREDICT_TRUE (if_add_index0 != ~0))
544 ip_interface_address_t *if_add =
545 pool_elt_at_index (lm->if_address_pool, if_add_index0);
546 ip4_address_t *if_ip =
547 ip_interface_address_get_address (lm, if_add);
548 out_ip0->src_address = *if_ip;
552 /* interface has no IP4 address - should not happen */
553 next0 = IP4_ICMP_ERROR_NEXT_DROP;
554 error0 = ICMP4_ERROR_DROP;
556 out_ip0->checksum = ip4_header_checksum (out_ip0);
558 /* Fill icmp header fields */
559 icmp0->type = vnet_buffer (p0)->ip.icmp.type;
560 icmp0->code = vnet_buffer (p0)->ip.icmp.code;
561 *((u32 *) (icmp0 + 1)) =
562 clib_host_to_net_u32 (vnet_buffer (p0)->ip.icmp.data);
565 ip_incremental_checksum (0, icmp0,
567 sizeof (ip4_header_t));
568 icmp0->checksum = ~ip_csum_fold (sum);
570 /* Update error status */
571 if (error0 == ICMP4_ERROR_NONE)
572 error0 = icmp4_icmp_type_to_error (icmp0->type);
573 vlib_error_count (vm, node->node_index, error0, 1);
575 /* Verify speculative enqueue, maybe switch current next frame */
576 vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
577 to_next, n_left_to_next,
580 vlib_put_next_frame (vm, node, next_index, n_left_to_next);
583 return frame->n_vectors;
587 VLIB_REGISTER_NODE (ip4_icmp_error_node) = {
588 .function = ip4_icmp_error,
589 .name = "ip4-icmp-error",
590 .vector_size = sizeof (u32),
592 .n_errors = ARRAY_LEN (icmp_error_strings),
593 .error_strings = icmp_error_strings,
595 .n_next_nodes = IP4_ICMP_ERROR_N_NEXT,
597 [IP4_ICMP_ERROR_NEXT_DROP] = "ip4-drop",
598 [IP4_ICMP_ERROR_NEXT_LOOKUP] = "ip4-lookup",
601 .format_trace = format_icmp_input_trace,
607 unformat_icmp_type_and_code (unformat_input_t * input, va_list * args)
609 icmp46_header_t *h = va_arg (*args, icmp46_header_t *);
610 icmp4_main_t *cm = &icmp4_main;
613 if (unformat_user (input, unformat_vlib_number_by_name,
614 cm->type_and_code_by_name, &i))
616 h->type = (i >> 8) & 0xff;
617 h->code = (i >> 0) & 0xff;
619 else if (unformat_user (input, unformat_vlib_number_by_name,
620 cm->type_by_name, &i))
632 icmp4_pg_edit_function (pg_main_t * pg,
634 pg_edit_group_t * g, u32 * packets, u32 n_packets)
636 vlib_main_t *vm = vlib_get_main ();
637 u32 ip_offset, icmp_offset;
639 icmp_offset = g->start_byte_offset;
640 ip_offset = (g - 1)->start_byte_offset;
642 while (n_packets >= 1)
646 icmp46_header_t *icmp0;
649 p0 = vlib_get_buffer (vm, packets[0]);
653 ASSERT (p0->current_data == 0);
654 ip0 = (void *) (p0->data + ip_offset);
655 icmp0 = (void *) (p0->data + icmp_offset);
657 /* if IP length has been specified, then calculate the length based on buffer */
658 if (ip0->length == 0)
659 len0 = vlib_buffer_length_in_chain (vm, p0) - icmp_offset;
661 len0 = clib_net_to_host_u16 (ip0->length) - icmp_offset;
664 ~ip_csum_fold (ip_incremental_checksum (0, icmp0, len0));
670 pg_edit_t type, code;
672 } pg_icmp46_header_t;
675 pg_icmp_header_init (pg_icmp46_header_t * p)
677 /* Initialize fields that are not bit fields in the IP header. */
678 #define _(f) pg_edit_init (&p->f, icmp46_header_t, f);
686 unformat_pg_icmp_header (unformat_input_t * input, va_list * args)
688 pg_stream_t *s = va_arg (*args, pg_stream_t *);
689 pg_icmp46_header_t *p;
692 p = pg_create_edit_group (s, sizeof (p[0]), sizeof (icmp46_header_t),
694 pg_icmp_header_init (p);
696 p->checksum.type = PG_EDIT_UNSPECIFIED;
701 if (!unformat (input, "ICMP %U", unformat_icmp_type_and_code, &tmp))
704 pg_edit_set_fixed (&p->type, tmp.type);
705 pg_edit_set_fixed (&p->code, tmp.code);
711 if (unformat (input, "checksum %U",
712 unformat_pg_edit, unformat_pg_number, &p->checksum))
715 /* Can't parse input: try next protocol level. */
720 if (!unformat_user (input, unformat_pg_payload, s))
723 if (p->checksum.type == PG_EDIT_UNSPECIFIED)
725 pg_edit_group_t *g = pg_stream_get_group (s, group_index);
726 g->edit_function = icmp4_pg_edit_function;
727 g->edit_function_opaque = 0;
733 /* Free up any edits we may have added. */
734 pg_free_edit_group (s);
739 ip4_icmp_register_type (vlib_main_t * vm, icmp4_type_t type, u32 node_index)
741 icmp4_main_t *im = &icmp4_main;
743 ASSERT ((int) type < ARRAY_LEN (im->ip4_input_next_index_by_type));
744 im->ip4_input_next_index_by_type[type]
745 = vlib_node_add_next (vm, ip4_icmp_input_node.index, node_index);
748 static clib_error_t *
749 icmp4_init (vlib_main_t * vm)
751 ip_main_t *im = &ip_main;
752 ip_protocol_info_t *pi;
753 icmp4_main_t *cm = &icmp4_main;
756 error = vlib_call_init_function (vm, ip_main_init);
761 pi = ip_get_protocol_info (im, IP_PROTOCOL_ICMP);
762 pi->format_header = format_ip4_icmp_header;
763 pi->unformat_pg_edit = unformat_pg_icmp_header;
765 cm->type_by_name = hash_create_string (0, sizeof (uword));
766 #define _(n,t) hash_set_mem (cm->type_by_name, #t, (n));
770 cm->type_and_code_by_name = hash_create_string (0, sizeof (uword));
771 #define _(a,n,t) hash_set_mem (cm->type_by_name, #t, (n) | (ICMP4_##a << 8));
775 clib_memset (cm->ip4_input_next_index_by_type,
776 ICMP_INPUT_NEXT_ERROR,
777 sizeof (cm->ip4_input_next_index_by_type));
779 ip4_icmp_register_type (vm, ICMP4_echo_request,
780 ip4_icmp_echo_request_node.index);
785 VLIB_INIT_FUNCTION (icmp4_init);
788 * fd.io coding-style-patch-verification: ON
791 * eval: (c-set-style "gnu")