2 * Copyright (c) 2015 Cisco and/or its affiliates.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at:
7 * http://www.apache.org/licenses/LICENSE-2.0
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
16 * ip/icmp4.c: ipv4 icmp
18 * Copyright (c) 2008 Eliot Dresselhaus
20 * Permission is hereby granted, free of charge, to any person obtaining
21 * a copy of this software and associated documentation files (the
22 * "Software"), to deal in the Software without restriction, including
23 * without limitation the rights to use, copy, modify, merge, publish,
24 * distribute, sublicense, and/or sell copies of the Software, and to
25 * permit persons to whom the Software is furnished to do so, subject to
26 * the following conditions:
28 * The above copyright notice and this permission notice shall be
29 * included in all copies or substantial portions of the Software.
31 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
32 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
33 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
34 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
35 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
36 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
37 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
40 #include <vlib/vlib.h>
41 #include <vnet/ip/ip.h>
42 #include <vnet/pg/pg.h>
45 static char * icmp_error_strings[] = {
51 static u8 * format_ip4_icmp_type_and_code (u8 * s, va_list * args)
53 icmp4_type_t type = va_arg (*args, int);
54 u8 code = va_arg (*args, int);
57 #define _(n,f) case n: t = #f; break;
70 return format (s, "unknown 0x%x", type);
72 s = format (s, "%s", t);
75 switch ((type << 8) | code)
77 #define _(a,n,f) case (ICMP4_##a << 8) | (n): t = #f; break;
85 s = format (s, " %s", t);
90 static u8 * format_ip4_icmp_header (u8 * s, va_list * args)
92 icmp46_header_t * icmp = va_arg (*args, icmp46_header_t *);
93 u32 max_header_bytes = va_arg (*args, u32);
96 if (max_header_bytes < sizeof (icmp[0]))
97 return format (s, "ICMP header truncated");
99 s = format (s, "ICMP %U checksum 0x%x",
100 format_ip4_icmp_type_and_code, icmp->type, icmp->code,
101 clib_net_to_host_u16 (icmp->checksum));
106 static u8 * format_icmp_input_trace (u8 * s, va_list * va)
108 CLIB_UNUSED (vlib_main_t * vm) = va_arg (*va, vlib_main_t *);
109 CLIB_UNUSED (vlib_node_t * node) = va_arg (*va, vlib_node_t *);
110 icmp_input_trace_t * t = va_arg (*va, icmp_input_trace_t *);
114 t->packet_data, sizeof (t->packet_data));
120 ICMP_INPUT_NEXT_ERROR,
125 uword * type_and_code_by_name;
127 uword * type_by_name;
129 /* Vector dispatch table indexed by [icmp type]. */
130 u8 ip4_input_next_index_by_type[256];
133 icmp4_main_t icmp4_main;
136 ip4_icmp_input (vlib_main_t * vm,
137 vlib_node_runtime_t * node,
138 vlib_frame_t * frame)
140 icmp4_main_t * im = &icmp4_main;
141 uword n_packets = frame->n_vectors;
142 u32 * from, * to_next;
143 u32 n_left_from, n_left_to_next, next;
145 from = vlib_frame_vector_args (frame);
146 n_left_from = n_packets;
147 next = node->cached_next_index;
149 if (node->flags & VLIB_NODE_FLAG_TRACE)
150 vlib_trace_frame_buffers_only (vm, node, from, frame->n_vectors,
152 sizeof (icmp_input_trace_t));
154 while (n_left_from > 0)
156 vlib_get_next_frame (vm, node, next, to_next, n_left_to_next);
158 while (n_left_from > 0 && n_left_to_next > 0)
162 icmp46_header_t * icmp0;
166 if (PREDICT_TRUE (n_left_from > 2))
168 vlib_prefetch_buffer_with_index (vm, from[2], LOAD);
169 p0 = vlib_get_buffer (vm, from[1]);
170 ip0 = vlib_buffer_get_current (p0);
171 CLIB_PREFETCH(ip0, CLIB_CACHE_LINE_BYTES, LOAD);
174 bi0 = to_next[0] = from[0];
181 p0 = vlib_get_buffer (vm, bi0);
182 ip0 = vlib_buffer_get_current (p0);
183 icmp0 = ip4_next_header (ip0);
185 next0 = im->ip4_input_next_index_by_type[type0];
187 p0->error = node->errors[ICMP4_ERROR_UNKNOWN_TYPE];
188 if (PREDICT_FALSE (next0 != next))
190 vlib_put_next_frame (vm, node, next, n_left_to_next + 1);
192 vlib_get_next_frame (vm, node, next, to_next, n_left_to_next);
199 vlib_put_next_frame (vm, node, next, n_left_to_next);
202 return frame->n_vectors;
205 VLIB_REGISTER_NODE (ip4_icmp_input_node,static) = {
206 .function = ip4_icmp_input,
207 .name = "ip4-icmp-input",
209 .vector_size = sizeof (u32),
211 .format_trace = format_icmp_input_trace,
213 .n_errors = ARRAY_LEN (icmp_error_strings),
214 .error_strings = icmp_error_strings,
218 [ICMP_INPUT_NEXT_ERROR] = "error-punt",
223 ip4_icmp_echo_request (vlib_main_t * vm,
224 vlib_node_runtime_t * node,
225 vlib_frame_t * frame)
227 uword n_packets = frame->n_vectors;
228 u32 * from, * to_next;
229 u32 n_left_from, n_left_to_next, next;
230 ip4_main_t * i4m = &ip4_main;
231 u16 * fragment_ids, * fid;
232 u8 host_config_ttl = i4m->host_config.ttl;
234 from = vlib_frame_vector_args (frame);
235 n_left_from = n_packets;
236 next = node->cached_next_index;
238 if (node->flags & VLIB_NODE_FLAG_TRACE)
239 vlib_trace_frame_buffers_only (vm, node, from, frame->n_vectors,
241 sizeof (icmp_input_trace_t));
243 /* Get random fragment IDs for replies. */
244 fid = fragment_ids = clib_random_buffer_get_data (&vm->random_buffer,
245 n_packets * sizeof (fragment_ids[0]));
247 while (n_left_from > 0)
249 vlib_get_next_frame (vm, node, next, to_next, n_left_to_next);
251 while (n_left_from > 2 && n_left_to_next > 2)
253 vlib_buffer_t * p0, * p1;
254 ip4_header_t * ip0, * ip1;
255 icmp46_header_t * icmp0, * icmp1;
258 ip_csum_t sum0, sum1;
260 bi0 = to_next[0] = from[0];
261 bi1 = to_next[1] = from[1];
268 p0 = vlib_get_buffer (vm, bi0);
269 p1 = vlib_get_buffer (vm, bi1);
270 ip0 = vlib_buffer_get_current (p0);
271 ip1 = vlib_buffer_get_current (p1);
272 icmp0 = ip4_next_header (ip0);
273 icmp1 = ip4_next_header (ip1);
275 vnet_buffer (p0)->sw_if_index[VLIB_RX] = vnet_main.local_interface_sw_if_index;
276 vnet_buffer (p1)->sw_if_index[VLIB_RX] = vnet_main.local_interface_sw_if_index;
278 /* Update ICMP checksum. */
279 sum0 = icmp0->checksum;
280 sum1 = icmp1->checksum;
282 ASSERT (icmp0->type == ICMP4_echo_request);
283 ASSERT (icmp1->type == ICMP4_echo_request);
284 sum0 = ip_csum_update (sum0, ICMP4_echo_request, ICMP4_echo_reply,
285 icmp46_header_t, type);
286 sum1 = ip_csum_update (sum1, ICMP4_echo_request, ICMP4_echo_reply,
287 icmp46_header_t, type);
288 icmp0->type = ICMP4_echo_reply;
289 icmp1->type = ICMP4_echo_reply;
291 icmp0->checksum = ip_csum_fold (sum0);
292 icmp1->checksum = ip_csum_fold (sum1);
294 src0 = ip0->src_address.data_u32;
295 src1 = ip1->src_address.data_u32;
296 dst0 = ip0->dst_address.data_u32;
297 dst1 = ip1->dst_address.data_u32;
299 /* Swap source and destination address.
300 Does not change checksum. */
301 ip0->src_address.data_u32 = dst0;
302 ip1->src_address.data_u32 = dst1;
303 ip0->dst_address.data_u32 = src0;
304 ip1->dst_address.data_u32 = src1;
306 /* Update IP checksum. */
307 sum0 = ip0->checksum;
308 sum1 = ip1->checksum;
310 sum0 = ip_csum_update (sum0, ip0->ttl, host_config_ttl,
312 sum1 = ip_csum_update (sum1, ip1->ttl, host_config_ttl,
314 ip0->ttl = host_config_ttl;
315 ip1->ttl = host_config_ttl;
317 /* New fragment id. */
318 sum0 = ip_csum_update (sum0, ip0->fragment_id, fid[0],
319 ip4_header_t, fragment_id);
320 sum1 = ip_csum_update (sum1, ip1->fragment_id, fid[1],
321 ip4_header_t, fragment_id);
322 ip0->fragment_id = fid[0];
323 ip1->fragment_id = fid[1];
326 ip0->checksum = ip_csum_fold (sum0);
327 ip1->checksum = ip_csum_fold (sum1);
329 ASSERT (ip0->checksum == ip4_header_checksum (ip0));
330 ASSERT (ip1->checksum == ip4_header_checksum (ip1));
332 p0->flags |= VNET_BUFFER_LOCALLY_ORIGINATED;
333 p1->flags |= VNET_BUFFER_LOCALLY_ORIGINATED;
336 while (n_left_from > 0 && n_left_to_next > 0)
340 icmp46_header_t * icmp0;
344 bi0 = to_next[0] = from[0];
351 p0 = vlib_get_buffer (vm, bi0);
352 ip0 = vlib_buffer_get_current (p0);
353 icmp0 = ip4_next_header (ip0);
355 vnet_buffer (p0)->sw_if_index[VLIB_RX] = vnet_main.local_interface_sw_if_index;
357 /* Update ICMP checksum. */
358 sum0 = icmp0->checksum;
360 ASSERT (icmp0->type == ICMP4_echo_request);
361 sum0 = ip_csum_update (sum0, ICMP4_echo_request, ICMP4_echo_reply,
362 icmp46_header_t, type);
363 icmp0->type = ICMP4_echo_reply;
364 icmp0->checksum = ip_csum_fold (sum0);
366 src0 = ip0->src_address.data_u32;
367 dst0 = ip0->dst_address.data_u32;
368 ip0->src_address.data_u32 = dst0;
369 ip0->dst_address.data_u32 = src0;
371 /* Update IP checksum. */
372 sum0 = ip0->checksum;
374 sum0 = ip_csum_update (sum0, ip0->ttl, host_config_ttl,
376 ip0->ttl = host_config_ttl;
378 sum0 = ip_csum_update (sum0, ip0->fragment_id, fid[0],
379 ip4_header_t, fragment_id);
380 ip0->fragment_id = fid[0];
383 ip0->checksum = ip_csum_fold (sum0);
385 ASSERT (ip0->checksum == ip4_header_checksum (ip0));
387 p0->flags |= VNET_BUFFER_LOCALLY_ORIGINATED;
390 vlib_put_next_frame (vm, node, next, n_left_to_next);
393 vlib_error_count (vm, ip4_icmp_input_node.index,
394 ICMP4_ERROR_ECHO_REPLIES_SENT,
397 return frame->n_vectors;
400 VLIB_REGISTER_NODE (ip4_icmp_echo_request_node,static) = {
401 .function = ip4_icmp_echo_request,
402 .name = "ip4-icmp-echo-request",
404 .vector_size = sizeof (u32),
406 .format_trace = format_icmp_input_trace,
410 [0] = "ip4-load-balance",
415 IP4_ICMP_ERROR_NEXT_DROP,
416 IP4_ICMP_ERROR_NEXT_LOOKUP,
417 IP4_ICMP_ERROR_N_NEXT,
418 } ip4_icmp_error_next_t;
421 icmp4_error_set_vnet_buffer (vlib_buffer_t *b, u8 type, u8 code, u32 data)
423 vnet_buffer(b)->ip.icmp.type = type;
424 vnet_buffer(b)->ip.icmp.code = code;
425 vnet_buffer(b)->ip.icmp.data = data;
429 icmp4_icmp_type_to_error (u8 type)
432 case ICMP4_destination_unreachable:
433 return ICMP4_ERROR_DEST_UNREACH_SENT;
434 case ICMP4_time_exceeded:
435 return ICMP4_ERROR_TTL_EXPIRE_SENT;
436 case ICMP4_parameter_problem:
437 return ICMP4_ERROR_PARAM_PROBLEM_SENT;
439 return ICMP4_ERROR_DROP;
444 ip4_icmp_error (vlib_main_t * vm,
445 vlib_node_runtime_t * node,
446 vlib_frame_t * frame)
448 u32 * from, * to_next;
449 uword n_left_from, n_left_to_next;
450 ip4_icmp_error_next_t next_index;
451 ip4_main_t *im = &ip4_main;
452 ip_lookup_main_t * lm = &im->lookup_main;
454 from = vlib_frame_vector_args(frame);
455 n_left_from = frame->n_vectors;
456 next_index = node->cached_next_index;
458 if (node->flags & VLIB_NODE_FLAG_TRACE)
459 vlib_trace_frame_buffers_only (vm, node, from, frame->n_vectors,
460 /* stride */ 1, sizeof (icmp_input_trace_t));
462 while (n_left_from > 0) {
463 vlib_get_next_frame(vm, node, next_index, to_next, n_left_to_next);
465 while (n_left_from > 0 && n_left_to_next > 0) {
467 u32 next0 = IP4_ICMP_ERROR_NEXT_LOOKUP;
468 u8 error0 = ICMP4_ERROR_NONE;
470 ip4_header_t * ip0, * out_ip0;
471 icmp46_header_t * icmp0;
472 u32 sw_if_index0, if_add_index0;
475 /* Speculatively enqueue p0 to the current next frame */
482 p0 = vlib_get_buffer(vm, pi0);
483 ip0 = vlib_buffer_get_current(p0);
484 sw_if_index0 = vnet_buffer(p0)->sw_if_index[VLIB_RX];
487 * RFC1812 says to keep as much of the original packet as
488 * possible within the minimum MTU (576). We cheat "a little"
489 * here by keeping whatever fits in the first buffer, to be more
492 if (PREDICT_FALSE(p0->total_length_not_including_first_buffer)) {
493 /* clear current_length of all other buffers in chain */
494 vlib_buffer_t *b = p0;
495 p0->total_length_not_including_first_buffer = 0;
496 while (b->flags & VLIB_BUFFER_NEXT_PRESENT) {
497 b = vlib_get_buffer (vm, b->next_buffer);
498 b->current_length = 0;
501 p0->current_length = p0->current_length > 576 ? 576 : p0->current_length;
503 /* Add IP header and ICMPv4 header including a 4 byte data field */
504 vlib_buffer_advance(p0,
505 -sizeof(ip4_header_t)-sizeof(icmp46_header_t)-4);
506 out_ip0 = vlib_buffer_get_current(p0);
507 icmp0 = (icmp46_header_t *) &out_ip0[1];
509 /* Fill ip header fields */
510 out_ip0->ip_version_and_header_length = 0x45;
512 out_ip0->length = clib_host_to_net_u16(p0->current_length);
513 out_ip0->fragment_id = 0;
514 out_ip0->flags_and_fragment_offset = 0;
516 out_ip0->protocol = IP_PROTOCOL_ICMP;
517 out_ip0->dst_address = ip0->src_address;
519 if (PREDICT_TRUE (vec_len (lm->if_address_pool_index_by_sw_if_index)
522 lm->if_address_pool_index_by_sw_if_index[sw_if_index0];
523 if (PREDICT_TRUE(if_add_index0 != ~0)) {
524 ip_interface_address_t *if_add =
525 pool_elt_at_index(lm->if_address_pool, if_add_index0);
526 ip4_address_t *if_ip =
527 ip_interface_address_get_address(lm, if_add);
528 out_ip0->src_address = *if_ip;
530 /* interface has no IP4 address - should not happen */
531 next0 = IP4_ICMP_ERROR_NEXT_DROP;
532 error0 = ICMP4_ERROR_DROP;
534 out_ip0->checksum = ip4_header_checksum(out_ip0);
536 /* Fill icmp header fields */
537 icmp0->type = vnet_buffer(p0)->ip.icmp.type;
538 icmp0->code = vnet_buffer(p0)->ip.icmp.code;
539 *((u32 *)(icmp0 + 1)) = clib_host_to_net_u32(vnet_buffer(p0)->ip.icmp.data);
541 sum = ip_incremental_checksum(0, icmp0, p0->current_length - sizeof(ip4_header_t));
542 icmp0->checksum = ~ip_csum_fold(sum);
544 /* Update error status */
545 if (error0 == ICMP4_ERROR_NONE)
546 error0 = icmp4_icmp_type_to_error(icmp0->type);
547 vlib_error_count(vm, node->node_index, error0, 1);
549 /* Verify speculative enqueue, maybe switch current next frame */
550 vlib_validate_buffer_enqueue_x1(vm, node, next_index,
551 to_next, n_left_to_next,
554 vlib_put_next_frame(vm, node, next_index, n_left_to_next);
557 return frame->n_vectors;
560 VLIB_REGISTER_NODE (ip4_icmp_error_node) = {
561 .function = ip4_icmp_error,
562 .name = "ip4-icmp-error",
563 .vector_size = sizeof (u32),
565 .n_errors = ARRAY_LEN (icmp_error_strings),
566 .error_strings = icmp_error_strings,
568 .n_next_nodes = IP4_ICMP_ERROR_N_NEXT,
570 [IP4_ICMP_ERROR_NEXT_DROP] = "error-drop",
571 [IP4_ICMP_ERROR_NEXT_LOOKUP] = "ip4-lookup",
574 .format_trace = format_icmp_input_trace,
578 static uword unformat_icmp_type_and_code (unformat_input_t * input, va_list * args)
580 icmp46_header_t * h = va_arg (*args, icmp46_header_t *);
581 icmp4_main_t * cm = &icmp4_main;
584 if (unformat_user (input, unformat_vlib_number_by_name,
585 cm->type_and_code_by_name, &i))
587 h->type = (i >> 8) & 0xff;
588 h->code = (i >> 0) & 0xff;
590 else if (unformat_user (input, unformat_vlib_number_by_name,
591 cm->type_by_name, &i))
603 icmp4_pg_edit_function (pg_main_t * pg,
609 vlib_main_t * vm = vlib_get_main();
610 u32 ip_offset, icmp_offset;
612 icmp_offset = g->start_byte_offset;
613 ip_offset = (g-1)->start_byte_offset;
615 while (n_packets >= 1)
619 icmp46_header_t * icmp0;
622 p0 = vlib_get_buffer (vm, packets[0]);
626 ASSERT (p0->current_data == 0);
627 ip0 = (void *) (p0->data + ip_offset);
628 icmp0 = (void *) (p0->data + icmp_offset);
629 len0 = clib_net_to_host_u16 (ip0->length) - ip4_header_bytes (ip0);
630 icmp0->checksum = ~ ip_csum_fold (ip_incremental_checksum (0, icmp0, len0));
635 pg_edit_t type, code;
637 } pg_icmp46_header_t;
640 pg_icmp_header_init (pg_icmp46_header_t * p)
642 /* Initialize fields that are not bit fields in the IP header. */
643 #define _(f) pg_edit_init (&p->f, icmp46_header_t, f);
651 unformat_pg_icmp_header (unformat_input_t * input, va_list * args)
653 pg_stream_t * s = va_arg (*args, pg_stream_t *);
654 pg_icmp46_header_t * p;
657 p = pg_create_edit_group (s, sizeof (p[0]), sizeof (icmp46_header_t),
659 pg_icmp_header_init (p);
661 p->checksum.type = PG_EDIT_UNSPECIFIED;
666 if (! unformat (input, "ICMP %U", unformat_icmp_type_and_code, &tmp))
669 pg_edit_set_fixed (&p->type, tmp.type);
670 pg_edit_set_fixed (&p->code, tmp.code);
676 if (unformat (input, "checksum %U",
678 unformat_pg_number, &p->checksum))
681 /* Can't parse input: try next protocol level. */
686 if (! unformat_user (input, unformat_pg_payload, s))
689 if (p->checksum.type == PG_EDIT_UNSPECIFIED)
691 pg_edit_group_t * g = pg_stream_get_group (s, group_index);
692 g->edit_function = icmp4_pg_edit_function;
693 g->edit_function_opaque = 0;
699 /* Free up any edits we may have added. */
700 pg_free_edit_group (s);
704 void ip4_icmp_register_type (vlib_main_t * vm, icmp4_type_t type,
707 icmp4_main_t * im = &icmp4_main;
709 ASSERT ((int)type < ARRAY_LEN (im->ip4_input_next_index_by_type));
710 im->ip4_input_next_index_by_type[type]
711 = vlib_node_add_next (vm, ip4_icmp_input_node.index, node_index);
714 static clib_error_t *
715 icmp4_init (vlib_main_t * vm)
717 ip_main_t * im = &ip_main;
718 ip_protocol_info_t * pi;
719 icmp4_main_t * cm = &icmp4_main;
720 clib_error_t * error;
722 error = vlib_call_init_function (vm, ip_main_init);
727 pi = ip_get_protocol_info (im, IP_PROTOCOL_ICMP);
728 pi->format_header = format_ip4_icmp_header;
729 pi->unformat_pg_edit = unformat_pg_icmp_header;
731 cm->type_by_name = hash_create_string (0, sizeof (uword));
732 #define _(n,t) hash_set_mem (cm->type_by_name, #t, (n));
736 cm->type_and_code_by_name = hash_create_string (0, sizeof (uword));
737 #define _(a,n,t) hash_set_mem (cm->type_by_name, #t, (n) | (ICMP4_##a << 8));
741 memset (cm->ip4_input_next_index_by_type,
742 ICMP_INPUT_NEXT_ERROR,
743 sizeof (cm->ip4_input_next_index_by_type));
745 ip4_icmp_register_type (vm, ICMP4_echo_request, ip4_icmp_echo_request_node.index);
750 VLIB_INIT_FUNCTION (icmp4_init);