1 /*---------------------------------------------------------------------------
2 * Copyright (c) 2009-2014 Cisco and/or its affiliates.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at:
7 * http://www.apache.org/licenses/LICENSE-2.0
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
14 *---------------------------------------------------------------------------
17 * IPv4 Fragmentation Node
24 #include <vnet/ip/ip.h>
37 format_ip_frag_trace (u8 * s, va_list * args)
39 CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
40 CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
41 ip_frag_trace_t *t = va_arg (*args, ip_frag_trace_t *);
42 s = format (s, "IPv%s offset: %u mtu: %u fragments: %u",
43 t->ipv6 ? "6" : "4", t->header_offset, t->mtu, t->n_fragments);
47 static u32 running_fragment_id;
50 * Limitation: Does follow buffer chains in the packet to fragment,
51 * but does not generate buffer chains. I.e. a fragment is always
52 * contained with in a single buffer and limited to the max buffer
56 ip4_frag_do_fragment (vlib_main_t * vm, u32 from_bi, u32 ** buffer,
57 ip_frag_error_t * error)
59 vlib_buffer_t *from_b;
61 u16 mtu, len, max, rem, offset, ip_frag_id, ip_frag_offset;
62 u8 *org_from_packet, more;
64 from_b = vlib_get_buffer (vm, from_bi);
65 offset = vnet_buffer (from_b)->ip_frag.header_offset;
66 mtu = vnet_buffer (from_b)->ip_frag.mtu;
67 org_from_packet = vlib_buffer_get_current (from_b);
68 ip4 = (ip4_header_t *) vlib_buffer_get_current (from_b) + offset;
70 rem = clib_net_to_host_u16 (ip4->length) - sizeof (ip4_header_t);
72 (mtu - sizeof (ip4_header_t) -
73 vnet_buffer (from_b)->ip_frag.header_offset) & ~0x7;
76 (vlib_buffer_length_in_chain (vm, from_b) - offset -
77 sizeof (ip4_header_t)))
79 *error = IP_FRAG_ERROR_MALFORMED;
83 if (mtu < sizeof (ip4_header_t))
85 *error = IP_FRAG_ERROR_CANT_FRAGMENT_HEADER;
89 if (ip4->flags_and_fragment_offset &
90 clib_host_to_net_u16 (IP4_HEADER_FLAG_DONT_FRAGMENT))
92 *error = IP_FRAG_ERROR_DONT_FRAGMENT_SET;
96 if (ip4_is_fragment (ip4))
98 ip_frag_id = ip4->fragment_id;
99 ip_frag_offset = ip4_get_fragment_offset (ip4);
101 !(!(ip4->flags_and_fragment_offset &
102 clib_host_to_net_u16 (IP4_HEADER_FLAG_MORE_FRAGMENTS)));
106 ip_frag_id = (++running_fragment_id);
111 u8 *from_data = (void *) (ip4 + 1);
112 vlib_buffer_t *org_from_b = from_b;
114 u16 left_in_from_buffer =
115 from_b->current_length - offset - sizeof (ip4_header_t);
117 /* Do the actual fragmentation */
122 ip4_header_t *to_ip4;
125 len = (rem > (mtu - sizeof (ip4_header_t) - offset) ? max : rem);
126 if (len != rem) /* Last fragment does not need to divisible by 8 */
128 if (!vlib_buffer_alloc (vm, &to_bi, 1))
130 *error = IP_FRAG_ERROR_MEMORY;
131 /* XXX: Free already allocated buffers? */
134 vec_add1 (*buffer, to_bi);
135 to_b = vlib_get_buffer (vm, to_bi);
136 vnet_buffer (to_b)->sw_if_index[VLIB_RX] =
137 vnet_buffer (org_from_b)->sw_if_index[VLIB_RX];
138 vnet_buffer (to_b)->sw_if_index[VLIB_TX] =
139 vnet_buffer (org_from_b)->sw_if_index[VLIB_TX];
140 /* Copy adj_index in case DPO based node is sending for the
141 * fragmentation, the packet would be sent back to the proper
142 * DPO next node and Index
144 vnet_buffer (to_b)->ip.adj_index[VLIB_RX] =
145 vnet_buffer (org_from_b)->ip.adj_index[VLIB_RX];
146 vnet_buffer (to_b)->ip.adj_index[VLIB_TX] =
147 vnet_buffer (org_from_b)->ip.adj_index[VLIB_TX];
149 /* Copy offset and ip4 header */
150 clib_memcpy (to_b->data, org_from_packet,
151 offset + sizeof (ip4_header_t));
152 to_ip4 = vlib_buffer_get_current (to_b) + offset;
153 to_data = (void *) (to_ip4 + 1);
155 /* Spin through buffer chain copying data */
156 // XXX: Make sure we don't overflow source buffer!!!
157 if (len > left_in_from_buffer)
159 clib_memcpy (to_data, from_data + ptr, left_in_from_buffer);
162 if (!(from_b->flags & VLIB_BUFFER_NEXT_PRESENT))
164 *error = IP_FRAG_ERROR_MALFORMED;
167 from_b = vlib_get_buffer (vm, from_b->next_buffer);
168 from_data = (u8 *) vlib_buffer_get_current (from_b);
169 clib_memcpy (to_data + left_in_from_buffer, from_data,
170 len - left_in_from_buffer);
171 ptr = len - left_in_from_buffer;
172 left_in_from_buffer =
173 from_b->current_length - (len - left_in_from_buffer);
177 clib_memcpy (to_data, from_data + ptr, len);
178 left_in_from_buffer -= len;
181 to_b->current_length = offset + len + sizeof (ip4_header_t);
183 to_ip4->fragment_id = ip_frag_id;
184 to_ip4->flags_and_fragment_offset =
185 clib_host_to_net_u16 ((fo >> 3) + ip_frag_offset);
186 to_ip4->flags_and_fragment_offset |=
187 clib_host_to_net_u16 (((len != rem) || more) << 13);
188 to_ip4->length = clib_host_to_net_u16 (len + sizeof (ip4_header_t));
189 to_ip4->checksum = ip4_header_checksum (to_ip4);
191 if (vnet_buffer (org_from_b)->ip_frag.flags & IP_FRAG_FLAG_IP4_HEADER)
193 /* Encapsulating ipv4 header */
194 ip4_header_t *encap_header4 =
195 (ip4_header_t *) vlib_buffer_get_current (to_b);
196 encap_header4->length = clib_host_to_net_u16 (to_b->current_length);
197 encap_header4->checksum = ip4_header_checksum (encap_header4);
199 else if (vnet_buffer (org_from_b)->
200 ip_frag.flags & IP_FRAG_FLAG_IP6_HEADER)
202 /* Encapsulating ipv6 header */
203 ip6_header_t *encap_header6 =
204 (ip6_header_t *) vlib_buffer_get_current (to_b);
205 encap_header6->payload_length =
206 clib_host_to_net_u16 (to_b->current_length -
207 sizeof (*encap_header6));
212 /* Free original packet chain */
213 vlib_buffer_free_one (vm, from_bi);
217 ip_frag_set_vnet_buffer (vlib_buffer_t * b, u16 offset, u16 mtu,
218 u8 next_index, u8 flags)
220 vnet_buffer (b)->ip_frag.header_offset = offset;
221 vnet_buffer (b)->ip_frag.mtu = mtu;
222 vnet_buffer (b)->ip_frag.next_index = next_index;
223 vnet_buffer (b)->ip_frag.flags = flags;
227 ip4_frag (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
229 u32 n_left_from, *from, next_index, *to_next, n_left_to_next;
230 vlib_node_runtime_t *error_node =
231 vlib_node_get_runtime (vm, ip4_frag_node.index);
232 from = vlib_frame_vector_args (frame);
233 n_left_from = frame->n_vectors;
234 next_index = node->cached_next_index;
235 u32 frag_sent = 0, small_packets = 0;
238 while (n_left_from > 0)
240 vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
242 while (n_left_from > 0 && n_left_to_next > 0)
244 u32 pi0, *frag_from, frag_left;
246 ip_frag_error_t error0;
247 ip4_frag_next_t next0;
249 //Note: The packet is not enqueued now.
250 //It is instead put in a vector where other fragments
251 //will be put as well.
255 error0 = IP_FRAG_ERROR_NONE;
257 p0 = vlib_get_buffer (vm, pi0);
258 ip4_frag_do_fragment (vm, pi0, &buffer, &error0);
260 if (PREDICT_FALSE (p0->flags & VLIB_BUFFER_IS_TRACED))
262 ip_frag_trace_t *tr =
263 vlib_add_trace (vm, node, p0, sizeof (*tr));
264 tr->header_offset = vnet_buffer (p0)->ip_frag.header_offset;
265 tr->mtu = vnet_buffer (p0)->ip_frag.mtu;
267 tr->n_fragments = vec_len (buffer);
268 tr->next = vnet_buffer (p0)->ip_frag.next_index;
271 if (error0 == IP_FRAG_ERROR_DONT_FRAGMENT_SET)
273 icmp4_error_set_vnet_buffer (p0, ICMP4_destination_unreachable,
274 ICMP4_destination_unreachable_fragmentation_needed_and_dont_fragment_set,
275 vnet_buffer (p0)->ip_frag.mtu);
276 vlib_buffer_advance (p0,
277 vnet_buffer (p0)->ip_frag.header_offset);
278 next0 = IP4_FRAG_NEXT_ICMP_ERROR;
283 next0 = (error0 == IP_FRAG_ERROR_NONE) ? vnet_buffer (p0)->
284 ip_frag.next_index : IP4_FRAG_NEXT_DROP;
288 if (error0 == IP_FRAG_ERROR_NONE)
290 frag_sent += vec_len (buffer);
291 small_packets += (vec_len (buffer) == 1);
294 vlib_error_count (vm, ip4_frag_node.index, error0, 1);
296 //Send fragments that were added in the frame
298 frag_left = vec_len (buffer);
300 while (frag_left > 0)
302 while (frag_left > 0 && n_left_to_next > 0)
305 i = to_next[0] = frag_from[0];
311 vlib_get_buffer (vm, i)->error = error_node->errors[error0];
312 vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
313 to_next, n_left_to_next, i,
316 vlib_put_next_frame (vm, node, next_index, n_left_to_next);
317 vlib_get_next_frame (vm, node, next_index, to_next,
320 vec_reset_length (buffer);
322 vlib_put_next_frame (vm, node, next_index, n_left_to_next);
326 vlib_node_increment_counter (vm, ip4_frag_node.index,
327 IP_FRAG_ERROR_FRAGMENT_SENT, frag_sent);
328 vlib_node_increment_counter (vm, ip4_frag_node.index,
329 IP_FRAG_ERROR_SMALL_PACKET, small_packets);
331 return frame->n_vectors;
336 ip6_frag_do_fragment (vlib_main_t * vm, u32 pi, u32 ** buffer,
337 ip_frag_error_t * error)
340 ip6_header_t *ip6_hdr;
341 ip6_frag_hdr_t *frag_hdr;
342 u8 *payload, *next_header;
344 p = vlib_get_buffer (vm, pi);
346 //Parsing the IPv6 headers
348 vlib_buffer_get_current (p) + vnet_buffer (p)->ip_frag.header_offset;
349 payload = (u8 *) (ip6_hdr + 1);
350 next_header = &ip6_hdr->protocol;
351 if (*next_header == IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS)
353 next_header = payload;
354 payload += payload[1] * 8;
357 if (*next_header == IP_PROTOCOL_IP6_DESTINATION_OPTIONS)
359 next_header = payload;
360 payload += payload[1] * 8;
363 if (*next_header == IP_PROTOCOL_IPV6_ROUTE)
365 next_header = payload;
366 payload += payload[1] * 8;
370 (payload >= (u8 *) vlib_buffer_get_current (p) + p->current_length))
372 //A malicious packet could set an extension header with a too big size
373 //and make us modify another vlib_buffer
374 *error = IP_FRAG_ERROR_MALFORMED;
378 if (p->flags & VLIB_BUFFER_NEXT_PRESENT)
380 *error = IP_FRAG_ERROR_MALFORMED;
386 if (*next_header == IP_PROTOCOL_IPV6_FRAGMENTATION)
388 //The fragmentation header is already there
389 frag_hdr = (ip6_frag_hdr_t *) payload;
390 has_more = ip6_frag_hdr_more (frag_hdr);
391 initial_offset = ip6_frag_hdr_offset (frag_hdr);
395 //Insert a fragmentation header in the packet
396 u8 nh = *next_header;
397 *next_header = IP_PROTOCOL_IPV6_FRAGMENTATION;
398 vlib_buffer_advance (p, -sizeof (*frag_hdr));
399 u8 *start = vlib_buffer_get_current (p);
400 memmove (start, start + sizeof (*frag_hdr),
401 payload - (start + sizeof (*frag_hdr)));
402 frag_hdr = (ip6_frag_hdr_t *) (payload - sizeof (*frag_hdr));
403 frag_hdr->identification = ++running_fragment_id;
404 frag_hdr->next_hdr = nh;
409 payload = (u8 *) (frag_hdr + 1);
411 u16 headers_len = payload - (u8 *) vlib_buffer_get_current (p);
412 u16 max_payload = vnet_buffer (p)->ip_frag.mtu - headers_len;
413 u16 rem = p->current_length - headers_len;
418 *error = IP_FRAG_ERROR_CANT_FRAGMENT_HEADER;
426 u16 len = (rem > max_payload) ? (max_payload & ~0x7) : rem;
431 if (!vlib_buffer_alloc (vm, &bi, 1))
433 *error = IP_FRAG_ERROR_MEMORY;
436 b = vlib_get_buffer (vm, bi);
437 vnet_buffer (b)->sw_if_index[VLIB_RX] =
438 vnet_buffer (p)->sw_if_index[VLIB_RX];
439 vnet_buffer (b)->sw_if_index[VLIB_TX] =
440 vnet_buffer (p)->sw_if_index[VLIB_TX];
442 /* Copy Adj_index in case DPO based node is sending for the fragmentation,
443 the packet would be sent back to the proper DPO next node and Index */
444 vnet_buffer (b)->ip.adj_index[VLIB_RX] =
445 vnet_buffer (p)->ip.adj_index[VLIB_RX];
446 vnet_buffer (b)->ip.adj_index[VLIB_TX] =
447 vnet_buffer (p)->ip.adj_index[VLIB_TX];
449 clib_memcpy (vlib_buffer_get_current (b),
450 vlib_buffer_get_current (p), headers_len);
451 clib_memcpy (vlib_buffer_get_current (b) + headers_len,
454 vlib_buffer_get_current (b) + headers_len - sizeof (*frag_hdr);
459 b = vlib_get_buffer (vm, bi);
460 //frag_hdr already set here
464 vlib_buffer_get_current (b) + vnet_buffer (p)->ip_frag.header_offset;
465 frag_hdr->fragment_offset_and_more =
466 ip6_frag_hdr_offset_and_more (initial_offset + (ptr >> 3),
468 b->current_length = headers_len + len;
469 ip6_hdr->payload_length =
470 clib_host_to_net_u16 (b->current_length -
471 vnet_buffer (p)->ip_frag.header_offset -
474 if (vnet_buffer (p)->ip_frag.flags & IP_FRAG_FLAG_IP4_HEADER)
476 //Encapsulating ipv4 header
477 ip4_header_t *encap_header4 =
478 (ip4_header_t *) vlib_buffer_get_current (b);
479 encap_header4->length = clib_host_to_net_u16 (b->current_length);
480 encap_header4->checksum = ip4_header_checksum (encap_header4);
482 else if (vnet_buffer (p)->ip_frag.flags & IP_FRAG_FLAG_IP6_HEADER)
484 //Encapsulating ipv6 header
485 ip6_header_t *encap_header6 =
486 (ip6_header_t *) vlib_buffer_get_current (b);
487 encap_header6->payload_length =
488 clib_host_to_net_u16 (b->current_length -
489 sizeof (*encap_header6));
492 vec_add1 (*buffer, bi);
499 ip6_frag (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
501 u32 n_left_from, *from, next_index, *to_next, n_left_to_next;
502 vlib_node_runtime_t *error_node =
503 vlib_node_get_runtime (vm, ip6_frag_node.index);
504 from = vlib_frame_vector_args (frame);
505 n_left_from = frame->n_vectors;
506 next_index = node->cached_next_index;
507 u32 frag_sent = 0, small_packets = 0;
510 while (n_left_from > 0)
512 vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
514 while (n_left_from > 0 && n_left_to_next > 0)
516 u32 pi0, *frag_from, frag_left;
518 ip_frag_error_t error0;
519 ip6_frag_next_t next0;
524 error0 = IP_FRAG_ERROR_NONE;
526 p0 = vlib_get_buffer (vm, pi0);
527 ip6_frag_do_fragment (vm, pi0, &buffer, &error0);
529 if (PREDICT_FALSE (p0->flags & VLIB_BUFFER_IS_TRACED))
531 ip_frag_trace_t *tr =
532 vlib_add_trace (vm, node, p0, sizeof (*tr));
533 tr->header_offset = vnet_buffer (p0)->ip_frag.header_offset;
534 tr->mtu = vnet_buffer (p0)->ip_frag.mtu;
536 tr->n_fragments = vec_len (buffer);
537 tr->next = vnet_buffer (p0)->ip_frag.next_index;
541 next0 = (error0 == IP_FRAG_ERROR_NONE) ? vnet_buffer (p0)->
542 ip_frag.next_index : IP6_FRAG_NEXT_DROP;
545 frag_sent += vec_len (buffer);
546 small_packets += (vec_len (buffer) == 1);
548 //Send fragments that were added in the frame
550 frag_left = vec_len (buffer);
551 while (frag_left > 0)
553 while (frag_left > 0 && n_left_to_next > 0)
556 i = to_next[0] = frag_from[0];
562 vlib_get_buffer (vm, i)->error = error_node->errors[error0];
563 vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
564 to_next, n_left_to_next, i,
567 vlib_put_next_frame (vm, node, next_index, n_left_to_next);
568 vlib_get_next_frame (vm, node, next_index, to_next,
571 vec_reset_length (buffer);
573 vlib_put_next_frame (vm, node, next_index, n_left_to_next);
576 vlib_node_increment_counter (vm, ip6_frag_node.index,
577 IP_FRAG_ERROR_FRAGMENT_SENT, frag_sent);
578 vlib_node_increment_counter (vm, ip6_frag_node.index,
579 IP_FRAG_ERROR_SMALL_PACKET, small_packets);
581 return frame->n_vectors;
584 static char *ip4_frag_error_strings[] = {
585 #define _(sym,string) string,
586 foreach_ip_frag_error
591 VLIB_REGISTER_NODE (ip4_frag_node) = {
592 .function = ip4_frag,
593 .name = IP4_FRAG_NODE_NAME,
594 .vector_size = sizeof (u32),
595 .format_trace = format_ip_frag_trace,
596 .type = VLIB_NODE_TYPE_INTERNAL,
598 .n_errors = IP_FRAG_N_ERROR,
599 .error_strings = ip4_frag_error_strings,
601 .n_next_nodes = IP4_FRAG_N_NEXT,
603 [IP4_FRAG_NEXT_IP4_LOOKUP] = "ip4-lookup",
604 [IP4_FRAG_NEXT_IP6_LOOKUP] = "ip6-lookup",
605 [IP4_FRAG_NEXT_ICMP_ERROR] = "ip4-icmp-error",
606 [IP4_FRAG_NEXT_DROP] = "ip4-drop"
612 VLIB_REGISTER_NODE (ip6_frag_node) = {
613 .function = ip6_frag,
614 .name = IP6_FRAG_NODE_NAME,
615 .vector_size = sizeof (u32),
616 .format_trace = format_ip_frag_trace,
617 .type = VLIB_NODE_TYPE_INTERNAL,
619 .n_errors = IP_FRAG_N_ERROR,
620 .error_strings = ip4_frag_error_strings,
622 .n_next_nodes = IP6_FRAG_N_NEXT,
624 [IP6_FRAG_NEXT_IP4_LOOKUP] = "ip4-lookup",
625 [IP6_FRAG_NEXT_IP6_LOOKUP] = "ip6-lookup",
626 [IP6_FRAG_NEXT_DROP] = "ip6-drop"
632 * fd.io coding-style-patch-verification: ON
635 * eval: (c-set-style "gnu")