1 /*---------------------------------------------------------------------------
2 * Copyright (c) 2009-2014 Cisco and/or its affiliates.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at:
7 * http://www.apache.org/licenses/LICENSE-2.0
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
14 *---------------------------------------------------------------------------
17 * IPv4 Fragmentation Node
24 #include <vnet/ip/ip.h>
35 static u8 * format_ip_frag_trace (u8 * s, va_list * args)
37 CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
38 CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
39 ip_frag_trace_t * t = va_arg (*args, ip_frag_trace_t *);
40 s = format(s, "IPv%s offset: %u mtu: %u fragments: %u next: %s",
42 t->header_offset, t->mtu, t->n_fragments, node->next_node_names[t->next]);
46 static u32 running_fragment_id;
49 ip4_frag_do_fragment(vlib_main_t *vm, u32 pi, u32 **buffer, ip_frag_error_t *error)
53 u16 mtu, ptr, len, max, rem,
54 offset, ip_frag_id, ip_frag_offset;
57 vec_add1(*buffer, pi);
58 p = vlib_get_buffer(vm, pi);
59 offset = vnet_buffer(p)->ip_frag.header_offset;
60 mtu = vnet_buffer(p)->ip_frag.mtu;
61 packet = (u8 *)vlib_buffer_get_current(p);
62 ip4 = (ip4_header_t *)(packet + offset);
64 rem = clib_net_to_host_u16(ip4->length) - sizeof(*ip4);
66 max = (mtu - sizeof(*ip4) - vnet_buffer(p)->ip_frag.header_offset) & ~0x7;
68 if (rem < (p->current_length - offset - sizeof(*ip4))) {
69 *error = IP_FRAG_ERROR_MALFORMED;
73 if (mtu < sizeof(*ip4)) {
74 *error = IP_FRAG_ERROR_CANT_FRAGMENT_HEADER;
78 if (ip4->flags_and_fragment_offset &
79 clib_host_to_net_u16(IP4_HEADER_FLAG_DONT_FRAGMENT)) {
80 *error = IP_FRAG_ERROR_DONT_FRAGMENT_SET;
84 if (ip4_is_fragment(ip4)) {
85 ip_frag_id = ip4->fragment_id;
86 ip_frag_offset = ip4_get_fragment_offset(ip4);
87 more = !!(ip4->flags_and_fragment_offset & clib_host_to_net_u16(IP4_HEADER_FLAG_MORE_FRAGMENTS));
89 ip_frag_id = (++running_fragment_id);
94 //Do the actual fragmentation
100 len = (rem > (mtu - sizeof(*ip4) - vnet_buffer(p)->ip_frag.header_offset)) ? max : rem;
105 fip4 = (ip4_header_t *)(vlib_buffer_get_current(b) + offset);
107 if (!vlib_buffer_alloc(vm, &bi, 1)) {
108 *error = IP_FRAG_ERROR_MEMORY;
111 vec_add1(*buffer, bi);
112 b = vlib_get_buffer(vm, bi);
113 vnet_buffer(b)->sw_if_index[VLIB_RX] = vnet_buffer(p)->sw_if_index[VLIB_RX];
114 vnet_buffer(b)->sw_if_index[VLIB_TX] = vnet_buffer(p)->sw_if_index[VLIB_TX];
115 fip4 = (ip4_header_t *)(vlib_buffer_get_current(b) + offset);
117 //Copy offset and ip4 header
118 memcpy(b->data, packet, offset + sizeof(*ip4));
120 memcpy(((u8*)(fip4)) + sizeof(*fip4),
121 packet + offset + sizeof(*fip4) + ptr, len);
123 b->current_length = offset + len + sizeof(*fip4);
125 fip4->fragment_id = ip_frag_id;
126 fip4->flags_and_fragment_offset = clib_host_to_net_u16((ptr >> 3) + ip_frag_offset);
127 fip4->flags_and_fragment_offset |= clib_host_to_net_u16(((len != rem) || more) << 13);
128 // ((len0 != rem0) || more0) << 13 is optimization for
129 // ((len0 != rem0) || more0) ? IP4_HEADER_FLAG_MORE_FRAGMENTS : 0
130 fip4->length = clib_host_to_net_u16(len + sizeof(*fip4));
131 fip4->checksum = ip4_header_checksum(fip4);
133 if(vnet_buffer(p)->ip_frag.flags & IP_FRAG_FLAG_IP4_HEADER) {
134 //Encapsulating ipv4 header
135 ip4_header_t *encap_header4 = (ip4_header_t *)vlib_buffer_get_current(b);
136 encap_header4->length = clib_host_to_net_u16(b->current_length);
137 encap_header4->checksum = ip4_header_checksum(encap_header4);
138 } else if (vnet_buffer(p)->ip_frag.flags & IP_FRAG_FLAG_IP6_HEADER) {
139 //Encapsulating ipv6 header
140 ip6_header_t *encap_header6 = (ip6_header_t *)vlib_buffer_get_current(b);
141 encap_header6->payload_length = clib_host_to_net_u16(b->current_length - sizeof(*encap_header6));
151 ip4_frag (vlib_main_t *vm,
152 vlib_node_runtime_t *node,
155 u32 n_left_from, *from, next_index, *to_next, n_left_to_next;
156 vlib_node_runtime_t * error_node = vlib_node_get_runtime(vm, ip4_frag_node.index);
157 from = vlib_frame_vector_args(frame);
158 n_left_from = frame->n_vectors;
159 next_index = node->cached_next_index;
160 u32 frag_sent = 0, small_packets = 0;
163 while (n_left_from > 0) {
164 vlib_get_next_frame(vm, node, next_index, to_next, n_left_to_next);
166 while (n_left_from > 0 && n_left_to_next > 0) {
167 u32 pi0, *frag_from, frag_left;
169 ip_frag_error_t error0;
170 ip4_frag_next_t next0;
172 //Note: The packet is not enqueued now.
173 //It is instead put in a vector where other fragments
174 //will be put as well.
178 error0 = IP_FRAG_ERROR_NONE;
180 p0 = vlib_get_buffer(vm, pi0);
181 ip4_frag_do_fragment(vm, pi0, &buffer, &error0);
183 if (PREDICT_FALSE(p0->flags & VLIB_BUFFER_IS_TRACED)) {
184 ip_frag_trace_t *tr = vlib_add_trace(vm, node, p0, sizeof (*tr));
185 tr->header_offset = vnet_buffer(p0)->ip_frag.header_offset;
186 tr->mtu = vnet_buffer(p0)->ip_frag.mtu;
188 tr->n_fragments = vec_len(buffer);
189 tr->next = vnet_buffer(p0)->ip_frag.next_index;
192 next0 = (error0 == IP_FRAG_ERROR_NONE) ? vnet_buffer(p0)->ip_frag.next_index : IP4_FRAG_NEXT_DROP;
193 frag_sent += vec_len(buffer);
194 small_packets += (vec_len(buffer) == 1);
196 //Send fragments that were added in the frame
198 frag_left = vec_len(buffer);
199 while (frag_left > 0) {
200 while (frag_left > 0 && n_left_to_next > 0) {
202 i = to_next[0] = frag_from[0];
208 vlib_get_buffer(vm, i)->error = error_node->errors[error0];
209 vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
210 to_next, n_left_to_next, i,
213 vlib_put_next_frame(vm, node, next_index, n_left_to_next);
214 vlib_get_next_frame(vm, node, next_index, to_next, n_left_to_next);
216 vec_reset_length(buffer);
218 vlib_put_next_frame(vm, node, next_index, n_left_to_next);
221 vlib_node_increment_counter(vm, ip4_frag_node.index, IP_FRAG_ERROR_FRAGMENT_SENT, frag_sent);
222 vlib_node_increment_counter(vm, ip4_frag_node.index, IP_FRAG_ERROR_SMALL_PACKET, small_packets);
224 return frame->n_vectors;
229 ip6_frag_do_fragment(vlib_main_t *vm, u32 pi, u32 **buffer, ip_frag_error_t *error)
232 ip6_header_t *ip6_hdr;
233 ip6_frag_hdr_t *frag_hdr;
234 u8 *payload, *next_header;
236 p = vlib_get_buffer(vm, pi);
238 //Parsing the IPv6 headers
239 ip6_hdr = vlib_buffer_get_current(p) + vnet_buffer(p)->ip_frag.header_offset;
240 payload = (u8 *)(ip6_hdr + 1);
241 next_header = &ip6_hdr->protocol;
242 if (*next_header == IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS) {
243 next_header = payload;
244 payload += payload[1] * 8;
247 if (*next_header == IP_PROTOCOL_IP6_DESTINATION_OPTIONS) {
248 next_header = payload;
249 payload += payload[1] * 8;
252 if (*next_header == IP_PROTOCOL_IPV6_ROUTE) {
253 next_header = payload;
254 payload += payload[1] * 8;
259 if (*next_header == IP_PROTOCOL_IPV6_FRAGMENTATION) {
260 //The fragmentation header is already there
261 frag_hdr = (ip6_frag_hdr_t *)payload;
262 has_more = ip6_frag_hdr_more(frag_hdr);
263 initial_offset = ip6_frag_hdr_offset(frag_hdr);
265 //Insert a fragmentation header in the packet
266 u8 nh = *next_header;
267 *next_header = IP_PROTOCOL_IPV6_FRAGMENTATION;
268 vlib_buffer_advance(p, -sizeof(*frag_hdr));
269 u8 *start = vlib_buffer_get_current(p);
270 memmove(start, start + sizeof(*frag_hdr), payload - (start + sizeof(*frag_hdr)));
271 frag_hdr = (ip6_frag_hdr_t *)(payload - sizeof(*frag_hdr));
272 frag_hdr->identification = ++running_fragment_id;
273 frag_hdr->next_hdr = nh;
278 payload = (u8 *)(frag_hdr + 1);
280 u16 headers_len = payload - (u8 *)vlib_buffer_get_current(p);
281 u16 max_payload = vnet_buffer(p)->ip_frag.mtu - headers_len;
282 u16 rem = p->current_length - headers_len;
285 if(max_payload < 8) {
286 *error = IP_FRAG_ERROR_CANT_FRAGMENT_HEADER;
293 u16 len = (rem > max_payload)?(max_payload & ~0x7):rem;
297 if (!vlib_buffer_alloc(vm, &bi, 1)) {
298 *error = IP_FRAG_ERROR_MEMORY;
301 b = vlib_get_buffer(vm, bi);
302 vnet_buffer(b)->sw_if_index[VLIB_RX] = vnet_buffer(p)->sw_if_index[VLIB_RX];
303 vnet_buffer(b)->sw_if_index[VLIB_TX] = vnet_buffer(p)->sw_if_index[VLIB_TX];
304 memcpy(vlib_buffer_get_current(b), vlib_buffer_get_current(p), headers_len);
305 memcpy(vlib_buffer_get_current(b) + headers_len, payload + ptr, len);
306 frag_hdr = vlib_buffer_get_current(b) + headers_len - sizeof(*frag_hdr);
309 b = vlib_get_buffer(vm, bi);
310 //frag_hdr already set here
313 ip6_hdr = vlib_buffer_get_current(b) + vnet_buffer(p)->ip_frag.header_offset;
314 frag_hdr->fragment_offset_and_more = ip6_frag_hdr_offset_and_more(initial_offset + (ptr >> 3), (rem || has_more));
315 b->current_length = headers_len + len;
316 ip6_hdr->payload_length = clib_host_to_net_u16(b->current_length - vnet_buffer(p)->ip_frag.header_offset - sizeof(*ip6_hdr));
318 if(vnet_buffer(p)->ip_frag.flags & IP_FRAG_FLAG_IP4_HEADER) {
319 //Encapsulating ipv4 header
320 ip4_header_t *encap_header4 = (ip4_header_t *)vlib_buffer_get_current(b);
321 encap_header4->length = clib_host_to_net_u16(b->current_length);
322 encap_header4->checksum = ip4_header_checksum(encap_header4);
323 } else if (vnet_buffer(p)->ip_frag.flags & IP_FRAG_FLAG_IP6_HEADER) {
324 //Encapsulating ipv6 header
325 ip6_header_t *encap_header6 = (ip6_header_t *)vlib_buffer_get_current(b);
326 encap_header6->payload_length = clib_host_to_net_u16(b->current_length - sizeof(*encap_header6));
329 vec_add1(*buffer, bi);
336 ip6_frag (vlib_main_t * vm,
337 vlib_node_runtime_t * node,
338 vlib_frame_t * frame)
340 u32 n_left_from, *from, next_index, *to_next, n_left_to_next;
341 vlib_node_runtime_t * error_node = vlib_node_get_runtime (vm, ip6_frag_node.index);
342 from = vlib_frame_vector_args (frame);
343 n_left_from = frame->n_vectors;
344 next_index = node->cached_next_index;
345 u32 frag_sent = 0, small_packets = 0;
348 while (n_left_from > 0) {
349 vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
351 while (n_left_from > 0 && n_left_to_next > 0) {
352 u32 pi0, *frag_from, frag_left;
354 ip_frag_error_t error0;
355 ip6_frag_next_t next0;
360 error0 = IP_FRAG_ERROR_NONE;
362 p0 = vlib_get_buffer(vm, pi0);
363 ip6_frag_do_fragment(vm, pi0, &buffer, &error0);
365 if (PREDICT_FALSE(p0->flags & VLIB_BUFFER_IS_TRACED)) {
366 ip_frag_trace_t *tr = vlib_add_trace(vm, node, p0, sizeof (*tr));
367 tr->header_offset = vnet_buffer(p0)->ip_frag.header_offset;
368 tr->mtu = vnet_buffer(p0)->ip_frag.mtu;
370 tr->n_fragments = vec_len(buffer);
371 tr->next = vnet_buffer(p0)->ip_frag.next_index;
374 next0 = (error0 == IP_FRAG_ERROR_NONE) ? vnet_buffer(p0)->ip_frag.next_index : IP6_FRAG_NEXT_DROP;
375 frag_sent += vec_len(buffer);
376 small_packets += (vec_len(buffer) == 1);
378 //Send fragments that were added in the frame
380 frag_left = vec_len(buffer);
381 while (frag_left > 0) {
382 while (frag_left > 0 && n_left_to_next > 0) {
384 i = to_next[0] = frag_from[0];
390 vlib_get_buffer(vm, i)->error = error_node->errors[error0];
391 vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
392 to_next, n_left_to_next, i,
395 vlib_put_next_frame(vm, node, next_index, n_left_to_next);
396 vlib_get_next_frame(vm, node, next_index, to_next, n_left_to_next);
398 vec_reset_length(buffer);
400 vlib_put_next_frame(vm, node, next_index, n_left_to_next);
403 vlib_node_increment_counter(vm, ip6_frag_node.index, IP_FRAG_ERROR_FRAGMENT_SENT, frag_sent);
404 vlib_node_increment_counter(vm, ip6_frag_node.index, IP_FRAG_ERROR_SMALL_PACKET, small_packets);
406 return frame->n_vectors;
409 static char * ip4_frag_error_strings[] = {
410 #define _(sym,string) string,
411 foreach_ip_frag_error
415 VLIB_REGISTER_NODE (ip4_frag_node) = {
416 .function = ip4_frag,
417 .name = IP4_FRAG_NODE_NAME,
418 .vector_size = sizeof (u32),
419 .format_trace = format_ip_frag_trace,
420 .type = VLIB_NODE_TYPE_INTERNAL,
422 .n_errors = IP_FRAG_N_ERROR,
423 .error_strings = ip4_frag_error_strings,
425 .n_next_nodes = IP4_FRAG_N_NEXT,
427 [IP4_FRAG_NEXT_IP4_LOOKUP] = "ip4-lookup",
428 [IP4_FRAG_NEXT_IP6_LOOKUP] = "ip6-lookup",
429 [IP4_FRAG_NEXT_DROP] = "error-drop"
433 VLIB_REGISTER_NODE (ip6_frag_node) = {
434 .function = ip6_frag,
435 .name = IP6_FRAG_NODE_NAME,
436 .vector_size = sizeof (u32),
437 .format_trace = format_ip_frag_trace,
438 .type = VLIB_NODE_TYPE_INTERNAL,
440 .n_errors = IP_FRAG_N_ERROR,
441 .error_strings = ip4_frag_error_strings,
443 .n_next_nodes = IP6_FRAG_N_NEXT,
445 [IP6_FRAG_NEXT_IP4_LOOKUP] = "ip4-lookup",
446 [IP6_FRAG_NEXT_IP6_LOOKUP] = "ip6-lookup",
447 [IP6_FRAG_NEXT_DROP] = "error-drop"