2 *---------------------------------------------------------------------------
3 * cnat_ipv4_icmp_error_inside_input.c - cnat_ipv4_icmp_error_inside_input node pipeline stage functions
5 * Copyright (c) 2008-2014 Cisco and/or its affiliates.
6 * Licensed under the Apache License, Version 2.0 (the "License");
7 * you may not use this file except in compliance with the License.
8 * You may obtain a copy of the License at:
10 * http://www.apache.org/licenses/LICENSE-2.0
12 * Unless required by applicable law or agreed to in writing, software
13 * distributed under the License is distributed on an "AS IS" BASIS,
14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 * See the License for the specific language governing permissions and
16 * limitations under the License.
17 *---------------------------------------------------------------------------
20 #include <vlib/vlib.h>
21 #include <vnet/vnet.h>
22 #include <vppinfra/error.h>
23 #include <vnet/buffer.h>
25 #include "cnat_ipv4_icmp.h"
27 #define foreach_cnat_ipv4_icmp_e_inside_input_error \
28 _(CNAT_V4_ICMP_E_I2O_T_PKT, "cnat v4 icmp_e i2o packet transmit") \
29 _(CNAT_V4_ICMP_E_I2O_D_PKT, "cnat v4 icmp_e i2o packet drop") \
30 _(CNAT_V4_ICMP_E_I2O_TTL_DROP, "cnat v4 icmp_e i2o ttl drop")
33 #define _(sym,str) sym,
34 foreach_cnat_ipv4_icmp_e_inside_input_error
36 CNAT_IPV4_ICMP_E_INSIDE_INPUT_N_ERROR,
37 } cnat_ipv4_icmp_e_inside_input_t;
39 static char * cnat_ipv4_icmp_e_inside_input_error_strings[] = {
40 #define _(sym,string) string,
41 foreach_cnat_ipv4_icmp_e_inside_input_error
46 u32 cached_next_index;
47 /* $$$$ add data here */
49 /* convenience variables */
50 vlib_main_t * vlib_main;
51 vnet_main_t * vnet_main;
52 } cnat_ipv4_icmp_e_inside_input_main_t;
57 CNAT_V4_ICMP_E_I2O_NEXT,
58 } cnat_ipv4_icmp_e_inside_input_next_t;
60 cnat_ipv4_icmp_e_inside_input_main_t cnat_ipv4_icmp_e_inside_input_main;
61 vlib_node_registration_t cnat_ipv4_icmp_e_inside_input_node;
65 inline void swap_ip_src_emip_dst(ipv4_header *ip,
66 icmp_em_ip_info *icmp_info,
67 cnat_main_db_entry_t *db, u16 vrf)
79 CNAT_UPDATE_L3_CHECKSUM_DECLARE
80 CNAT_UPDATE_ICMP_ERR_CHECKSUM_DECLARE
83 * fix inner layer ip & l4 checksum
85 em_ip = icmp_info->em_ip;
86 em_port = icmp_info->em_port;
88 CNAT_UPDATE_L3_CHECKSUM(((u16)(db->in2out_key.k.ipv4)),
89 ((u16)(db->in2out_key.k.ipv4 >> 16)),
90 (clib_net_to_host_u16(em_ip->checksum)),
91 ((u16)(db->out2in_key.k.ipv4)),
92 ((u16)(db->out2in_key.k.ipv4 >> 16)))
94 old_ip = clib_net_to_host_u32(em_ip->dest_addr);
95 old_port = clib_net_to_host_u16(*em_port);
96 old_ip_checksum = clib_net_to_host_u16(em_ip->checksum);
99 clib_host_to_net_u32(db->out2in_key.k.ipv4);
101 clib_host_to_net_u16(new_l3_c);
103 clib_host_to_net_u16(db->out2in_key.k.port);
106 * fix outter layer ip & icmp checksum
108 icmp = icmp_info->icmp;
109 CNAT_UPDATE_ICMP_ERR_CHECKSUM(((u16)(old_ip & 0xFFFF)),
110 ((u16)(old_ip >> 16)),
113 (clib_net_to_host_u16(icmp->checksum)),
114 ((u16)(db->out2in_key.k.ipv4 & 0xffff)),
115 ((u16)(db->out2in_key.k.ipv4 >> 16)),
116 ((u16)(db->out2in_key.k.port)),
120 clib_host_to_net_u16(new_icmp_c);
122 old_ip = clib_net_to_host_u32(ip->src_addr);
125 clib_host_to_net_u32(db->out2in_key.k.ipv4);
127 CNAT_UPDATE_L3_CHECKSUM(((u16)(old_ip & 0xFFFF)),
128 ((u16)(old_ip >> 16)),
129 (clib_net_to_host_u16(ip->checksum)),
130 ((u16)(db->out2in_key.k.ipv4)),
131 ((u16)(db->out2in_key.k.ipv4 >> 16)))
133 clib_host_to_net_u16(new_l3_c);
136 if(is_static_dest_nat_enabled(vrf) == CNAT_SUCCESS) {
138 * fix inner layer ip & l4 checksum
140 em_snat_ip = icmp_info->em_ip;
141 em_snat_port = icmp_info->em_port;
143 old_ip = spp_net_to_host_byte_order_32(&(em_snat_ip->src_addr));
144 old_port = spp_net_to_host_byte_order_16(em_snat_port);
145 old_ip_checksum = spp_net_to_host_byte_order_16(&(em_snat_ip->checksum));
147 if(cnat_static_dest_db_get_translation(em_snat_ip->src_addr, &postmap_ip, vrf, direction) == CNAT_SUCCESS) {
148 old_postmap_ip = spp_net_to_host_byte_order_32(&postmap_ip);
150 CNAT_UPDATE_L3_CHECKSUM(((u16)(old_ip)),
151 ((u16)(old_ip >> 16)),
152 (spp_net_to_host_byte_order_16(&(em_snat_ip->checksum))),
153 ((u16)(old_postmap_ip)),
154 ((u16)(old_postmap_ip >> 16)))
155 em_snat_ip->src_addr = postmap_ip;
156 em_snat_ip->checksum =
157 spp_host_to_net_byte_order_16(new_l3_c);
160 * fix outter layer ip & icmp checksum
162 icmp = icmp_info->icmp;
163 CNAT_UPDATE_ICMP_ERR_CHECKSUM(((u16)(old_ip & 0xFFFF)),
164 ((u16)(old_ip >> 16)),
167 (spp_net_to_host_byte_order_16(&(icmp->checksum))),
168 ((u16)(old_postmap_ip & 0xffff)),
169 ((u16)(old_postmap_ip >> 16)),
174 spp_host_to_net_byte_order_16(new_icmp_c);
179 if(is_static_dest_nat_enabled(vrf) == CNAT_SUCCESS) {
181 if(cnat_static_dest_db_get_translation(ip->dest_addr, &postmap_ip, vrf, direction) == CNAT_SUCCESS) {
183 old_ip = spp_net_to_host_byte_order_32(&(ip->dest_addr));
184 old_postmap_ip = spp_net_to_host_byte_order_32(&postmap_ip);
186 CNAT_UPDATE_L3_CHECKSUM(((u16)(old_ip & 0xFFFF)),
187 ((u16)(old_ip >> 16)),
188 (spp_net_to_host_byte_order_16(&(ip->checksum))),
189 ((u16)(old_postmap_ip & 0xFFFF)),
190 ((u16)(old_postmap_ip >> 16)))
191 ip->dest_addr = postmap_ip;
194 clib_host_to_net_u16(new_l3_c);
202 * Use the generic buffer metadata + first line of packet data prefetch
203 * stage function from <api/pipeline.h>. This is usually a Good Idea.
205 #define stage0 generic_stage0
209 stage1(vlib_main_t * vm, vlib_node_runtime_t * node, u32 buffer_index)
215 vlib_buffer_t * b0 = vlib_get_buffer (vm, buffer_index);
216 ipv4_header *ip = vlib_buffer_get_current (b0);
217 u8 ipv4_hdr_len = (ip->version_hdr_len_words & 0xf) << 2;
218 icmp_v4_t *icmp = (icmp_v4_t *)((u8*)ip + ipv4_hdr_len);
219 ipv4_header *em_ip = (ipv4_header*)((u8*)icmp + 8); /* embedded pkt's v4 hdr */
220 u8 em_ip_hdr_len = (em_ip->version_hdr_len_words & 0xf) << 2;
223 u32 protocol = CNAT_ICMP;
225 /* Check L4 header for embedded packet */
226 if (em_ip->protocol == TCP_PROT) {
227 tcp_hdr_type *tcp = (tcp_hdr_type*)((u8 *)em_ip + em_ip_hdr_len);
228 vnet_buffer(b0)->vcgn_uii.key.k.port =
229 clib_net_to_host_u16(tcp->dest_port);
232 } else if (em_ip->protocol == UDP_PROT) {
233 udp_hdr_type_t *udp = (udp_hdr_type_t *)((u8 *)em_ip + em_ip_hdr_len);
234 vnet_buffer(b0)->vcgn_uii.key.k.port =
235 clib_net_to_host_u16(udp->dest_port);
239 icmp_v4_t *icmp = (icmp_v4_t*)((u8 *)em_ip + em_ip_hdr_len);
240 vnet_buffer(b0)->vcgn_uii.key.k.port =
241 clib_net_to_host_u16(icmp->identifier);
243 if (PREDICT_FALSE((icmp->type != ICMPV4_ECHOREPLY) &&
244 (icmp->type != ICMPV4_ECHO))) {
246 * Try to set invalid protocol for these cases, so that
247 * hash lookup does not return valid main_db. This approach
248 * may optimize the regular cases with valid protocols
249 * as it avoids one more check for regular cases in stage3
251 protocol = CNAT_INVALID_PROTO;
255 tmp = vnet_buffer(b0)->vcgn_uii.key.k.ipv4 =
256 clib_net_to_host_u32(em_ip->dest_addr);
258 tmp |= ((u64)vnet_buffer(b0)->vcgn_uii.key.k.port) << 32;
260 PLATFORM_CNAT_SET_RX_VRF(vnet_buffer(b0)->sw_if_index[VLIB_RX],
261 vnet_buffer(b0)->vcgn_uii.key.k.vrf,
263 tmp |= ((u64)vnet_buffer(b0)->vcgn_uii.key.k.vrf) << 48;
265 CNAT_V4_GET_HASH(tmp, bucket, CNAT_MAIN_HASH_MASK)
267 prefetch_target = (u8 *)(&cnat_in2out_hash[bucket]);
268 vnet_buffer(b0)->vcgn_uii.bucket = bucket;
270 /* Prefetch the hash bucket */
271 CLIB_PREFETCH(prefetch_target, CLIB_CACHE_LINE_BYTES, LOAD);
274 #define SPP_LOG2_CACHE_LINE_BYTES 6
275 #define SPP_CACHE_LINE_BYTES (1 << SPP_LOG2_CACHE_LINE_BYTES)
278 stage2(vlib_main_t * vm, vlib_node_runtime_t * node, u32 buffer_index)
280 vlib_buffer_t * b0 = vlib_get_buffer(vm, buffer_index);
281 uword prefetch_target0, prefetch_target1;
282 u32 bucket = vnet_buffer(b0)->vcgn_uii.bucket;
284 /* read the hash bucket */
285 u32 db_index = vnet_buffer(b0)->vcgn_uii.bucket
286 = cnat_in2out_hash[bucket].next;
288 if (PREDICT_TRUE(db_index != EMPTY)) {
290 * Prefetch database keys. We save space by not cache-line
291 * aligning the DB entries. We don't want to waste LSU
292 * bandwidth prefetching stuff we won't need.
294 prefetch_target0 = (uword)(cnat_main_db + db_index);
295 CLIB_PREFETCH((void*)prefetch_target0, CLIB_CACHE_LINE_BYTES, LOAD);
296 /* Just beyond DB key #2 */
297 prefetch_target1 = prefetch_target0 +
298 STRUCT_OFFSET_OF(cnat_main_db_entry_t, user_ports);
299 /* If the targets are in different lines, do the second prefetch */
300 if (PREDICT_FALSE((prefetch_target0 & ~(SPP_CACHE_LINE_BYTES-1)) !=
301 (prefetch_target1 & ~(SPP_CACHE_LINE_BYTES-1)))) {
302 CLIB_PREFETCH((void *)prefetch_target1, CLIB_CACHE_LINE_BYTES, LOAD);
308 stage3(vlib_main_t * vm, vlib_node_runtime_t * node, u32 buffer_index)
310 cnat_main_db_entry_t *db;
311 vlib_buffer_t * b0 = vlib_get_buffer(vm, buffer_index);
312 u32 db_index = vnet_buffer(b0)->vcgn_uii.bucket;
315 * Note: if the search already failed (empty bucket),
316 * the answer is already in the pipeline context structure
318 if (PREDICT_TRUE(db_index != EMPTY)) {
321 * Note: hash collisions suck. We can't easily prefetch around them.
322 * The first trip around the track will be fast. After that, maybe
326 db = cnat_main_db + db_index;
327 if (PREDICT_TRUE(db->in2out_key.key64 ==
328 vnet_buffer(b0)->vcgn_uii.key.key64)) {
331 db_index = db->in2out_hash.next;
332 } while (db_index != EMPTY);
334 /* Stick the answer back into the pipeline context structure */
335 vnet_buffer(b0)->vcgn_uii.bucket = db_index;
341 static inline u32 last_stage (vlib_main_t *vm, vlib_node_runtime_t *node,
344 vlib_buffer_t *b0 = vlib_get_buffer (vm, bi);
345 u32 db_index = vnet_buffer(b0)->vcgn_uii.bucket;
346 int disposition = CNAT_V4_ICMP_E_I2O_T;
347 int counter = CNAT_V4_ICMP_E_I2O_T_PKT;
349 ipv4_header *ip = (ipv4_header *)vlib_buffer_get_current(b0);
350 u8 ipv4_hdr_len = (ip->version_hdr_len_words & 0xf) << 2;
351 icmp_v4_t *icmp = (icmp_v4_t *)((u8*)ip + ipv4_hdr_len);
352 ipv4_header *em_ip = (ipv4_header*)((u8*)icmp + 8); /* embedded pkt's v4 hdr */
353 u8 em_ip_hdr_len = (em_ip->version_hdr_len_words & 0xf) << 2;
354 vlib_node_t *n = vlib_get_node (vm, cnat_ipv4_icmp_e_inside_input_node.index);
355 u32 node_counter_base_index = n->error_heap_index;
356 vlib_error_main_t * em = &vm->error_main;
357 cnat_main_db_entry_t *db = NULL;
358 icmp_em_ip_info icmp_info;
360 if (PLATFORM_HANDLE_TTL_DECREMENT) {
361 if (PREDICT_FALSE(ip->ttl <= 1)) {
363 * As it is ICMP error packet with TTL <= 1,
364 * let's drop the packet (no need to genereate
365 * another ICMP error).
368 disposition = CNAT_V4_ICMP_E_I2O_D;
369 counter = CNAT_V4_ICMP_E_I2O_TTL_DROP;
375 if (PREDICT_TRUE(db_index != EMPTY)) {
376 icmp_info.em_ip = em_ip;
377 icmp_info.icmp = icmp;
378 //icmp_info.em_port = vnet_buffer(b0)->vcgn_uii.key.k.port;
380 /* Note: This could have been done in stage1 itself,
381 * but we need to introduce one u16 * in vnet_buffer_opaque_t
382 * Since this flow is expected to be very rare in actual
383 * deployment scenario, we may afford to do these steps here
384 * as well. Lets confirm during core review. */
385 if (em_ip->protocol == TCP_PROT) {
386 tcp_hdr_type *tcp = (tcp_hdr_type*)((u8 *)em_ip + em_ip_hdr_len);
387 icmp_info.em_port = &(tcp->dest_port);
388 } else if (em_ip->protocol == UDP_PROT) {
389 udp_hdr_type_t *udp = (udp_hdr_type_t *)
390 ((u8 *)em_ip + em_ip_hdr_len);
391 icmp_info.em_port = &(udp->dest_port);
393 icmp_v4_t *icmp_inner = (icmp_v4_t*)((u8 *)em_ip + em_ip_hdr_len);
394 icmp_info.em_port = &(icmp_inner->identifier);
397 db = cnat_main_db + db_index;
399 * 1. update dst addr:dst port of embedded ip pkt
400 * update src addr of icmp pkt
401 * 2. fix udp/tcp/ip checksum of embedded pkt
402 * fix icmp, ip check of icmp pkt
403 * don need to update the timer
406 if (PREDICT_FALSE(icmp_debug_flag)) {
407 printf("\nDUMPING ICMP PKT BEFORE\n");
411 if (PLATFORM_HANDLE_TTL_DECREMENT) {
413 * Decrement TTL and update IPv4 checksum
415 ipv4_decr_ttl_n_calc_csum(ip);
418 swap_ip_src_emip_dst(ip, &icmp_info,
419 db, db->in2out_key.k.vrf);
421 if (PREDICT_FALSE(icmp_debug_flag)) {
422 printf("\nDUMPING ICMP PKT AFTER\n");
425 in2out_forwarding_count++;
428 disposition = CNAT_V4_ICMP_E_I2O_D;
429 counter = CNAT_V4_ICMP_E_I2O_D_PKT;
434 em->counters[node_counter_base_index + counter] += 1;
438 #include <vnet/pipeline.h>
440 static uword cnat_ipv4_icmp_e_inside_input_node_fn (vlib_main_t * vm,
441 vlib_node_runtime_t * node,
442 vlib_frame_t * frame)
444 return dispatch_pipeline (vm, node, frame);
448 VLIB_REGISTER_NODE (cnat_ipv4_icmp_e_inside_input_node) = {
449 .function = cnat_ipv4_icmp_e_inside_input_node_fn,
450 .name = "vcgn-v4-icmp-e-i2o",
451 .vector_size = sizeof (u32),
452 .type = VLIB_NODE_TYPE_INTERNAL,
454 .n_errors = ARRAY_LEN(cnat_ipv4_icmp_e_inside_input_error_strings),
455 .error_strings = cnat_ipv4_icmp_e_inside_input_error_strings,
457 .n_next_nodes = CNAT_V4_ICMP_E_I2O_NEXT,
459 /* edit / add dispositions here */
461 [CNAT_V4_ICMP_E_I2O_T] = "ip4-input",
462 [CNAT_V4_ICMP_E_I2O_D] = "error-drop",
466 clib_error_t *cnat_ipv4_icmp_e_inside_input_init (vlib_main_t *vm)
468 cnat_ipv4_icmp_e_inside_input_main_t * mp = &cnat_ipv4_icmp_e_inside_input_main;
471 mp->vnet_main = vnet_get_main();
476 VLIB_INIT_FUNCTION (cnat_ipv4_icmp_e_inside_input_init);