2 *---------------------------------------------------------------------------
3 * cnat_v4_tcp_in2out_stages.c - cnat_v4_tcp_in2out node pipeline stage functions
6 * Copyright (c) 2008-2014 Cisco and/or its affiliates.
7 * Licensed under the Apache License, Version 2.0 (the "License");
8 * you may not use this file except in compliance with the License.
9 * You may obtain a copy of the License at:
11 * http://www.apache.org/licenses/LICENSE-2.0
13 * Unless required by applicable law or agreed to in writing, software
14 * distributed under the License is distributed on an "AS IS" BASIS,
15 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 * See the License for the specific language governing permissions and
17 * limitations under the License.
18 *---------------------------------------------------------------------------
21 #include <vlib/vlib.h>
22 #include <vnet/vnet.h>
23 #include <vppinfra/error.h>
24 #include <vnet/buffer.h>
27 /* #include <cnat_feature_data.h> */
28 #include "ipv4_packet.h"
29 #include "tcp_header_definitions.h"
30 #include "cnat_config.h"
31 #include "cnat_global.h"
32 #include "cnat_v4_functions.h"
33 #include "cnat_v4_ftp_alg.h"
34 #include "cnat_v4_pptp_alg.h"
36 #define foreach_cnat_ipv4_tcp_inside_input_error \
37 _(TCP_NAT_IN, "packets received") \
38 _(TCP_NAT, "packets NATed") \
39 _(TCP_EXCEPTION, "packets to exception") \
40 _(TCP_TTL_GEN, "Generated TTL Expiry ICMP packet") \
41 _(TCP_TTL_DROP, "Could not generate TTL Expiry ICMP packet") \
42 _(TCP_SESSION_DROP, "Could not generate session") \
43 _(TCP_FRAG_DROP, "Non-first Fragment received")
46 #define _(sym,str) sym,
47 foreach_cnat_ipv4_tcp_inside_input_error
49 CNAT_IPV4_TCP_INSIDE_INPUT_N_ERROR,
50 } cnat_ipv4_tcp_inside_input_t;
52 static char * cnat_ipv4_tcp_inside_input_error_strings[] = {
53 #define _(sym,string) string,
54 foreach_cnat_ipv4_tcp_inside_input_error
59 typedef struct cnat_v4_tcp_in2out_pipeline_data_ {
60 spp_node_main_vector_t *nmv;
61 /* Add additional pipeline stage data here... */
63 u16 src_port; /* Added for handling fragments */
64 u16 dst_port; /* Added for handling fragments */
65 } cnat_v4_tcp_in2out_pipeline_data_t;
67 static cnat_v4_tcp_in2out_pipeline_data_t pctx_data[SPP_MAXDISPATCH];
69 #define EXTRA_PIPELINE_ARGS_PROTO , cnat_v4_tcp_in2out_pipeline_data_t *pctx
70 #define EXTRA_PIPELINE_ARGS , pctx
74 stage0(spp_ctx_t **ctxs, int index, spp_node_t *np,
75 u8 *disp_used EXTRA_PIPELINE_ARGS_PROTO))
77 spp_ctx_t *ctx = ctxs[index];
79 * Prefetch the context header. This is almost always
80 * the right thing to do
82 SPP_PREFETCH_CTX(ctx);
87 stage1(spp_ctx_t **ctxs, int index, spp_node_t *np,
88 u8 *disp_used EXTRA_PIPELINE_ARGS_PROTO))
90 spp_ctx_t *ctx = ctxs[index];
91 /* got ctx, prefetch packet data separately */
92 SPP_PREFETCH_CTX_DATA(ctx, 1*CACHE_DATA_QUANTUM);
97 stage2(spp_ctx_t **ctxs, int index, spp_node_t *np,
98 u8 *disp_used EXTRA_PIPELINE_ARGS_PROTO))
100 spp_ctx_t *ctx = ctxs[index];
103 cnat_feature_data_t *fd = (cnat_feature_data_t *)ctx->feature_data;
108 INCREMENT_NODE_COUNTER(np, TCP_NAT_IN);
110 /* extract the key from ctx and save it to feature_data */
112 ip = (ipv4_header *)(ctx->current_header);
113 ctx->application_start = (ip->version_hdr_len_words & 0xf) << 2;
114 tcp = (tcp_hdr_type*) ((u8 *)ip + ctx->application_start);
116 PLATFORM_CNAT_SET_RX_VRF(ctx,fd->dbl.k.k.vrf, CNAT_TCP, 1);
117 fd->dbl.k.k.ipv4 = spp_net_to_host_byte_order_32(&ip->src_addr);
119 if(PREDICT_FALSE(ctx->ru.rx.frag)) {
120 /* Must have routed through cnat_v4_frag_in2out node
121 * Since feature data of the ctx is being used for other
122 * purposes here, copy them to extra stage argument
124 u16 *feature_data_ports = (u16 *)&ctx->feature_data[2];
125 pctx[index].src_port = fd->dbl.k.k.port = *feature_data_ports;
126 feature_data_ports++;
127 pctx[index].dst_port = *feature_data_ports;
129 fd->dbl.k.k.port = spp_net_to_host_byte_order_16(&tcp->src_port);
130 pctx[index].dst_port =
131 spp_net_to_host_byte_order_16(&tcp->dest_port);
135 /* extra info for evil mode, or default value for dst_ipv4 field in good mode */
136 fd->dbl.dst_ipv4 = address_dependent_filtering ?
137 spp_net_to_host_byte_order_32(&ip->dest_addr) : 0;
140 CNAT_V4_GET_HASH(fd->dbl.k.key64,
141 bucket, CNAT_MAIN_HASH_MASK)
143 prefetch_target = (u8 *)(&cnat_in2out_hash[bucket]);
144 pctx[index].bucket = bucket;
146 /* Prefetch the hash bucket */
147 SPP_PREFETCH(prefetch_target, 0, LOAD);
153 stage3(spp_ctx_t **ctxs, int index, spp_node_t *np,
154 u8 *disp_used EXTRA_PIPELINE_ARGS_PROTO))
158 uword prefetch_target0, prefetch_target1;
160 bucket = pctx[index].bucket;
162 /* read the hash bucket */
163 db_index = pctx[index].bucket = cnat_in2out_hash[bucket].next;
164 if (PREDICT_TRUE(db_index != EMPTY)) {
167 * Prefetch database keys. We save space by not cache-line
168 * aligning the DB entries. We don't want to waste LSU
169 * bandwidth prefetching stuff we won't need.
172 prefetch_target0 = (uword)(cnat_main_db + db_index);
174 SPP_PREFETCH(prefetch_target0, 0, LOAD);
176 /* Just beyond DB key #2 */
178 prefetch_target1 = prefetch_target0 +
179 STRUCT_OFFSET_OF(cnat_main_db_entry_t, user_ports);
181 /* If the targets are in different lines, do the second prefetch */
183 if (PREDICT_FALSE((prefetch_target0 & ~(SPP_CACHE_LINE_BYTES-1)) !=
184 (prefetch_target1 & ~(SPP_CACHE_LINE_BYTES-1)))) {
186 SPP_PREFETCH(prefetch_target1, 0, LOAD);
193 stage4(spp_ctx_t **ctxs, int index, spp_node_t *np,
194 u8 *disp_used EXTRA_PIPELINE_ARGS_PROTO)
196 spp_ctx_t *ctx = ctxs[index];
197 u32 db_index = pctx[index].bucket;
198 cnat_main_db_entry_t *db;
199 cnat_feature_data_t *fd;
202 * Note: if the search already failed (empty bucket),
203 * the answer is already in the pipeline context structure
205 if (PREDICT_FALSE(db_index == EMPTY)) {
209 fd = (cnat_feature_data_t *)ctx->feature_data;
212 * Note: hash collisions suck. We can't easily prefetch around them.
213 * The first trip around the track will be fast. After that, maybe
218 db = cnat_main_db + db_index;
219 if (PREDICT_TRUE(db->in2out_key.key64 == fd->dbl.k.key64))
221 db_index = db->in2out_hash.next;
223 } while (db_index != EMPTY);
225 /* even in evil mode, for in2out, we nat all packets regardless mode and dst_ip */
227 /* Stick the answer back into the pipeline context structure */
228 pctx[index].bucket = db_index;
233 stage5(spp_ctx_t **ctxs, int index, spp_node_t *np,
234 u8 *disp_used EXTRA_PIPELINE_ARGS_PROTO))
236 spp_ctx_t *ctx = ctxs[index];
237 u32 db_index = pctx[index].bucket;
238 cnat_feature_data_t *fd = (cnat_feature_data_t *)ctx->feature_data;
240 cnat_main_db_entry_t *db;
241 /* Below two pointers are just to keep the cnat_ftp_alg call happy*/
242 dslite_table_entry_t *dslite_entry_ptr = NULL;
243 ipv6_header_t *ipv6_hdr = NULL;
252 ip = (ipv4_header *) ctx->current_header;
254 if (PLATFORM_HANDLE_TTL_DECREMENT) {
255 if (PREDICT_FALSE(ip->ttl <= 1)) {
256 /* Try to generate ICMP error msg, as TTL is <= 1 */
258 if (icmpv4_generate_with_throttling
259 (ctx, ip, ctx->ru.rx.uidb_index)) {
261 disposition = CNAT_REWRITE_OUTPUT;
262 INCREMENT_NODE_COUNTER(np, TCP_TTL_GEN);
264 /* Could not generated ICMP - drop the packet */
265 disposition = CNAT_DROP;
266 INCREMENT_NODE_COUNTER(np, TCP_TTL_DROP);
272 if (PREDICT_FALSE(db_index == EMPTY)) {
273 if(PREDICT_FALSE(ctx->ru.rx.frag)) {
274 /* Must have routed through cnat_v4_frag_in2out node */
276 spp_net_to_host_byte_order_16(&(ip->frag_flags_offset));
277 if(PREDICT_FALSE(frag_offset & IP_FRAG_OFFSET_MASK)) {
278 INCREMENT_NODE_COUNTER(np, TCP_FRAG_DROP);
279 disposition = CNAT_DROP;
282 INCREMENT_NODE_COUNTER(np, TCP_EXCEPTION);
283 disposition = CNAT_V4_TCP_IE;
286 INCREMENT_NODE_COUNTER(np, TCP_EXCEPTION);
287 disposition = CNAT_V4_TCP_IE;
290 cnat_key_t dest_info;
291 cnat_session_entry_t *session_db = NULL;
292 db = cnat_main_db + db_index;
293 /* Handle destination sessions */
294 tcp = (tcp_hdr_type*) ((u8*)ip + ctx->application_start);
295 dest_info.k.port = pctx[index].dst_port;
296 dest_info.k.ipv4 = spp_net_to_host_byte_order_32(&(ip->dest_addr));
298 if(PREDICT_TRUE(!PLATFORM_DBL_SUPPORT)) {
300 /* No DBL support, so just update the destn and proceed */
301 db->dst_ipv4 = dest_info.k.ipv4;
302 db->dst_port = dest_info.k.port;
306 if(PREDICT_FALSE(db->dst_ipv4 != dest_info.k.ipv4 ||
307 db->dst_port != dest_info.k.port)) {
308 if(PREDICT_TRUE(db->nsessions == 0)) {
309 /* Should be a static entry
310 * Note this session as the first session and log
312 cnat_add_dest_n_log(db, &dest_info);
313 } else if(PREDICT_FALSE(db->nsessions == 1)) {
314 /* Destn is not same as in main db. Multiple session
317 dest_info.k.vrf = db->in2out_key.k.vrf;
318 session_db = cnat_handle_1to2_session(db, &dest_info);
319 if(PREDICT_FALSE(session_db == NULL)) {
320 disposition = CNAT_DROP;
321 INCREMENT_NODE_COUNTER(np, TCP_SESSION_DROP);
324 } else { /* There are already multiple destinations */
325 dest_info.k.vrf = db->in2out_key.k.vrf;
326 /* If session already exists,
327 * cnat_create_session_db_entry will return the existing db
328 * else create a new db
329 * If could not create, return NULL
331 session_db = cnat_create_session_db_entry(&dest_info,
333 if(PREDICT_FALSE(session_db == NULL)) {
334 disposition = CNAT_DROP;
335 INCREMENT_NODE_COUNTER(np, TCP_SESSION_DROP);
339 if(PREDICT_TRUE(session_db)) {
340 /* Have to repeat the window size check for new destinations */
341 window = (u32)spp_net_to_host_byte_order_16(
343 window = window << session_db->scale;
344 if(PREDICT_TRUE(!session_db->window)) {
345 calculate_window_scale(tcp, &scale);
346 session_db->scale = scale;
347 session_db->window = window;
348 } else if (PREDICT_FALSE(session_db->window <
350 /* Update the db entry with window option from packet */
351 session_db->window = window;
355 session_db->tcp_seq_num = spp_net_to_host_byte_order_32(
357 session_db->ack_no = spp_net_to_host_byte_order_32(
359 if (PREDICT_FALSE(global_debug_flag && CNAT_DEBUG_GLOBAL_ALL)) {
360 PLATFORM_DEBUG_PRINT("\n In2out SDB stages seq no = %u,"
361 " ack no = %u, window = %u\n",
362 session_db->tcp_seq_num,
368 //Update the seq no and ack no for subsequent communication
369 //after connection establishment
370 //No need to update window here. Window is already updated
371 //during connection establishment
372 window = (u32)spp_net_to_host_byte_order_16(
374 window = window << db->scale;
375 if(PREDICT_FALSE(!ALG_ENABLED_DB(db))) {
376 //This check is done since proto_data is part of union in main
378 db->proto_data.tcp_seq_chk.seq_no =
379 spp_net_to_host_byte_order_32(
381 db->proto_data.tcp_seq_chk.ack_no =
382 spp_net_to_host_byte_order_32(
385 if (PREDICT_FALSE(db->diff_window < window)) {
386 /* Update the db entry with window option from packet */
387 db->diff_window = window;
389 if (PREDICT_FALSE(global_debug_flag && CNAT_DEBUG_GLOBAL_ALL)) {
390 PLATFORM_DEBUG_PRINT("\n In2out MainDB seq no = %u,"
392 db->proto_data.tcp_seq_chk.seq_no,
393 db->proto_data.tcp_seq_chk.ack_no);
394 PLATFORM_DEBUG_PRINT("\n In2out MAINDB window = %u\n",
400 INCREMENT_NODE_COUNTER(np, TCP_NAT);
402 disposition = CNAT_REWRITE_OUTPUT;
404 /* NAT the packet and update checksum (increamental) */
406 /* If it is a non-first fragment, we need not worry about
407 * ALGs as the packet does not have TCP header..
408 * However, under a very race scenario when this non-first
409 * fragment is containing an FTP PORT command OR RTSP command
410 * we cannot handle that case.. in that case the ALG will fail
411 * Do not want to add a lot of complexity to handle one in million
415 spp_net_to_host_byte_order_16(&(ip->frag_flags_offset));
417 if(PREDICT_FALSE(frag_offset & IP_FRAG_OFFSET_MASK)) {
418 /* Non first fragment.. no TCP header */
419 FTP_ALG_DEBUG_PRINTF("Non first frag.. cannot handle ALG");
420 goto handle_ttl_n_checksum;
423 FTP_ALG_DEBUG_PRINTF("src port 0x%x, dst_port 0x%x",
424 spp_net_to_host_byte_order_16(&tcp->src_port),
425 spp_net_to_host_byte_order_16(&tcp->dest_port))
428 if (PREDICT_FALSE(ftp_alg_enabled &&
429 (spp_net_to_host_byte_order_16(&tcp->src_port) == 21 ||
430 spp_net_to_host_byte_order_16(&tcp->dest_port) == 21))) {
432 if(PREDICT_FALSE((db->flags & CNAT_DB_FLAG_PPTP_TUNNEL_ACTIVE) ||
433 (db->flags & CNAT_DB_FLAG_PPTP_TUNNEL_INIT)))
435 /* FTP on a PPTP Control session? Ignore FTP */
436 goto handle_ttl_n_checksum;
439 if (PREDICT_FALSE(tcp->flags & (TCP_FLAG_SYN | TCP_FLAG_RST |
442 FTP_ALG_DEBUG_PRINTF("SYN Case setting delta = 0")
444 /* reset the delta */
445 if(PREDICT_FALSE(session_db != NULL)) {
446 session_db->alg.delta = 0;
453 /* need to adjust seq # for in2out pkt if delta is not 0 */
454 if (PREDICT_TRUE((session_db && (session_db->alg.delta != 0))
455 || ((!session_db) && (db->alg.delta != 0)))) {
456 seq = net2host32(&tcp->seq_num);
458 FTP_ALG_DEBUG_PRINTF("Orig Seq Num 0x%x", seq)
460 * for ftp packets, due to PORT command translation,
461 * we may have cases that a packet/payload len gets
462 * changed for tcp, we need to adjust the packet's
463 * sequence numbers to match the changes. The delta
464 * of orig pkt len and new len is in alg_dlt[1] together
465 * with the sequence number that cuased the delta. When
466 * there are multiple len changes, we keep theprevious
467 * delta in alg_dlt[0] for case like pkt retransmission.
468 * So depends on packet seq number, we decide to use
469 * either latest delta or previous delta ([0])
470 * We won't be here if both delta values are 0
472 if(PREDICT_FALSE(session_db != NULL)) {
473 seq1 = seq > session_db->tcp_seq_num ?
474 (seq + session_db->alg.alg_dlt[1]):
475 (seq + session_db->alg.alg_dlt[0]);
477 seq1 = seq > db->proto_data.seq_pcp.tcp_seq_num ?
478 (seq + db->alg.alg_dlt[1]):
479 (seq + db->alg.alg_dlt[0]);
482 FTP_ALG_DEBUG_PRINTF("Old_seq_num 0x%x New Seq Num 0x%x",
485 if (PREDICT_TRUE(seq1 != seq)) {
487 tcp->seq_num = host2net32(seq1);
489 FTP_ALG_DEBUG_PRINTF("Old TCP Checksum 0x%x",
490 net2host16(&tcp->tcp_checksum))
493 * fix checksum incremental for seq # changes
494 * newchecksum = ~(~oldchecksum + ~old + new)
496 CNAT_UPDATE_TCP_SEQ_ACK_CHECKSUM(seq, seq1)
497 } /* There is a diff in seq */
499 } /* ALG Delta is non zero */
501 rc = cnat_ftp_alg((u8*) ip, &delta, db, dslite_entry_ptr, ipv6_hdr);
503 FTP_ALG_DEBUG_PRINTF("cnat_ftp_alg rc 0x%x", rc)
505 /*if located PORT cmd, packet being updated, take the delta and seq # */
506 if (PREDICT_FALSE(rc)) {
508 /* set alg flag for this ftp control connection */
509 if(PREDICT_FALSE(session_db != NULL)) {
510 session_db->flags |= CNAT_DB_FLAG_ALG_CTRL_FLOW;
512 db->flags |= CNAT_DB_FLAG_ALG_CTRL_FLOW;
516 * rc != 0 indicates this packet has triggered a new pkt len delta
517 * we need to update db entry's seq# with seq# of this packet.
519 * Move alg_dlt[1] to [0], (current delta -> previous delta)
520 * then apply latest delta to alg_dlt[1] (keep [1] as latest delta)
522 if(PREDICT_FALSE(session_db != NULL)) {
523 session_db->tcp_seq_num = net2host32(&tcp->seq_num);
524 session_db->alg.alg_dlt[0] = session_db->alg.alg_dlt[1];
526 /* accumulate the delta ! */
527 session_db->alg.alg_dlt[1] += delta;
528 FTP_ALG_DEBUG_PRINTF(
529 "cnat_ftp_alg seq_num 0x%x, dlt0 0x%x, dlt1 0x%x",
530 session_db->tcp_seq_num,
531 session_db->alg.alg_dlt[0],
532 session_db->alg.alg_dlt[1])
535 db->proto_data.seq_pcp.tcp_seq_num = net2host32(&tcp->seq_num);
536 db->alg.alg_dlt[0] = db->alg.alg_dlt[1];
538 /* accumulate the delta ! */
539 db->alg.alg_dlt[1] += delta;
541 FTP_ALG_DEBUG_PRINTF(
542 "cnat_ftp_alg seq_num 0x%x, dlt0 0x%x, dlt1 0x%x",
543 db->proto_data.seq_pcp.tcp_seq_num,
547 ctx->current_length += delta;
548 }/* cnat_ftp_alg returned non zero */
549 } /* It is not a SYN, RST or FIN */
550 } else if (PREDICT_FALSE(rtsp_alg_port_num &&
551 ((spp_net_to_host_byte_order_16(&tcp->dest_port) == rtsp_alg_port_num) ||
552 (spp_net_to_host_byte_order_16(&tcp->src_port) == rtsp_alg_port_num))) ) {
554 if (PREDICT_FALSE(tcp->flags & (TCP_FLAG_SYN | TCP_FLAG_RST |
557 FTP_ALG_DEBUG_PRINTF("SYN Case setting delta = 0")
559 /* reset the delta */
560 if(PREDICT_FALSE(session_db != NULL)) {
561 session_db->alg.delta = 0;
567 #define RTSP_ALG_DELTA_MASK 0xFF
568 /* need to adjust seq # for in2out pkt if delta is not 0 */
569 if (PREDICT_FALSE((session_db &&
570 (session_db->alg.delta & RTSP_ALG_DELTA_MASK) != 0) ||
572 (db->alg.delta & RTSP_ALG_DELTA_MASK) != 0))) {
573 seq = net2host32(&tcp->seq_num);
575 if(PREDICT_FALSE(session_db != NULL)) {
576 seq1 = seq > session_db->tcp_seq_num ?
577 (seq + db->alg.alg_dlt[1]):
578 (seq + db->alg.alg_dlt[0]);
580 seq1 = seq > db->proto_data.seq_pcp.tcp_seq_num ?
581 (seq + db->alg.alg_dlt[1]):
582 (seq + db->alg.alg_dlt[0]);
585 FTP_ALG_DEBUG_PRINTF("Old_seq_num 0x%x New Seq Num 0x%x",
588 if (PREDICT_TRUE(seq1 != seq)) {
590 tcp->seq_num = host2net32(seq1);
592 FTP_ALG_DEBUG_PRINTF("Old TCP Checksum 0x%x",
593 net2host16(&tcp->tcp_checksum))
596 * fix checksum incremental for seq # changes
597 * newchecksum = ~(~oldchecksum + ~old + new)
599 CNAT_UPDATE_TCP_SEQ_ACK_CHECKSUM(seq, seq1)
604 if ((session_db && (!session_db->alg.il)) ||
605 ((!session_db) && (!db->alg.il))) {
606 cnat_rtsp_alg((u8*) ip,
614 handle_ttl_n_checksum:
615 if (PLATFORM_HANDLE_TTL_DECREMENT) {
617 * Decrement TTL and update IPv4 checksum
619 ipv4_decr_ttl_n_calc_csum(ip);
622 tcp_in2out_nat_mss_n_checksum(ip,
624 db->out2in_key.k.ipv4,
625 db->out2in_key.k.port,
627 /* CNAT_PPTP_ALG_SUPPORT */
628 /* code to handle pptp control msgs */
630 (spp_net_to_host_byte_order_16(&tcp->dest_port) ==
635 PPTP_DBG(3, "PPTP mgmt/ctrl msg recieved");
637 ret = cnat_handle_pptp_msg(ctx, db , tcp, PPTP_PNS );
639 if( PREDICT_FALSE( ret != CNAT_SUCCESS) ) {
640 PPTP_DBG(3, "PPTP mgmt/ctrl msg drop");
641 disposition = CNAT_DROP;
642 PPTP_INCR(ctrl_msg_drops);
647 /* CNAT_PPTP_ALG_SUPPORT */
649 /* update transaltion counters */
652 in2out_forwarding_count++;
654 PLATFORM_CNAT_SET_TX_VRF(ctx,db->out2in_key.k.vrf);
656 /* update the timer for good mode, or evil mode dst_ip match */
658 // if (!address_dependent_filtering || fd->dbl.dst_ipv4 == db->dst_ipv4) {
659 if(PREDICT_FALSE(session_db != NULL)) {
660 V4_TCP_UPDATE_SESSION_DB_FLAG(session_db, tcp);
661 CNAT_DB_TIMEOUT_RST(session_db);
663 V4_TCP_UPDATE_SESSION_FLAG(db, tcp);
664 CNAT_DB_TIMEOUT_RST(db);
671 /* Pick up the answer and put it into the context */
672 fd->dbl.db_index = db_index;
676 DISP_PUSH_CTX(np, ctx, disposition, disp_used, last_disposition, last_contexts_ptr, last_nused_ptr);