Initial commit of vpp code.
[vpp.git] / vnet / vnet / vcgn / cnat_ipv4_tcp_inside_input.c
1 /* 
2  *---------------------------------------------------------------------------
3  * cnat_ipv4_tcp_inside_input.c - cnat_ipv4_tcp_inside_input node pipeline 
4  * stage functions
5  *
6  *
7  * Copyright (c) 2008-2014 Cisco and/or its affiliates.
8  * Licensed under the Apache License, Version 2.0 (the "License");
9  * you may not use this file except in compliance with the License.
10  * You may obtain a copy of the License at:
11  *
12  *     http://www.apache.org/licenses/LICENSE-2.0
13  *
14  * Unless required by applicable law or agreed to in writing, software
15  * distributed under the License is distributed on an "AS IS" BASIS,
16  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17  * See the License for the specific language governing permissions and
18  * limitations under the License.
19  *---------------------------------------------------------------------------
20  */
21
22 #include <vlib/vlib.h>
23 #include <vnet/vnet.h>
24 #include <vppinfra/error.h>
25 #include <vnet/buffer.h>
26
27 #include "cnat_db.h"
28 #include "tcp_header_definitions.h"
29 #include "cnat_config.h"
30 #include "cnat_global.h"
31 #include "cnat_v4_functions.h"
32
33 #define foreach_cnat_ipv4_tcp_inside_input_error                \
34 _(CNAT_V4_TCP_I2O_PKT_IN, "tcp i2o packets received")                   \
35 _(CNAT_V4_TCP_I2O_PKT_T, "tcp i2o packets natted")              \
36 _(CNAT_V4_TCP_I2O_EXCEPTION, "packets to tcp i2o exception")            \
37 _(CNAT_V4_TCP_I2O_TTL_GEN, "generated TTL expiry ICMP packets")         \
38 _(CNAT_V4_TCP_I2O_TTL_GEN_DROP, "could not generate TTL expiry ICMP packets")           \
39 _(CNAT_V4_TCP_I2O_SESSION_DROP, "could not generate session")                           \
40 _(CNAT_V4_UDP_I2O_FRAG_DROP, "non-first fragment drop")
41
42 typedef enum {
43 #define _(sym,str) sym,
44   foreach_cnat_ipv4_tcp_inside_input_error 
45 #undef _
46   CNAT_IPV4_TCP_INSIDE_INPUT_N_ERROR,
47 } cnat_ipv4_tcp_inside_input_t;
48
49 static char * cnat_ipv4_tcp_inside_input_error_strings[] = {
50 #define _(sym,string) string,
51   foreach_cnat_ipv4_tcp_inside_input_error
52 #undef _
53 };
54
55 typedef struct {
56   u32 cached_next_index;
57   /* $$$$ add data here */
58
59   /* convenience variables */
60   vlib_main_t * vlib_main;
61   vnet_main_t * vnet_main;
62 } cnat_ipv4_tcp_inside_input_main_t;
63
64 typedef enum {
65     CNAT_V4_TCP_I2O_E,
66     CNAT_V4_TCP_I2O_T,
67     CNAT_V4_TCP_I2O_D,
68     CNAT_V4_TCP_I2O_NEXT,
69 } cnat_ipv4_tcp_inside_input_next_t;
70
71 #define CNAT_REWRITE_OUTPUT CNAT_V4_TCP_I2O_T
72 #define CNAT_V4_ICMP_GEN CNAT_V4_TCP_I2O_D
73
74 //#define CNAT_V4_TCP_I2O_E CNAT_V4_TCP_I2O_D //remove it once exception node is created
75 cnat_ipv4_tcp_inside_input_main_t cnat_ipv4_tcp_inside_input_main;
76 vlib_node_registration_t cnat_ipv4_tcp_inside_input_node;
77
78 #define NSTAGES 6
79
80 /*
81  * Use the generic buffer metadata + first line of packet data prefetch
82  * stage function from <api/pipeline.h>. This is usually a Good Idea.
83  */
84 #define stage0 generic_stage0
85
86
87 static inline void
88 stage1(vlib_main_t * vm, vlib_node_runtime_t * node, u32 buffer_index)
89 {
90     u64 a, b, c;
91     u32 bucket;
92     u8 *prefetch_target;
93     //cnat_feature_data_t *fd = (cnat_feature_data_t *)ctx->feature_data;
94
95
96     vlib_buffer_t * b0 = vlib_get_buffer (vm, buffer_index);
97     ipv4_header *ip = vlib_buffer_get_current (b0);
98     u8   ipv4_hdr_len = (ip->version_hdr_len_words & 0xf) << 2;
99     tcp_hdr_type *tcp = (tcp_hdr_type *)((u8*)ip + ipv4_hdr_len);
100   
101     u64 tmp = 0;
102     tmp = vnet_buffer(b0)->vcgn_uii.key.k.ipv4 =
103             clib_net_to_host_u32(ip->src_addr);
104     vnet_buffer(b0)->vcgn_uii.key.k.port =
105             clib_net_to_host_u16 (tcp->src_port);
106
107     tmp |= ((u64)vnet_buffer(b0)->vcgn_uii.key.k.port) << 32;
108
109     PLATFORM_CNAT_SET_RX_VRF(vnet_buffer(b0)->sw_if_index[VLIB_RX],
110                              vnet_buffer(b0)->vcgn_uii.key.k.vrf,
111                              CNAT_TCP)
112     tmp |= ((u64)vnet_buffer(b0)->vcgn_uii.key.k.vrf) << 48;
113
114     CNAT_V4_GET_HASH(tmp, bucket, CNAT_MAIN_HASH_MASK)
115
116     prefetch_target = (u8 *)(&cnat_in2out_hash[bucket]);
117     vnet_buffer(b0)->vcgn_uii.bucket = bucket;
118
119     /* Prefetch the hash bucket */
120     CLIB_PREFETCH(prefetch_target, CLIB_CACHE_LINE_BYTES, LOAD);
121 }
122
123 static inline void
124 stage2(vlib_main_t * vm, vlib_node_runtime_t * node, u32 buffer_index)
125 { /* nothing */ }
126
127
128 #define SPP_LOG2_CACHE_LINE_BYTES 6
129 #define SPP_CACHE_LINE_BYTES (1 << SPP_LOG2_CACHE_LINE_BYTES)
130
131 static inline void
132 stage3(vlib_main_t * vm, vlib_node_runtime_t * node, u32 buffer_index)
133 {
134     vlib_buffer_t * b0 = vlib_get_buffer(vm, buffer_index);
135     uword prefetch_target0, prefetch_target1;
136     u32 bucket = vnet_buffer(b0)->vcgn_uii.bucket;
137   
138     /* read the hash bucket */
139     u32 db_index = vnet_buffer(b0)->vcgn_uii.bucket
140                  = cnat_in2out_hash[bucket].next;
141
142     if (PREDICT_TRUE(db_index != EMPTY)) {
143         /*
144          * Prefetch database keys. We save space by not cache-line
145          * aligning the DB entries. We don't want to waste LSU
146          * bandwidth prefetching stuff we won't need.
147          */
148         prefetch_target0 = (uword)(cnat_main_db + db_index);
149         CLIB_PREFETCH((void*)prefetch_target0, CLIB_CACHE_LINE_BYTES, LOAD);
150         /* Just beyond DB key #2 */
151         prefetch_target1 = prefetch_target0 +
152         STRUCT_OFFSET_OF(cnat_main_db_entry_t, user_ports);
153         /* If the targets are in different lines, do the second prefetch */
154         if (PREDICT_FALSE((prefetch_target0 & ~(SPP_CACHE_LINE_BYTES-1)) !=
155                       (prefetch_target1 & ~(SPP_CACHE_LINE_BYTES-1)))) {
156             CLIB_PREFETCH((void *)prefetch_target1, CLIB_CACHE_LINE_BYTES, LOAD);
157         }
158     }
159 }
160
161 static inline void
162 stage4(vlib_main_t * vm, vlib_node_runtime_t * node, u32 buffer_index)
163 {
164   cnat_main_db_entry_t *db;
165   vlib_buffer_t * b0 = vlib_get_buffer(vm, buffer_index);
166   u32 db_index = vnet_buffer(b0)->vcgn_uii.bucket;
167
168   /*
169    * Note: if the search already failed (empty bucket),
170    * the answer is already in the pipeline context structure
171    */
172   if (PREDICT_TRUE(db_index != EMPTY)) {
173
174     /*
175      * Note: hash collisions suck. We can't easily prefetch around them.
176      * The first trip around the track will be fast. After that, maybe
177      * not so much...
178      */
179     do {
180       db = cnat_main_db + db_index;
181       if (PREDICT_TRUE(db->in2out_key.key64 ==
182                   vnet_buffer(b0)->vcgn_uii.key.key64)) {
183         break;
184       }
185       db_index = db->in2out_hash.next;
186     } while (db_index != EMPTY);
187
188     /* Stick the answer back into the pipeline context structure */
189     vnet_buffer(b0)->vcgn_uii.bucket = db_index;
190   }
191 }
192
193
194 static inline u32 last_stage (vlib_main_t *vm, vlib_node_runtime_t *node,
195                               u32 bi)
196 {
197     vlib_buffer_t *b0 = vlib_get_buffer (vm, bi);
198     u32 db_index = vnet_buffer(b0)->vcgn_uii.bucket;
199     spp_ctx_t *ctx = (spp_ctx_t *) &vnet_buffer(b0)->vcgn_uii;
200     int disposition = CNAT_V4_TCP_I2O_T;
201     int counter = CNAT_V4_TCP_I2O_PKT_T;
202
203     ipv4_header *ip = (ipv4_header *)vlib_buffer_get_current(b0);
204     u8   ipv4_hdr_len = (ip->version_hdr_len_words & 0xf) << 2;
205     tcp_hdr_type *tcp = (tcp_hdr_type *)((u8*)ip + ipv4_hdr_len);
206     vlib_node_t *n = vlib_get_node (vm, cnat_ipv4_tcp_inside_input_node.index);
207     u32 node_counter_base_index = n->error_heap_index;
208     vlib_error_main_t * em = &vm->error_main;
209     cnat_session_entry_t *session_db = NULL;
210     cnat_main_db_entry_t *db = NULL;
211     cnat_key_t dest_info;
212     u32 window;
213     u8 scale;
214
215
216     INCREMENT_NODE_COUNTER(CNAT_V4_TCP_I2O_PKT_IN);
217
218     if (PLATFORM_HANDLE_TTL_DECREMENT) {
219         if (PREDICT_FALSE(ip->ttl <= 1)) {
220             /* Try to generate ICMP error msg, as TTL is <= 1 */
221
222             if (icmpv4_generate_with_throttling
223                     (ctx, ip, ctx->ru.rx.uidb_index)) {
224
225                 /* Generated ICMP */
226                 disposition = CNAT_REWRITE_OUTPUT;
227                 counter = CNAT_V4_TCP_I2O_TTL_GEN;
228             } else {
229                 /* Could not generated ICMP - drop the packet */
230                 disposition = CNAT_V4_TCP_I2O_D; 
231                 counter = CNAT_V4_TCP_I2O_TTL_GEN_DROP;
232             }
233             goto drop_pkt;
234         }
235     }
236
237     if (PREDICT_FALSE(db_index == EMPTY)) {
238     /* Deleted fragment code from here */
239             disposition = CNAT_V4_TCP_I2O_E;
240         counter = CNAT_V4_TCP_I2O_EXCEPTION;
241     } else {
242         db = cnat_main_db + db_index;
243
244         /* Handle destination sessions */
245         dest_info.k.port = clib_net_to_host_u16(tcp->dest_port);
246         dest_info.k.ipv4 = clib_net_to_host_u32(ip->dest_addr);
247
248         if(PREDICT_TRUE(!PLATFORM_DBL_SUPPORT)) {
249
250             /* No DBL support, so just update the destn and proceed */
251             db->dst_ipv4 = dest_info.k.ipv4;
252             db->dst_port = dest_info.k.port;
253             goto update_pkt;
254         }
255
256         if(PREDICT_FALSE(db->dst_ipv4 != dest_info.k.ipv4 ||
257             db->dst_port != dest_info.k.port)) {
258             if(PREDICT_TRUE(db->nsessions == 0)) {
259                 /* Should be a static entry
260                  * Note this session as the first session and log
261                  */
262                 cnat_add_dest_n_log(db, &dest_info);
263             } else if(PREDICT_FALSE(db->nsessions == 1)) {
264                 /* Destn is not same as in main db. Multiple session
265                  * scenario
266                  */
267                 dest_info.k.vrf = db->in2out_key.k.vrf;
268                 session_db = cnat_handle_1to2_session(db, &dest_info);
269                 if(PREDICT_FALSE(session_db == NULL)) {
270                     disposition = CNAT_V4_TCP_I2O_D;
271                     counter = CNAT_V4_TCP_I2O_SESSION_DROP;
272                     goto drop_pkt;
273                 }
274             } else { /* There are already multiple destinations */
275                 dest_info.k.vrf = db->in2out_key.k.vrf;
276                 /* If session already exists,
277                  * cnat_create_session_db_entry will return the existing db
278                  * else create a new db
279                  * If could not create, return NULL
280                  */
281                 session_db = cnat_create_session_db_entry(&dest_info,
282                         db, TRUE);
283                 if(PREDICT_FALSE(session_db == NULL)) {
284                     disposition = CNAT_V4_TCP_I2O_D;
285                     counter = CNAT_V4_TCP_I2O_SESSION_DROP;
286                     goto drop_pkt;
287                 }
288             }
289             if(PREDICT_TRUE(session_db != 0)) {
290                 /* Have to repeat the window size check for new destinations */
291                 window = (u32)clib_net_to_host_u16(tcp->window_size);
292                 window = window << session_db->scale;
293                 if(PREDICT_TRUE(!session_db->window)) {
294                     calculate_window_scale(tcp, &scale);
295                     session_db->scale       = scale;
296                     session_db->window      = window;
297                 } else if (PREDICT_FALSE(session_db->window < 
298                             window)) { 
299                     /* Update the db entry with window option from packet */
300                     session_db->window  = window;
301                 } else {
302                     /* Do nothing */
303                 }    
304                 session_db->tcp_seq_num = clib_net_to_host_u32(tcp->seq_num);
305                 session_db->ack_no      = clib_net_to_host_u32(tcp->ack_num);
306 #if DEBUG > 1
307                 printf("\n In2out SDB stages seq no = %u," 
308                         "   ack no = %u, window = %u\n",
309                         session_db->tcp_seq_num,
310                         session_db->ack_no,
311                         session_db->window);
312 #endif
313                     
314             }
315         } else {
316             //Update the seq no and ack no for subsequent communication
317             //after connection establishment
318             //No need to update window here. Window is already updated 
319             //during connection establishment
320             window = (u32)clib_net_to_host_u16(tcp->window_size);
321             window = window << db->scale;
322             if(PREDICT_FALSE(!ALG_ENABLED_DB(db))) {
323                 //This check is done since proto_data is part of union in main 
324                 //db entry
325                 db->proto_data.tcp_seq_chk.seq_no  = 
326                     clib_net_to_host_u32(tcp->seq_num);
327                 db->proto_data.tcp_seq_chk.ack_no  = 
328                     clib_net_to_host_u32(tcp->ack_num);
329             }                         
330             if (PREDICT_FALSE(db->diff_window < window)) { 
331                 /* Update the db entry with window option from packet */
332                 db->diff_window = window; 
333             }
334 #if DEBUG > 1
335             printf("\n In2out MainDB seq no = %u,"
336                     "\n ack no = %u\n",
337                     db->proto_data.tcp_seq_chk.seq_no,
338                     db->proto_data.tcp_seq_chk.ack_no);
339             printf("\n In2out MAINDB window = %u\n",
340                     db->diff_window);
341 #endif                  
342         }
343 update_pkt:
344
345         counter = CNAT_V4_TCP_I2O_PKT_T;
346         disposition = CNAT_V4_TCP_I2O_T;
347
348         /* NO FRAGMENT & ALG HANDLING. DELETING THE CODE */
349
350         if (PLATFORM_HANDLE_TTL_DECREMENT) {
351             /*
352              * Decrement TTL and update IPv4 checksum
353              */
354             ipv4_decr_ttl_n_calc_csum(ip);
355         }
356
357         tcp_in2out_nat_mss_n_checksum(ip, 
358                                       tcp, 
359                                       db->out2in_key.k.ipv4, 
360                                       db->out2in_key.k.port, 
361                                       db 
362                                       /*, db->in2out_key.k.vrf */);
363
364         /* update transaltion counters */
365         db->in2out_pkts++;
366         in2out_forwarding_count++;
367
368         /* update the timer for good mode, or evil mode dst_ip match */
369
370         if(PREDICT_FALSE(session_db != NULL)) {
371             V4_TCP_UPDATE_SESSION_DB_FLAG(session_db, tcp);
372             CNAT_DB_TIMEOUT_RST(session_db);
373         } else {
374             V4_TCP_UPDATE_SESSION_FLAG(db, tcp);
375             CNAT_DB_TIMEOUT_RST(db);
376         }
377     }
378
379 drop_pkt:
380
381     em->counters[node_counter_base_index + counter] += 1;
382     return  disposition;
383 }
384
385 #include <vnet/pipeline.h>
386
387 static uword cnat_ipv4_tcp_inside_input_node_fn (vlib_main_t * vm,
388                               vlib_node_runtime_t * node,
389                               vlib_frame_t * frame)
390 {
391     return dispatch_pipeline (vm, node, frame);
392 }
393
394
395 VLIB_REGISTER_NODE (cnat_ipv4_tcp_inside_input_node) = {
396   .function = cnat_ipv4_tcp_inside_input_node_fn,
397   .name = "vcgn-v4-tcp-i2o",
398   .vector_size = sizeof (u32),
399   .type = VLIB_NODE_TYPE_INTERNAL,
400
401   .n_errors = ARRAY_LEN(cnat_ipv4_tcp_inside_input_error_strings),
402   .error_strings = cnat_ipv4_tcp_inside_input_error_strings,
403
404   .n_next_nodes = CNAT_V4_TCP_I2O_NEXT,
405
406   /* edit / add dispositions here */
407   .next_nodes = {
408       [CNAT_V4_TCP_I2O_E] = "vcgn-v4-tcp-i2o-e",
409       [CNAT_V4_TCP_I2O_T] = "ip4-input",
410       [CNAT_V4_TCP_I2O_D] = "error-drop",
411   },
412 };
413
414 clib_error_t *cnat_ipv4_tcp_inside_input_init (vlib_main_t *vm)
415 {
416   cnat_ipv4_tcp_inside_input_main_t * mp = &cnat_ipv4_tcp_inside_input_main;
417
418   mp->vlib_main = vm;
419   mp->vnet_main = vnet_get_main();
420
421   return 0;
422 }
423
424 VLIB_INIT_FUNCTION (cnat_ipv4_tcp_inside_input_init);