Initial commit of vpp code.
[vpp.git] / vnet / vnet / vcgn / cnat_ipv4_icmp_error_outside_input.c
1 /* 
2  *---------------------------------------------------------------------------
3  * cnat_ipv4_icmp_error_outside_input.c - cnat_ipv4_icmp_error_outside_input node pipeline stage functions
4  *
5  * Copyright (c) 2008-2014 Cisco and/or its affiliates.
6  * Licensed under the Apache License, Version 2.0 (the "License");
7  * you may not use this file except in compliance with the License.
8  * You may obtain a copy of the License at:
9  *
10  *     http://www.apache.org/licenses/LICENSE-2.0
11  *
12  * Unless required by applicable law or agreed to in writing, software
13  * distributed under the License is distributed on an "AS IS" BASIS,
14  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15  * See the License for the specific language governing permissions and
16  * limitations under the License.
17  *---------------------------------------------------------------------------
18  */
19
20 #include <vlib/vlib.h>
21 #include <vnet/vnet.h>
22 #include <vppinfra/error.h>
23 #include <vnet/buffer.h>
24
25 #include "cnat_ipv4_icmp.h"
26
27 #define foreach_cnat_ipv4_icmp_e_outside_input_error            \
28 _(CNAT_V4_ICMP_E_O2I_T_PKT, "cnat v4 icmp_e o2i packet transmit")                       \
29 _(CNAT_V4_ICMP_E_O2I_D_PKT, "cnat v4 icmp_e o2i packet drop")                   \
30 _(CNAT_V4_ICMP_E_O2I_TTL_DROP, "cnat v4 icmp_e o2i ttl drop")
31
32 typedef enum {
33 #define _(sym,str) sym,
34   foreach_cnat_ipv4_icmp_e_outside_input_error 
35 #undef _
36   CNAT_IPV4_ICMP_E_OUTSIDE_INPUT_N_ERROR,
37 } cnat_ipv4_icmp_e_outside_input_t;
38
39 static char * cnat_ipv4_icmp_e_outside_input_error_strings[] = {
40 #define _(sym,string) string,
41   foreach_cnat_ipv4_icmp_e_outside_input_error
42 #undef _
43 };
44
45 typedef struct {
46   u32 cached_next_index;
47   /* $$$$ add data here */
48
49   /* convenience variables */
50   vlib_main_t * vlib_main;
51   vnet_main_t * vnet_main;
52 } cnat_ipv4_icmp_e_outside_input_main_t;
53
54 typedef enum {
55     CNAT_V4_ICMP_E_O2I_T,
56     CNAT_V4_ICMP_E_O2I_D,
57     CNAT_V4_ICMP_E_O2I_NEXT,
58 } cnat_ipv4_icmp_e_outside_input_next_t;
59
60 cnat_ipv4_icmp_e_outside_input_main_t cnat_ipv4_icmp_e_outside_input_main;
61 vlib_node_registration_t cnat_ipv4_icmp_e_outside_input_node;
62
63 #define NSTAGES 5
64
65 inline void swap_ip_dst_emip_src(ipv4_header *ip,
66                                  icmp_em_ip_info *icmp_info,
67                                  cnat_main_db_entry_t *db, u16 vrf)
68 {
69     icmp_v4_t   *icmp;
70     ipv4_header *em_ip;
71     u16 *em_port;
72     u32 old_ip;
73     u16 old_port;
74     u16 old_ip_checksum;
75
76     /*
77      * declear variable
78      */
79     CNAT_UPDATE_L3_CHECKSUM_DECLARE
80     CNAT_UPDATE_ICMP_ERR_CHECKSUM_DECLARE
81
82     /*
83      * fix inner layer ip & l4 checksum
84      */
85     em_ip = icmp_info->em_ip;
86     em_port = icmp_info->em_port;
87
88     CNAT_UPDATE_L3_CHECKSUM(((u16)(db->out2in_key.k.ipv4)),
89                                ((u16)(db->out2in_key.k.ipv4 >> 16)),
90                                (clib_net_to_host_u16(em_ip->checksum)),
91                                ((u16)(db->in2out_key.k.ipv4)),
92                                ((u16)(db->in2out_key.k.ipv4 >> 16)))
93
94     old_ip          = clib_net_to_host_u32(em_ip->src_addr);
95     old_port        = clib_net_to_host_u16(*em_port);
96     old_ip_checksum = clib_net_to_host_u16(em_ip->checksum);
97
98     em_ip->src_addr =
99         clib_host_to_net_u32(db->in2out_key.k.ipv4);
100     em_ip->checksum =
101         clib_host_to_net_u16(new_l3_c);
102     *em_port =
103         clib_host_to_net_u16(db->in2out_key.k.port);
104
105     /*
106      * fix outter layer ip & icmp checksum
107      */
108     icmp = icmp_info->icmp;
109     CNAT_UPDATE_ICMP_ERR_CHECKSUM(((u16)(old_ip & 0xFFFF)),
110                                  ((u16)(old_ip >> 16)),
111                                  (old_port),
112                                  (old_ip_checksum),
113                                  (clib_net_to_host_u16(icmp->checksum)),
114                                  ((u16)(db->in2out_key.k.ipv4 & 0xffff)),
115                                  ((u16)(db->in2out_key.k.ipv4 >> 16)),
116                                  ((u16)(db->in2out_key.k.port)), 
117                                  ((u16)(new_l3_c)))
118
119     icmp->checksum =
120         clib_host_to_net_u16(new_icmp_c); 
121
122     old_ip = clib_net_to_host_u32(ip->dest_addr);
123
124     ip->dest_addr = 
125         clib_host_to_net_u32(db->in2out_key.k.ipv4);
126
127     CNAT_UPDATE_L3_CHECKSUM(((u16)(old_ip & 0xFFFF)),
128                             ((u16)(old_ip >> 16)),
129                             (clib_net_to_host_u16(ip->checksum)),
130                             ((u16)(db->in2out_key.k.ipv4)),
131                             ((u16)(db->in2out_key.k.ipv4 >> 16)))
132     ip->checksum = 
133         clib_host_to_net_u16(new_l3_c);
134
135 #if 0
136     if(is_static_dest_nat_enabled(vrf) == CNAT_SUCCESS) {
137         /*
138          * fix inner layer ip & l4 checksum
139          */
140         em_snat_ip = icmp_info->em_ip;
141         em_snat_port = icmp_info->em_port;
142
143         old_ip          = spp_net_to_host_byte_order_32(&(em_snat_ip->dest_addr));
144         old_port        = spp_net_to_host_byte_order_16(em_snat_port);
145         old_ip_checksum = spp_net_to_host_byte_order_16(&(em_snat_ip->checksum));
146         direction = 1;
147         if(cnat_static_dest_db_get_translation(em_snat_ip->dest_addr, &postmap_ip, vrf, direction) ==  CNAT_SUCCESS) {
148             old_postmap_ip = spp_net_to_host_byte_order_32(&postmap_ip);
149
150             CNAT_UPDATE_L3_CHECKSUM(((u16)(old_ip)),
151                                ((u16)(old_ip >> 16)),
152                                (spp_net_to_host_byte_order_16(&(em_snat_ip->checksum))),
153                                ((u16)(old_postmap_ip)),
154                                ((u16)(old_postmap_ip >> 16)))
155             em_snat_ip->dest_addr = postmap_ip;
156             em_snat_ip->checksum =
157                 spp_host_to_net_byte_order_16(new_l3_c);
158
159             /*
160              * fix outter layer ip & icmp checksum
161              */
162             icmp = icmp_info->icmp;
163             CNAT_UPDATE_ICMP_ERR_CHECKSUM(((u16)(old_ip & 0xFFFF)),
164                                  ((u16)(old_ip >> 16)),
165                                  (old_port),
166                                  (old_ip_checksum),
167                                  (spp_net_to_host_byte_order_16(&(icmp->checksum))),
168                                  ((u16)(old_postmap_ip & 0xffff)),
169                                  ((u16)(old_postmap_ip >> 16)),
170                                  ((u16)(old_port)), 
171                                  ((u16)(new_l3_c)))
172
173             icmp->checksum =
174                 spp_host_to_net_byte_order_16(new_icmp_c); 
175
176         }
177     }
178
179     if(is_static_dest_nat_enabled(vrf) == CNAT_SUCCESS) {
180         direction = 1;
181         if(cnat_static_dest_db_get_translation(ip->src_addr, &postmap_ip, vrf, direction) ==  CNAT_SUCCESS) {
182             CNAT_UPDATE_L3_CHECKSUM_DECLARE 
183                 
184             old_ip = spp_net_to_host_byte_order_32(&(ip->src_addr));
185             old_postmap_ip = spp_net_to_host_byte_order_32(&postmap_ip);
186
187             CNAT_UPDATE_L3_CHECKSUM(((u16)(old_ip & 0xFFFF)),
188                                     ((u16)(old_ip >> 16)),
189                                     (spp_net_to_host_byte_order_16(&(ip->checksum))),
190                                     ((u16)(old_postmap_ip & 0xFFFF)),
191                                     ((u16)(old_postmap_ip >> 16)))          
192             ip->checksum =
193                 spp_host_to_net_byte_order_16(new_l3_c);
194             ip->src_addr = postmap_ip;
195         }
196     }
197 #endif /* if 0 */
198 }
199
200 /*
201  * Use the generic buffer metadata + first line of packet data prefetch
202  * stage function from <api/pipeline.h>. This is usually a Good Idea.
203  */
204 #define stage0 generic_stage0
205
206
207 static inline void
208 stage1(vlib_main_t * vm, vlib_node_runtime_t * node, u32 buffer_index)
209 {
210     u64 a, b, c;
211     u32 bucket;
212     u8 *prefetch_target;
213
214     vlib_buffer_t * b0 = vlib_get_buffer (vm, buffer_index);
215     ipv4_header *ip = vlib_buffer_get_current (b0);
216     u8   ipv4_hdr_len = (ip->version_hdr_len_words & 0xf) << 2;
217     icmp_v4_t *icmp = (icmp_v4_t *)((u8*)ip + ipv4_hdr_len);
218     ipv4_header *em_ip = (ipv4_header*)((u8*)icmp + 8); /* embedded pkt's v4 hdr */
219     u8 em_ip_hdr_len = (em_ip->version_hdr_len_words & 0xf) << 2;
220   
221     u64 tmp = 0;
222     u32 protocol = CNAT_ICMP;
223
224     /* Check L4 header for embedded packet */
225     if (em_ip->protocol == TCP_PROT) {
226         tcp_hdr_type *tcp = (tcp_hdr_type*)((u8 *)em_ip + em_ip_hdr_len);
227         vnet_buffer(b0)->vcgn_uii.key.k.port = 
228             clib_net_to_host_u16(tcp->src_port);
229         protocol = CNAT_TCP;
230
231     } else if (em_ip->protocol == UDP_PROT) {
232         udp_hdr_type_t *udp = (udp_hdr_type_t *)((u8 *)em_ip + em_ip_hdr_len);
233         vnet_buffer(b0)->vcgn_uii.key.k.port = 
234             clib_net_to_host_u16(udp->src_port);
235         protocol = CNAT_UDP;
236
237     } else {
238         icmp_v4_t *icmp = (icmp_v4_t*)((u8 *)em_ip + em_ip_hdr_len);
239         vnet_buffer(b0)->vcgn_uii.key.k.port = 
240             clib_net_to_host_u16(icmp->identifier);
241
242         if (PREDICT_FALSE((icmp->type != ICMPV4_ECHOREPLY) &&
243                            (icmp->type != ICMPV4_ECHO))) {
244             /*
245              * Try to set invalid protocol for these cases, so that
246              * hash lookup does not return valid main_db.  This approach
247              * may optimize the regular cases with valid protocols
248              * as it avoids one more check for regular cases in stage3
249              */
250             protocol = CNAT_INVALID_PROTO;
251         } 
252     }
253
254     tmp = vnet_buffer(b0)->vcgn_uii.key.k.ipv4 =
255             clib_net_to_host_u32(em_ip->src_addr);
256
257     tmp |= ((u64)vnet_buffer(b0)->vcgn_uii.key.k.port) << 32;
258
259     PLATFORM_CNAT_SET_RX_VRF(vnet_buffer(b0)->sw_if_index[VLIB_RX],
260                              vnet_buffer(b0)->vcgn_uii.key.k.vrf,
261                              protocol)
262     tmp |= ((u64)vnet_buffer(b0)->vcgn_uii.key.k.vrf) << 48;
263
264     CNAT_V4_GET_HASH(tmp, bucket, CNAT_MAIN_HASH_MASK)
265
266     prefetch_target = (u8 *)(&cnat_out2in_hash[bucket]);
267     vnet_buffer(b0)->vcgn_uii.bucket = bucket;
268
269     /* Prefetch the hash bucket */
270     CLIB_PREFETCH(prefetch_target, CLIB_CACHE_LINE_BYTES, LOAD);
271 }
272
273
274 #define SPP_LOG2_CACHE_LINE_BYTES 6
275 #define SPP_CACHE_LINE_BYTES (1 << SPP_LOG2_CACHE_LINE_BYTES)
276
277 static inline void
278 stage2(vlib_main_t * vm, vlib_node_runtime_t * node, u32 buffer_index)
279 {
280     vlib_buffer_t * b0 = vlib_get_buffer(vm, buffer_index);
281     uword prefetch_target0, prefetch_target1;
282     u32 bucket = vnet_buffer(b0)->vcgn_uii.bucket;
283   
284     /* read the hash bucket */
285     u32 db_index = vnet_buffer(b0)->vcgn_uii.bucket
286                  = cnat_out2in_hash[bucket].next;
287
288     if (PREDICT_TRUE(db_index != EMPTY)) {
289         /*
290          * Prefetch database keys. We save space by not cache-line
291          * aligning the DB entries. We don't want to waste LSU
292          * bandwidth prefetching stuff we won't need.
293          */
294         prefetch_target0 = (uword)(cnat_main_db + db_index);
295         CLIB_PREFETCH((void*)prefetch_target0, CLIB_CACHE_LINE_BYTES, LOAD);
296         /* Just beyond DB key #2 */
297         prefetch_target1 = prefetch_target0 +
298         STRUCT_OFFSET_OF(cnat_main_db_entry_t, user_ports);
299         /* If the targets are in different lines, do the second prefetch */
300         if (PREDICT_FALSE((prefetch_target0 & ~(SPP_CACHE_LINE_BYTES-1)) !=
301                       (prefetch_target1 & ~(SPP_CACHE_LINE_BYTES-1)))) {
302             CLIB_PREFETCH((void *)prefetch_target1, CLIB_CACHE_LINE_BYTES, LOAD);
303         }
304     }
305 }
306
307
308 static inline void
309 stage3(vlib_main_t * vm, vlib_node_runtime_t * node, u32 buffer_index)
310 {
311   cnat_main_db_entry_t *db;
312   vlib_buffer_t * b0 = vlib_get_buffer(vm, buffer_index);
313   u32 db_index = vnet_buffer(b0)->vcgn_uii.bucket;
314
315   /*
316    * Note: if the search already failed (empty bucket),
317    * the answer is already in the pipeline context structure
318    */
319   if (PREDICT_TRUE(db_index != EMPTY)) {
320
321     /*
322      * Note: hash collisions suck. We can't easily prefetch around them.
323      * The first trip around the track will be fast. After that, maybe
324      * not so much...
325      */
326     do {
327       db = cnat_main_db + db_index;
328       if (PREDICT_TRUE(db->out2in_key.key64 ==
329                   vnet_buffer(b0)->vcgn_uii.key.key64)) {
330         break;
331       }
332       db_index = db->out2in_hash.next;
333     } while (db_index != EMPTY);
334
335     /* Stick the answer back into the pipeline context structure */
336     vnet_buffer(b0)->vcgn_uii.bucket = db_index;
337   }
338 }
339
340 static inline u32 last_stage (vlib_main_t *vm, vlib_node_runtime_t *node,
341                               u32 bi)
342 {
343     vlib_buffer_t *b0 = vlib_get_buffer (vm, bi);
344     u32 db_index = vnet_buffer(b0)->vcgn_uii.bucket;
345     int disposition = CNAT_V4_ICMP_E_O2I_T;
346     int counter = CNAT_V4_ICMP_E_O2I_T_PKT;
347
348     ipv4_header *ip = (ipv4_header *)vlib_buffer_get_current(b0);
349     u8   ipv4_hdr_len = (ip->version_hdr_len_words & 0xf) << 2;
350     icmp_v4_t *icmp = (icmp_v4_t *)((u8*)ip + ipv4_hdr_len);
351     ipv4_header *em_ip = (ipv4_header*)((u8*)icmp + 8); /* embedded pkt's v4 hdr */
352     u8 em_ip_hdr_len = (em_ip->version_hdr_len_words & 0xf) << 2;
353     vlib_node_t *n = vlib_get_node (vm, cnat_ipv4_icmp_e_outside_input_node.index);
354     u32 node_counter_base_index = n->error_heap_index;
355     vlib_error_main_t * em = &vm->error_main;
356     cnat_main_db_entry_t *db = NULL;
357     icmp_em_ip_info icmp_info;
358
359
360     if (PREDICT_TRUE(db_index != EMPTY)) {
361
362          icmp_info.em_ip = em_ip;
363          icmp_info.icmp = icmp;
364
365          /* Note: This could have been done in stage1 itself, 
366           * but we need to introduce one u16 * in vnet_buffer_opaque_t 
367           * Since this flow is expected to be very rare in actual 
368           * deployment scenario, we may afford to do these steps here 
369           * as well. Lets confirm during core review. */
370
371          if (em_ip->protocol == TCP_PROT) {
372              tcp_hdr_type *tcp = (tcp_hdr_type*)((u8 *)em_ip + em_ip_hdr_len);
373              icmp_info.em_port = &(tcp->src_port); 
374          } else if (em_ip->protocol == UDP_PROT) {
375              udp_hdr_type_t *udp = (udp_hdr_type_t *)
376                  ((u8 *)em_ip + em_ip_hdr_len);
377              icmp_info.em_port = &(udp->src_port); 
378          } else {
379              icmp_v4_t *icmp_inner = (icmp_v4_t*)((u8 *)em_ip + em_ip_hdr_len);
380              icmp_info.em_port = &(icmp_inner->identifier);
381          }
382
383          db = cnat_main_db + db_index;
384
385         if (PREDICT_FALSE(icmp_debug_flag)) {
386             printf("\nDUMPING ICMP PKT BEFORE\n");
387             print_icmp_pkt(ip);
388         }
389
390         if (PLATFORM_HANDLE_TTL_DECREMENT) {
391             /*
392              * Decrement TTL and update IPv4 checksum
393              */
394             ipv4_decr_ttl_n_calc_csum(ip);
395         }
396
397         swap_ip_dst_emip_src(ip, &icmp_info,
398                              db, db->in2out_key.k.vrf);
399
400         if (PREDICT_FALSE(icmp_debug_flag)) {
401             printf("\nDUMPING ICMP PKT AFTER\n");
402             print_icmp_pkt(ip);
403         }
404
405     } else {
406         disposition = CNAT_V4_ICMP_E_O2I_D;
407         counter = CNAT_V4_ICMP_E_O2I_D_PKT;
408     }
409
410     em->counters[node_counter_base_index + counter] += 1;
411     return  disposition;
412 }
413
414 #include <vnet/pipeline.h>
415
416 static uword cnat_ipv4_icmp_e_outside_input_node_fn (vlib_main_t * vm,
417                               vlib_node_runtime_t * node,
418                               vlib_frame_t * frame)
419 {
420     return dispatch_pipeline (vm, node, frame);
421 }
422
423
424 VLIB_REGISTER_NODE (cnat_ipv4_icmp_e_outside_input_node) = {
425   .function = cnat_ipv4_icmp_e_outside_input_node_fn,
426   .name = "vcgn-v4-icmp-e-o2i",
427   .vector_size = sizeof (u32),
428   .type = VLIB_NODE_TYPE_INTERNAL,
429
430   .n_errors = ARRAY_LEN(cnat_ipv4_icmp_e_outside_input_error_strings),
431   .error_strings = cnat_ipv4_icmp_e_outside_input_error_strings,
432
433   .n_next_nodes = CNAT_V4_ICMP_E_O2I_NEXT,
434
435   /* edit / add dispositions here */
436   .next_nodes = {
437       [CNAT_V4_ICMP_E_O2I_T] = "ip4-input",
438       [CNAT_V4_ICMP_E_O2I_D] = "error-drop",
439   },
440 };
441
442 clib_error_t *cnat_ipv4_icmp_e_outside_input_init (vlib_main_t *vm)
443 {
444   cnat_ipv4_icmp_e_outside_input_main_t * mp = &cnat_ipv4_icmp_e_outside_input_main;
445
446   mp->vlib_main = vm;
447   mp->vnet_main = vnet_get_main();
448
449   return 0;
450 }
451
452 VLIB_INIT_FUNCTION (cnat_ipv4_icmp_e_outside_input_init);