X-Git-Url: https://gerrit.fd.io/r/gitweb?a=blobdiff_plain;f=src%2Fplugins%2Fmap%2Fip4_map_t.c;h=e9882e7b2ee47a0ca7a5bbff24f7d0bd716e45b9;hb=f145c15631ba62e798395499f83a2f8a91ae83c7;hp=3d52ba4f3eab2635263133daf91aa7e241d3a8b7;hpb=fccd1b2b694146506e0efdeee6c23674550fc5d5;p=vpp.git diff --git a/src/plugins/map/ip4_map_t.c b/src/plugins/map/ip4_map_t.c index 3d52ba4f3ea..e9882e7b2ee 100644 --- a/src/plugins/map/ip4_map_t.c +++ b/src/plugins/map/ip4_map_t.c @@ -17,13 +17,12 @@ #include #include -#define IP4_MAP_T_DUAL_LOOP 1 - typedef enum { IP4_MAPT_NEXT_MAPT_TCP_UDP, IP4_MAPT_NEXT_MAPT_ICMP, IP4_MAPT_NEXT_MAPT_FRAGMENTED, + IP4_MAPT_NEXT_ICMP_ERROR, IP4_MAPT_NEXT_DROP, IP4_MAPT_N_NEXT } ip4_mapt_next_t; @@ -31,6 +30,7 @@ typedef enum typedef enum { IP4_MAPT_ICMP_NEXT_IP6_LOOKUP, + IP4_MAPT_ICMP_NEXT_IP6_REWRITE, IP4_MAPT_ICMP_NEXT_IP6_FRAG, IP4_MAPT_ICMP_NEXT_DROP, IP4_MAPT_ICMP_N_NEXT @@ -39,6 +39,7 @@ typedef enum typedef enum { IP4_MAPT_TCP_UDP_NEXT_IP6_LOOKUP, + IP4_MAPT_TCP_UDP_NEXT_IP6_REWRITE, IP4_MAPT_TCP_UDP_NEXT_IP6_FRAG, IP4_MAPT_TCP_UDP_NEXT_DROP, IP4_MAPT_TCP_UDP_N_NEXT @@ -47,6 +48,7 @@ typedef enum typedef enum { IP4_MAPT_FRAGMENTED_NEXT_IP6_LOOKUP, + IP4_MAPT_FRAGMENTED_NEXT_IP6_REWRITE, IP4_MAPT_FRAGMENTED_NEXT_IP6_FRAG, IP4_MAPT_FRAGMENTED_NEXT_DROP, IP4_MAPT_FRAGMENTED_N_NEXT @@ -64,41 +66,6 @@ typedef CLIB_PACKED (struct { }) ip4_mapt_pseudo_header_t; /* *INDENT-ON* */ - -static_always_inline int -ip4_map_fragment_cache (ip4_header_t * ip4, u16 port) -{ - u32 *ignore = NULL; - map_ip4_reass_lock (); - map_ip4_reass_t *r = - map_ip4_reass_get (ip4->src_address.as_u32, ip4->dst_address.as_u32, - ip4->fragment_id, - (ip4->protocol == - IP_PROTOCOL_ICMP) ? IP_PROTOCOL_ICMP6 : ip4->protocol, - &ignore); - if (r) - r->port = port; - - map_ip4_reass_unlock (); - return !r; -} - -static_always_inline i32 -ip4_map_fragment_get_port (ip4_header_t * ip4) -{ - u32 *ignore = NULL; - map_ip4_reass_lock (); - map_ip4_reass_t *r = - map_ip4_reass_get (ip4->src_address.as_u32, ip4->dst_address.as_u32, - ip4->fragment_id, - (ip4->protocol == - IP_PROTOCOL_ICMP) ? IP_PROTOCOL_ICMP6 : ip4->protocol, - &ignore); - i32 ret = r ? r->port : -1; - map_ip4_reass_unlock (); - return ret; -} - typedef struct { map_domain_t *d; @@ -106,7 +73,8 @@ typedef struct } icmp_to_icmp6_ctx_t; static int -ip4_to_ip6_set_icmp_cb (ip4_header_t * ip4, ip6_header_t * ip6, void *arg) +ip4_to_ip6_set_icmp_cb (vlib_buffer_t * b, ip4_header_t * ip4, + ip6_header_t * ip6, void *arg) { icmp_to_icmp6_ctx_t *ctx = arg; @@ -120,8 +88,8 @@ ip4_to_ip6_set_icmp_cb (ip4_header_t * ip4, ip6_header_t * ip6, void *arg) } static int -ip4_to_ip6_set_inner_icmp_cb (ip4_header_t * ip4, ip6_header_t * ip6, - void *arg) +ip4_to_ip6_set_inner_icmp_cb (vlib_buffer_t * b, ip4_header_t * ip4, + ip6_header_t * ip6, void *arg) { icmp_to_icmp6_ctx_t *ctx = arg; @@ -205,9 +173,14 @@ ip4_map_t_icmp (vlib_main_t * vm, if (vnet_buffer (p0)->map_t.mtu < p0->current_length) { vnet_buffer (p0)->ip_frag.mtu = vnet_buffer (p0)->map_t.mtu; - vnet_buffer (p0)->ip_frag.next_index = IP6_FRAG_NEXT_IP6_LOOKUP; + vnet_buffer (p0)->ip_frag.next_index = IP_FRAG_NEXT_IP6_LOOKUP; next0 = IP4_MAPT_ICMP_NEXT_IP6_FRAG; } + else + { + next0 = ip4_map_ip6_lookup_bypass (p0, NULL) ? + IP4_MAPT_ICMP_NEXT_IP6_REWRITE : next0; + } err0: if (PREDICT_TRUE (error0 == MAP_ERROR_NONE)) { @@ -231,10 +204,44 @@ ip4_map_t_icmp (vlib_main_t * vm, return frame->n_vectors; } -static int -ip4_to_ip6_set_cb (ip4_header_t * ip4, ip6_header_t * ip6, void *ctx) +/* + * Translate fragmented IPv4 UDP/TCP packet to IPv6. + */ +always_inline int +map_ip4_to_ip6_fragmented (vlib_buffer_t * p, + ip4_mapt_pseudo_header_t * pheader) { - ip4_mapt_pseudo_header_t *pheader = ctx; + ip4_header_t *ip4; + ip6_header_t *ip6; + ip6_frag_hdr_t *frag; + + ip4 = vlib_buffer_get_current (p); + frag = (ip6_frag_hdr_t *) u8_ptr_add (ip4, sizeof (*ip4) - sizeof (*frag)); + ip6 = + (ip6_header_t *) u8_ptr_add (ip4, + sizeof (*ip4) - sizeof (*frag) - + sizeof (*ip6)); + vlib_buffer_advance (p, sizeof (*ip4) - sizeof (*ip6) - sizeof (*frag)); + + //We know that the protocol was one of ICMP, TCP or UDP + //because the first fragment was found and cached + frag->next_hdr = + (ip4->protocol == IP_PROTOCOL_ICMP) ? IP_PROTOCOL_ICMP6 : ip4->protocol; + frag->identification = frag_id_4to6 (ip4->fragment_id); + frag->rsv = 0; + frag->fragment_offset_and_more = + ip6_frag_hdr_offset_and_more (ip4_get_fragment_offset (ip4), + clib_net_to_host_u16 + (ip4->flags_and_fragment_offset) & + IP4_HEADER_FLAG_MORE_FRAGMENTS); + + ip6->ip_version_traffic_class_and_flow_label = + clib_host_to_net_u32 ((6 << 28) + (ip4->tos << 20)); + ip6->payload_length = + clib_host_to_net_u16 (clib_net_to_host_u16 (ip4->length) - + sizeof (*ip4) + sizeof (*frag)); + ip6->hop_limit = ip4->ttl; + ip6->protocol = IP_PROTOCOL_IPV6_FRAGMENTATION; ip6->dst_address.as_u64[0] = pheader->daddr.as_u64[0]; ip6->dst_address.as_u64[1] = pheader->daddr.as_u64[1]; @@ -279,7 +286,7 @@ ip4_map_t_fragmented (vlib_main_t * vm, pheader0 = vlib_buffer_get_current (p0); vlib_buffer_advance (p0, sizeof (*pheader0)); - if (ip4_to_ip6_fragmented (p0, ip4_to_ip6_set_cb, pheader0)) + if (map_ip4_to_ip6_fragmented (p0, pheader0)) { p0->error = error_node->errors[MAP_ERROR_FRAGMENT_DROPPED]; next0 = IP4_MAPT_FRAGMENTED_NEXT_DROP; @@ -290,9 +297,14 @@ ip4_map_t_fragmented (vlib_main_t * vm, { vnet_buffer (p0)->ip_frag.mtu = vnet_buffer (p0)->map_t.mtu; vnet_buffer (p0)->ip_frag.next_index = - IP6_FRAG_NEXT_IP6_LOOKUP; + IP_FRAG_NEXT_IP6_LOOKUP; next0 = IP4_MAPT_FRAGMENTED_NEXT_IP6_FRAG; } + else + { + next0 = ip4_map_ip6_lookup_bypass (p0, NULL) ? + IP4_MAPT_FRAGMENTED_NEXT_IP6_REWRITE : next0; + } } vlib_validate_buffer_enqueue_x1 (vm, node, next_index, @@ -304,6 +316,110 @@ ip4_map_t_fragmented (vlib_main_t * vm, return frame->n_vectors; } +/* + * Translate IPv4 UDP/TCP packet to IPv6. + */ +always_inline int +map_ip4_to_ip6_tcp_udp (vlib_buffer_t * p, ip4_mapt_pseudo_header_t * pheader) +{ + map_main_t *mm = &map_main; + ip4_header_t *ip4; + ip6_header_t *ip6; + ip_csum_t csum; + u16 *checksum; + ip6_frag_hdr_t *frag; + u32 frag_id; + ip4_address_t old_src, old_dst; + + ip4 = vlib_buffer_get_current (p); + + if (ip4->protocol == IP_PROTOCOL_UDP) + { + udp_header_t *udp = ip4_next_header (ip4); + checksum = &udp->checksum; + + /* + * UDP checksum is optional over IPv4 but mandatory for IPv6 We + * do not check udp->length sanity but use our safe computed + * value instead + */ + if (PREDICT_FALSE (!*checksum)) + { + u16 udp_len = clib_host_to_net_u16 (ip4->length) - sizeof (*ip4); + csum = ip_incremental_checksum (0, udp, udp_len); + csum = ip_csum_with_carry (csum, clib_host_to_net_u16 (udp_len)); + csum = + ip_csum_with_carry (csum, clib_host_to_net_u16 (IP_PROTOCOL_UDP)); + csum = ip_csum_with_carry (csum, *((u64 *) (&ip4->src_address))); + *checksum = ~ip_csum_fold (csum); + } + } + else + { + tcp_header_t *tcp = ip4_next_header (ip4); + if (mm->tcp_mss > 0) + { + csum = tcp->checksum; + map_mss_clamping (tcp, &csum, mm->tcp_mss); + tcp->checksum = ip_csum_fold (csum); + } + checksum = &tcp->checksum; + } + + old_src.as_u32 = ip4->src_address.as_u32; + old_dst.as_u32 = ip4->dst_address.as_u32; + + /* Deal with fragmented packets */ + if (PREDICT_FALSE (ip4->flags_and_fragment_offset & + clib_host_to_net_u16 (IP4_HEADER_FLAG_MORE_FRAGMENTS))) + { + ip6 = + (ip6_header_t *) u8_ptr_add (ip4, + sizeof (*ip4) - sizeof (*ip6) - + sizeof (*frag)); + frag = + (ip6_frag_hdr_t *) u8_ptr_add (ip4, sizeof (*ip4) - sizeof (*frag)); + frag_id = frag_id_4to6 (ip4->fragment_id); + vlib_buffer_advance (p, sizeof (*ip4) - sizeof (*ip6) - sizeof (*frag)); + } + else + { + ip6 = (ip6_header_t *) (((u8 *) ip4) + sizeof (*ip4) - sizeof (*ip6)); + vlib_buffer_advance (p, sizeof (*ip4) - sizeof (*ip6)); + frag = NULL; + } + + ip6->ip_version_traffic_class_and_flow_label = + clib_host_to_net_u32 ((6 << 28) + (ip4->tos << 20)); + ip6->payload_length = u16_net_add (ip4->length, -sizeof (*ip4)); + ip6->hop_limit = ip4->ttl; + ip6->protocol = ip4->protocol; + if (PREDICT_FALSE (frag != NULL)) + { + frag->next_hdr = ip6->protocol; + frag->identification = frag_id; + frag->rsv = 0; + frag->fragment_offset_and_more = ip6_frag_hdr_offset_and_more (0, 1); + ip6->protocol = IP_PROTOCOL_IPV6_FRAGMENTATION; + ip6->payload_length = u16_net_add (ip6->payload_length, sizeof (*frag)); + } + + ip6->dst_address.as_u64[0] = pheader->daddr.as_u64[0]; + ip6->dst_address.as_u64[1] = pheader->daddr.as_u64[1]; + ip6->src_address.as_u64[0] = pheader->saddr.as_u64[0]; + ip6->src_address.as_u64[1] = pheader->saddr.as_u64[1]; + + csum = ip_csum_sub_even (*checksum, old_src.as_u32); + csum = ip_csum_sub_even (csum, old_dst.as_u32); + csum = ip_csum_add_even (csum, ip6->src_address.as_u64[0]); + csum = ip_csum_add_even (csum, ip6->src_address.as_u64[1]); + csum = ip_csum_add_even (csum, ip6->dst_address.as_u64[0]); + csum = ip_csum_add_even (csum, ip6->dst_address.as_u64[1]); + *checksum = ip_csum_fold (csum); + + return 0; +} + static uword ip4_map_t_tcp_udp (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) @@ -320,72 +436,6 @@ ip4_map_t_tcp_udp (vlib_main_t * vm, { vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); -#ifdef IP4_MAP_T_DUAL_LOOP - while (n_left_from >= 4 && n_left_to_next >= 2) - { - u32 pi0, pi1; - vlib_buffer_t *p0, *p1; - ip4_mapt_pseudo_header_t *pheader0, *pheader1; - ip4_mapt_tcp_udp_next_t next0, next1; - - pi0 = to_next[0] = from[0]; - pi1 = to_next[1] = from[1]; - from += 2; - n_left_from -= 2; - to_next += 2; - n_left_to_next -= 2; - - next0 = IP4_MAPT_TCP_UDP_NEXT_IP6_LOOKUP; - next1 = IP4_MAPT_TCP_UDP_NEXT_IP6_LOOKUP; - p0 = vlib_get_buffer (vm, pi0); - p1 = vlib_get_buffer (vm, pi1); - - //Accessing pseudo header - pheader0 = vlib_buffer_get_current (p0); - pheader1 = vlib_buffer_get_current (p1); - vlib_buffer_advance (p0, sizeof (*pheader0)); - vlib_buffer_advance (p1, sizeof (*pheader1)); - - if (ip4_to_ip6_tcp_udp (p0, ip4_to_ip6_set_cb, pheader0)) - { - p0->error = error_node->errors[MAP_ERROR_UNKNOWN]; - next0 = IP4_MAPT_TCP_UDP_NEXT_DROP; - } - else - { - if (vnet_buffer (p0)->map_t.mtu < p0->current_length) - { - //Send to fragmentation node if necessary - vnet_buffer (p0)->ip_frag.mtu = vnet_buffer (p0)->map_t.mtu; - vnet_buffer (p0)->ip_frag.next_index = - IP6_FRAG_NEXT_IP6_LOOKUP; - next0 = IP4_MAPT_TCP_UDP_NEXT_IP6_FRAG; - } - } - - if (ip4_to_ip6_tcp_udp (p1, ip4_to_ip6_set_cb, pheader1)) - { - p1->error = error_node->errors[MAP_ERROR_UNKNOWN]; - next1 = IP4_MAPT_TCP_UDP_NEXT_DROP; - } - else - { - if (vnet_buffer (p1)->map_t.mtu < p1->current_length) - { - //Send to fragmentation node if necessary - vnet_buffer (p1)->ip_frag.mtu = vnet_buffer (p1)->map_t.mtu; - vnet_buffer (p1)->ip_frag.next_index = - IP6_FRAG_NEXT_IP6_LOOKUP; - next1 = IP4_MAPT_TCP_UDP_NEXT_IP6_FRAG; - } - } - - vlib_validate_buffer_enqueue_x2 (vm, node, next_index, - to_next, n_left_to_next, pi0, pi1, - next0, next1); - } -#endif - while (n_left_from > 0 && n_left_to_next > 0) { u32 pi0; @@ -406,7 +456,7 @@ ip4_map_t_tcp_udp (vlib_main_t * vm, pheader0 = vlib_buffer_get_current (p0); vlib_buffer_advance (p0, sizeof (*pheader0)); - if (ip4_to_ip6_tcp_udp (p0, ip4_to_ip6_set_cb, pheader0)) + if (map_ip4_to_ip6_tcp_udp (p0, pheader0)) { p0->error = error_node->errors[MAP_ERROR_UNKNOWN]; next0 = IP4_MAPT_TCP_UDP_NEXT_DROP; @@ -418,9 +468,14 @@ ip4_map_t_tcp_udp (vlib_main_t * vm, //Send to fragmentation node if necessary vnet_buffer (p0)->ip_frag.mtu = vnet_buffer (p0)->map_t.mtu; vnet_buffer (p0)->ip_frag.next_index = - IP6_FRAG_NEXT_IP6_LOOKUP; + IP_FRAG_NEXT_IP6_LOOKUP; next0 = IP4_MAPT_TCP_UDP_NEXT_IP6_FRAG; } + else + { + next0 = ip4_map_ip6_lookup_bypass (p0, NULL) ? + IP4_MAPT_TCP_UDP_NEXT_IP6_REWRITE : next0; + } } vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next, n_left_to_next, pi0, @@ -435,7 +490,7 @@ ip4_map_t_tcp_udp (vlib_main_t * vm, static_always_inline void ip4_map_t_classify (vlib_buffer_t * p0, map_domain_t * d0, ip4_header_t * ip40, u16 ip4_len0, i32 * dst_port0, - u8 * error0, ip4_mapt_next_t * next0) + u8 * error0, ip4_mapt_next_t * next0, u16 l4_dst_port) { if (PREDICT_FALSE (ip4_get_fragment_offset (ip40))) { @@ -446,7 +501,7 @@ ip4_map_t_classify (vlib_buffer_t * p0, map_domain_t * d0, } else { - *dst_port0 = ip4_map_fragment_get_port (ip40); + *dst_port0 = l4_dst_port; *error0 = (*dst_port0 == -1) ? MAP_ERROR_FRAGMENT_MEMORY : *error0; } } @@ -455,14 +510,14 @@ ip4_map_t_classify (vlib_buffer_t * p0, map_domain_t * d0, vnet_buffer (p0)->map_t.checksum_offset = 36; *next0 = IP4_MAPT_NEXT_MAPT_TCP_UDP; *error0 = ip4_len0 < 40 ? MAP_ERROR_MALFORMED : *error0; - *dst_port0 = (i32) * ((u16 *) u8_ptr_add (ip40, sizeof (*ip40) + 2)); + *dst_port0 = l4_dst_port; } else if (PREDICT_TRUE (ip40->protocol == IP_PROTOCOL_UDP)) { vnet_buffer (p0)->map_t.checksum_offset = 26; *next0 = IP4_MAPT_NEXT_MAPT_TCP_UDP; *error0 = ip4_len0 < 28 ? MAP_ERROR_MALFORMED : *error0; - *dst_port0 = (i32) * ((u16 *) u8_ptr_add (ip40, sizeof (*ip40) + 2)); + *dst_port0 = l4_dst_port; } else if (ip40->protocol == IP_PROTOCOL_ICMP) { @@ -474,7 +529,7 @@ ip4_map_t_classify (vlib_buffer_t * p0, map_domain_t * d0, || ((icmp46_header_t *) u8_ptr_add (ip40, sizeof (*ip40)))->code == ICMP4_echo_request) - *dst_port0 = (i32) * ((u16 *) u8_ptr_add (ip40, sizeof (*ip40) + 6)); + *dst_port0 = l4_dst_port; } else { @@ -498,142 +553,13 @@ ip4_map_t (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) { vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); -#ifdef IP4_MAP_T_DUAL_LOOP - while (n_left_from >= 4 && n_left_to_next >= 2) - { - u32 pi0, pi1; - vlib_buffer_t *p0, *p1; - ip4_header_t *ip40, *ip41; - map_domain_t *d0, *d1; - ip4_mapt_next_t next0 = 0, next1 = 0; - u16 ip4_len0, ip4_len1; - u8 error0, error1; - i32 dst_port0, dst_port1; - ip4_mapt_pseudo_header_t *pheader0, *pheader1; - - pi0 = to_next[0] = from[0]; - pi1 = to_next[1] = from[1]; - from += 2; - n_left_from -= 2; - to_next += 2; - n_left_to_next -= 2; - error0 = MAP_ERROR_NONE; - error1 = MAP_ERROR_NONE; - - p0 = vlib_get_buffer (vm, pi0); - p1 = vlib_get_buffer (vm, pi1); - ip40 = vlib_buffer_get_current (p0); - ip41 = vlib_buffer_get_current (p1); - ip4_len0 = clib_host_to_net_u16 (ip40->length); - ip4_len1 = clib_host_to_net_u16 (ip41->length); - - if (PREDICT_FALSE (p0->current_length < ip4_len0 || - ip40->ip_version_and_header_length != 0x45)) - { - error0 = MAP_ERROR_UNKNOWN; - next0 = IP4_MAPT_NEXT_DROP; - } - - if (PREDICT_FALSE (p1->current_length < ip4_len1 || - ip41->ip_version_and_header_length != 0x45)) - { - error1 = MAP_ERROR_UNKNOWN; - next1 = IP4_MAPT_NEXT_DROP; - } - - vnet_buffer (p0)->map_t.map_domain_index = - vnet_buffer (p0)->ip.adj_index[VLIB_TX]; - d0 = ip4_map_get_domain (vnet_buffer (p0)->map_t.map_domain_index); - vnet_buffer (p1)->map_t.map_domain_index = - vnet_buffer (p1)->ip.adj_index[VLIB_TX]; - d1 = ip4_map_get_domain (vnet_buffer (p1)->map_t.map_domain_index); - - vnet_buffer (p0)->map_t.mtu = d0->mtu ? d0->mtu : ~0; - vnet_buffer (p1)->map_t.mtu = d1->mtu ? d1->mtu : ~0; - - dst_port0 = -1; - dst_port1 = -1; - - ip4_map_t_classify (p0, d0, ip40, ip4_len0, &dst_port0, &error0, - &next0); - ip4_map_t_classify (p1, d1, ip41, ip4_len1, &dst_port1, &error1, - &next1); - - //Add MAP-T pseudo header in front of the packet - vlib_buffer_advance (p0, -sizeof (*pheader0)); - vlib_buffer_advance (p1, -sizeof (*pheader1)); - pheader0 = vlib_buffer_get_current (p0); - pheader1 = vlib_buffer_get_current (p1); - - //Save addresses within the packet - ip4_map_t_embedded_address (d0, &pheader0->saddr, - &ip40->src_address); - ip4_map_t_embedded_address (d1, &pheader1->saddr, - &ip41->src_address); - pheader0->daddr.as_u64[0] = - map_get_pfx_net (d0, ip40->dst_address.as_u32, (u16) dst_port0); - pheader0->daddr.as_u64[1] = - map_get_sfx_net (d0, ip40->dst_address.as_u32, (u16) dst_port0); - pheader1->daddr.as_u64[0] = - map_get_pfx_net (d1, ip41->dst_address.as_u32, (u16) dst_port1); - pheader1->daddr.as_u64[1] = - map_get_sfx_net (d1, ip41->dst_address.as_u32, (u16) dst_port1); - - if (PREDICT_FALSE - (ip4_is_first_fragment (ip40) && (dst_port0 != -1) - && (d0->ea_bits_len != 0 || !d0->rules) - && ip4_map_fragment_cache (ip40, dst_port0))) - { - error0 = MAP_ERROR_FRAGMENT_MEMORY; - } - - if (PREDICT_FALSE - (ip4_is_first_fragment (ip41) && (dst_port1 != -1) - && (d1->ea_bits_len != 0 || !d1->rules) - && ip4_map_fragment_cache (ip41, dst_port1))) - { - error1 = MAP_ERROR_FRAGMENT_MEMORY; - } - - if (PREDICT_TRUE - (error0 == MAP_ERROR_NONE && next0 != IP4_MAPT_NEXT_MAPT_ICMP)) - { - vlib_increment_combined_counter (cm + MAP_DOMAIN_COUNTER_TX, - thread_index, - vnet_buffer (p0)-> - map_t.map_domain_index, 1, - clib_net_to_host_u16 - (ip40->length)); - } - - if (PREDICT_TRUE - (error1 == MAP_ERROR_NONE && next1 != IP4_MAPT_NEXT_MAPT_ICMP)) - { - vlib_increment_combined_counter (cm + MAP_DOMAIN_COUNTER_TX, - thread_index, - vnet_buffer (p1)-> - map_t.map_domain_index, 1, - clib_net_to_host_u16 - (ip41->length)); - } - - next0 = (error0 != MAP_ERROR_NONE) ? IP4_MAPT_NEXT_DROP : next0; - next1 = (error1 != MAP_ERROR_NONE) ? IP4_MAPT_NEXT_DROP : next1; - p0->error = error_node->errors[error0]; - p1->error = error_node->errors[error1]; - vlib_validate_buffer_enqueue_x2 (vm, node, next_index, to_next, - n_left_to_next, pi0, pi1, next0, - next1); - } -#endif - while (n_left_from > 0 && n_left_to_next > 0) { u32 pi0; vlib_buffer_t *p0; ip4_header_t *ip40; map_domain_t *d0; - ip4_mapt_next_t next0; + ip4_mapt_next_t next0 = 0; u16 ip4_len0; u8 error0; i32 dst_port0; @@ -647,24 +573,64 @@ ip4_map_t (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) error0 = MAP_ERROR_NONE; p0 = vlib_get_buffer (vm, pi0); + + u16 l4_dst_port = vnet_buffer (p0)->ip.reass.l4_dst_port; + ip40 = vlib_buffer_get_current (p0); ip4_len0 = clib_host_to_net_u16 (ip40->length); if (PREDICT_FALSE (p0->current_length < ip4_len0 || ip40->ip_version_and_header_length != 0x45)) { error0 = MAP_ERROR_UNKNOWN; - next0 = IP4_MAPT_NEXT_DROP; } - vnet_buffer (p0)->map_t.map_domain_index = - vnet_buffer (p0)->ip.adj_index[VLIB_TX]; - d0 = ip4_map_get_domain (vnet_buffer (p0)->map_t.map_domain_index); + d0 = ip4_map_get_domain (&ip40->dst_address, + &vnet_buffer (p0)->map_t.map_domain_index, + &error0); - vnet_buffer (p0)->map_t.mtu = d0->mtu ? d0->mtu : ~0; + if (!d0) + { /* Guess it wasn't for us */ + vnet_feature_next (&next0, p0); + goto exit; + } dst_port0 = -1; + + if (PREDICT_FALSE (ip40->ttl == 1)) + { + icmp4_error_set_vnet_buffer (p0, ICMP4_time_exceeded, + ICMP4_time_exceeded_ttl_exceeded_in_transit, + 0); + p0->error = error_node->errors[MAP_ERROR_TIME_EXCEEDED]; + next0 = IP4_MAPT_NEXT_ICMP_ERROR; + goto trace; + } + + bool df0 = + ip40->flags_and_fragment_offset & + clib_host_to_net_u16 (IP4_HEADER_FLAG_DONT_FRAGMENT); + + vnet_buffer (p0)->map_t.mtu = d0->mtu ? d0->mtu : ~0; + + if (PREDICT_FALSE + (df0 && !map_main.frag_ignore_df + && + ((ip4_len0 + + (sizeof (ip6_header_t) - sizeof (ip4_header_t))) > + vnet_buffer (p0)->map_t.mtu))) + { + icmp4_error_set_vnet_buffer (p0, ICMP4_destination_unreachable, + ICMP4_destination_unreachable_fragmentation_needed_and_dont_fragment_set, + vnet_buffer (p0)->map_t.mtu - + (sizeof (ip6_header_t) - + sizeof (ip4_header_t))); + p0->error = error_node->errors[MAP_ERROR_DF_SET]; + next0 = IP4_MAPT_NEXT_ICMP_ERROR; + goto trace; + } + ip4_map_t_classify (p0, d0, ip40, ip4_len0, &dst_port0, &error0, - &next0); + &next0, l4_dst_port); /* Verify that port is not among the well-known ports */ if ((d0->psid_length > 0 && d0->psid_offset > 0) @@ -686,17 +652,6 @@ ip4_map_t (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) pheader0->daddr.as_u64[1] = map_get_sfx_net (d0, ip40->dst_address.as_u32, (u16) dst_port0); - //It is important to cache at this stage because the result might be necessary - //for packets within the same vector. - //Actually, this approach even provides some limited out-of-order fragments support - if (PREDICT_FALSE - (ip4_is_first_fragment (ip40) && (dst_port0 != -1) - && (d0->ea_bits_len != 0 || !d0->rules) - && ip4_map_fragment_cache (ip40, dst_port0))) - { - error0 = MAP_ERROR_UNKNOWN; - } - if (PREDICT_TRUE (error0 == MAP_ERROR_NONE && next0 != IP4_MAPT_NEXT_MAPT_ICMP)) { @@ -710,6 +665,12 @@ ip4_map_t (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) next0 = (error0 != MAP_ERROR_NONE) ? IP4_MAPT_NEXT_DROP : next0; p0->error = error_node->errors[error0]; + trace: + if (PREDICT_FALSE (p0->flags & VLIB_BUFFER_IS_TRACED)) + { + map_add_trace (vm, node, p0, d0 - map_main.domains, dst_port0); + } + exit: vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next, n_left_to_next, pi0, next0); @@ -726,6 +687,13 @@ static char *map_t_error_strings[] = { }; /* *INDENT-OFF* */ +VNET_FEATURE_INIT (ip4_map_t_feature, static) = { + .arc_name = "ip4-unicast", + .node_name = "ip4-map-t", + .runs_before = VNET_FEATURES ("ip4-flow-classify"), + .runs_after = VNET_FEATURES ("ip4-sv-reassembly-feature"), +}; + VLIB_REGISTER_NODE(ip4_map_t_fragmented_node) = { .function = ip4_map_t_fragmented, .name = "ip4-map-t-fragmented", @@ -739,6 +707,7 @@ VLIB_REGISTER_NODE(ip4_map_t_fragmented_node) = { .n_next_nodes = IP4_MAPT_FRAGMENTED_N_NEXT, .next_nodes = { [IP4_MAPT_FRAGMENTED_NEXT_IP6_LOOKUP] = "ip6-lookup", + [IP4_MAPT_FRAGMENTED_NEXT_IP6_REWRITE] = "ip6-load-balance", [IP4_MAPT_FRAGMENTED_NEXT_IP6_FRAG] = IP6_FRAG_NODE_NAME, [IP4_MAPT_FRAGMENTED_NEXT_DROP] = "error-drop", }, @@ -759,6 +728,7 @@ VLIB_REGISTER_NODE(ip4_map_t_icmp_node) = { .n_next_nodes = IP4_MAPT_ICMP_N_NEXT, .next_nodes = { [IP4_MAPT_ICMP_NEXT_IP6_LOOKUP] = "ip6-lookup", + [IP4_MAPT_ICMP_NEXT_IP6_REWRITE] = "ip6-load-balance", [IP4_MAPT_ICMP_NEXT_IP6_FRAG] = IP6_FRAG_NODE_NAME, [IP4_MAPT_ICMP_NEXT_DROP] = "error-drop", }, @@ -779,6 +749,7 @@ VLIB_REGISTER_NODE(ip4_map_t_tcp_udp_node) = { .n_next_nodes = IP4_MAPT_TCP_UDP_N_NEXT, .next_nodes = { [IP4_MAPT_TCP_UDP_NEXT_IP6_LOOKUP] = "ip6-lookup", + [IP4_MAPT_TCP_UDP_NEXT_IP6_REWRITE] = "ip6-load-balance", [IP4_MAPT_TCP_UDP_NEXT_IP6_FRAG] = IP6_FRAG_NODE_NAME, [IP4_MAPT_TCP_UDP_NEXT_DROP] = "error-drop", }, @@ -801,6 +772,7 @@ VLIB_REGISTER_NODE(ip4_map_t_node) = { [IP4_MAPT_NEXT_MAPT_TCP_UDP] = "ip4-map-t-tcp-udp", [IP4_MAPT_NEXT_MAPT_ICMP] = "ip4-map-t-icmp", [IP4_MAPT_NEXT_MAPT_FRAGMENTED] = "ip4-map-t-fragmented", + [IP4_MAPT_NEXT_ICMP_ERROR] = "ip4-icmp-error", [IP4_MAPT_NEXT_DROP] = "error-drop", }, };