MAP: Ensure fragmented packets get from ip4_map to ip4_map_reass.
[vpp.git] / vnet / vnet / map / ip4_map.c
index cf53ef4..adbc72e 100644 (file)
@@ -27,8 +27,10 @@ enum ip4_map_next_e {
 #ifdef MAP_SKIP_IP6_LOOKUP
   IP4_MAP_NEXT_IP6_REWRITE,
 #endif
-  IP4_MAP_NEXT_FRAGMENT,
+  IP4_MAP_NEXT_IP4_FRAGMENT,
+  IP4_MAP_NEXT_IP6_FRAGMENT,
   IP4_MAP_NEXT_REASS,
+  IP4_MAP_NEXT_ICMP_ERROR,
   IP4_MAP_NEXT_DROP,
   IP4_MAP_N_NEXT,
 };
@@ -76,7 +78,7 @@ ip4_map_get_port (ip4_header_t *ip, map_dir_e dir)
     icmp46_header_t *icmp = (void *)(ip + 1);
     if (icmp->type == ICMP4_echo_request || icmp->type == ICMP4_echo_reply) {
       return *((u16 *)(icmp + 1));
-    } else if (clib_net_to_host_u16(ip->length) >= 64) { // IP + ICMP + IP + L4 header
+    } else if (clib_net_to_host_u16(ip->length) >= 56) { // IP + ICMP + IP + L4 header
       ip4_header_t *icmp_ip = (ip4_header_t *)(icmp + 2);
       if (PREDICT_TRUE((icmp_ip->protocol == IP_PROTOCOL_TCP) ||
                       (icmp_ip->protocol == IP_PROTOCOL_UDP))) {
@@ -98,7 +100,7 @@ ip4_map_port_and_security_check (map_domain_t *d, ip4_header_t *ip, u32 *next, u
   u16 port = 0;
 
   if (d->psid_length > 0) {
-    if (!ip4_is_fragment(ip)) {
+    if (ip4_get_fragment_offset(ip) == 0) {
       if (PREDICT_FALSE((ip->ip_version_and_header_length != 0x45) || clib_host_to_net_u16(ip->length) < 28)) {
        return 0;
       }
@@ -108,6 +110,7 @@ ip4_map_port_and_security_check (map_domain_t *d, ip4_header_t *ip, u32 *next, u
        if ((d->psid_offset > 0) && (clib_net_to_host_u16(port) < (0x1 << (16 - d->psid_offset)))) {
          *error = MAP_ERROR_ENCAP_SEC_CHECK;
        } else {
+         if (ip4_get_fragment_more(ip)) *next = IP4_MAP_NEXT_REASS;
          return (port);
        }
       } else {
@@ -130,7 +133,7 @@ ip4_map_vtcfl (ip4_header_t *ip4, vlib_buffer_t *p)
   u8 tc = mm->tc_copy ? ip4->tos : mm->tc;
   u32 vtcfl = 0x6 << 28;
   vtcfl |= tc << 20;
-  vtcfl |= vnet_buffer(p)->ip.flow_hash && 0x000fffff;
+  vtcfl |= vnet_buffer(p)->ip.flow_hash & 0x000fffff;
 
   return (clib_host_to_net_u32(vtcfl));
 }
@@ -155,6 +158,49 @@ ip4_map_ip6_lookup_bypass (vlib_buffer_t *p0, ip4_header_t *ip)
   return (false);
 }
 
+/*
+ * ip4_map_ttl
+ */
+static inline void
+ip4_map_decrement_ttl (ip4_header_t *ip, u8 *error)
+{
+  i32 ttl = ip->ttl;
+
+  /* Input node should have reject packets with ttl 0. */
+  ASSERT (ip->ttl > 0);
+
+  u32 checksum = ip->checksum + clib_host_to_net_u16(0x0100);
+  checksum += checksum >= 0xffff;
+  ip->checksum = checksum;
+  ttl -= 1;
+  ip->ttl = ttl;
+  *error = ttl <= 0 ? IP4_ERROR_TIME_EXPIRED : *error;
+
+  /* Verify checksum. */
+  ASSERT (ip->checksum == ip4_header_checksum(ip));
+}
+
+static u32
+ip4_map_fragment (vlib_buffer_t *b, u16 mtu, bool df, u8 *error)
+{
+  map_main_t *mm = &map_main;
+
+  if (mm->frag_inner) {
+    ip_frag_set_vnet_buffer(b, sizeof(ip6_header_t), mtu, IP4_FRAG_NEXT_IP6_LOOKUP, IP_FRAG_FLAG_IP6_HEADER);
+    return (IP4_MAP_NEXT_IP4_FRAGMENT);
+  } else {
+    if (df && !mm->frag_ignore_df) {
+      icmp4_error_set_vnet_buffer(b, ICMP4_destination_unreachable,
+                                 ICMP4_destination_unreachable_fragmentation_needed_and_dont_fragment_set, mtu);
+      vlib_buffer_advance(b, sizeof(ip6_header_t));
+      *error = MAP_ERROR_DF_SET;
+      return (IP4_MAP_NEXT_ICMP_ERROR);
+    }
+    ip_frag_set_vnet_buffer(b, 0, mtu, IP6_FRAG_NEXT_IP6_LOOKUP, IP_FRAG_FLAG_IP6_HEADER);
+    return (IP4_MAP_NEXT_IP6_FRAGMENT);
+  }
+}
+
 /*
  * ip4_map
  */
@@ -176,7 +222,7 @@ ip4_map (vlib_main_t *vm,
     vlib_get_next_frame(vm, node, next_index, to_next, n_left_to_next);
 
     /* Dual loop */
-    while (n_left_from > 4 && n_left_to_next > 2) {
+    while (n_left_from >= 4 && n_left_to_next >= 2) {
       u32 pi0, pi1;
       vlib_buffer_t *p0, *p1;
       map_domain_t *d0, *d1;
@@ -212,8 +258,6 @@ ip4_map (vlib_main_t *vm,
       p1 = vlib_get_buffer(vm, pi1);
       ip40 = vlib_buffer_get_current(p0);
       ip41 = vlib_buffer_get_current(p1);
-      p0->current_length = clib_net_to_host_u16(ip40->length);
-      p1->current_length = clib_net_to_host_u16(ip41->length);
       d0 = ip4_map_get_domain(vnet_buffer(p0)->ip.adj_index[VLIB_TX], &map_domain_index0);
       d1 = ip4_map_get_domain(vnet_buffer(p1)->ip.adj_index[VLIB_TX], &map_domain_index1);
       ASSERT(d0);
@@ -225,6 +269,12 @@ ip4_map (vlib_main_t *vm,
       port0 = ip4_map_port_and_security_check(d0, ip40, &next0, &error0);
       port1 = ip4_map_port_and_security_check(d1, ip41, &next1, &error1);
 
+      /* Decrement IPv4 TTL */
+      ip4_map_decrement_ttl(ip40, &error0);
+      ip4_map_decrement_ttl(ip41, &error1);
+      bool df0 = ip40->flags_and_fragment_offset & clib_host_to_net_u16(IP4_HEADER_FLAG_DONT_FRAGMENT);
+      bool df1 = ip41->flags_and_fragment_offset & clib_host_to_net_u16(IP4_HEADER_FLAG_DONT_FRAGMENT);
+
       /* MAP calc */
       u32 da40 = clib_net_to_host_u32(ip40->dst_address.as_u32);
       u32 da41 = clib_net_to_host_u32(ip41->dst_address.as_u32);
@@ -234,8 +284,10 @@ ip4_map (vlib_main_t *vm,
       u64 dal61 = map_get_pfx(d1, da41, dp41);
       u64 dar60 = map_get_sfx(d0, da40, dp40);
       u64 dar61 = map_get_sfx(d1, da41, dp41);
-      if (dal60 == 0 && dar60 == 0) error0 = MAP_ERROR_UNKNOWN;
-      if (dal61 == 0 && dar61 == 0) error1 = MAP_ERROR_UNKNOWN;
+      if (dal60 == 0 && dar60 == 0 && error0 == MAP_ERROR_NONE && next0 != IP4_MAP_NEXT_REASS)
+       error0 = MAP_ERROR_NO_BINDING;
+      if (dal61 == 0 && dar61 == 0 && error1 == MAP_ERROR_NONE && next1 != IP4_MAP_NEXT_REASS)
+       error0 = MAP_ERROR_NO_BINDING;
 
       /* construct ipv6 header */
       vlib_buffer_advance(p0, - sizeof(ip6_header_t));
@@ -266,11 +318,7 @@ ip4_map (vlib_main_t *vm,
        */
       if (PREDICT_TRUE(error0 == MAP_ERROR_NONE)) {
        if (PREDICT_FALSE(d0->mtu && (clib_net_to_host_u16(ip6h0->payload_length) + sizeof(*ip6h0) > d0->mtu))) {
-         vnet_buffer(p0)->ip_frag.header_offset = sizeof(*ip6h0);
-         vnet_buffer(p0)->ip_frag.next_index = IP4_FRAG_NEXT_IP6_LOOKUP;
-         vnet_buffer(p0)->ip_frag.mtu = d0->mtu;
-         vnet_buffer(p0)->ip_frag.flags = IP_FRAG_FLAG_IP6_HEADER;
-         next0 = IP4_MAP_NEXT_FRAGMENT;
+         next0 = ip4_map_fragment(p0, d0->mtu, df0, &error0);
        } else {
          next0 = ip4_map_ip6_lookup_bypass(p0, ip40) ? IP4_MAP_NEXT_IP6_REWRITE : next0;
          vlib_increment_combined_counter(cm + MAP_DOMAIN_COUNTER_TX, cpu_index, map_domain_index0, 1,
@@ -286,11 +334,7 @@ ip4_map (vlib_main_t *vm,
        */
       if (PREDICT_TRUE(error1 == MAP_ERROR_NONE)) {
        if (PREDICT_FALSE(d1->mtu && (clib_net_to_host_u16(ip6h1->payload_length) + sizeof(*ip6h1) > d1->mtu))) {
-         vnet_buffer(p1)->ip_frag.header_offset = sizeof(*ip6h1);
-         vnet_buffer(p1)->ip_frag.next_index = IP4_FRAG_NEXT_IP6_LOOKUP;
-         vnet_buffer(p1)->ip_frag.mtu = d1->mtu;
-         vnet_buffer(p1)->ip_frag.flags = IP_FRAG_FLAG_IP6_HEADER;
-         next1 = IP4_MAP_NEXT_FRAGMENT;
+         next1 = ip4_map_fragment(p1, d1->mtu, df1, &error1);
        } else {
          next1 = ip4_map_ip6_lookup_bypass(p1, ip41) ? IP4_MAP_NEXT_IP6_REWRITE : next1;
          vlib_increment_combined_counter(cm + MAP_DOMAIN_COUNTER_TX, cpu_index, map_domain_index1, 1,
@@ -336,7 +380,6 @@ ip4_map (vlib_main_t *vm,
 
       p0 = vlib_get_buffer(vm, pi0);
       ip40 = vlib_buffer_get_current(p0);
-      p0->current_length = clib_net_to_host_u16(ip40->length);
       d0 = ip4_map_get_domain(vnet_buffer(p0)->ip.adj_index[VLIB_TX], &map_domain_index0);
       ASSERT(d0);
 
@@ -345,12 +388,17 @@ ip4_map (vlib_main_t *vm,
        */
       port0 = ip4_map_port_and_security_check(d0, ip40, &next0, &error0);
 
+      /* Decrement IPv4 TTL */
+      ip4_map_decrement_ttl(ip40, &error0);
+      bool df0 = ip40->flags_and_fragment_offset & clib_host_to_net_u16(IP4_HEADER_FLAG_DONT_FRAGMENT);
+
       /* MAP calc */
       u32 da40 = clib_net_to_host_u32(ip40->dst_address.as_u32);
       u16 dp40 = clib_net_to_host_u16(port0);
       u64 dal60 = map_get_pfx(d0, da40, dp40);
       u64 dar60 = map_get_sfx(d0, da40, dp40);
-      if (dal60 == 0 && dar60 == 0 && error0 == MAP_ERROR_NONE) error0 = MAP_ERROR_UNKNOWN;
+      if (dal60 == 0 && dar60 == 0 && error0 == MAP_ERROR_NONE && next0 != IP4_MAP_NEXT_REASS)
+       error0 = MAP_ERROR_NO_BINDING;
 
       /* construct ipv6 header */
       vlib_buffer_advance(p0, - (sizeof(ip6_header_t)));
@@ -371,11 +419,7 @@ ip4_map (vlib_main_t *vm,
        */
       if (PREDICT_TRUE(error0 == MAP_ERROR_NONE)) {
        if (PREDICT_FALSE(d0->mtu && (clib_net_to_host_u16(ip6h0->payload_length) + sizeof(*ip6h0) > d0->mtu))) {
-         vnet_buffer(p0)->ip_frag.header_offset = sizeof(*ip6h0);
-         vnet_buffer(p0)->ip_frag.next_index = IP4_FRAG_NEXT_IP6_LOOKUP;
-         vnet_buffer(p0)->ip_frag.mtu = d0->mtu;
-         vnet_buffer(p0)->ip_frag.flags = IP_FRAG_FLAG_IP6_HEADER;
-         next0 = IP4_MAP_NEXT_FRAGMENT;
+         next0 = ip4_map_fragment(p0, d0->mtu, df0, &error0);
        } else {
          next0 = ip4_map_ip6_lookup_bypass(p0, ip40) ? IP4_MAP_NEXT_IP6_REWRITE : next0;
          vlib_increment_combined_counter(cm + MAP_DOMAIN_COUNTER_TX, cpu_index, map_domain_index0, 1,
@@ -566,8 +610,10 @@ VLIB_REGISTER_NODE(ip4_map_node) = {
 #ifdef MAP_SKIP_IP6_LOOKUP
     [IP4_MAP_NEXT_IP6_REWRITE] = "ip6-rewrite",
 #endif
-    [IP4_MAP_NEXT_FRAGMENT] = "ip4-frag",
+    [IP4_MAP_NEXT_IP4_FRAGMENT] = "ip4-frag",
+    [IP4_MAP_NEXT_IP6_FRAGMENT] = "ip6-frag",
     [IP4_MAP_NEXT_REASS] = "ip4-map-reass",
+    [IP4_MAP_NEXT_ICMP_ERROR] = "ip4-icmp-error",
     [IP4_MAP_NEXT_DROP] = "error-drop",
   },
 };