X-Git-Url: https://gerrit.fd.io/r/gitweb?a=blobdiff_plain;f=src%2Fvnet%2Fmap%2Fmap.c;h=b2eefee9bb22b692e180611ee34a3755c176e9b5;hb=e31d956;hp=a2d28118ff4a22f84efe30aa2f2f122001524ac3;hpb=a9a20e7f69f4a91a4d5267ab5ce14125bdc7d6c6;p=vpp.git diff --git a/src/vnet/map/map.c b/src/vnet/map/map.c index a2d28118ff4..b2eefee9bb2 100644 --- a/src/vnet/map/map.c +++ b/src/vnet/map/map.c @@ -19,27 +19,11 @@ #include #include #include +#include #include "map.h" -#ifdef __SSE4_2__ -static inline u32 -crc_u32 (u32 data, u32 value) -{ - __asm__ volatile ("crc32l %[data], %[value];":[value] "+r" (value):[data] - "rm" (data)); - return value; -} -#else -#include - -static inline u32 -crc_u32 (u32 data, u32 value) -{ - u64 tmp = ((u64) data << 32) | (u64) value; - return (u32) clib_xxhash (tmp); -} -#endif +map_main_t map_main; /* * This code supports the following MAP modes: @@ -75,91 +59,6 @@ crc_u32 (u32 data, u32 value) */ -i32 -ip4_get_port (ip4_header_t * ip, map_dir_e dir, u16 buffer_len) -{ - //TODO: use buffer length - if (ip->ip_version_and_header_length != 0x45 || - ip4_get_fragment_offset (ip)) - return -1; - - if (PREDICT_TRUE ((ip->protocol == IP_PROTOCOL_TCP) || - (ip->protocol == IP_PROTOCOL_UDP))) - { - udp_header_t *udp = (void *) (ip + 1); - return (dir == MAP_SENDER) ? udp->src_port : udp->dst_port; - } - else if (ip->protocol == IP_PROTOCOL_ICMP) - { - icmp46_header_t *icmp = (void *) (ip + 1); - if (icmp->type == ICMP4_echo_request || icmp->type == ICMP4_echo_reply) - { - return *((u16 *) (icmp + 1)); - } - else if (clib_net_to_host_u16 (ip->length) >= 64) - { - ip = (ip4_header_t *) (icmp + 2); - if (PREDICT_TRUE ((ip->protocol == IP_PROTOCOL_TCP) || - (ip->protocol == IP_PROTOCOL_UDP))) - { - udp_header_t *udp = (void *) (ip + 1); - return (dir == MAP_SENDER) ? udp->dst_port : udp->src_port; - } - else if (ip->protocol == IP_PROTOCOL_ICMP) - { - icmp46_header_t *icmp = (void *) (ip + 1); - if (icmp->type == ICMP4_echo_request || - icmp->type == ICMP4_echo_reply) - { - return *((u16 *) (icmp + 1)); - } - } - } - } - return -1; -} - -i32 -ip6_get_port (ip6_header_t * ip6, map_dir_e dir, u16 buffer_len) -{ - u8 l4_protocol; - u16 l4_offset; - u16 frag_offset; - u8 *l4; - - if (ip6_parse (ip6, buffer_len, &l4_protocol, &l4_offset, &frag_offset)) - return -1; - - //TODO: Use buffer length - - if (frag_offset && - ip6_frag_hdr_offset (((ip6_frag_hdr_t *) - u8_ptr_add (ip6, frag_offset)))) - return -1; //Can't deal with non-first fragment for now - - l4 = u8_ptr_add (ip6, l4_offset); - if (l4_protocol == IP_PROTOCOL_TCP || l4_protocol == IP_PROTOCOL_UDP) - { - return (dir == - MAP_SENDER) ? ((udp_header_t *) (l4))->src_port : ((udp_header_t - *) - (l4))->dst_port; - } - else if (l4_protocol == IP_PROTOCOL_ICMP6) - { - icmp46_header_t *icmp = (icmp46_header_t *) (l4); - if (icmp->type == ICMP6_echo_request) - { - return (dir == MAP_SENDER) ? ((u16 *) (icmp))[2] : -1; - } - else if (icmp->type == ICMP6_echo_reply) - { - return (dir == MAP_SENDER) ? -1 : ((u16 *) (icmp))[2]; - } - } - return -1; -} - int map_create_domain (ip4_address_t * ip4_prefix, @@ -176,7 +75,6 @@ map_create_domain (ip4_address_t * ip4_prefix, map_main_t *mm = &map_main; dpo_id_t dpo_v4 = DPO_INVALID; dpo_id_t dpo_v6 = DPO_INVALID; - fib_node_index_t fei; map_domain_t *d; /* Sanity check on the src prefix length */ @@ -187,6 +85,12 @@ map_create_domain (ip4_address_t * ip4_prefix, clib_warning ("MAP-T only supports ip6_src_len = 96 for now."); return -1; } + if ((flags & MAP_DOMAIN_RFC6052) && ip6_prefix_len != 96) + { + clib_warning ("RFC6052 translation only supports ip6_prefix_len = " + "96 for now"); + return -1; + } } else { @@ -267,57 +171,28 @@ map_create_domain (ip4_address_t * ip4_prefix, dpo_reset (&dpo_v4); /* - * Multiple MAP domains may share same source IPv6 TEP. - * In this case the route will exist and be MAP sourced. - * Find the adj (if any) already contributed and modify it + * construct a DPO to use the v6 domain */ - fib_prefix_t pfx6 = { - .fp_proto = FIB_PROTOCOL_IP6, - .fp_len = d->ip6_src_len, - .fp_addr = { - .ip6 = d->ip6_src, - } - , - }; - fei = fib_table_lookup_exact_match (0, &pfx6); - - if (FIB_NODE_INDEX_INVALID != fei) - { - dpo_id_t dpo = DPO_INVALID; - - if (fib_entry_get_dpo_for_source (fei, FIB_SOURCE_MAP, &dpo)) - { - /* - * modify the existing MAP to indicate it's shared - * skip to route add. - */ - const dpo_id_t *md_dpo; - map_dpo_t *md; - - ASSERT (DPO_LOAD_BALANCE == dpo.dpoi_type); - - md_dpo = load_balance_get_bucket (dpo.dpoi_index, 0); - md = map_dpo_get (md_dpo->dpoi_index); - - md->md_domain = ~0; - dpo_copy (&dpo_v6, md_dpo); - dpo_reset (&dpo); - - goto route_add; - } - } - if (d->flags & MAP_DOMAIN_TRANSLATION) map_t_dpo_create (DPO_PROTO_IP6, *map_domain_index, &dpo_v6); else map_dpo_create (DPO_PROTO_IP6, *map_domain_index, &dpo_v6); -route_add: /* + * Multiple MAP domains may share same source IPv6 TEP. Which is just dandy. + * We are not tracking the sharing. So a v4 lookup to find the correct + * domain post decap/trnaslate is always done + * * Create ip6 route. This is a reference counted add. If the prefix * already exists and is MAP sourced, it is now MAP source n+1 times * and will need to be removed n+1 times. */ + fib_prefix_t pfx6 = { + .fp_proto = FIB_PROTOCOL_IP6, + .fp_len = d->ip6_src_len, + .fp_addr.ip6 = d->ip6_src, + }; + fib_table_entry_special_dpo_add (0, &pfx6, FIB_SOURCE_MAP, FIB_ENTRY_FLAG_EXCLUSIVE, &dpo_v6); @@ -437,23 +312,165 @@ map_add_del_psid (u32 map_domain_index, u16 psid, ip6_address_t * tep, } #ifdef MAP_SKIP_IP6_LOOKUP +/** + * Pre-resolvd per-protocol global next-hops + */ +map_main_pre_resolved_t pre_resolved[FIB_PROTOCOL_MAX]; + static void -map_pre_resolve (ip4_address_t * ip4, ip6_address_t * ip6) +map_pre_resolve_init (map_main_pre_resolved_t * pr) { - map_main_t *mm = &map_main; - ip6_main_t *im6 = &ip6_main; + pr->fei = FIB_NODE_INDEX_INVALID; + fib_node_init (&pr->node, FIB_NODE_TYPE_MAP_E); +} - if (ip6->as_u64[0] != 0 || ip6->as_u64[1] != 0) +static u8 * +format_map_pre_resolve (u8 * s, va_list * ap) +{ + map_main_pre_resolved_t *pr = va_arg (*ap, map_main_pre_resolved_t *); + + if (FIB_NODE_INDEX_INVALID != pr->fei) { - // FIXME NOT an ADJ - mm->adj6_index = ip6_fib_table_fwding_lookup (im6, 0, ip6); - clib_warning ("FIB lookup results in: %u", mm->adj6_index); + fib_prefix_t pfx; + + fib_entry_get_prefix (pr->fei, &pfx); + + return (format (s, "%U (%u)", + format_ip46_address, &pfx.fp_addr, IP46_TYPE_ANY, + pr->dpo.dpoi_index)); } - if (ip4->as_u32 != 0) + else { - // FIXME NOT an ADJ - mm->adj4_index = ip4_fib_table_lookup_lb (0, ip4); - clib_warning ("FIB lookup results in: %u", mm->adj4_index); + return (format (s, "un-set")); + } +} + + +/** + * Function definition to inform the FIB node that its last lock has gone. + */ +static void +map_last_lock_gone (fib_node_t * node) +{ + /* + * The MAP is a root of the graph. As such + * it never has children and thus is never locked. + */ + ASSERT (0); +} + +static map_main_pre_resolved_t * +map_from_fib_node (fib_node_t * node) +{ + ASSERT (FIB_NODE_TYPE_MAP_E == node->fn_type); + return ((map_main_pre_resolved_t *) + (((char *) node) - + STRUCT_OFFSET_OF (map_main_pre_resolved_t, node))); +} + +static void +map_stack (map_main_pre_resolved_t * pr) +{ + const dpo_id_t *dpo; + + dpo = fib_entry_contribute_ip_forwarding (pr->fei); + + dpo_copy (&pr->dpo, dpo); +} + +/** + * Function definition to backwalk a FIB node + */ +static fib_node_back_walk_rc_t +map_back_walk (fib_node_t * node, fib_node_back_walk_ctx_t * ctx) +{ + map_stack (map_from_fib_node (node)); + + return (FIB_NODE_BACK_WALK_CONTINUE); +} + +/** + * Function definition to get a FIB node from its index + */ +static fib_node_t * +map_fib_node_get (fib_node_index_t index) +{ + return (&pre_resolved[index].node); +} + +/* + * Virtual function table registered by MPLS GRE tunnels + * for participation in the FIB object graph. + */ +const static fib_node_vft_t map_vft = { + .fnv_get = map_fib_node_get, + .fnv_last_lock = map_last_lock_gone, + .fnv_back_walk = map_back_walk, +}; + +static void +map_fib_resolve (map_main_pre_resolved_t * pr, + fib_protocol_t proto, u8 len, const ip46_address_t * addr) +{ + fib_prefix_t pfx = { + .fp_proto = proto, + .fp_len = len, + .fp_addr = *addr, + }; + + pr->fei = fib_table_entry_special_add (0, // default fib + &pfx, + FIB_SOURCE_RR, FIB_ENTRY_FLAG_NONE); + pr->sibling = fib_entry_child_add (pr->fei, FIB_NODE_TYPE_MAP_E, proto); + map_stack (pr); +} + +static void +map_fib_unresolve (map_main_pre_resolved_t * pr, + fib_protocol_t proto, u8 len, const ip46_address_t * addr) +{ + fib_prefix_t pfx = { + .fp_proto = proto, + .fp_len = len, + .fp_addr = *addr, + }; + + fib_entry_child_remove (pr->fei, pr->sibling); + + fib_table_entry_special_remove (0, // default fib + &pfx, FIB_SOURCE_RR); + dpo_reset (&pr->dpo); + + pr->fei = FIB_NODE_INDEX_INVALID; + pr->sibling = FIB_NODE_INDEX_INVALID; +} + +static void +map_pre_resolve (ip4_address_t * ip4, ip6_address_t * ip6, int is_del) +{ + if (ip6 && (ip6->as_u64[0] != 0 || ip6->as_u64[1] != 0)) + { + ip46_address_t addr = { + .ip6 = *ip6, + }; + if (is_del) + map_fib_unresolve (&pre_resolved[FIB_PROTOCOL_IP6], + FIB_PROTOCOL_IP6, 128, &addr); + else + map_fib_resolve (&pre_resolved[FIB_PROTOCOL_IP6], + FIB_PROTOCOL_IP6, 128, &addr); + } + if (ip4 && (ip4->as_u32 != 0)) + { + ip46_address_t addr = { + .ip4 = *ip4, + }; + if (is_del) + map_fib_unresolve (&pre_resolved[FIB_PROTOCOL_IP4], + FIB_PROTOCOL_IP4, 32, &addr); + else + map_fib_resolve (&pre_resolved[FIB_PROTOCOL_IP4], + FIB_PROTOCOL_IP4, 32, &addr); } } #endif @@ -575,6 +592,8 @@ map_add_domain_command_fn (vlib_main_t * vm, num_m_args++; else if (unformat (line_input, "map-t")) flags |= MAP_DOMAIN_TRANSLATION; + else if (unformat (line_input, "rfc6052")) + flags |= (MAP_DOMAIN_TRANSLATION | MAP_DOMAIN_RFC6052); else { error = clib_error_return (0, "unknown input `%U'", @@ -695,10 +714,10 @@ map_pre_resolve_command_fn (vlib_main_t * vm, vlib_cli_command_t * cmd) { unformat_input_t _line_input, *line_input = &_line_input; - ip4_address_t ip4nh; - ip6_address_t ip6nh; - map_main_t *mm = &map_main; + ip4_address_t ip4nh, *p_v4 = NULL; + ip6_address_t ip6nh, *p_v6 = NULL; clib_error_t *error = NULL; + int is_del = 0; memset (&ip4nh, 0, sizeof (ip4nh)); memset (&ip6nh, 0, sizeof (ip6nh)); @@ -710,10 +729,12 @@ map_pre_resolve_command_fn (vlib_main_t * vm, while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) { if (unformat (line_input, "ip4-nh %U", unformat_ip4_address, &ip4nh)) - mm->preresolve_ip4 = ip4nh; + p_v4 = &ip4nh; else if (unformat (line_input, "ip6-nh %U", unformat_ip6_address, &ip6nh)) - mm->preresolve_ip6 = ip6nh; + p_v6 = &ip6nh; + else if (unformat (line_input, "del")) + is_del = 1; else { error = clib_error_return (0, "unknown input `%U'", @@ -722,7 +743,7 @@ map_pre_resolve_command_fn (vlib_main_t * vm, } } - map_pre_resolve (&ip4nh, &ip6nh); + map_pre_resolve (p_v4, p_v6, is_del); done: unformat_free (line_input); @@ -906,6 +927,18 @@ done: return error; } +static char * +map_flags_to_string (u32 flags) +{ + if (flags & MAP_DOMAIN_RFC6052) + return "rfc6052"; + if (flags & MAP_DOMAIN_PREFIX) + return "prefix"; + if (flags & MAP_DOMAIN_TRANSLATION) + return "map-t"; + return ""; +} + static u8 * format_map_domain (u8 * s, va_list * args) { @@ -920,13 +953,14 @@ format_map_domain (u8 * s, va_list * args) ip6_prefix = d->ip6_prefix; s = format (s, - "[%d] ip4-pfx %U/%d ip6-pfx %U/%d ip6-src %U/%d ea_bits_len %d psid-offset %d psid-len %d mtu %d %s", + "[%d] ip4-pfx %U/%d ip6-pfx %U/%d ip6-src %U/%d ea_bits_len %d " + "psid-offset %d psid-len %d mtu %d %s", d - mm->domains, format_ip4_address, &d->ip4_prefix, d->ip4_prefix_len, format_ip6_address, &ip6_prefix, d->ip6_prefix_len, format_ip6_address, &d->ip6_src, d->ip6_src_len, d->ea_bits_len, d->psid_offset, d->psid_length, d->mtu, - (d->flags & MAP_DOMAIN_TRANSLATION) ? "map-t" : ""); + map_flags_to_string (d->flags)); if (counters) { @@ -1093,7 +1127,10 @@ show_map_stats_command_fn (vlib_main_t * vm, unformat_input_t * input, map_domain_t *d; int domains = 0, rules = 0, domaincount = 0, rulecount = 0; if (pool_elts (mm->domains) == 0) - vlib_cli_output (vm, "No MAP domains are configured..."); + { + vlib_cli_output (vm, "No MAP domains are configured..."); + return 0; + } /* *INDENT-OFF* */ pool_foreach(d, mm->domains, ({ @@ -1113,9 +1150,10 @@ show_map_stats_command_fn (vlib_main_t * vm, unformat_input_t * input, #if MAP_SKIP_IP6_LOOKUP vlib_cli_output (vm, - "MAP pre-resolve: IP6 next-hop: %U (%u), IP4 next-hop: %U (%u)\n", - format_ip6_address, &mm->preresolve_ip6, mm->adj6_index, - format_ip4_address, &mm->preresolve_ip4, mm->adj4_index); + "MAP pre-resolve: IP6 next-hop: %U, IP4 next-hop: %U\n", + format_map_pre_resolve, &pre_resolved[FIB_PROTOCOL_IP6], + format_map_pre_resolve, &pre_resolved[FIB_PROTOCOL_IP4]); + #endif if (mm->tc_copy) @@ -1154,7 +1192,7 @@ show_map_stats_command_fn (vlib_main_t * vm, unformat_input_t * input, { which = cm - mm->domain_counters; - for (i = 0; i < vec_len (cm->maxi); i++) + for (i = 0; i < vlib_combined_counter_n_counters (cm); i++) { vlib_get_combined_counter (cm, i, &v); total_pkts[which] += v.packets; @@ -1455,10 +1493,12 @@ map_ip4_reass_get (u32 src, u32 dst, u16 fragment_id, }; u32 h = 0; - h = crc_u32 (k.as_u32[0], h); - h = crc_u32 (k.as_u32[1], h); - h = crc_u32 (k.as_u32[2], h); - h = crc_u32 (k.as_u32[3], h); +#ifdef clib_crc32c_uses_intrinsics + h = clib_crc32c ((u8 *) k.as_u32, 16); +#else + u64 tmp = k.as_u32[0] ^ k.as_u32[1] ^ k.as_u32[2] ^ k.as_u32[3]; + h = clib_xxhash (tmp); +#endif h = h >> (32 - mm->ip4_reass_ht_log2len); f64 now = vlib_time_now (mm->vlib_main); @@ -1627,8 +1667,15 @@ map_ip6_reass_get (ip6_address_t * src, ip6_address_t * dst, u32 fragment_id, u32 h = 0; int i; - for (i = 0; i < 10; i++) - h = crc_u32 (k.as_u32[i], h); + +#ifdef clib_crc32c_uses_intrinsics + h = clib_crc32c ((u8 *) k.as_u32, 40); +#else + u64 tmp = + k.as_u64[0] ^ k.as_u64[1] ^ k.as_u64[2] ^ k.as_u64[3] ^ k.as_u64[4]; + h = clib_xxhash (tmp); +#endif + h = h >> (32 - mm->ip6_reass_ht_log2len); f64 now = vlib_time_now (mm->vlib_main); @@ -2180,10 +2227,12 @@ map_init (vlib_main_t * vm) mm->vlib_main = vm; #ifdef MAP_SKIP_IP6_LOOKUP - memset (&mm->preresolve_ip4, 0, sizeof (mm->preresolve_ip4)); - memset (&mm->preresolve_ip6, 0, sizeof (mm->preresolve_ip6)); - mm->adj4_index = 0; - mm->adj6_index = 0; + fib_protocol_t proto; + + FOR_EACH_FIB_PROTOCOL (proto) + { + map_pre_resolve_init (&pre_resolved[proto]); + } #endif /* traffic class */ @@ -2213,6 +2262,7 @@ map_init (vlib_main_t * vm) mm->ip4_reass_pool = 0; mm->ip4_reass_lock = clib_mem_alloc_aligned (CLIB_CACHE_LINE_BYTES, CLIB_CACHE_LINE_BYTES); + *mm->ip4_reass_lock = 0; mm->ip4_reass_conf_ht_ratio = MAP_IP4_REASS_HT_RATIO_DEFAULT; mm->ip4_reass_conf_lifetime_ms = MAP_IP4_REASS_LIFETIME_DEFAULT; mm->ip4_reass_conf_pool_size = MAP_IP4_REASS_POOL_SIZE_DEFAULT; @@ -2228,6 +2278,7 @@ map_init (vlib_main_t * vm) mm->ip6_reass_pool = 0; mm->ip6_reass_lock = clib_mem_alloc_aligned (CLIB_CACHE_LINE_BYTES, CLIB_CACHE_LINE_BYTES); + *mm->ip6_reass_lock = 0; mm->ip6_reass_conf_ht_ratio = MAP_IP6_REASS_HT_RATIO_DEFAULT; mm->ip6_reass_conf_lifetime_ms = MAP_IP6_REASS_LIFETIME_DEFAULT; mm->ip6_reass_conf_pool_size = MAP_IP6_REASS_POOL_SIZE_DEFAULT; @@ -2238,6 +2289,9 @@ map_init (vlib_main_t * vm) mm->ip6_reass_fifo_last = MAP_REASS_INDEX_NONE; map_ip6_reass_reinit (NULL, NULL); +#ifdef MAP_SKIP_IP6_LOOKUP + fib_node_register_type (FIB_NODE_TYPE_MAP_E, &map_vft); +#endif map_dpo_module_init (); return 0;