X-Git-Url: https://gerrit.fd.io/r/gitweb?a=blobdiff_plain;f=vnet%2Fvnet%2Fip%2Fip6_forward.c;h=bdcdbf0dc8288c9309e8535fa27277eae5fc1151;hb=8c1bebe83f7e277a36b8f570ed88363a5f3639b3;hp=afb23f1bf6c1f20171b180fc83268a4907f2cb41;hpb=203c632d29fc5f5d820251e4e58ea724e1002a91;p=vpp.git diff --git a/vnet/vnet/ip/ip6_forward.c b/vnet/vnet/ip/ip6_forward.c index afb23f1bf6c..bdcdbf0dc82 100644 --- a/vnet/vnet/ip/ip6_forward.c +++ b/vnet/vnet/ip/ip6_forward.c @@ -42,618 +42,19 @@ #include /* for ethernet_header_t */ #include /* for srp_hw_interface_class */ #include +#include +#include +#include +#include #include -static void compute_prefix_lengths_in_search_order (ip6_main_t * im) -{ - int i; - vec_reset_length (im->prefix_lengths_in_search_order); - /* Note: bitmap reversed so this is in fact a longest prefix match */ - clib_bitmap_foreach (i, im->non_empty_dst_address_length_bitmap, - ({ - int dst_address_length = 128 - i; - vec_add1 (im->prefix_lengths_in_search_order, dst_address_length); - })); -} - -u32 -ip6_fib_lookup_with_table (ip6_main_t * im, u32 fib_index, ip6_address_t * dst) -{ - ip_lookup_main_t * lm = &im->lookup_main; - int i, len; - int rv; - BVT(clib_bihash_kv) kv, value; - u64 fib; - - len = vec_len (im->prefix_lengths_in_search_order); - - kv.key[0] = dst->as_u64[0]; - kv.key[1] = dst->as_u64[1]; - fib = ((u64)((fib_index))<<32); - - for (i = 0; i < len; i++) - { - int dst_address_length = im->prefix_lengths_in_search_order[i]; - ip6_address_t * mask = &im->fib_masks[dst_address_length]; - - ASSERT(dst_address_length >= 0 && dst_address_length <= 128); - //As lengths are decreasing, masks are increasingly specific. - kv.key[0] &= mask->as_u64[0]; - kv.key[1] &= mask->as_u64[1]; - kv.key[2] = fib | dst_address_length; - - rv = BV(clib_bihash_search_inline_2)(&im->ip6_lookup_table, &kv, &value); - if (rv == 0) - return value.value; - } - - return lm->miss_adj_index; -} - -u32 ip6_fib_lookup (ip6_main_t * im, u32 sw_if_index, ip6_address_t * dst) -{ - u32 fib_index = vec_elt (im->fib_index_by_sw_if_index, sw_if_index); - return ip6_fib_lookup_with_table (im, fib_index, dst); -} - -void -vnet_ip6_fib_init (ip6_main_t * im, u32 fib_index) -{ - ip_lookup_main_t * lm = &im->lookup_main; - ip6_add_del_route_args_t a; - ip_adjacency_t * adj; - - memset(&a, 0x0, sizeof(ip6_add_del_route_args_t)); - - a.table_index_or_table_id = fib_index; - a.flags = (IP6_ROUTE_FLAG_ADD - | IP6_ROUTE_FLAG_FIB_INDEX - | IP6_ROUTE_FLAG_KEEP_OLD_ADJACENCY - | IP6_ROUTE_FLAG_NO_REDISTRIBUTE); - - /* Add ff02::1:ff00:0/104 via local route for all tables. - This is required for neighbor discovery to work. */ - adj = ip_add_adjacency (lm, /* template */ 0, /* block size */ 1, - &a.adj_index); - adj->lookup_next_index = IP_LOOKUP_NEXT_LOCAL; - adj->if_address_index = ~0; - adj->rewrite_header.data_bytes = 0; - - ip6_set_solicited_node_multicast_address (&a.dst_address, 0); - - a.dst_address_length = 104; - ip6_add_del_route (im, &a); - - /* Add all-routers multicast address via local route for all tables */ - adj = ip_add_adjacency (lm, /* template */ 0, /* block size */ 1, - &a.adj_index); - adj->lookup_next_index = IP_LOOKUP_NEXT_LOCAL; - adj->if_address_index = ~0; - adj->rewrite_header.data_bytes = 0; - - ip6_set_reserved_multicast_address (&a.dst_address, - IP6_MULTICAST_SCOPE_link_local, - IP6_MULTICAST_GROUP_ID_all_routers); - - a.dst_address_length = 128; - ip6_add_del_route (im, &a); - - /* Add all-nodes multicast address via local route for all tables */ - adj = ip_add_adjacency (lm, /* template */ 0, /* block size */ 1, - &a.adj_index); - adj->lookup_next_index = IP_LOOKUP_NEXT_LOCAL; - adj->if_address_index = ~0; - adj->rewrite_header.data_bytes = 0; - - ip6_set_reserved_multicast_address (&a.dst_address, - IP6_MULTICAST_SCOPE_link_local, - IP6_MULTICAST_GROUP_ID_all_hosts); - - a.dst_address_length = 128; - ip6_add_del_route (im, &a); - - /* Add all-mldv2 multicast address via local route for all tables */ - adj = ip_add_adjacency (lm, /* template */ 0, /* block size */ 1, - &a.adj_index); - adj->lookup_next_index = IP_LOOKUP_NEXT_LOCAL; - adj->if_address_index = ~0; - adj->rewrite_header.data_bytes = 0; - - ip6_set_reserved_multicast_address (&a.dst_address, - IP6_MULTICAST_SCOPE_link_local, - IP6_MULTICAST_GROUP_ID_mldv2_routers); - - a.dst_address_length = 128; - ip6_add_del_route (im, &a); -} - -static ip6_fib_t * -create_fib_with_table_id (ip6_main_t * im, u32 table_id) -{ - ip6_fib_t * fib; - hash_set (im->fib_index_by_table_id, table_id, vec_len (im->fibs)); - vec_add2 (im->fibs, fib, 1); - fib->table_id = table_id; - fib->index = fib - im->fibs; - fib->flow_hash_config = IP_FLOW_HASH_DEFAULT; - vnet_ip6_fib_init (im, fib->index); - return fib; -} - -ip6_fib_t * -find_ip6_fib_by_table_index_or_id (ip6_main_t * im, u32 table_index_or_id, u32 flags) -{ - uword * p, fib_index; - - fib_index = table_index_or_id; - if (! (flags & IP6_ROUTE_FLAG_FIB_INDEX)) - { - if (table_index_or_id == ~0) { - table_index_or_id = 0; - while (hash_get (im->fib_index_by_table_id, table_index_or_id)) { - table_index_or_id++; - } - return create_fib_with_table_id (im, table_index_or_id); - } - - p = hash_get (im->fib_index_by_table_id, table_index_or_id); - if (! p) - return create_fib_with_table_id (im, table_index_or_id); - fib_index = p[0]; - } - return vec_elt_at_index (im->fibs, fib_index); -} - -void ip6_add_del_route (ip6_main_t * im, ip6_add_del_route_args_t * a) -{ - ip_lookup_main_t * lm = &im->lookup_main; - ip6_fib_t * fib; - ip6_address_t dst_address; - u32 dst_address_length, adj_index; - uword is_del; - u32 old_adj_index = ~0; - BVT(clib_bihash_kv) kv, value; - - vlib_smp_unsafe_warning(); - - is_del = (a->flags & IP6_ROUTE_FLAG_DEL) != 0; - - /* Either create new adjacency or use given one depending on arguments. */ - if (a->n_add_adj > 0) - { - ip_add_adjacency (lm, a->add_adj, a->n_add_adj, &adj_index); - ip_call_add_del_adjacency_callbacks (lm, adj_index, /* is_del */ 0); - } - else - adj_index = a->adj_index; - - dst_address = a->dst_address; - dst_address_length = a->dst_address_length; - fib = find_ip6_fib_by_table_index_or_id (im, a->table_index_or_table_id, - a->flags); - - ASSERT (dst_address_length < ARRAY_LEN (im->fib_masks)); - ip6_address_mask (&dst_address, &im->fib_masks[dst_address_length]); - - /* refcount accounting */ - if (is_del) - { - ASSERT (im->dst_address_length_refcounts[dst_address_length] > 0); - if (--im->dst_address_length_refcounts[dst_address_length] == 0) - { - im->non_empty_dst_address_length_bitmap = - clib_bitmap_set (im->non_empty_dst_address_length_bitmap, - 128 - dst_address_length, 0); - compute_prefix_lengths_in_search_order (im); - } - } - else - { - im->dst_address_length_refcounts[dst_address_length]++; - - im->non_empty_dst_address_length_bitmap = - clib_bitmap_set (im->non_empty_dst_address_length_bitmap, - 128 - dst_address_length, 1); - compute_prefix_lengths_in_search_order (im); - } - - kv.key[0] = dst_address.as_u64[0]; - kv.key[1] = dst_address.as_u64[1]; - kv.key[2] = ((u64)((fib - im->fibs))<<32) | dst_address_length; - - if (BV(clib_bihash_search)(&im->ip6_lookup_table, &kv, &value) == 0) - old_adj_index = value.value; - - if (is_del) - BV(clib_bihash_add_del) (&im->ip6_lookup_table, &kv, 0 /* is_add */); - else - { - /* Make sure adj index is valid. */ - if (CLIB_DEBUG > 0) - (void) ip_get_adjacency (lm, adj_index); - - kv.value = adj_index; - - BV(clib_bihash_add_del) (&im->ip6_lookup_table, &kv, 1 /* is_add */); - } - - /* Avoid spurious reference count increments */ - if (old_adj_index == adj_index - && adj_index != ~0 - && !(a->flags & IP6_ROUTE_FLAG_KEEP_OLD_ADJACENCY)) - { - ip_adjacency_t * adj = ip_get_adjacency (lm, adj_index); - if (adj->share_count > 0) - adj->share_count --; - } - - /* Delete old adjacency index if present and changed. */ - { - if (! (a->flags & IP6_ROUTE_FLAG_KEEP_OLD_ADJACENCY) - && old_adj_index != ~0 - && old_adj_index != adj_index) - ip_del_adjacency (lm, old_adj_index); - } -} - -void -ip6_add_del_route_next_hop (ip6_main_t * im, - u32 flags, - ip6_address_t * dst_address, - u32 dst_address_length, - ip6_address_t * next_hop, - u32 next_hop_sw_if_index, - u32 next_hop_weight, u32 adj_index, - u32 explicit_fib_index) -{ - vnet_main_t * vnm = vnet_get_main(); - ip_lookup_main_t * lm = &im->lookup_main; - u32 fib_index; - ip6_fib_t * fib; - ip6_address_t masked_dst_address; - u32 old_mp_adj_index, new_mp_adj_index; - u32 dst_adj_index, nh_adj_index; - int rv; - ip_adjacency_t * dst_adj; - ip_multipath_adjacency_t * old_mp, * new_mp; - int is_del = (flags & IP6_ROUTE_FLAG_DEL) != 0; - int is_interface_next_hop; - clib_error_t * error = 0; - uword * nh_result; - BVT(clib_bihash_kv) kv, value; - - vlib_smp_unsafe_warning(); - - if (explicit_fib_index == (u32)~0) - fib_index = vec_elt (im->fib_index_by_sw_if_index, next_hop_sw_if_index); - else - fib_index = explicit_fib_index; - - fib = vec_elt_at_index (im->fibs, fib_index); - - /* Lookup next hop to be added or deleted. */ - is_interface_next_hop = ip6_address_is_zero (next_hop); - if (adj_index == (u32)~0) - { - if (is_interface_next_hop) - { - nh_result = hash_get (im->interface_route_adj_index_by_sw_if_index, - next_hop_sw_if_index); - if (nh_result) - nh_adj_index = *nh_result; - else - { - ip_adjacency_t * adj; - adj = ip_add_adjacency (lm, /* template */ 0, /* block size */ 1, - &nh_adj_index); - ip6_adjacency_set_interface_route (vnm, adj, - next_hop_sw_if_index, ~0); - ip_call_add_del_adjacency_callbacks - (lm, nh_adj_index, /* is_del */ 0); - hash_set (im->interface_route_adj_index_by_sw_if_index, - next_hop_sw_if_index, nh_adj_index); - } - } - else - { - /* Look for the interface /128 route */ - kv.key[0] = next_hop->as_u64[0]; - kv.key[1] = next_hop->as_u64[1]; - kv.key[2] = ((u64)((fib - im->fibs))<<32) | 128; - - if (BV(clib_bihash_search)(&im->ip6_lookup_table, &kv, &value) < 0) - { - ip_adjacency_t * adj; - nh_adj_index = ip6_fib_lookup_with_table (im, fib_index, next_hop); - adj = ip_get_adjacency (lm, nh_adj_index); - /* if ND interface adjacencty is present, we need to - install ND adjaceny for specific next hop */ - if (adj->lookup_next_index == IP_LOOKUP_NEXT_ARP && - adj->arp.next_hop.ip6.as_u64[0] == 0 && - adj->arp.next_hop.ip6.as_u64[1] == 0) - { - nh_adj_index = vnet_ip6_neighbor_glean_add(fib_index, next_hop); - } - else - { - ip_adjacency_t add_adj; - memset (&add_adj, 0, sizeof(add_adj)); - add_adj.n_adj = 1; - add_adj.lookup_next_index = IP_LOOKUP_NEXT_INDIRECT; - add_adj.indirect.next_hop.ip6.as_u64[0] = next_hop->as_u64[0]; - add_adj.indirect.next_hop.ip6.as_u64[1] = next_hop->as_u64[1]; - add_adj.explicit_fib_index = explicit_fib_index; - ip_add_adjacency (lm, &add_adj, 1, &nh_adj_index); - } - } - else - nh_adj_index = value.value; - - } - } - else - { - /* Look for the interface /128 route */ - kv.key[0] = next_hop->as_u64[0]; - kv.key[1] = next_hop->as_u64[1]; - kv.key[2] = ((u64)((fib - im->fibs))<<32) | 128; - - if (BV(clib_bihash_search)(&im->ip6_lookup_table, &kv, &value) < 0) - { - vnm->api_errno = VNET_API_ERROR_UNKNOWN_DESTINATION; - error = clib_error_return (0, "next-hop %U/128 not in FIB", - format_ip6_address, next_hop); - goto done; - } - - nh_adj_index = value.value; - } - - ASSERT (dst_address_length < ARRAY_LEN (im->fib_masks)); - masked_dst_address = dst_address[0]; - ip6_address_mask (&masked_dst_address, &im->fib_masks[dst_address_length]); - - kv.key[0] = masked_dst_address.as_u64[0]; - kv.key[1] = masked_dst_address.as_u64[1]; - kv.key[2] = ((u64)((fib - im->fibs))<<32) | dst_address_length; - - rv = BV(clib_bihash_search)(&im->ip6_lookup_table, &kv, &value); - - if (rv == 0) - { - dst_adj_index = value.value; - dst_adj = ip_get_adjacency (lm, dst_adj_index); - } - else - { - /* For deletes destination must be known. */ - if (is_del) - { - vnm->api_errno = VNET_API_ERROR_UNKNOWN_DESTINATION; - error = clib_error_return (0, "unknown destination %U/%d", - format_ip6_address, dst_address, - dst_address_length); - goto done; - } - - dst_adj_index = ~0; - dst_adj = 0; - } - - /* Ignore adds of X/128 with next hop of X. */ - if (! is_del - && dst_address_length == 128 - && ip6_address_is_equal (dst_address, next_hop)) - { - vnm->api_errno = VNET_API_ERROR_PREFIX_MATCHES_NEXT_HOP; - error = clib_error_return (0, "prefix matches next hop %U/%d", - format_ip6_address, dst_address, - dst_address_length); - goto done; - } - - /* Destination is not known and default weight is set so add route - to existing non-multipath adjacency */ - if (dst_adj_index == ~0 && next_hop_weight == 1 && next_hop_sw_if_index == ~0) - { - /* create new adjacency */ - ip6_add_del_route_args_t a; - a.table_index_or_table_id = fib_index; - a.flags = ((is_del ? IP6_ROUTE_FLAG_DEL : IP6_ROUTE_FLAG_ADD) - | IP6_ROUTE_FLAG_FIB_INDEX - | IP6_ROUTE_FLAG_KEEP_OLD_ADJACENCY - | (flags & (IP6_ROUTE_FLAG_NO_REDISTRIBUTE - | IP6_ROUTE_FLAG_NOT_LAST_IN_GROUP))); - a.dst_address = dst_address[0]; - a.dst_address_length = dst_address_length; - a.adj_index = nh_adj_index; - a.add_adj = 0; - a.n_add_adj = 0; - - ip6_add_del_route (im, &a); - goto done; - } - - old_mp_adj_index = dst_adj ? dst_adj->heap_handle : ~0; - - if (! ip_multipath_adjacency_add_del_next_hop - (lm, is_del, - dst_adj ? dst_adj->heap_handle : ~0, - nh_adj_index, - next_hop_weight, - &new_mp_adj_index)) - { - vnm->api_errno = VNET_API_ERROR_NEXT_HOP_NOT_FOUND_MP; - error = clib_error_return - (0, "requested deleting next-hop %U not found in multi-path", - format_ip6_address, next_hop); - goto done; - } - - old_mp = new_mp = 0; - if (old_mp_adj_index != ~0) - old_mp = vec_elt_at_index (lm->multipath_adjacencies, old_mp_adj_index); - if (new_mp_adj_index != ~0) - new_mp = vec_elt_at_index (lm->multipath_adjacencies, new_mp_adj_index); - - if (old_mp != new_mp) - { - ip6_add_del_route_args_t a; - a.table_index_or_table_id = fib_index; - a.flags = ((is_del ? IP6_ROUTE_FLAG_DEL : IP6_ROUTE_FLAG_ADD) - | IP6_ROUTE_FLAG_FIB_INDEX - | IP6_ROUTE_FLAG_KEEP_OLD_ADJACENCY - | (flags & IP6_ROUTE_FLAG_NO_REDISTRIBUTE)); - a.dst_address = dst_address[0]; - a.dst_address_length = dst_address_length; - a.adj_index = new_mp ? new_mp->adj_index : dst_adj_index; - a.add_adj = 0; - a.n_add_adj = 0; - - ip6_add_del_route (im, &a); - } - - done: - if (error) - clib_error_report (error); -} - -u32 -ip6_get_route (ip6_main_t * im, - u32 table_index_or_table_id, - u32 flags, - ip6_address_t * address, - u32 address_length) -{ - ip6_fib_t * fib = find_ip6_fib_by_table_index_or_id (im, table_index_or_table_id, flags); - ip6_address_t masked_address; - BVT(clib_bihash_kv) kv, value; - - ASSERT (address_length < ARRAY_LEN (im->fib_masks)); - clib_memcpy (&masked_address, address, sizeof (masked_address)); - ip6_address_mask (&masked_address, &im->fib_masks[address_length]); - - kv.key[0] = masked_address.as_u64[0]; - kv.key[1] = masked_address.as_u64[1]; - kv.key[2] = ((u64)((fib - im->fibs))<<32) | address_length; - - if (BV(clib_bihash_search)(&im->ip6_lookup_table, &kv, &value) == 0) - return (value.value); - return 0; -} - -void -ip6_foreach_matching_route (ip6_main_t * im, - u32 table_index_or_table_id, - u32 flags, - ip6_address_t * dst_address, - u32 address_length, - ip6_address_t ** results, - u8 ** result_lengths) -{ - ip6_fib_t * fib = - find_ip6_fib_by_table_index_or_id (im, table_index_or_table_id, flags); - BVT(clib_bihash) * h = &im->ip6_lookup_table; - BVT(clib_bihash_value) * v; - clib_bihash_bucket_t * b; - int i, j, k; - - if (*results) - _vec_len (*results) = 0; - if (*result_lengths) - _vec_len (*result_lengths) = 0; - - /* Walk the table looking for routes which match the supplied address */ - for (i = 0; i < h->nbuckets; i++) - { - b = &h->buckets [i]; - if (b->offset == 0) - continue; - - v = BV(clib_bihash_get_value) (h, b->offset); - for (j = 0; j < (1<log2_pages); j++) - { - for (k = 0; k < BIHASH_KVP_PER_PAGE; k++) - { - if (BV(clib_bihash_is_free)(&v->kvp[k])) - continue; - - if ((v->kvp[k].key[2] - == (((u64)((fib - im->fibs))<<32) | address_length)) - && ip6_destination_matches_route - (im, dst_address, (ip6_address_t *) &v->kvp[k], - address_length)) - { - ip6_address_t * a; - - a = (ip6_address_t *)(&v->kvp[k]); - - vec_add1 (*results, a[0]); - vec_add1 (*result_lengths, address_length); - } - } - v++; - } - } -} - -void ip6_maybe_remap_adjacencies (ip6_main_t * im, - u32 table_index_or_table_id, - u32 flags) -{ -#if SOONE - ip6_fib_t * fib - = find_ip6_fib_by_table_index_or_id (im, table_index_or_table_id, flags); -#endif - ip_lookup_main_t * lm = &im->lookup_main; - - if (lm->n_adjacency_remaps == 0) - return; - - clib_warning ("unimplemented, please report to vpp-dev@cisco.com"); - - /* All remaps have been performed. */ - lm->n_adjacency_remaps = 0; -} - -void ip6_delete_matching_routes (ip6_main_t * im, - u32 table_index_or_table_id, - u32 flags, - ip6_address_t * address, - u32 address_length) -{ - /* $$$$ static may be OK - this should happen only on thread 0 */ - static ip6_address_t * matching_addresses; - static u8 * matching_address_lengths; - u32 l, i; - ip6_add_del_route_args_t a; - - vlib_smp_unsafe_warning(); - - a.flags = IP6_ROUTE_FLAG_DEL | IP6_ROUTE_FLAG_NO_REDISTRIBUTE | flags; - a.table_index_or_table_id = table_index_or_table_id; - a.adj_index = ~0; - a.add_adj = 0; - a.n_add_adj = 0; - - for (l = address_length + 1; l <= 128; l++) - { - ip6_foreach_matching_route (im, table_index_or_table_id, flags, - address, - l, - &matching_addresses, - &matching_address_lengths); - for (i = 0; i < vec_len (matching_addresses); i++) - { - a.dst_address = matching_addresses[i]; - a.dst_address_length = matching_address_lengths[i]; - ip6_add_del_route (im, &a); - } - } - - ip6_maybe_remap_adjacencies (im, table_index_or_table_id, flags); -} +/** + * @file + * @brief IPv6 Forwarding. + * + * This file contains the source code for IPv6 forwarding. + */ void ip6_forward_next_trace (vlib_main_t * vm, @@ -664,12 +65,10 @@ ip6_forward_next_trace (vlib_main_t * vm, always_inline uword ip6_lookup_inline (vlib_main_t * vm, vlib_node_runtime_t * node, - vlib_frame_t * frame, - int is_indirect) + vlib_frame_t * frame) { ip6_main_t * im = &ip6_main; - ip_lookup_main_t * lm = &im->lookup_main; - vlib_combined_counter_main_t * cm = &im->lookup_main.adjacency_counters; + vlib_combined_counter_main_t * cm = &load_balance_main.lbm_to_counters; u32 n_left_from, n_left_to_next, * from, * to_next; ip_lookup_next_t next; u32 cpu_index = os_get_cpu_number(); @@ -686,13 +85,14 @@ ip6_lookup_inline (vlib_main_t * vm, while (n_left_from >= 4 && n_left_to_next >= 2) { vlib_buffer_t * p0, * p1; - u32 pi0, pi1, adj_index0, adj_index1, wrong_next; + u32 pi0, pi1, lbi0, lbi1, wrong_next; ip_lookup_next_t next0, next1; ip6_header_t * ip0, * ip1; - ip_adjacency_t * adj0, * adj1; ip6_address_t * dst_addr0, * dst_addr1; u32 fib_index0, fib_index1; u32 flow_hash_config0, flow_hash_config1; + const dpo_id_t *dpo0, *dpo1; + const load_balance_t *lb0, *lb1; /* Prefetch next iteration. */ { @@ -716,19 +116,8 @@ ip6_lookup_inline (vlib_main_t * vm, ip0 = vlib_buffer_get_current (p0); ip1 = vlib_buffer_get_current (p1); - if (is_indirect) - { - ip_adjacency_t * iadj0, * iadj1; - iadj0 = ip_get_adjacency (lm, vnet_buffer(p0)->ip.adj_index[VLIB_TX]); - iadj1 = ip_get_adjacency (lm, vnet_buffer(p1)->ip.adj_index[VLIB_TX]); - dst_addr0 = &iadj0->indirect.next_hop.ip6; - dst_addr1 = &iadj1->indirect.next_hop.ip6; - } - else - { - dst_addr0 = &ip0->dst_address; - dst_addr1 = &ip1->dst_address; - } + dst_addr0 = &ip0->dst_address; + dst_addr1 = &ip1->dst_address; fib_index0 = vec_elt (im->fib_index_by_sw_if_index, vnet_buffer (p0)->sw_if_index[VLIB_RX]); fib_index1 = vec_elt (im->fib_index_by_sw_if_index, vnet_buffer (p1)->sw_if_index[VLIB_RX]); @@ -738,69 +127,60 @@ ip6_lookup_inline (vlib_main_t * vm, fib_index1 = (vnet_buffer(p1)->sw_if_index[VLIB_TX] == (u32)~0) ? fib_index1 : vnet_buffer(p1)->sw_if_index[VLIB_TX]; - adj_index0 = ip6_fib_lookup_with_table (im, fib_index0, dst_addr0); - adj_index1 = ip6_fib_lookup_with_table (im, fib_index1, dst_addr1); + lbi0 = ip6_fib_table_fwding_lookup (im, fib_index0, dst_addr0); + lbi1 = ip6_fib_table_fwding_lookup (im, fib_index1, dst_addr1); - adj0 = ip_get_adjacency (lm, adj_index0); - adj1 = ip_get_adjacency (lm, adj_index1); - - if (PREDICT_FALSE (adj0->explicit_fib_index != ~0)) - { - adj_index0 = ip6_fib_lookup_with_table - (im, adj0->explicit_fib_index, dst_addr0); - adj0 = ip_get_adjacency (lm, adj_index0); - } - if (PREDICT_FALSE (adj1->explicit_fib_index != ~0)) - { - adj_index1 = ip6_fib_lookup_with_table - (im, adj1->explicit_fib_index, dst_addr1); - adj1 = ip_get_adjacency (lm, adj_index1); - } - - next0 = adj0->lookup_next_index; - next1 = adj1->lookup_next_index; - - /* Only process the HBH Option Header if explicitly configured to do so */ - next0 = (ip0->protocol == IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS) && im->hbh_enabled && - adj_index0 ? IP6_LOOKUP_NEXT_HOP_BY_HOP : adj0->lookup_next_index; - next1 = (ip1->protocol == IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS) && im->hbh_enabled && - adj_index1 ? IP6_LOOKUP_NEXT_HOP_BY_HOP : adj1->lookup_next_index; + lb0 = load_balance_get (lbi0); + lb1 = load_balance_get (lbi1); - vnet_buffer (p0)->ip.flow_hash = + vnet_buffer (p0)->ip.flow_hash = vnet_buffer(p1)->ip.flow_hash = 0; - if (PREDICT_FALSE(adj0->n_adj > 1)) + if (PREDICT_FALSE(lb0->lb_n_buckets > 1)) { - flow_hash_config0 = - vec_elt_at_index (im->fibs,fib_index0)->flow_hash_config; - vnet_buffer (p0)->ip.flow_hash = + flow_hash_config0 = lb0->lb_hash_config; + vnet_buffer (p0)->ip.flow_hash = ip6_compute_flow_hash (ip0, flow_hash_config0); } - - if (PREDICT_FALSE(adj1->n_adj > 1)) + if (PREDICT_FALSE(lb1->lb_n_buckets > 1)) { - flow_hash_config1 = - vec_elt_at_index (im->fibs,fib_index0)->flow_hash_config; - - vnet_buffer (p1)->ip.flow_hash = + flow_hash_config1 = lb1->lb_hash_config; + vnet_buffer (p1)->ip.flow_hash = ip6_compute_flow_hash (ip1, flow_hash_config1); } - ASSERT (adj0->n_adj > 0); - ASSERT (adj1->n_adj > 0); - ASSERT (is_pow2 (adj0->n_adj)); - ASSERT (is_pow2 (adj1->n_adj)); - adj_index0 += (vnet_buffer (p0)->ip.flow_hash & (adj0->n_adj - 1)); - adj_index1 += (vnet_buffer (p1)->ip.flow_hash & (adj1->n_adj - 1)); + ASSERT (lb0->lb_n_buckets > 0); + ASSERT (lb1->lb_n_buckets > 0); + ASSERT (is_pow2 (lb0->lb_n_buckets)); + ASSERT (is_pow2 (lb1->lb_n_buckets)); + dpo0 = load_balance_get_bucket_i(lb0, + (vnet_buffer (p0)->ip.flow_hash & + lb0->lb_n_buckets_minus_1)); + dpo1 = load_balance_get_bucket_i(lb1, + (vnet_buffer (p1)->ip.flow_hash & + lb1->lb_n_buckets_minus_1)); - vnet_buffer (p0)->ip.adj_index[VLIB_TX] = adj_index0; - vnet_buffer (p1)->ip.adj_index[VLIB_TX] = adj_index1; + next0 = dpo0->dpoi_next_node; + next1 = dpo1->dpoi_next_node; - vlib_increment_combined_counter - (cm, cpu_index, adj_index0, 1, + /* Only process the HBH Option Header if explicitly configured to do so */ + next0 = ((ip0->protocol == IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS) && + im->hbh_enabled) ? + (ip_lookup_next_t) IP6_LOOKUP_NEXT_HOP_BY_HOP : + next0; + next1 = ((ip1->protocol == IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS) && + im->hbh_enabled) ? + (ip_lookup_next_t) IP6_LOOKUP_NEXT_HOP_BY_HOP : + next1; + + vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index; + vnet_buffer (p1)->ip.adj_index[VLIB_TX] = dpo1->dpoi_index; + + vlib_increment_combined_counter + (cm, cpu_index, lbi0, 1, vlib_buffer_length_in_chain (vm, p0)); - vlib_increment_combined_counter - (cm, cpu_index, adj_index1, 1, + vlib_increment_combined_counter + (cm, cpu_index, lbi1, 1, vlib_buffer_length_in_chain (vm, p1)); from += 2; @@ -844,16 +224,17 @@ ip6_lookup_inline (vlib_main_t * vm, } } } - + while (n_left_from > 0 && n_left_to_next > 0) { vlib_buffer_t * p0; ip6_header_t * ip0; - u32 pi0, adj_index0; + u32 pi0, lbi0; ip_lookup_next_t next0; - ip_adjacency_t * adj0; + load_balance_t * lb0; ip6_address_t * dst_addr0; u32 fib_index0, flow_hash_config0; + const dpo_id_t *dpo0; pi0 = from[0]; to_next[0] = pi0; @@ -862,57 +243,44 @@ ip6_lookup_inline (vlib_main_t * vm, ip0 = vlib_buffer_get_current (p0); - if (is_indirect) - { - ip_adjacency_t * iadj0; - iadj0 = ip_get_adjacency (lm, vnet_buffer(p0)->ip.adj_index[VLIB_TX]); - dst_addr0 = &iadj0->indirect.next_hop.ip6; - } - else - { - dst_addr0 = &ip0->dst_address; - } + dst_addr0 = &ip0->dst_address; fib_index0 = vec_elt (im->fib_index_by_sw_if_index, vnet_buffer (p0)->sw_if_index[VLIB_RX]); fib_index0 = (vnet_buffer(p0)->sw_if_index[VLIB_TX] == (u32)~0) ? fib_index0 : vnet_buffer(p0)->sw_if_index[VLIB_TX]; - flow_hash_config0 = - vec_elt_at_index (im->fibs,fib_index0)->flow_hash_config; - - adj_index0 = ip6_fib_lookup_with_table (im, fib_index0, dst_addr0); - - adj0 = ip_get_adjacency (lm, adj_index0); + flow_hash_config0 = + ip6_fib_get (fib_index0)->flow_hash_config; - if (PREDICT_FALSE (adj0->explicit_fib_index != ~0)) - { - adj_index0 = ip6_fib_lookup_with_table - (im, adj0->explicit_fib_index, dst_addr0); - adj0 = ip_get_adjacency (lm, adj_index0); - } + lbi0 = ip6_fib_table_fwding_lookup (im, fib_index0, dst_addr0); - /* Only process the HBH Option Header if explicitly configured to do so */ - next0 = (ip0->protocol == IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS) && im->hbh_enabled && - adj_index0 ? IP6_LOOKUP_NEXT_HOP_BY_HOP : adj0->lookup_next_index; + lb0 = load_balance_get (lbi0); vnet_buffer (p0)->ip.flow_hash = 0; - if (PREDICT_FALSE(adj0->n_adj > 1)) + if (PREDICT_FALSE(lb0->lb_n_buckets > 1)) { - flow_hash_config0 = - vec_elt_at_index (im->fibs,fib_index0)->flow_hash_config; - vnet_buffer (p0)->ip.flow_hash = + flow_hash_config0 = lb0->lb_hash_config; + vnet_buffer (p0)->ip.flow_hash = ip6_compute_flow_hash (ip0, flow_hash_config0); } - ASSERT (adj0->n_adj > 0); - ASSERT (is_pow2 (adj0->n_adj)); - adj_index0 += (vnet_buffer (p0)->ip.flow_hash & (adj0->n_adj - 1)); + ASSERT (lb0->lb_n_buckets > 0); + ASSERT (is_pow2 (lb0->lb_n_buckets)); + dpo0 = load_balance_get_bucket_i(lb0, + (vnet_buffer (p0)->ip.flow_hash & + lb0->lb_n_buckets_minus_1)); + next0 = dpo0->dpoi_next_node; + /* Only process the HBH Option Header if explicitly configured to do so */ + next0 = ((ip0->protocol == IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS) && + im->hbh_enabled) ? + (ip_lookup_next_t) IP6_LOOKUP_NEXT_HOP_BY_HOP : + next0; - vnet_buffer (p0)->ip.adj_index[VLIB_TX] = adj_index0; + vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index; - vlib_increment_combined_counter - (cm, cpu_index, adj_index0, 1, + vlib_increment_combined_counter + (cm, cpu_index, lbi0, 1, vlib_buffer_length_in_chain (vm, p0)); from += 1; @@ -937,163 +305,192 @@ ip6_lookup_inline (vlib_main_t * vm, } if (node->flags & VLIB_NODE_FLAG_TRACE) - ip6_forward_next_trace(vm, node, frame, VLIB_TX); + ip6_forward_next_trace(vm, node, frame, VLIB_TX); return frame->n_vectors; } -void ip6_adjacency_set_interface_route (vnet_main_t * vnm, - ip_adjacency_t * adj, - u32 sw_if_index, - u32 if_address_index) -{ - vnet_hw_interface_t * hw = vnet_get_sup_hw_interface (vnm, sw_if_index); - ip_lookup_next_t n; - u32 node_index; - - if (hw->hw_class_index == ethernet_hw_interface_class.index - || hw->hw_class_index == srp_hw_interface_class.index) - { - n = IP_LOOKUP_NEXT_ARP; - node_index = ip6_discover_neighbor_node.index; - adj->if_address_index = if_address_index; - adj->arp.next_hop.ip6.as_u64[0] = 0; - adj->arp.next_hop.ip6.as_u64[1] = 0; - } - else - { - n = IP_LOOKUP_NEXT_REWRITE; - node_index = ip6_rewrite_node.index; - } - - adj->lookup_next_index = n; - adj->explicit_fib_index = ~0; - - vnet_rewrite_for_sw_interface - (vnm, - VNET_L3_PACKET_TYPE_IP6, - sw_if_index, - node_index, - VNET_REWRITE_FOR_SW_INTERFACE_ADDRESS_BROADCAST, - &adj->rewrite_header, - sizeof (adj->rewrite_data)); -} - static void ip6_add_interface_routes (vnet_main_t * vnm, u32 sw_if_index, ip6_main_t * im, u32 fib_index, ip_interface_address_t * a) { ip_lookup_main_t * lm = &im->lookup_main; - ip_adjacency_t * adj; ip6_address_t * address = ip_interface_address_get_address (lm, a); - ip6_add_del_route_args_t x; - vnet_hw_interface_t * hw_if = vnet_get_sup_hw_interface (vnm, sw_if_index); - u32 classify_table_index; - - /* Add e.g. 1.0.0.0/8 as interface route (arp for Ethernet). */ - x.table_index_or_table_id = fib_index; - x.flags = (IP6_ROUTE_FLAG_ADD - | IP6_ROUTE_FLAG_FIB_INDEX - | IP6_ROUTE_FLAG_NO_REDISTRIBUTE); - x.dst_address = address[0]; - x.dst_address_length = a->address_length; - x.n_add_adj = 0; - x.add_adj = 0; + fib_prefix_t pfx = { + .fp_len = a->address_length, + .fp_proto = FIB_PROTOCOL_IP6, + .fp_addr.ip6 = *address, + }; a->neighbor_probe_adj_index = ~0; if (a->address_length < 128) - { - adj = ip_add_adjacency (lm, /* template */ 0, /* block size */ 1, - &x.adj_index); - ip6_adjacency_set_interface_route (vnm, adj, sw_if_index, a - lm->if_address_pool); - ip_call_add_del_adjacency_callbacks (lm, x.adj_index, /* is_del */ 0); - ip6_add_del_route (im, &x); - a->neighbor_probe_adj_index = x.adj_index; - } - - /* Add e.g. ::1/128 as local to this host. */ - adj = ip_add_adjacency (lm, /* template */ 0, /* block size */ 1, - &x.adj_index); + { + fib_node_index_t fei; + + fei = fib_table_entry_update_one_path(fib_index, + &pfx, + FIB_SOURCE_INTERFACE, + (FIB_ENTRY_FLAG_CONNECTED | + FIB_ENTRY_FLAG_ATTACHED), + FIB_PROTOCOL_IP6, + NULL, /* No next-hop address */ + sw_if_index, + ~0, // invalid FIB index + 1, + MPLS_LABEL_INVALID, + FIB_ROUTE_PATH_FLAG_NONE); + a->neighbor_probe_adj_index = fib_entry_get_adj(fei); + } - classify_table_index = ~0; + pfx.fp_len = 128; if (sw_if_index < vec_len (lm->classify_table_index_by_sw_if_index)) - classify_table_index = lm->classify_table_index_by_sw_if_index [sw_if_index]; - if (classify_table_index != (u32) ~0) + { + u32 classify_table_index = + lm->classify_table_index_by_sw_if_index [sw_if_index]; + if (classify_table_index != (u32) ~0) + { + dpo_id_t dpo = DPO_INVALID; + + dpo_set(&dpo, + DPO_CLASSIFY, + DPO_PROTO_IP6, + classify_dpo_create(DPO_PROTO_IP6, classify_table_index)); + + fib_table_entry_special_dpo_add(fib_index, + &pfx, + FIB_SOURCE_CLASSIFY, + FIB_ENTRY_FLAG_NONE, + &dpo); + dpo_reset(&dpo); + } + } + + fib_table_entry_update_one_path(fib_index, + &pfx, + FIB_SOURCE_INTERFACE, + (FIB_ENTRY_FLAG_CONNECTED | + FIB_ENTRY_FLAG_LOCAL), + FIB_PROTOCOL_IP6, + &pfx.fp_addr, + sw_if_index, + ~0, // invalid FIB index + 1, + MPLS_LABEL_INVALID, + FIB_ROUTE_PATH_FLAG_NONE); +} + +static void +ip6_del_interface_routes (ip6_main_t * im, + u32 fib_index, + ip6_address_t * address, + u32 address_length) +{ + fib_prefix_t pfx = { + .fp_len = address_length, + .fp_proto = FIB_PROTOCOL_IP6, + .fp_addr.ip6 = *address, + }; + + if (pfx.fp_len < 128) { - adj->lookup_next_index = IP_LOOKUP_NEXT_CLASSIFY; - adj->classify.table_index = classify_table_index; + fib_table_entry_delete(fib_index, + &pfx, + FIB_SOURCE_INTERFACE); + } - else - adj->lookup_next_index = IP_LOOKUP_NEXT_LOCAL; - - adj->if_address_index = a - lm->if_address_pool; - adj->rewrite_header.sw_if_index = sw_if_index; - adj->rewrite_header.max_l3_packet_bytes = hw_if->max_l3_packet_bytes[VLIB_RX]; - adj->rewrite_header.data_bytes = 0; - ip_call_add_del_adjacency_callbacks (lm, x.adj_index, /* is_del */ 0); - x.dst_address_length = 128; - ip6_add_del_route (im, &x); + + pfx.fp_len = 128; + fib_table_entry_delete(fib_index, + &pfx, + FIB_SOURCE_INTERFACE); } -static void -ip6_del_interface_routes (ip6_main_t * im, u32 fib_index, - ip6_address_t * address, u32 address_length) +void +ip6_sw_interface_enable_disable (u32 sw_if_index, + u32 is_enable) { - ip6_add_del_route_args_t x; - - /* Add e.g. 1.0.0.0/8 as interface route (arp for Ethernet). */ - x.table_index_or_table_id = fib_index; - x.flags = (IP6_ROUTE_FLAG_DEL - | IP6_ROUTE_FLAG_FIB_INDEX - | IP6_ROUTE_FLAG_NO_REDISTRIBUTE); - x.dst_address = address[0]; - x.dst_address_length = address_length; - x.adj_index = ~0; - x.n_add_adj = 0; - x.add_adj = 0; - - if (address_length < 128) + vlib_main_t * vm = vlib_get_main(); + ip6_main_t * im = &ip6_main; + ip_lookup_main_t * lm = &im->lookup_main; + u32 ci, cast; + u32 lookup_feature_index; + + vec_validate_init_empty (im->ip_enabled_by_sw_if_index, sw_if_index, 0); + + /* + * enable/disable only on the 1<->0 transition + */ + if (is_enable) { - /* Don't wipe out fe80::0/64 */ - if (address_length != 64 || - address[0].as_u64[0] != clib_net_to_host_u64(0xfe80000000000000ULL)) - ip6_add_del_route (im, &x); + if (1 != ++im->ip_enabled_by_sw_if_index[sw_if_index]) + return; } + else + { + ASSERT(im->ip_enabled_by_sw_if_index[sw_if_index] > 0); + if (0 != --im->ip_enabled_by_sw_if_index[sw_if_index]) + return; + } + + for (cast = 0; cast <= VNET_IP_RX_MULTICAST_FEAT; cast++) + { + ip_config_main_t * cm = &lm->feature_config_mains[cast]; + vnet_config_main_t * vcm = &cm->config_main; + + vec_validate_init_empty (cm->config_index_by_sw_if_index, sw_if_index, ~0); + ci = cm->config_index_by_sw_if_index[sw_if_index]; + + if (cast == VNET_IP_RX_UNICAST_FEAT) + lookup_feature_index = im->ip6_unicast_rx_feature_lookup; + else + lookup_feature_index = im->ip6_multicast_rx_feature_lookup; - x.dst_address_length = 128; - ip6_add_del_route (im, &x); + if (is_enable) + ci = vnet_config_add_feature (vm, vcm, + ci, + lookup_feature_index, + /* config data */ 0, + /* # bytes of config data */ 0); + else + ci = vnet_config_del_feature (vm, vcm, + ci, + lookup_feature_index, + /* config data */ 0, + /* # bytes of config data */ 0); - ip6_delete_matching_routes (im, - fib_index, - IP6_ROUTE_FLAG_FIB_INDEX, - address, - address_length); + cm->config_index_by_sw_if_index[sw_if_index] = ci; + } } -typedef struct { - u32 sw_if_index; - ip6_address_t address; - u32 length; -} ip6_interface_address_t; +/* get first interface address */ +ip6_address_t * +ip6_interface_first_address (ip6_main_t * im, + u32 sw_if_index, + ip_interface_address_t ** result_ia) +{ + ip_lookup_main_t * lm = &im->lookup_main; + ip_interface_address_t * ia = 0; + ip6_address_t * result = 0; -static clib_error_t * -ip6_add_del_interface_address_internal (vlib_main_t * vm, - u32 sw_if_index, - ip6_address_t * new_address, - u32 new_length, - u32 redistribute, - u32 insert_routes, - u32 is_del); + foreach_ip_interface_address (lm, ia, sw_if_index, + 1 /* honor unnumbered */, + ({ + ip6_address_t * a = ip_interface_address_get_address (lm, ia); + result = a; + break; + })); + if (result_ia) + *result_ia = result ? ia : 0; + return result; +} -static clib_error_t * -ip6_add_del_interface_address_internal (vlib_main_t * vm, - u32 sw_if_index, - ip6_address_t * address, - u32 address_length, - u32 redistribute, - u32 insert_routes, - u32 is_del) +clib_error_t * +ip6_add_del_interface_address (vlib_main_t * vm, + u32 sw_if_index, + ip6_address_t * address, + u32 address_length, + u32 is_del) { vnet_main_t * vnm = vnet_get_main(); ip6_main_t * im = &ip6_main; @@ -1125,17 +522,13 @@ ip6_add_del_interface_address_internal (vlib_main_t * vm, goto done; } - if (vnet_sw_interface_is_admin_up (vnm, sw_if_index) && insert_routes) - { - if (is_del) - ip6_del_interface_routes (im, ip6_af.fib_index, address, - address_length); - - else - ip6_add_interface_routes (vnm, sw_if_index, - im, ip6_af.fib_index, - pool_elt_at_index (lm->if_address_pool, if_address_index)); - } + if (is_del) + ip6_del_interface_routes (im, ip6_af.fib_index, address, + address_length); + else + ip6_add_interface_routes (vnm, sw_if_index, + im, ip6_af.fib_index, + pool_elt_at_index (lm->if_address_pool, if_address_index)); { ip6_add_del_interface_address_callback_t * cb; @@ -1151,18 +544,6 @@ ip6_add_del_interface_address_internal (vlib_main_t * vm, return error; } -clib_error_t * -ip6_add_del_interface_address (vlib_main_t * vm, u32 sw_if_index, - ip6_address_t * address, u32 address_length, - u32 is_del) -{ - return ip6_add_del_interface_address_internal - (vm, sw_if_index, address, address_length, - /* redistribute */ 1, - /* insert_routes */ 1, - is_del); -} - clib_error_t * ip6_sw_interface_admin_up_down (vnet_main_t * vnm, u32 sw_if_index, @@ -1182,7 +563,7 @@ ip6_sw_interface_admin_up_down (vnet_main_t * vnm, fib_index = vec_elt (im->fib_index_by_sw_if_index, sw_if_index); - foreach_ip_interface_address (&im->lookup_main, ia, sw_if_index, + foreach_ip_interface_address (&im->lookup_main, ia, sw_if_index, 0 /* honor unnumbered */, ({ a = ip_interface_address_get_address (&im->lookup_main, ia); @@ -1201,69 +582,121 @@ ip6_sw_interface_admin_up_down (vnet_main_t * vnm, VNET_SW_INTERFACE_ADMIN_UP_DOWN_FUNCTION (ip6_sw_interface_admin_up_down); /* Built-in ip6 unicast rx feature path definition */ +VNET_IP6_UNICAST_FEATURE_INIT (ip6_flow_classify, static) = { + .node_name = "ip6-flow-classify", + .runs_before = ORDER_CONSTRAINTS {"ip6-inacl", 0}, + .feature_index = &ip6_main.ip6_unicast_rx_feature_flow_classify, +}; + VNET_IP6_UNICAST_FEATURE_INIT (ip6_inacl, static) = { - .node_name = "ip6-inacl", - .runs_before = {"ipsec-input-ip6", 0}, + .node_name = "ip6-inacl", + .runs_before = ORDER_CONSTRAINTS {"ip6-policer-classify", 0}, .feature_index = &ip6_main.ip6_unicast_rx_feature_check_access, }; +VNET_IP6_UNICAST_FEATURE_INIT (ip6_policer_classify, static) = { + .node_name = "ip6-policer-classify", + .runs_before = ORDER_CONSTRAINTS {"ipsec-input-ip6", 0}, + .feature_index = &ip6_main.ip6_unicast_rx_feature_policer_classify, +}; + VNET_IP6_UNICAST_FEATURE_INIT (ip6_ipsec, static) = { .node_name = "ipsec-input-ip6", - .runs_before = {"l2tp-decap", 0}, + .runs_before = ORDER_CONSTRAINTS {"l2tp-decap", 0}, .feature_index = &ip6_main.ip6_unicast_rx_feature_ipsec, }; VNET_IP6_UNICAST_FEATURE_INIT (ip6_l2tp, static) = { .node_name = "l2tp-decap", - .runs_before = {"vpath-input-ip6", 0}, + .runs_before = ORDER_CONSTRAINTS {"vpath-input-ip6", 0}, .feature_index = &ip6_main.ip6_unicast_rx_feature_l2tp_decap, }; VNET_IP6_UNICAST_FEATURE_INIT (ip6_vpath, static) = { .node_name = "vpath-input-ip6", - .runs_before = {"ip6-lookup", 0}, + .runs_before = ORDER_CONSTRAINTS {"ip6-lookup", 0}, .feature_index = &ip6_main.ip6_unicast_rx_feature_vpath, }; VNET_IP6_UNICAST_FEATURE_INIT (ip6_lookup, static) = { .node_name = "ip6-lookup", - .runs_before = {0}, /* not before any other features */ + .runs_before = ORDER_CONSTRAINTS {"ip6-drop", 0}, .feature_index = &ip6_main.ip6_unicast_rx_feature_lookup, }; +VNET_IP6_UNICAST_FEATURE_INIT (ip6_drop, static) = { + .node_name = "ip6-drop", + .runs_before = 0, /*last feature*/ + .feature_index = &ip6_main.ip6_unicast_rx_feature_drop, +}; + /* Built-in ip6 multicast rx feature path definition (none now) */ -VNET_IP6_MULTICAST_FEATURE_INIT (ip4_vpath_mc, static) = { +VNET_IP6_MULTICAST_FEATURE_INIT (ip6_vpath_mc, static) = { .node_name = "vpath-input-ip6", - .runs_before = {"ip6-lookup", 0}, + .runs_before = ORDER_CONSTRAINTS {"ip6-lookup", 0}, .feature_index = &ip6_main.ip6_multicast_rx_feature_vpath, }; VNET_IP6_MULTICAST_FEATURE_INIT (ip6_lookup, static) = { .node_name = "ip6-lookup", - .runs_before = {0}, /* not before any other features */ + .runs_before = ORDER_CONSTRAINTS {"ip6-drop", 0}, .feature_index = &ip6_main.ip6_multicast_rx_feature_lookup, }; -static char * feature_start_nodes[] = +VNET_IP6_MULTICAST_FEATURE_INIT (ip6_drop_mc, static) = { + .node_name = "ip6-drop", + .runs_before = 0, /* last feature */ + .feature_index = &ip6_main.ip6_multicast_rx_feature_drop, +}; + +static char * rx_feature_start_nodes[] = {"ip6-input"}; +static char * tx_feature_start_nodes[] = +{ + "ip6-rewrite", + "ip6-midchain", +}; + +/* Built-in ip4 tx feature path definition */ +VNET_IP6_TX_FEATURE_INIT (interface_output, static) = { + .node_name = "interface-output", + .runs_before = 0, /* not before any other features */ + .feature_index = &ip6_main.ip6_tx_feature_interface_output, +}; + static clib_error_t * ip6_feature_init (vlib_main_t * vm, ip6_main_t * im) { ip_lookup_main_t * lm = &im->lookup_main; clib_error_t * error; vnet_cast_t cast; - - for (cast = 0; cast < VNET_N_CAST; cast++) + ip_config_main_t * cm; + vnet_config_main_t * vcm; + char **feature_start_nodes; + int feature_start_len; + + for (cast = 0; cast < VNET_N_IP_FEAT; cast++) { - ip_config_main_t * cm = &lm->rx_config_mains[cast]; - vnet_config_main_t * vcm = &cm->config_main; - - if ((error = ip_feature_init_cast (vm, cm, vcm, + cm = &lm->feature_config_mains[cast]; + vcm = &cm->config_main; + + if (cast < VNET_IP_TX_FEAT) + { + feature_start_nodes = rx_feature_start_nodes; + feature_start_len = ARRAY_LEN(rx_feature_start_nodes); + } + else + { + feature_start_nodes = tx_feature_start_nodes; + feature_start_len = ARRAY_LEN(tx_feature_start_nodes); + } + + if ((error = vnet_feature_arc_init (vm, vcm, feature_start_nodes, - ARRAY_LEN(feature_start_nodes), - cast, - 0 /* is_ip4 */))) + feature_start_len, + im->next_feature[cast], + &im->feature_nodes[cast]))) return error; } return 0; @@ -1280,18 +713,20 @@ ip6_sw_interface_add_del (vnet_main_t * vnm, u32 ci, cast; u32 feature_index; - for (cast = 0; cast < VNET_N_CAST; cast++) + for (cast = 0; cast < VNET_N_IP_FEAT; cast++) { - ip_config_main_t * cm = &lm->rx_config_mains[cast]; + ip_config_main_t * cm = &lm->feature_config_mains[cast]; vnet_config_main_t * vcm = &cm->config_main; vec_validate_init_empty (cm->config_index_by_sw_if_index, sw_if_index, ~0); ci = cm->config_index_by_sw_if_index[sw_if_index]; - if (cast == VNET_UNICAST) - feature_index = im->ip6_unicast_rx_feature_lookup; + if (cast == VNET_IP_RX_UNICAST_FEAT) + feature_index = im->ip6_unicast_rx_feature_drop; + else if (cast == VNET_IP_RX_MULTICAST_FEAT) + feature_index = im->ip6_multicast_rx_feature_drop; else - feature_index = im->ip6_multicast_rx_feature_lookup; + feature_index = im->ip6_tx_feature_interface_output; if (is_add) ci = vnet_config_add_feature (vm, vcm, @@ -1300,13 +735,18 @@ ip6_sw_interface_add_del (vnet_main_t * vnm, /* config data */ 0, /* # bytes of config data */ 0); else - ci = vnet_config_del_feature (vm, vcm, - ci, - feature_index, - /* config data */ 0, - /* # bytes of config data */ 0); - + { + ci = vnet_config_del_feature (vm, vcm, ci, + feature_index, + /* config data */ 0, + /* # bytes of config data */ 0); + if (vec_len(im->ip_enabled_by_sw_if_index) > sw_if_index) + im->ip_enabled_by_sw_if_index[sw_if_index] = 0; + } cm->config_index_by_sw_if_index[sw_if_index] = ci; + /* + * note: do not update the tx feature count here. + */ } return /* no error */ 0; } @@ -1318,7 +758,7 @@ ip6_lookup (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) { - return ip6_lookup_inline (vm, node, frame, /* is_indirect */ 0); + return ip6_lookup_inline (vm, node, frame); } static u8 * format_ip6_lookup_trace (u8 * s, va_list * args); @@ -1336,25 +776,95 @@ VLIB_REGISTER_NODE (ip6_lookup_node) = { VLIB_NODE_FUNCTION_MULTIARCH (ip6_lookup_node, ip6_lookup) -static uword -ip6_indirect (vlib_main_t * vm, - vlib_node_runtime_t * node, - vlib_frame_t * frame) +always_inline uword +ip6_load_balance (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame) { - return ip6_lookup_inline (vm, node, frame, /* is_indirect */ 1); -} + vlib_combined_counter_main_t * cm = &load_balance_main.lbm_via_counters; + u32 n_left_from, n_left_to_next, * from, * to_next; + ip_lookup_next_t next; + u32 cpu_index = os_get_cpu_number(); + from = vlib_frame_vector_args (frame); + n_left_from = frame->n_vectors; + next = node->cached_next_index; + + if (node->flags & VLIB_NODE_FLAG_TRACE) + ip6_forward_next_trace(vm, node, frame, VLIB_TX); -VLIB_REGISTER_NODE (ip6_indirect_node) = { - .function = ip6_indirect, - .name = "ip6-indirect", + while (n_left_from > 0) + { + vlib_get_next_frame (vm, node, next, + to_next, n_left_to_next); + + + while (n_left_from > 0 && n_left_to_next > 0) + { + ip_lookup_next_t next0; + const load_balance_t *lb0; + vlib_buffer_t * p0; + u32 pi0, lbi0, hc0; + const ip6_header_t *ip0; + const dpo_id_t *dpo0; + + pi0 = from[0]; + to_next[0] = pi0; + + p0 = vlib_get_buffer (vm, pi0); + + ip0 = vlib_buffer_get_current (p0); + lbi0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX]; + + lb0 = load_balance_get(lbi0); + hc0 = lb0->lb_hash_config; + vnet_buffer(p0)->ip.flow_hash = ip6_compute_flow_hash(ip0, hc0); + + dpo0 = load_balance_get_bucket_i(lb0, + vnet_buffer(p0)->ip.flow_hash & + (lb0->lb_n_buckets - 1)); + + next0 = dpo0->dpoi_next_node; + vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index; + + vlib_increment_combined_counter + (cm, cpu_index, lbi0, 1, + vlib_buffer_length_in_chain (vm, p0)); + + from += 1; + to_next += 1; + n_left_to_next -= 1; + n_left_from -= 1; + + if (PREDICT_FALSE (next0 != next)) + { + n_left_to_next += 1; + vlib_put_next_frame (vm, node, next, n_left_to_next); + next = next0; + vlib_get_next_frame (vm, node, next, + to_next, n_left_to_next); + to_next[0] = pi0; + to_next += 1; + n_left_to_next -= 1; + } + } + + vlib_put_next_frame (vm, node, next, n_left_to_next); + } + + return frame->n_vectors; +} + +VLIB_REGISTER_NODE (ip6_load_balance_node) = { + .function = ip6_load_balance, + .name = "ip6-load-balance", .vector_size = sizeof (u32), .sibling_of = "ip6-lookup", .format_trace = format_ip6_lookup_trace, .n_next_nodes = 0, }; -VLIB_NODE_FUNCTION_MULTIARCH (ip6_indirect_node, ip6_indirect) +VLIB_NODE_FUNCTION_MULTIARCH (ip6_load_balance_node, ip6_load_balance) typedef struct { /* Adjacency taken. */ @@ -1375,7 +885,7 @@ static u8 * format_ip6_forward_next_trace (u8 * s, va_list * args) s = format(s, "%U%U", format_white_space, indent, - format_ip6_header, t->packet_data); + format_ip6_header, t->packet_data, sizeof (t->packet_data)); return s; } @@ -1384,16 +894,13 @@ static u8 * format_ip6_lookup_trace (u8 * s, va_list * args) CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); ip6_forward_next_trace_t * t = va_arg (*args, ip6_forward_next_trace_t *); - vnet_main_t * vnm = vnet_get_main(); - ip6_main_t * im = &ip6_main; uword indent = format_get_indent (s); - s = format (s, "fib %d adj-idx %d : %U flow hash: 0x%08x", - t->fib_index, t->adj_index, format_ip_adjacency, - vnm, &im->lookup_main, t->adj_index, t->flow_hash); + s = format (s, "fib %d dpo-idx %d flow hash: 0x%08x", + t->fib_index, t->adj_index, t->flow_hash); s = format(s, "\n%U%U", format_white_space, indent, - format_ip6_header, t->packet_data); + format_ip6_header, t->packet_data, sizeof (t->packet_data)); return s; } @@ -1404,16 +911,16 @@ static u8 * format_ip6_rewrite_trace (u8 * s, va_list * args) CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); ip6_forward_next_trace_t * t = va_arg (*args, ip6_forward_next_trace_t *); vnet_main_t * vnm = vnet_get_main(); - ip6_main_t * im = &ip6_main; uword indent = format_get_indent (s); s = format (s, "tx_sw_if_index %d adj-idx %d : %U flow hash: 0x%08x", t->fib_index, t->adj_index, format_ip_adjacency, - vnm, &im->lookup_main, t->adj_index, t->flow_hash); + t->adj_index, FORMAT_IP_ADJACENCY_NONE, + t->flow_hash); s = format (s, "\n%U%U", format_white_space, indent, format_ip_adjacency_packet_data, - vnm, &im->lookup_main, t->adj_index, + vnm, t->adj_index, t->packet_data, sizeof (t->packet_data)); return s; } @@ -1543,12 +1050,6 @@ ip6_punt (vlib_main_t * vm, vlib_frame_t * frame) { return ip6_drop_or_punt (vm, node, frame, IP6_ERROR_ADJACENCY_PUNT); } -static uword -ip6_miss (vlib_main_t * vm, - vlib_node_runtime_t * node, - vlib_frame_t * frame) -{ return ip6_drop_or_punt (vm, node, frame, IP6_ERROR_DST_LOOKUP_MISS); } - VLIB_REGISTER_NODE (ip6_drop_node,static) = { .function = ip6_drop, .name = "ip6-drop", @@ -1579,21 +1080,6 @@ VLIB_REGISTER_NODE (ip6_punt_node,static) = { VLIB_NODE_FUNCTION_MULTIARCH (ip6_punt_node, ip6_punt) -VLIB_REGISTER_NODE (ip6_miss_node,static) = { - .function = ip6_miss, - .name = "ip6-miss", - .vector_size = sizeof (u32), - - .format_trace = format_ip6_forward_next_trace, - - .n_next_nodes = 1, - .next_nodes = { - [0] = "error-drop", - }, -}; - -VLIB_NODE_FUNCTION_MULTIARCH (ip6_miss_node, ip6_miss) - VLIB_REGISTER_NODE (ip6_multicast_node,static) = { .function = ip6_drop, .name = "ip6-multicast", @@ -1623,7 +1109,7 @@ u16 ip6_tcp_udp_icmp_compute_checksum (vlib_main_t * vm, vlib_buffer_t * p0, ip6 sum0 = ip0->payload_length + clib_host_to_net_u16 (ip0->protocol); payload_length_host_byte_order = clib_net_to_host_u16 (ip0->payload_length); data_this_buffer = (void *) (ip0 + 1); - + for (i = 0; i < ARRAY_LEN (ip0->src_address.as_uword); i++) { sum0 = ip_csum_with_carry (sum0, @@ -1643,7 +1129,7 @@ u16 ip6_tcp_udp_icmp_compute_checksum (vlib_main_t * vm, vlib_buffer_t * p0, ip6 skip_bytes = 8* (1 + ext_hdr->n_data_u64s); data_this_buffer = (void *)((u8 *)data_this_buffer + skip_bytes); - + payload_length_host_byte_order -= skip_bytes; headers_size += skip_bytes; } @@ -1675,7 +1161,7 @@ u16 ip6_tcp_udp_icmp_compute_checksum (vlib_main_t * vm, vlib_buffer_t * p0, ip6 *bogus_lengthp = 1; return 0xfefe; } - } + } else sum0 = ip_incremental_checksum (sum0, data_this_buffer, n_this_buffer); #else if (p0 && n_this_buffer + headers_size > p0->current_length) @@ -1746,7 +1232,7 @@ ip6_local (vlib_main_t * vm, from = vlib_frame_vector_args (frame); n_left_from = frame->n_vectors; next_index = node->cached_next_index; - + if (node->flags & VLIB_NODE_FLAG_TRACE) ip6_forward_next_trace (vm, node, frame, VLIB_TX); @@ -1765,14 +1251,14 @@ ip6_local (vlib_main_t * vm, i32 len_diff0, len_diff1; u8 error0, type0, good_l4_checksum0; u8 error1, type1, good_l4_checksum1; - + pi0 = to_next[0] = from[0]; pi1 = to_next[1] = from[1]; from += 2; n_left_from -= 2; to_next += 2; n_left_to_next -= 2; - + p0 = vlib_get_buffer (vm, pi0); p1 = vlib_get_buffer (vm, pi1); @@ -1846,17 +1332,21 @@ ip6_local (vlib_main_t * vm, /* Drop packets from unroutable hosts. */ /* If this is a neighbor solicitation (ICMP), skip source RPF check */ - if (error0 == IP6_ERROR_UNKNOWN_PROTOCOL && type0 != IP_BUILTIN_PROTOCOL_ICMP) + if (error0 == IP6_ERROR_UNKNOWN_PROTOCOL && + type0 != IP_BUILTIN_PROTOCOL_ICMP && + !ip6_address_is_link_local_unicast(&ip0->src_address)) { u32 src_adj_index0 = ip6_src_lookup_for_packet (im, p0, ip0); - error0 = (lm->miss_adj_index == src_adj_index0 + error0 = (ADJ_INDEX_INVALID == src_adj_index0 ? IP6_ERROR_SRC_LOOKUP_MISS : error0); } - if (error1 == IP6_ERROR_UNKNOWN_PROTOCOL && type1 != IP_BUILTIN_PROTOCOL_ICMP) + if (error1 == IP6_ERROR_UNKNOWN_PROTOCOL && + type1 != IP_BUILTIN_PROTOCOL_ICMP && + !ip6_address_is_link_local_unicast(&ip1->src_address)) { u32 src_adj_index1 = ip6_src_lookup_for_packet (im, p1, ip1); - error1 = (lm->miss_adj_index == src_adj_index1 + error1 = (ADJ_INDEX_INVALID == src_adj_index1 ? IP6_ERROR_SRC_LOOKUP_MISS : error1); } @@ -1880,13 +1370,13 @@ ip6_local (vlib_main_t * vm, u32 pi0, ip_len0, udp_len0, flags0, next0; i32 len_diff0; u8 error0, type0, good_l4_checksum0; - + pi0 = to_next[0] = from[0]; from += 1; n_left_from -= 1; to_next += 1; n_left_to_next -= 1; - + p0 = vlib_get_buffer (vm, pi0); ip0 = vlib_buffer_get_current (p0); @@ -1933,10 +1423,12 @@ ip6_local (vlib_main_t * vm, : error0); /* If this is a neighbor solicitation (ICMP), skip source RPF check */ - if (error0 == IP6_ERROR_UNKNOWN_PROTOCOL && type0 != IP_BUILTIN_PROTOCOL_ICMP) + if (error0 == IP6_ERROR_UNKNOWN_PROTOCOL && + type0 != IP_BUILTIN_PROTOCOL_ICMP && + !ip6_address_is_link_local_unicast(&ip0->src_address)) { u32 src_adj_index0 = ip6_src_lookup_for_packet (im, p0, ip0); - error0 = (lm->miss_adj_index == src_adj_index0 + error0 = (ADJ_INDEX_INVALID == src_adj_index0 ? IP6_ERROR_SRC_LOOKUP_MISS : error0); } @@ -1949,7 +1441,7 @@ ip6_local (vlib_main_t * vm, to_next, n_left_to_next, pi0, next0); } - + vlib_put_next_frame (vm, node, next_index, n_left_to_next); } @@ -1997,9 +1489,10 @@ typedef enum { } ip6_discover_neighbor_error_t; static uword -ip6_discover_neighbor (vlib_main_t * vm, - vlib_node_runtime_t * node, - vlib_frame_t * frame) +ip6_discover_neighbor_inline (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame, + int is_glean) { vnet_main_t * vnm = vnet_get_main(); ip6_main_t * im = &ip6_main; @@ -2008,7 +1501,7 @@ ip6_discover_neighbor (vlib_main_t * vm, uword n_left_from, n_left_to_next_drop; static f64 time_last_seed_change = -1e100; static u32 hash_seeds[3]; - static uword hash_bitmap[256 / BITS (uword)]; + static uword hash_bitmap[256 / BITS (uword)]; f64 time_now; int bogus_length; @@ -2059,11 +1552,11 @@ ip6_discover_neighbor (vlib_main_t * vm, adj0 = ip_get_adjacency (lm, adj_index0); - if (adj0->arp.next_hop.ip6.as_u64[0] || - adj0->arp.next_hop.ip6.as_u64[1]) { - ip0->dst_address.as_u64[0] = adj0->arp.next_hop.ip6.as_u64[0]; - ip0->dst_address.as_u64[1] = adj0->arp.next_hop.ip6.as_u64[1]; - } + if (!is_glean) + { + ip0->dst_address.as_u64[0] = adj0->sub_type.nbr.next_hop.ip6.as_u64[0]; + ip0->dst_address.as_u64[1] = adj0->sub_type.nbr.next_hop.ip6.as_u64[1]; + } a0 = hash_seeds[0]; b0 = hash_seeds[1]; @@ -2105,37 +1598,46 @@ ip6_discover_neighbor (vlib_main_t * vm, if (!(hw_if0->flags & VNET_HW_INTERFACE_FLAG_LINK_UP)) drop0 = 1; - p0->error = - node->errors[drop0 ? IP6_DISCOVER_NEIGHBOR_ERROR_DROP + p0->error = + node->errors[drop0 ? IP6_DISCOVER_NEIGHBOR_ERROR_DROP : IP6_DISCOVER_NEIGHBOR_ERROR_REQUEST_SENT]; if (drop0) continue; + /* + * the adj has been updated to a rewrite but the node the DPO that got + * us here hasn't - yet. no big deal. we'll drop while we wait. + */ + if (IP_LOOKUP_NEXT_REWRITE == adj0->lookup_next_index) + continue; + { u32 bi0 = 0; icmp6_neighbor_solicitation_header_t * h0; vlib_buffer_t * b0; - h0 = vlib_packet_template_get_packet + h0 = vlib_packet_template_get_packet (vm, &im->discover_neighbor_packet_template, &bi0); - /* + /* * Build ethernet header. - * Choose source address based on destination lookup - * adjacency. + * Choose source address based on destination lookup + * adjacency. */ - if (ip6_src_address_for_packet (im, p0, &h0->ip.src_address, - sw_if_index0)) { - //There is no address on the interface + if (ip6_src_address_for_packet (lm, + sw_if_index0, + &h0->ip.src_address)) + { + /* There is no address on the interface */ p0->error = node->errors[IP6_DISCOVER_NEIGHBOR_ERROR_NO_SOURCE_ADDRESS]; vlib_buffer_free(vm, &bi0, 1); continue; - } + } - /* - * Destination address is a solicited node multicast address. + /* + * Destination address is a solicited node multicast address. * We need to fill in - * the low 24 bits with low 24 bits of target's address. + * the low 24 bits with low 24 bits of target's address. */ h0->ip.dst_address.as_u8[13] = ip0->dst_address.as_u8[13]; h0->ip.dst_address.as_u8[14] = ip0->dst_address.as_u8[14]; @@ -2143,24 +1645,24 @@ ip6_discover_neighbor (vlib_main_t * vm, h0->neighbor.target_address = ip0->dst_address; - clib_memcpy (h0->link_layer_option.ethernet_address, + clib_memcpy (h0->link_layer_option.ethernet_address, hw_if0->hw_address, vec_len (hw_if0->hw_address)); /* $$$$ appears we need this; why is the checksum non-zero? */ h0->neighbor.icmp.checksum = 0; - h0->neighbor.icmp.checksum = - ip6_tcp_udp_icmp_compute_checksum (vm, 0, &h0->ip, + h0->neighbor.icmp.checksum = + ip6_tcp_udp_icmp_compute_checksum (vm, 0, &h0->ip, &bogus_length); ASSERT (bogus_length == 0); vlib_buffer_copy_trace_flag (vm, p0, bi0); b0 = vlib_get_buffer (vm, bi0); - vnet_buffer (b0)->sw_if_index[VLIB_TX] + vnet_buffer (b0)->sw_if_index[VLIB_TX] = vnet_buffer (p0)->sw_if_index[VLIB_TX]; /* Add rewrite/encap string. */ - vnet_rewrite_one_header (adj0[0], h0, + vnet_rewrite_one_header (adj0[0], h0, sizeof (ethernet_header_t)); vlib_buffer_advance (b0, -adj0->rewrite_header.data_bytes); @@ -2170,16 +1672,32 @@ ip6_discover_neighbor (vlib_main_t * vm, } } - vlib_put_next_frame (vm, node, IP6_DISCOVER_NEIGHBOR_NEXT_DROP, + vlib_put_next_frame (vm, node, IP6_DISCOVER_NEIGHBOR_NEXT_DROP, n_left_to_next_drop); } return frame->n_vectors; } +static uword +ip6_discover_neighbor (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame) +{ + return (ip6_discover_neighbor_inline(vm, node, frame, 0)); +} + +static uword +ip6_glean (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame) +{ + return (ip6_discover_neighbor_inline(vm, node, frame, 1)); +} + static char * ip6_discover_neighbor_error_strings[] = { [IP6_DISCOVER_NEIGHBOR_ERROR_DROP] = "address overflow drops", - [IP6_DISCOVER_NEIGHBOR_ERROR_REQUEST_SENT] + [IP6_DISCOVER_NEIGHBOR_ERROR_REQUEST_SENT] = "neighbor solicitations sent", [IP6_DISCOVER_NEIGHBOR_ERROR_NO_SOURCE_ADDRESS] = "no source address for ND solicitation", @@ -2202,6 +1720,23 @@ VLIB_REGISTER_NODE (ip6_discover_neighbor_node) = { }, }; +VLIB_REGISTER_NODE (ip6_glean_node) = { + .function = ip6_glean, + .name = "ip6-glean", + .vector_size = sizeof (u32), + + .format_trace = format_ip6_forward_next_trace, + + .n_errors = ARRAY_LEN (ip6_discover_neighbor_error_strings), + .error_strings = ip6_discover_neighbor_error_strings, + + .n_next_nodes = IP6_DISCOVER_NEIGHBOR_N_NEXT, + .next_nodes = { + [IP6_DISCOVER_NEIGHBOR_NEXT_DROP] = "error-drop", + [IP6_DISCOVER_NEIGHBOR_NEXT_REPLY_TX] = "interface-output", + }, +}; + clib_error_t * ip6_probe_neighbor (vlib_main_t * vm, ip6_address_t * dst, u32 sw_if_index) { @@ -2222,8 +1757,8 @@ ip6_probe_neighbor (vlib_main_t * vm, ip6_address_t * dst, u32 sw_if_index) if (!(si->flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP)) { return clib_error_return (0, "%U: interface %U down", - format_ip6_address, dst, - format_vnet_sw_if_index_name, vnm, + format_ip6_address, dst, + format_vnet_sw_if_index_name, vnm, sw_if_index); } @@ -2231,7 +1766,7 @@ ip6_probe_neighbor (vlib_main_t * vm, ip6_address_t * dst, u32 sw_if_index) if (! src) { vnm->api_errno = VNET_API_ERROR_NO_MATCHING_INTERFACE; - return clib_error_return + return clib_error_return (0, "no matching interface address for destination %U (interface %U)", format_ip6_address, dst, format_vnet_sw_if_index_name, vnm, sw_if_index); @@ -2252,7 +1787,7 @@ ip6_probe_neighbor (vlib_main_t * vm, ip6_address_t * dst, u32 sw_if_index) clib_memcpy (h->link_layer_option.ethernet_address, hi->hw_address, vec_len (hi->hw_address)); - h->neighbor.icmp.checksum = + h->neighbor.icmp.checksum = ip6_tcp_udp_icmp_compute_checksum (vm, 0, &h->ip, &bogus_length); ASSERT(bogus_length == 0); @@ -2277,24 +1812,27 @@ ip6_probe_neighbor (vlib_main_t * vm, ip6_address_t * dst, u32 sw_if_index) typedef enum { IP6_REWRITE_NEXT_DROP, + IP6_REWRITE_NEXT_ICMP_ERROR, } ip6_rewrite_next_t; always_inline uword ip6_rewrite_inline (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame, - int rewrite_for_locally_received_packets) + int rewrite_for_locally_received_packets, + int is_midchain) { ip_lookup_main_t * lm = &ip6_main.lookup_main; u32 * from = vlib_frame_vector_args (frame); u32 n_left_from, n_left_to_next, * to_next, next_index; vlib_node_runtime_t * error_node = vlib_node_get_runtime (vm, ip6_input_node.index); vlib_rx_or_tx_t adj_rx_tx = rewrite_for_locally_received_packets ? VLIB_RX : VLIB_TX; + ip_config_main_t * cm = &lm->feature_config_mains[VNET_IP_TX_FEAT]; n_left_from = frame->n_vectors; next_index = node->cached_next_index; u32 cpu_index = os_get_cpu_number(); - + while (n_left_from > 0) { vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); @@ -2306,7 +1844,8 @@ ip6_rewrite_inline (vlib_main_t * vm, ip6_header_t * ip0, * ip1; u32 pi0, rw_len0, next0, error0, adj_index0; u32 pi1, rw_len1, next1, error1, adj_index1; - + u32 tx_sw_if_index0, tx_sw_if_index1; + /* Prefetch next iteration. */ { vlib_buffer_t * p2, * p3; @@ -2331,7 +1870,7 @@ ip6_rewrite_inline (vlib_main_t * vm, n_left_from -= 2; to_next += 2; n_left_to_next -= 2; - + p0 = vlib_get_buffer (vm, pi0); p1 = vlib_get_buffer (vm, pi1); @@ -2345,6 +1884,7 @@ ip6_rewrite_inline (vlib_main_t * vm, ip1 = vlib_buffer_get_current (p1); error0 = error1 = IP6_ERROR_NONE; + next0 = next1 = IP6_REWRITE_NEXT_DROP; if (! rewrite_for_locally_received_packets) { @@ -2360,37 +1900,43 @@ ip6_rewrite_inline (vlib_main_t * vm, ip0->hop_limit = hop_limit0; ip1->hop_limit = hop_limit1; - error0 = hop_limit0 <= 0 ? IP6_ERROR_TIME_EXPIRED : error0; - error1 = hop_limit1 <= 0 ? IP6_ERROR_TIME_EXPIRED : error1; + /* + * If the hop count drops below 1 when forwarding, generate + * an ICMP response. + */ + if (PREDICT_FALSE(hop_limit0 <= 0)) + { + error0 = IP6_ERROR_TIME_EXPIRED; + next0 = IP6_REWRITE_NEXT_ICMP_ERROR; + vnet_buffer (p0)->sw_if_index[VLIB_TX] = (u32)~0; + icmp6_error_set_vnet_buffer(p0, ICMP6_time_exceeded, + ICMP6_time_exceeded_ttl_exceeded_in_transit, 0); + } + if (PREDICT_FALSE(hop_limit1 <= 0)) + { + error1 = IP6_ERROR_TIME_EXPIRED; + next1 = IP6_REWRITE_NEXT_ICMP_ERROR; + vnet_buffer (p1)->sw_if_index[VLIB_TX] = (u32)~0; + icmp6_error_set_vnet_buffer(p1, ICMP6_time_exceeded, + ICMP6_time_exceeded_ttl_exceeded_in_transit, 0); + } } adj0 = ip_get_adjacency (lm, adj_index0); adj1 = ip_get_adjacency (lm, adj_index1); - if (rewrite_for_locally_received_packets) - { - /* - * If someone sends e.g. an icmp6 w/ src = dst = interface addr, - * we end up here with a local adjacency in hand - */ - if (PREDICT_FALSE(adj0->lookup_next_index - == IP_LOOKUP_NEXT_LOCAL)) - error0 = IP6_ERROR_SPOOFED_LOCAL_PACKETS; - if (PREDICT_FALSE(adj1->lookup_next_index - == IP_LOOKUP_NEXT_LOCAL)) - error1 = IP6_ERROR_SPOOFED_LOCAL_PACKETS; - } - rw_len0 = adj0[0].rewrite_header.data_bytes; rw_len1 = adj1[0].rewrite_header.data_bytes; + vnet_buffer(p0)->ip.save_rewrite_length = rw_len0; + vnet_buffer(p1)->ip.save_rewrite_length = rw_len1; - vlib_increment_combined_counter (&lm->adjacency_counters, - cpu_index, + vlib_increment_combined_counter (&adjacency_counters, + cpu_index, adj_index0, /* packet increment */ 0, /* byte increment */ rw_len0); - vlib_increment_combined_counter (&lm->adjacency_counters, - cpu_index, + vlib_increment_combined_counter (&adjacency_counters, + cpu_index, adj_index1, /* packet increment */ 0, /* byte increment */ rw_len1); @@ -2403,25 +1949,66 @@ ip6_rewrite_inline (vlib_main_t * vm, ? IP6_ERROR_MTU_EXCEEDED : error1); - p0->current_data -= rw_len0; - p1->current_data -= rw_len1; + /* Don't adjust the buffer for hop count issue; icmp-error node + * wants to see the IP headerr */ + if (PREDICT_TRUE(error0 == IP6_ERROR_NONE)) + { + p0->current_data -= rw_len0; + p0->current_length += rw_len0; + + tx_sw_if_index0 = adj0[0].rewrite_header.sw_if_index; + vnet_buffer (p0)->sw_if_index[VLIB_TX] = + tx_sw_if_index0; + next0 = adj0[0].rewrite_header.next_index; + + if (PREDICT_FALSE + (clib_bitmap_get (lm->tx_sw_if_has_ip_output_features, + tx_sw_if_index0))) + { + p0->current_config_index = + vec_elt (cm->config_index_by_sw_if_index, + tx_sw_if_index0); + vnet_get_config_data (&cm->config_main, + &p0->current_config_index, + &next0, + /* # bytes of config data */ 0); + } + } + if (PREDICT_TRUE(error1 == IP6_ERROR_NONE)) + { + p1->current_data -= rw_len1; + p1->current_length += rw_len1; - p0->current_length += rw_len0; - p1->current_length += rw_len1; + tx_sw_if_index1 = adj1[0].rewrite_header.sw_if_index; + vnet_buffer (p1)->sw_if_index[VLIB_TX] = + tx_sw_if_index1; + next1 = adj1[0].rewrite_header.next_index; - vnet_buffer (p0)->sw_if_index[VLIB_TX] = adj0[0].rewrite_header.sw_if_index; - vnet_buffer (p1)->sw_if_index[VLIB_TX] = adj1[0].rewrite_header.sw_if_index; - - next0 = (error0 == IP6_ERROR_NONE) ? - adj0[0].rewrite_header.next_index : IP6_REWRITE_NEXT_DROP; - next1 = (error1 == IP6_ERROR_NONE) ? - adj1[0].rewrite_header.next_index : IP6_REWRITE_NEXT_DROP; + if (PREDICT_FALSE + (clib_bitmap_get (lm->tx_sw_if_has_ip_output_features, + tx_sw_if_index1))) + { + p1->current_config_index = + vec_elt (cm->config_index_by_sw_if_index, + tx_sw_if_index1); + vnet_get_config_data (&cm->config_main, + &p1->current_config_index, + &next1, + /* # bytes of config data */ 0); + } + } /* Guess we are only writing on simple Ethernet header. */ vnet_rewrite_two_headers (adj0[0], adj1[0], ip0, ip1, sizeof (ethernet_header_t)); - + + if (is_midchain) + { + adj0->sub_type.midchain.fixup_func(vm, adj0, p0); + adj1->sub_type.midchain.fixup_func(vm, adj1, p1); + } + vlib_validate_buffer_enqueue_x2 (vm, node, next_index, to_next, n_left_to_next, pi0, pi1, next0, next1); @@ -2434,7 +2021,8 @@ ip6_rewrite_inline (vlib_main_t * vm, ip6_header_t * ip0; u32 pi0, rw_len0; u32 adj_index0, next0, error0; - + u32 tx_sw_if_index0; + pi0 = to_next[0] = from[0]; p0 = vlib_get_buffer (vm, pi0); @@ -2445,10 +2033,11 @@ ip6_rewrite_inline (vlib_main_t * vm, ASSERT(adj_index0); adj0 = ip_get_adjacency (lm, adj_index0); - + ip0 = vlib_buffer_get_current (p0); error0 = IP6_ERROR_NONE; + next0 = IP6_REWRITE_NEXT_DROP; /* Check hop limit */ if (! rewrite_for_locally_received_packets) @@ -2461,24 +2050,29 @@ ip6_rewrite_inline (vlib_main_t * vm, ip0->hop_limit = hop_limit0; - error0 = hop_limit0 <= 0 ? IP6_ERROR_TIME_EXPIRED : error0; + if (PREDICT_FALSE(hop_limit0 <= 0)) + { + /* + * If the hop count drops below 1 when forwarding, generate + * an ICMP response. + */ + error0 = IP6_ERROR_TIME_EXPIRED; + next0 = IP6_REWRITE_NEXT_ICMP_ERROR; + vnet_buffer (p0)->sw_if_index[VLIB_TX] = (u32)~0; + icmp6_error_set_vnet_buffer(p0, ICMP6_time_exceeded, + ICMP6_time_exceeded_ttl_exceeded_in_transit, 0); + } } - if (rewrite_for_locally_received_packets) - { - if (PREDICT_FALSE(adj0->lookup_next_index - == IP_LOOKUP_NEXT_LOCAL)) - error0 = IP6_ERROR_SPOOFED_LOCAL_PACKETS; - } - /* Guess we are only writing on simple Ethernet header. */ vnet_rewrite_one_header (adj0[0], ip0, sizeof (ethernet_header_t)); - + /* Update packet buffer attributes/set output interface. */ rw_len0 = adj0[0].rewrite_header.data_bytes; + vnet_buffer(p0)->ip.save_rewrite_length = rw_len0; - vlib_increment_combined_counter (&lm->adjacency_counters, - cpu_index, + vlib_increment_combined_counter (&adjacency_counters, + cpu_index, adj_index0, /* packet increment */ 0, /* byte increment */ rw_len0); @@ -2488,12 +2082,36 @@ ip6_rewrite_inline (vlib_main_t * vm, ? IP6_ERROR_MTU_EXCEEDED : error0); - p0->current_data -= rw_len0; - p0->current_length += rw_len0; - vnet_buffer (p0)->sw_if_index[VLIB_TX] = adj0[0].rewrite_header.sw_if_index; - - next0 = (error0 == IP6_ERROR_NONE) ? - adj0[0].rewrite_header.next_index : IP6_REWRITE_NEXT_DROP; + /* Don't adjust the buffer for hop count issue; icmp-error node + * wants to see the IP headerr */ + if (PREDICT_TRUE(error0 == IP6_ERROR_NONE)) + { + p0->current_data -= rw_len0; + p0->current_length += rw_len0; + + tx_sw_if_index0 = adj0[0].rewrite_header.sw_if_index; + + vnet_buffer (p0)->sw_if_index[VLIB_TX] = tx_sw_if_index0; + next0 = adj0[0].rewrite_header.next_index; + + if (PREDICT_FALSE + (clib_bitmap_get (lm->tx_sw_if_has_ip_output_features, + tx_sw_if_index0))) + { + p0->current_config_index = + vec_elt (cm->config_index_by_sw_if_index, + tx_sw_if_index0); + vnet_get_config_data (&cm->config_main, + &p0->current_config_index, + &next0, + /* # bytes of config data */ 0); + } + } + + if (is_midchain) + { + adj0->sub_type.midchain.fixup_func(vm, adj0, p0); + } p0->error = error_node->errors[error0]; @@ -2501,7 +2119,7 @@ ip6_rewrite_inline (vlib_main_t * vm, n_left_from -= 1; to_next += 1; n_left_to_next -= 1; - + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next, n_left_to_next, pi0, next0); @@ -2523,7 +2141,8 @@ ip6_rewrite_transit (vlib_main_t * vm, vlib_frame_t * frame) { return ip6_rewrite_inline (vm, node, frame, - /* rewrite_for_locally_received_packets */ 0); + /* rewrite_for_locally_received_packets */ 0, + /* midchain */ 0); } static uword @@ -2532,9 +2151,32 @@ ip6_rewrite_local (vlib_main_t * vm, vlib_frame_t * frame) { return ip6_rewrite_inline (vm, node, frame, - /* rewrite_for_locally_received_packets */ 1); + /* rewrite_for_locally_received_packets */ 1, + /* midchain */ 0); +} + +static uword +ip6_midchain (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame) +{ + return ip6_rewrite_inline (vm, node, frame, + /* rewrite_for_locally_received_packets */ 0, + /* midchain */ 1); } +VLIB_REGISTER_NODE (ip6_midchain_node) = { + .function = ip6_midchain, + .name = "ip6-midchain", + .vector_size = sizeof (u32), + + .format_trace = format_ip6_forward_next_trace, + + .sibling_of = "ip6-rewrite", +}; + +VLIB_NODE_FUNCTION_MULTIARCH (ip6_midchain_node, ip6_midchain) + VLIB_REGISTER_NODE (ip6_rewrite_node) = { .function = ip6_rewrite_transit, .name = "ip6-rewrite", @@ -2542,13 +2184,14 @@ VLIB_REGISTER_NODE (ip6_rewrite_node) = { .format_trace = format_ip6_rewrite_trace, - .n_next_nodes = 1, + .n_next_nodes = 2, .next_nodes = { [IP6_REWRITE_NEXT_DROP] = "error-drop", + [IP6_REWRITE_NEXT_ICMP_ERROR] = "ip6-icmp-error", }, }; -VLIB_NODE_FUNCTION_MULTIARCH (ip6_rewrite_node, ip6_rewrite_transit) +VLIB_NODE_FUNCTION_MULTIARCH (ip6_rewrite_node, ip6_rewrite_transit); VLIB_REGISTER_NODE (ip6_rewrite_local_node) = { .function = ip6_rewrite_local, @@ -2562,7 +2205,7 @@ VLIB_REGISTER_NODE (ip6_rewrite_local_node) = { .n_next_nodes = 0, }; -VLIB_NODE_FUNCTION_MULTIARCH (ip6_rewrite_local_node, ip6_rewrite_local) +VLIB_NODE_FUNCTION_MULTIARCH (ip6_rewrite_local_node, ip6_rewrite_local); /* * Hop-by-Hop handling @@ -2640,6 +2283,76 @@ format_ip6_hop_by_hop_trace (u8 * s, va_list * args) return s; } +always_inline u8 ip6_scan_hbh_options ( + vlib_buffer_t * b0, + ip6_header_t *ip0, + ip6_hop_by_hop_header_t *hbh0, + ip6_hop_by_hop_option_t *opt0, + ip6_hop_by_hop_option_t *limit0, + u32 *next0) +{ + ip6_hop_by_hop_main_t *hm = &ip6_hop_by_hop_main; + u8 type0; + u8 error0 = 0; + + while (opt0 < limit0) + { + type0 = opt0->type; + switch (type0) + { + case 0: /* Pad1 */ + opt0 = (ip6_hop_by_hop_option_t *) ((u8 *)opt0) + 1; + continue; + case 1: /* PadN */ + break; + default: + if (hm->options[type0]) + { + if ((*hm->options[type0])(b0, ip0, opt0) < 0) + { + error0 = IP6_HOP_BY_HOP_ERROR_FORMAT; + return(error0); + } + } + else + { + /* Unrecognized mandatory option, check the two high order bits */ + switch (opt0->type & HBH_OPTION_TYPE_HIGH_ORDER_BITS) + { + case HBH_OPTION_TYPE_SKIP_UNKNOWN: + break; + case HBH_OPTION_TYPE_DISCARD_UNKNOWN: + error0 = IP6_HOP_BY_HOP_ERROR_UNKNOWN_OPTION; + *next0 = IP_LOOKUP_NEXT_DROP; + break; + case HBH_OPTION_TYPE_DISCARD_UNKNOWN_ICMP: + error0 = IP6_HOP_BY_HOP_ERROR_UNKNOWN_OPTION; + *next0 = IP_LOOKUP_NEXT_ICMP_ERROR; + icmp6_error_set_vnet_buffer(b0, ICMP6_parameter_problem, + ICMP6_parameter_problem_unrecognized_option, (u8 *)opt0 - (u8 *)ip0); + break; + case HBH_OPTION_TYPE_DISCARD_UNKNOWN_ICMP_NOT_MCAST: + error0 = IP6_HOP_BY_HOP_ERROR_UNKNOWN_OPTION; + if (!ip6_address_is_multicast(&ip0->dst_address)) + { + *next0 = IP_LOOKUP_NEXT_ICMP_ERROR; + icmp6_error_set_vnet_buffer(b0, ICMP6_parameter_problem, + ICMP6_parameter_problem_unrecognized_option, (u8 *)opt0 - (u8 *)ip0); + } + else + { + *next0 = IP_LOOKUP_NEXT_DROP; + } + break; + } + return(error0); + } + } + opt0 = (ip6_hop_by_hop_option_t *) (((u8 *)opt0) + opt0->length + sizeof (ip6_hop_by_hop_option_t)); + } + return(error0); +} + /* * Process the Hop-by-Hop Options header */ @@ -2664,6 +2377,116 @@ ip6_hop_by_hop (vlib_main_t * vm, vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); + while (n_left_from >= 4 && n_left_to_next >= 2) { + u32 bi0, bi1; + vlib_buffer_t * b0, *b1; + u32 next0, next1; + ip6_header_t * ip0, *ip1; + ip6_hop_by_hop_header_t *hbh0, *hbh1; + ip6_hop_by_hop_option_t *opt0, *limit0, *opt1, *limit1; + u8 error0 = 0, error1 = 0; + + /* Prefetch next iteration. */ + { + vlib_buffer_t * p2, * p3; + + p2 = vlib_get_buffer (vm, from[2]); + p3 = vlib_get_buffer (vm, from[3]); + + vlib_prefetch_buffer_header (p2, LOAD); + vlib_prefetch_buffer_header (p3, LOAD); + + CLIB_PREFETCH (p2->data, 2*CLIB_CACHE_LINE_BYTES, LOAD); + CLIB_PREFETCH (p3->data, 2*CLIB_CACHE_LINE_BYTES, LOAD); + } + + /* Speculatively enqueue b0, b1 to the current next frame */ + to_next[0] = bi0 = from[0]; + to_next[1] = bi1 = from[1]; + from += 2; + to_next += 2; + n_left_from -= 2; + n_left_to_next -= 2; + + b0 = vlib_get_buffer (vm, bi0); + b1 = vlib_get_buffer (vm, bi1); + u32 adj_index0 = vnet_buffer(b0)->ip.adj_index[VLIB_TX]; + ip_adjacency_t *adj0 = ip_get_adjacency(lm, adj_index0); + u32 adj_index1 = vnet_buffer(b1)->ip.adj_index[VLIB_TX]; + ip_adjacency_t *adj1 = ip_get_adjacency(lm, adj_index1); + + /* Default use the next_index from the adjacency. A HBH option rarely redirects to a different node */ + next0 = adj0->lookup_next_index; + next1 = adj1->lookup_next_index; + + ip0 = vlib_buffer_get_current (b0); + ip1 = vlib_buffer_get_current (b1); + hbh0 = (ip6_hop_by_hop_header_t *)(ip0+1); + hbh1 = (ip6_hop_by_hop_header_t *)(ip1+1); + opt0 = (ip6_hop_by_hop_option_t *)(hbh0+1); + opt1 = (ip6_hop_by_hop_option_t *)(hbh1+1); + limit0 = (ip6_hop_by_hop_option_t *)((u8 *)hbh0 + ((hbh0->length + 1) << 3)); + limit1 = (ip6_hop_by_hop_option_t *)((u8 *)hbh1 + ((hbh1->length + 1) << 3)); + + /* + * Basic validity checks + */ + if ((hbh0->length + 1) << 3 > clib_net_to_host_u16(ip0->payload_length)) { + error0 = IP6_HOP_BY_HOP_ERROR_FORMAT; + next0 = IP_LOOKUP_NEXT_DROP; + goto outdual; + } + /* Scan the set of h-b-h options, process ones that we understand */ + error0 = ip6_scan_hbh_options(b0, ip0, hbh0, opt0, limit0, &next0); + + if ((hbh1->length + 1) << 3 > clib_net_to_host_u16(ip1->payload_length)) { + error1 = IP6_HOP_BY_HOP_ERROR_FORMAT; + next1 = IP_LOOKUP_NEXT_DROP; + goto outdual; + } + /* Scan the set of h-b-h options, process ones that we understand */ + error1 = ip6_scan_hbh_options(b1,ip1,hbh1,opt1,limit1, &next1); + + outdual: + /* Has the classifier flagged this buffer for special treatment? */ + if ((error0 == 0) && (vnet_buffer(b0)->l2_classify.opaque_index == OI_DECAP)) + next0 = hm->next_override; + + /* Has the classifier flagged this buffer for special treatment? */ + if ((error1 == 0) && (vnet_buffer(b1)->l2_classify.opaque_index == OI_DECAP)) + next1 = hm->next_override; + + if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE))) + { + if (b0->flags & VLIB_BUFFER_IS_TRACED) { + ip6_hop_by_hop_trace_t *t = vlib_add_trace(vm, node, b0, sizeof (*t)); + u32 trace_len = (hbh0->length + 1) << 3; + t->next_index = next0; + /* Capture the h-b-h option verbatim */ + trace_len = trace_len < ARRAY_LEN(t->option_data) ? trace_len : ARRAY_LEN(t->option_data); + t->trace_len = trace_len; + clib_memcpy(t->option_data, hbh0, trace_len); + } + if (b1->flags & VLIB_BUFFER_IS_TRACED) { + ip6_hop_by_hop_trace_t *t = vlib_add_trace(vm, node, b1, sizeof (*t)); + u32 trace_len = (hbh1->length + 1) << 3; + t->next_index = next1; + /* Capture the h-b-h option verbatim */ + trace_len = trace_len < ARRAY_LEN(t->option_data) ? trace_len : ARRAY_LEN(t->option_data); + t->trace_len = trace_len; + clib_memcpy(t->option_data, hbh1, trace_len); + } + + } + + b0->error = error_node->errors[error0]; + b1->error = error_node->errors[error1]; + + /* verify speculative enqueue, maybe switch current next frame */ + vlib_validate_buffer_enqueue_x2 (vm, node, next_index, to_next, n_left_to_next, bi0, + bi1,next0, next1); + } + while (n_left_from > 0 && n_left_to_next > 0) { u32 bi0; vlib_buffer_t * b0; @@ -2671,7 +2494,6 @@ ip6_hop_by_hop (vlib_main_t * vm, ip6_header_t * ip0; ip6_hop_by_hop_header_t *hbh0; ip6_hop_by_hop_option_t *opt0, *limit0; - u8 type0; u8 error0 = 0; /* Speculatively enqueue b0 to the current next frame */ @@ -2703,54 +2525,12 @@ ip6_hop_by_hop (vlib_main_t * vm, } /* Scan the set of h-b-h options, process ones that we understand */ - while (opt0 < limit0) { - type0 = opt0->type; - switch (type0) { - case 0: /* Pad1 */ - opt0 = (ip6_hop_by_hop_option_t *) ((u8 *)opt0) + 1; - continue; - case 1: /* PadN */ - break; - default: - if (hm->options[type0]) { - if ((*hm->options[type0])(b0, ip0, opt0) < 0) { - error0 = IP6_HOP_BY_HOP_ERROR_FORMAT; - goto out0; - } - } else { - /* Unrecognized mandatory option, check the two high order bits */ - switch (opt0->type & HBH_OPTION_TYPE_HIGH_ORDER_BITS) { - case HBH_OPTION_TYPE_SKIP_UNKNOWN: - break; - case HBH_OPTION_TYPE_DISCARD_UNKNOWN: - next0 = IP_LOOKUP_NEXT_DROP; - break; - case HBH_OPTION_TYPE_DISCARD_UNKNOWN_ICMP: - next0 = IP_LOOKUP_NEXT_ICMP_ERROR; - icmp6_error_set_vnet_buffer(b0, ICMP6_parameter_problem, - ICMP6_parameter_problem_unrecognized_option, (u8 *)opt0 - (u8 *)ip0); - break; - case HBH_OPTION_TYPE_DISCARD_UNKNOWN_ICMP_NOT_MCAST: - if (!ip6_address_is_multicast(&ip0->dst_address)) { - next0 = IP_LOOKUP_NEXT_ICMP_ERROR; - icmp6_error_set_vnet_buffer(b0, ICMP6_parameter_problem, - ICMP6_parameter_problem_unrecognized_option, (u8 *)opt0 - (u8 *)ip0); - } else { - next0 = IP_LOOKUP_NEXT_DROP; - } - break; - } - error0 = IP6_HOP_BY_HOP_ERROR_UNKNOWN_OPTION; - goto out0; - } - } - opt0 = (ip6_hop_by_hop_option_t *) (((u8 *)opt0) + opt0->length + sizeof (ip6_hop_by_hop_option_t)); - } + error0 = ip6_scan_hbh_options(b0, ip0, hbh0, opt0, limit0, &next0); out0: /* Has the classifier flagged this buffer for special treatment? */ if ((error0 == 0) && (vnet_buffer(b0)->l2_classify.opaque_index == OI_DECAP)) - next0 = IP6_LOOKUP_NEXT_POP_HOP_BY_HOP; + next0 = hm->next_override; if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED)) { ip6_hop_by_hop_trace_t *t = vlib_add_trace(vm, node, b0, sizeof (*t)); @@ -2784,7 +2564,7 @@ VLIB_REGISTER_NODE (ip6_hop_by_hop_node) = { .n_next_nodes = 0, }; -VLIB_NODE_FUNCTION_MULTIARCH (ip6_hop_by_hop_node, ip6_hop_by_hop) +VLIB_NODE_FUNCTION_MULTIARCH (ip6_hop_by_hop_node, ip6_hop_by_hop); static clib_error_t * ip6_hop_by_hop_init (vlib_main_t * vm) @@ -2792,12 +2572,19 @@ ip6_hop_by_hop_init (vlib_main_t * vm) ip6_hop_by_hop_main_t * hm = &ip6_hop_by_hop_main; memset(hm->options, 0, sizeof(hm->options)); memset(hm->trace, 0, sizeof(hm->trace)); - + hm->next_override = IP6_LOOKUP_NEXT_POP_HOP_BY_HOP; return (0); } VLIB_INIT_FUNCTION (ip6_hop_by_hop_init); +void ip6_hbh_set_next_override (uword next) +{ + ip6_hop_by_hop_main_t * hm = &ip6_hop_by_hop_main; + + hm->next_override = next; +} + int ip6_hbh_register_option (u8 option, int options(vlib_buffer_t *b, ip6_header_t *ip, ip6_hop_by_hop_option_t *opt), @@ -2884,13 +2671,18 @@ ip6_lookup_init (vlib_main_t * vm) if (im->lookup_table_size == 0) im->lookup_table_size = IP6_FIB_DEFAULT_HASH_MEMORY_SIZE; - - BV(clib_bihash_init) (&im->ip6_lookup_table, "ip6 lookup table", + + BV(clib_bihash_init) (&(im->ip6_table[IP6_FIB_TABLE_FWDING].ip6_hash), + "ip6 FIB fwding table", + im->lookup_table_nbuckets, + im->lookup_table_size); + BV(clib_bihash_init) (&im->ip6_table[IP6_FIB_TABLE_NON_FWDING].ip6_hash, + "ip6 FIB non-fwding table", im->lookup_table_nbuckets, im->lookup_table_size); - + /* Create FIB with index 0 and table id of 0. */ - find_ip6_fib_by_table_index_or_id (im, /* table id */ 0, IP6_ROUTE_FLAG_TABLE_ID); + fib_table_find_or_create_and_lock(FIB_PROTOCOL_IP6, 0); { pg_node_t * pn; @@ -2960,28 +2752,47 @@ add_del_ip6_interface_table (vlib_main_t * vm, } { - ip6_main_t * im = &ip6_main; - ip6_fib_t * fib = - find_ip6_fib_by_table_index_or_id (im, table_id, IP6_ROUTE_FLAG_TABLE_ID); + u32 fib_index = fib_table_find_or_create_and_lock(FIB_PROTOCOL_IP6, + table_id); - if (fib) - { - vec_validate (im->fib_index_by_sw_if_index, sw_if_index); - im->fib_index_by_sw_if_index[sw_if_index] = fib->index; - } + vec_validate (ip6_main.fib_index_by_sw_if_index, sw_if_index); + ip6_main.fib_index_by_sw_if_index[sw_if_index] = fib_index; } + done: return error; } -VLIB_CLI_COMMAND (set_interface_ip_table_command, static) = { +/*? + * Place the indicated interface into the supplied IPv6 FIB table (also known + * as a VRF). If the FIB table does not exist, this command creates it. To + * display the current IPv6 FIB table, use the command 'show ip6 fib'. + * FIB table will only be displayed if a route has been added to the table, or + * an IP Address is assigned to an interface in the table (which adds a route + * automatically). + * + * @note IP addresses added after setting the interface IP table end up in + * the indicated FIB table. If the IP address is added prior to adding the + * interface to the FIB table, it will NOT be part of the FIB table. Predictable + * but potentially counter-intuitive results occur if you provision interface + * addresses in multiple FIBs. Upon RX, packets will be processed in the last + * IP table ID provisioned. It might be marginally useful to evade source RPF + * drops to put an interface address into multiple FIBs. + * + * @cliexpar + * Example of how to add an interface to an IPv6 FIB table (where 2 is the table-id): + * @cliexcmd{set interface ip6 table GigabitEthernet2/0/0 2} + ?*/ +/* *INDENT-OFF* */ +VLIB_CLI_COMMAND (set_interface_ip6_table_command, static) = { .path = "set interface ip6 table", .function = add_del_ip6_interface_table, - .short_help = "set interface ip6 table " + .short_help = "set interface ip6 table " }; +/* *INDENT-ON* */ -void +void ip6_link_local_address_from_ethernet_mac_address (ip6_address_t *ip, u8 *mac) { @@ -2997,8 +2808,8 @@ ip6_link_local_address_from_ethernet_mac_address (ip6_address_t *ip, ip->as_u8 [15] = mac[5]; } -void -ip6_ethernet_mac_address_from_link_local_address (u8 *mac, +void +ip6_ethernet_mac_address_from_link_local_address (u8 *mac, ip6_address_t *ip) { /* Invert the previously inverted "u" bit */ @@ -3010,7 +2821,7 @@ ip6_ethernet_mac_address_from_link_local_address (u8 *mac, mac[5] = ip->as_u8 [15]; } -static clib_error_t * +static clib_error_t * test_ip6_link_command_fn (vlib_main_t * vm, unformat_input_t * input, vlib_cli_command_t * cmd) @@ -3027,15 +2838,28 @@ test_ip6_link_command_fn (vlib_main_t * vm, vlib_cli_output (vm, "Original MAC address: %U", format_ethernet_address, mac); } - + return 0; } +/*? + * This command converts the given MAC Address into an IPv6 link-local + * address. + * + * @cliexpar + * Example of how to create an IPv6 link-local address: + * @cliexstart{test ip6 link 16:d9:e0:91:79:86} + * Link local address: fe80::14d9:e0ff:fe91:7986 + * Original MAC address: 16:d9:e0:91:79:86 + * @cliexend +?*/ +/* *INDENT-OFF* */ VLIB_CLI_COMMAND (test_link_command, static) = { .path = "test ip6 link", - .function = test_ip6_link_command_fn, + .function = test_ip6_link_command_fn, .short_help = "test ip6 link ", }; +/* *INDENT-ON* */ int vnet_set_ip6_flow_hash (u32 table_id, u32 flow_hash_config) { @@ -3046,7 +2870,7 @@ int vnet_set_ip6_flow_hash (u32 table_id, u32 flow_hash_config) if (p == 0) return -1; - fib = vec_elt_at_index (im6->fibs, p[0]); + fib = ip6_fib_get (p[0]); fib->flow_hash_config = flow_hash_config; return 1; @@ -3075,7 +2899,7 @@ set_ip6_flow_hash_command_fn (vlib_main_t * vm, if (matched == 0) return clib_error_return (0, "unknown input `%U'", format_unformat_error, input); - + rv = vnet_set_ip6_flow_hash (table_id, flow_hash_config); switch (rv) { @@ -3084,21 +2908,98 @@ set_ip6_flow_hash_command_fn (vlib_main_t * vm, case -1: return clib_error_return (0, "no such FIB table %d", table_id); - + default: clib_warning ("BUG: illegal flow hash config 0x%x", flow_hash_config); break; } - + return 0; } +/*? + * Configure the set of IPv6 fields used by the flow hash. + * + * @cliexpar + * @parblock + * Example of how to set the flow hash on a given table: + * @cliexcmd{set ip6 flow-hash table 8 dst sport dport proto} + * + * Example of display the configured flow hash: + * @cliexstart{show ip6 fib} + * ipv6-VRF:0, fib_index 0, flow hash: src dst sport dport proto + * @::/0 + * unicast-ip6-chain + * [@0]: dpo-load-balance: [index:5 buckets:1 uRPF:5 to:[0:0]] + * [0] [@0]: dpo-drop ip6 + * fe80::/10 + * unicast-ip6-chain + * [@0]: dpo-load-balance: [index:10 buckets:1 uRPF:10 to:[0:0]] + * [0] [@2]: dpo-receive + * ff02::1/128 + * unicast-ip6-chain + * [@0]: dpo-load-balance: [index:8 buckets:1 uRPF:8 to:[0:0]] + * [0] [@2]: dpo-receive + * ff02::2/128 + * unicast-ip6-chain + * [@0]: dpo-load-balance: [index:7 buckets:1 uRPF:7 to:[0:0]] + * [0] [@2]: dpo-receive + * ff02::16/128 + * unicast-ip6-chain + * [@0]: dpo-load-balance: [index:9 buckets:1 uRPF:9 to:[0:0]] + * [0] [@2]: dpo-receive + * ff02::1:ff00:0/104 + * unicast-ip6-chain + * [@0]: dpo-load-balance: [index:6 buckets:1 uRPF:6 to:[0:0]] + * [0] [@2]: dpo-receive + * ipv6-VRF:8, fib_index 1, flow hash: dst sport dport proto + * @::/0 + * unicast-ip6-chain + * [@0]: dpo-load-balance: [index:21 buckets:1 uRPF:20 to:[0:0]] + * [0] [@0]: dpo-drop ip6 + * @::a:1:1:0:4/126 + * unicast-ip6-chain + * [@0]: dpo-load-balance: [index:27 buckets:1 uRPF:26 to:[0:0]] + * [0] [@4]: ipv6-glean: af_packet0 + * @::a:1:1:0:7/128 + * unicast-ip6-chain + * [@0]: dpo-load-balance: [index:28 buckets:1 uRPF:27 to:[0:0]] + * [0] [@2]: dpo-receive: @::a:1:1:0:7 on af_packet0 + * fe80::/10 + * unicast-ip6-chain + * [@0]: dpo-load-balance: [index:26 buckets:1 uRPF:25 to:[0:0]] + * [0] [@2]: dpo-receive + * fe80::fe:3eff:fe3e:9222/128 + * unicast-ip6-chain + * [@0]: dpo-load-balance: [index:29 buckets:1 uRPF:28 to:[0:0]] + * [0] [@2]: dpo-receive: fe80::fe:3eff:fe3e:9222 on af_packet0 + * ff02::1/128 + * unicast-ip6-chain + * [@0]: dpo-load-balance: [index:24 buckets:1 uRPF:23 to:[0:0]] + * [0] [@2]: dpo-receive + * ff02::2/128 + * unicast-ip6-chain + * [@0]: dpo-load-balance: [index:23 buckets:1 uRPF:22 to:[0:0]] + * [0] [@2]: dpo-receive + * ff02::16/128 + * unicast-ip6-chain + * [@0]: dpo-load-balance: [index:25 buckets:1 uRPF:24 to:[0:0]] + * [0] [@2]: dpo-receive + * ff02::1:ff00:0/104 + * unicast-ip6-chain + * [@0]: dpo-load-balance: [index:22 buckets:1 uRPF:21 to:[0:0]] + * [0] [@2]: dpo-receive + * @cliexend + * @endparblock +?*/ +/* *INDENT-OFF* */ VLIB_CLI_COMMAND (set_ip6_flow_hash_command, static) = { .path = "set ip6 flow-hash", - .short_help = - "set ip table flow-hash table src dst sport dport proto reverse", + .short_help = + "set ip6 flow-hash table [src] [dst] [sport] [dport] [proto] [reverse]", .function = set_ip6_flow_hash_command_fn, }; +/* *INDENT-ON* */ static clib_error_t * show_ip6_local_command_fn (vlib_main_t * vm, @@ -3108,7 +3009,7 @@ show_ip6_local_command_fn (vlib_main_t * vm, ip6_main_t * im = &ip6_main; ip_lookup_main_t * lm = &im->lookup_main; int i; - + vlib_cli_output (vm, "Protocols handled by ip6_local"); for (i = 0; i < ARRAY_LEN(lm->local_next_by_ip_protocol); i++) { @@ -3120,13 +3021,28 @@ show_ip6_local_command_fn (vlib_main_t * vm, -VLIB_CLI_COMMAND (show_ip_local, static) = { +/*? + * Display the set of protocols handled by the local IPv6 stack. + * + * @cliexpar + * Example of how to display local protocol table: + * @cliexstart{show ip6 local} + * Protocols handled by ip6_local + * 17 + * 43 + * 58 + * 115 + * @cliexend +?*/ +/* *INDENT-OFF* */ +VLIB_CLI_COMMAND (show_ip6_local, static) = { .path = "show ip6 local", .function = show_ip6_local_command_fn, - .short_help = "Show ip6 local protocol table", + .short_help = "show ip6 local", }; +/* *INDENT-ON* */ -int vnet_set_ip6_classify_intfc (vlib_main_t * vm, u32 sw_if_index, +int vnet_set_ip6_classify_intfc (vlib_main_t * vm, u32 sw_if_index, u32 table_index) { vnet_main_t * vnm = vnet_get_main(); @@ -3134,6 +3050,7 @@ int vnet_set_ip6_classify_intfc (vlib_main_t * vm, u32 sw_if_index, ip6_main_t * ipm = &ip6_main; ip_lookup_main_t * lm = &ipm->lookup_main; vnet_classify_main_t * cm = &vnet_classify_main; + ip6_address_t *if_addr; if (pool_is_free_index (im->sw_interfaces, sw_if_index)) return VNET_API_ERROR_NO_MATCHING_INTERFACE; @@ -3144,6 +3061,46 @@ int vnet_set_ip6_classify_intfc (vlib_main_t * vm, u32 sw_if_index, vec_validate (lm->classify_table_index_by_sw_if_index, sw_if_index); lm->classify_table_index_by_sw_if_index [sw_if_index] = table_index; + if_addr = ip6_interface_first_address (ipm, sw_if_index, NULL); + + if (NULL != if_addr) + { + fib_prefix_t pfx = { + .fp_len = 128, + .fp_proto = FIB_PROTOCOL_IP6, + .fp_addr.ip6 = *if_addr, + }; + u32 fib_index; + + fib_index = fib_table_get_index_for_sw_if_index(FIB_PROTOCOL_IP4, + sw_if_index); + + + if (table_index != (u32) ~0) + { + dpo_id_t dpo = DPO_INVALID; + + dpo_set(&dpo, + DPO_CLASSIFY, + DPO_PROTO_IP6, + classify_dpo_create(DPO_PROTO_IP6, + table_index)); + + fib_table_entry_special_dpo_add(fib_index, + &pfx, + FIB_SOURCE_CLASSIFY, + FIB_ENTRY_FLAG_NONE, + &dpo); + dpo_reset(&dpo); + } + else + { + fib_table_entry_special_remove(fib_index, + &pfx, + FIB_SOURCE_CLASSIFY); + } + } + return 0; } @@ -3156,20 +3113,20 @@ set_ip6_classify_command_fn (vlib_main_t * vm, int table_index_set = 0; u32 sw_if_index = ~0; int rv; - + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) { if (unformat (input, "table-index %d", &table_index)) table_index_set = 1; - else if (unformat (input, "intfc %U", unformat_vnet_sw_interface, + else if (unformat (input, "intfc %U", unformat_vnet_sw_interface, vnet_get_main(), &sw_if_index)) ; else break; } - + if (table_index_set == 0) return clib_error_return (0, "classify table-index must be specified"); - + if (sw_if_index == ~0) return clib_error_return (0, "interface / subif must be specified"); @@ -3189,12 +3146,24 @@ set_ip6_classify_command_fn (vlib_main_t * vm, return 0; } +/*? + * Assign a classification table to an interface. The classification + * table is created using the 'classify table' and 'classify session' + * commands. Once the table is create, use this command to filter packets + * on an interface. + * + * @cliexpar + * Example of how to assign a classification table to an interface: + * @cliexcmd{set ip6 classify intfc GigabitEthernet2/0/0 table-index 1} +?*/ +/* *INDENT-OFF* */ VLIB_CLI_COMMAND (set_ip6_classify_command, static) = { .path = "set ip6 classify", - .short_help = - "set ip6 classify intfc table-index ", + .short_help = + "set ip6 classify intfc table-index ", .function = set_ip6_classify_command_fn, }; +/* *INDENT-ON* */ static clib_error_t * ip6_config (vlib_main_t * vm, unformat_input_t * input) @@ -3228,3 +3197,57 @@ ip6_config (vlib_main_t * vm, unformat_input_t * input) VLIB_EARLY_CONFIG_FUNCTION (ip6_config, "ip6"); +#define TEST_CODE 1 +#if TEST_CODE > 0 + +static clib_error_t * +set_interface_ip6_output_feature_command_fn (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + vnet_main_t * vnm = vnet_get_main(); + u32 sw_if_index = ~0; + int is_add = 1; + ip6_main_t * im = &ip6_main; + ip_lookup_main_t * lm = &im->lookup_main; + + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (input, "%U", unformat_vnet_sw_interface, vnm, &sw_if_index)) + ; + else if (unformat (input, "del")) + is_add = 0; + else + break; + } + + if (sw_if_index == ~0) + return clib_error_return (0, "unknown interface `%U'", + format_unformat_error, input); + + lm->tx_sw_if_has_ip_output_features = + clib_bitmap_set (lm->tx_sw_if_has_ip_output_features, sw_if_index, is_add); + + return 0; +} + +/*? + * Enable or disable the output feature on an interface. + * + * @todo Need a more detailed description. + * + * @cliexpar + * Example of how to enable the output feature on an interface: + * @cliexcmd{set interface ip6 output feature GigabitEthernet2/0/0} + * Example of how to disable the output feature on an interface: + * @cliexcmd{set interface ip6 output feature GigabitEthernet2/0/0 del} +?*/ +/* *INDENT-OFF* */ +VLIB_CLI_COMMAND (set_interface_ip6_output_feature, static) = { + .path = "set interface ip6 output feature", + .function = set_interface_ip6_output_feature_command_fn, + .short_help = "set interface ip6 output feature [del]", +}; +/* *INDENT-ON* */ + +#endif /* TEST_CODE */