From: Neale Ranns Date: Sat, 8 Oct 2016 12:03:40 +0000 (+0100) Subject: FIB2.0: Adjacency complete pull model (VPP-487) X-Git-Tag: v17.01-rc0~87 X-Git-Url: https://gerrit.fd.io/r/gitweb?a=commitdiff_plain;h=b80c536e34b610ca77cd84448754e4bd9c46cf68;p=vpp.git FIB2.0: Adjacency complete pull model (VPP-487) Change the adjacency completion model to pull not push. A complete adjacency has a rewirte string, an incomplete one does not. the re-write string for a peer comes either from a discovery protocol (i.e. ARP/ND) or can be directly derived from the link type (i.e. GRE tunnels). Which method it is, is interface type specific. For each packet type sent on a link to a peer there is a corresponding adjacency. For example, if there is a peer 10.0.0.1 on Eth0 and we need to send to it IPv4 and MPLS packets, there will be two adjacencies; one for the IPv4 and one for the MPLS packets. The adjacencies are thus distinguished by the packets the carry, this is known as the adjacency's 'link-type'. It is not an L3 packet type, since the adjacency can have a link type of Ethernet (for L2 over GRE). The discovery protocols are not aware of all the link types required - only the FIB is. the FIB will create adjacencies as and when they are required, and it is thus then desirable to 'pull' from the discovery protocol the re-write required. The alternative (that we have now) is that the discovery protocol pushes (i.e. creates) adjacencies for each link type - this creates more adjacencies than we need. To pull, FIB now requests from the interface-type to 'complete' the adjacency. The interface can then delegate to the discovery protocol (on ethernet links) or directly build the re-write (i.e on GRE). Change-Id: I61451789ae03f26b1012d8d6524007b769b6c6ee Signed-off-by: Neale Ranns --- diff --git a/vnet/vnet/adj/adj.c b/vnet/vnet/adj/adj.c index 8f9d96efd60..0bdecc6affe 100644 --- a/vnet/vnet/adj/adj.c +++ b/vnet/vnet/adj/adj.c @@ -92,43 +92,50 @@ adj_index_is_special (adj_index_t adj_index) u8 * format_ip_adjacency (u8 * s, va_list * args) { - vnet_main_t * vnm = va_arg (*args, vnet_main_t *); - u32 adj_index = va_arg (*args, u32); - format_ip_adjacency_flags_t fiaf = va_arg (*args, format_ip_adjacency_flags_t); - ip_adjacency_t * adj = adj_get(adj_index); + format_ip_adjacency_flags_t fiaf; + ip_adjacency_t * adj; + u32 adj_index; + + adj_index = va_arg (*args, u32); + fiaf = va_arg (*args, format_ip_adjacency_flags_t); + adj = adj_get(adj_index); - switch (adj->lookup_next_index) - { - case IP_LOOKUP_NEXT_REWRITE: - s = format (s, "%U", format_adj_nbr, adj_index, 0); - break; - case IP_LOOKUP_NEXT_ARP: - s = format (s, "%U", format_adj_nbr_incomplete, adj_index, 0); - break; - case IP_LOOKUP_NEXT_GLEAN: - s = format (s, " %U", - format_vnet_sw_interface_name, - vnm, - vnet_get_sw_interface(vnm, - adj->rewrite_header.sw_if_index)); - break; - - case IP_LOOKUP_NEXT_MIDCHAIN: - s = format (s, "%U", format_adj_midchain, adj_index, 2); - break; - default: - break; - } - s = format (s, " index:%d", adj_index); - - if (fiaf & FORMAT_IP_ADJACENCY_DETAIL) - { - s = format (s, " locks:%d", adj->ia_node.fn_locks); - s = format(s, "\nchildren:\n "); - s = fib_node_children_format(adj->ia_node.fn_children, s); - } - - return s; + switch (adj->lookup_next_index) + { + case IP_LOOKUP_NEXT_REWRITE: + s = format (s, "%U", format_adj_nbr, adj_index, 0); + break; + case IP_LOOKUP_NEXT_ARP: + s = format (s, "%U", format_adj_nbr_incomplete, adj_index, 0); + break; + case IP_LOOKUP_NEXT_GLEAN: + s = format (s, "%U", format_adj_glean, adj_index, 0); + break; + case IP_LOOKUP_NEXT_MIDCHAIN: + s = format (s, "%U", format_adj_midchain, adj_index, 2); + break; + default: + break; + } + + if (fiaf & FORMAT_IP_ADJACENCY_DETAIL) + { + s = format (s, "\n locks:%d", adj->ia_node.fn_locks); + s = format (s, " node:[%d]:%U", + adj->rewrite_header.node_index, + format_vlib_node_name, vlib_get_main(), + adj->rewrite_header.node_index); + s = format (s, " next:[%d]:%U", + adj->rewrite_header.next_index, + format_vlib_next_node_name, + vlib_get_main(), + adj->rewrite_header.node_index, + adj->rewrite_header.next_index); + s = format(s, "\n children:\n "); + s = fib_node_children_format(adj->ia_node.fn_children, s); + } + + return s; } /* @@ -139,9 +146,13 @@ format_ip_adjacency (u8 * s, va_list * args) static void adj_last_lock_gone (ip_adjacency_t *adj) { + vlib_main_t * vm = vlib_get_main(); + ASSERT(0 == fib_node_list_get_size(adj->ia_node.fn_children)); ADJ_DBG(adj, "last-lock-gone"); + vlib_worker_thread_barrier_sync (vm); + switch (adj->lookup_next_index) { case IP_LOOKUP_NEXT_MIDCHAIN: @@ -168,6 +179,8 @@ adj_last_lock_gone (ip_adjacency_t *adj) break; } + vlib_worker_thread_barrier_release(vm); + fib_node_deinit(&adj->ia_node); pool_put(adj_pool, adj); } @@ -239,6 +252,49 @@ adj_child_remove (adj_index_t adj_index, sibling_index); } +/** + * @brief Return the link type of the adjacency + */ +vnet_link_t +adj_get_link_type (adj_index_t ai) +{ + const ip_adjacency_t *adj; + + adj = adj_get(ai); + + return (adj->ia_link); +} + +/** + * @brief Return the sw interface index of the adjacency. + */ +u32 +adj_get_sw_if_index (adj_index_t ai) +{ + const ip_adjacency_t *adj; + + adj = adj_get(ai); + + return (adj->rewrite_header.sw_if_index); +} + +/** + * @brief Return the link type of the adjacency + */ +const u8* +adj_get_rewrite (adj_index_t ai) +{ + vnet_rewrite_header_t *rw; + ip_adjacency_t *adj; + + adj = adj_get(ai); + rw = &adj->rewrite_header; + + ASSERT (rw->data_bytes != 0xfefe); + + return (rw->data - rw->data_bytes); +} + static fib_node_t * adj_get_node (fib_node_index_t index) { @@ -289,7 +345,7 @@ adj_module_init (vlib_main_t * vm) adj_midchain_module_init(); /* - * 4 special adjs for v4 and v6 resp. + * one special adj to reserve index 0 */ special_v4_miss_adj_with_index_zero = adj_alloc(FIB_PROTOCOL_IP4); @@ -298,10 +354,73 @@ adj_module_init (vlib_main_t * vm) VLIB_INIT_FUNCTION (adj_module_init); +static clib_error_t * +adj_show (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + adj_index_t ai = ADJ_INDEX_INVALID; + u32 sw_if_index = ~0; + + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (input, "%d", &ai)) + ; + else if (unformat (input, "%U", + unformat_vnet_sw_interface, vnet_get_main(), + &sw_if_index)) + ; + else + break; + } + + if (ADJ_INDEX_INVALID != ai) + { + vlib_cli_output (vm, "[@%d] %U", + ai, + format_ip_adjacency, ai, + FORMAT_IP_ADJACENCY_DETAIL); + } + else + { + /* *INDENT-OFF* */ + pool_foreach_index(ai, adj_pool, + ({ + if (~0 != sw_if_index && + sw_if_index == adj_get_sw_if_index(ai)) + { + vlib_cli_output (vm, "[@%d] %U", + ai, + format_ip_adjacency, ai, + FORMAT_IP_ADJACENCY_NONE); + } + })); + /* *INDENT-ON* */ + } + + return 0; +} + +/*? + * Show all adjacencies. + * @cliexpar + * @cliexstart{sh adj} + * [@0] + * [@1] glean: loop0 + * [@2] ipv4 via 1.0.0.2 loop0: IP4: 00:00:22:aa:bb:cc -> 00:00:11:aa:bb:cc + * [@3] mpls via 1.0.0.2 loop0: MPLS_UNICAST: 00:00:22:aa:bb:cc -> 00:00:11:aa:bb:cc + * [@4] ipv4 via 1.0.0.3 loop0: IP4: 00:00:22:aa:bb:cc -> 00:00:11:aa:bb:cc + * [@5] mpls via 1.0.0.3 loop0: MPLS_UNICAST: 00:00:22:aa:bb:cc -> 00:00:11:aa:bb:cc + * @cliexend + ?*/ +VLIB_CLI_COMMAND (adj_show_command, static) = { + .path = "show adj", + .short_help = "show adj [] [interface]", + .function = adj_show, +}; + /* * DEPRECATED: DO NOT USE - * - * Create new block of given number of contiguous adjacencies. */ ip_adjacency_t * ip_add_adjacency (ip_lookup_main_t * lm, diff --git a/vnet/vnet/adj/adj.h b/vnet/vnet/adj/adj.h index 002dab359ab..e85625db7ee 100644 --- a/vnet/vnet/adj/adj.h +++ b/vnet/vnet/adj/adj.h @@ -75,6 +75,28 @@ extern u32 adj_child_add(adj_index_t adj_index, extern void adj_child_remove(adj_index_t adj_index, u32 sibling_index); +/** + * @brief Walk the Adjacencies on a given interface + */ +extern void adj_walk (u32 sw_if_index, + adj_walk_cb_t cb, + void *ctx); + +/** + * @brief Return the link type of the adjacency + */ +extern vnet_link_t adj_get_link_type (adj_index_t ai); + +/** + * @brief Return the sw interface index of the adjacency. + */ +extern u32 adj_get_sw_if_index (adj_index_t ai); + +/** + * @brief Return the link type of the adjacency + */ +extern const u8* adj_get_rewrite (adj_index_t ai); + /** * @brief * The global adjacnecy pool. Exposed for fast/inline data-plane access diff --git a/vnet/vnet/adj/adj_glean.c b/vnet/vnet/adj/adj_glean.c index f5d181018b6..290af1fddc8 100644 --- a/vnet/vnet/adj/adj_glean.c +++ b/vnet/vnet/adj/adj_glean.c @@ -188,7 +188,8 @@ format_adj_glean (u8* s, va_list *ap) vnet_main_t * vnm = vnet_get_main(); ip_adjacency_t * adj = adj_get(index); - return (format(s, " glean: %U", + return (format(s, "%U-glean: %U", + format_fib_protocol, adj->ia_nh_proto, format_vnet_sw_interface_name, vnm, vnet_get_sw_interface(vnm, diff --git a/vnet/vnet/adj/adj_glean.h b/vnet/vnet/adj/adj_glean.h index ce3534ecee6..640bd2f91eb 100644 --- a/vnet/vnet/adj/adj_glean.h +++ b/vnet/vnet/adj/adj_glean.h @@ -47,6 +47,11 @@ extern adj_index_t adj_glean_add_or_lock(fib_protocol_t proto, u32 sw_if_index, const ip46_address_t *nh_addr); +/** + * @brief Format/display a glean adjacency. + */ +extern u8* format_adj_glean(u8* s, va_list *ap); + /** * @brief * Module initialisation diff --git a/vnet/vnet/adj/adj_internal.h b/vnet/vnet/adj/adj_internal.h index 25a477ad007..f882bff84c0 100644 --- a/vnet/vnet/adj/adj_internal.h +++ b/vnet/vnet/adj/adj_internal.h @@ -41,21 +41,21 @@ #define ADJ_DBG(_e, _fmt, _args...) #endif -static inline vlib_node_registration_t* +static inline u32 adj_get_rewrite_node (fib_link_t linkt) { switch (linkt) { case FIB_LINK_IP4: - return (&ip4_rewrite_node); + return (ip4_rewrite_node.index); case FIB_LINK_IP6: - return (&ip6_rewrite_node); + return (ip6_rewrite_node.index); case FIB_LINK_MPLS: - return (&mpls_output_node); + return (mpls_output_node.index); case FIB_LINK_ETHERNET: - return (&adj_l2_rewrite_node); + return (adj_l2_rewrite_node.index); } ASSERT(0); - return (NULL); + return (0); } static inline vnet_l3_packet_type_t @@ -75,17 +75,17 @@ adj_fib_link_2_vnet (fib_link_t linkt) return (0); } -static inline vnet_l3_packet_type_t +static inline vnet_link_t adj_fib_proto_2_nd (fib_protocol_t fp) { switch (fp) { case FIB_PROTOCOL_IP4: - return (VNET_L3_PACKET_TYPE_ARP); + return (VNET_LINK_ARP); case FIB_PROTOCOL_IP6: - return (VNET_L3_PACKET_TYPE_IP6); + return (VNET_LINK_IP6); case FIB_PROTOCOL_MPLS: - return (VNET_L3_PACKET_TYPE_MPLS_UNICAST); + return (VNET_LINK_MPLS); } return (0); } @@ -100,6 +100,12 @@ adj_get_index (ip_adjacency_t *adj) return (adj - adj_pool); } +extern void adj_nbr_update_rewrite_internal (ip_adjacency_t *adj, + adj_nbr_rewrite_flag_t flags, + u32 complete_next_index, + u32 next_index, + u8 *rewrite); + extern ip_adjacency_t * adj_alloc(fib_protocol_t proto); extern void adj_nbr_remove(fib_protocol_t nh_proto, diff --git a/vnet/vnet/adj/adj_l2.c b/vnet/vnet/adj/adj_l2.c index cf0f04442cf..2bb28a20129 100644 --- a/vnet/vnet/adj/adj_l2.c +++ b/vnet/vnet/adj/adj_l2.c @@ -32,11 +32,10 @@ format_adj_l2_trace (u8 * s, va_list * args) CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); adj_l2_trace_t * t = va_arg (*args, adj_l2_trace_t *); - vnet_main_t * vnm = vnet_get_main(); s = format (s, "adj-idx %d : %U", t->adj_index, - format_ip_adjacency, vnm, t->adj_index, FORMAT_IP_ADJACENCY_NONE); + format_ip_adjacency, t->adj_index, FORMAT_IP_ADJACENCY_NONE); return s; } diff --git a/vnet/vnet/adj/adj_midchain.c b/vnet/vnet/adj/adj_midchain.c index c40d4e8bca5..f42e3a9072a 100644 --- a/vnet/vnet/adj/adj_midchain.c +++ b/vnet/vnet/adj/adj_midchain.c @@ -130,7 +130,7 @@ format_adj_midchain_tx_trace (u8 * s, va_list * args) adj_midchain_tx_trace_t *tr = va_arg (*args, adj_midchain_tx_trace_t*); s = format(s, "adj-midchain:[%d]:%U", tr->ai, - format_ip_adjacency, vnet_get_main(), tr->ai, + format_ip_adjacency, tr->ai, FORMAT_IP_ADJACENCY_NONE); return (s); @@ -294,7 +294,17 @@ adj_nbr_midchain_update_rewrite (adj_index_t adj_index, ASSERT(ADJ_INDEX_INVALID != adj_index); adj = adj_get(adj_index); - adj->lookup_next_index = IP_LOOKUP_NEXT_MIDCHAIN; + + /* + * one time only update. since we don't support chainging the tunnel + * src,dst, this is all we need. + */ + ASSERT(adj->lookup_next_index == IP_LOOKUP_NEXT_ARP); + /* + * tunnels can always provide a rewrite. + */ + ASSERT(NULL != rewrite); + adj->sub_type.midchain.fixup_func = fixup; cm = adj_midchain_get_cofing_for_link_type(adj); @@ -334,69 +344,26 @@ adj_nbr_midchain_update_rewrite (adj_index_t adj_index, cm->config_index_by_sw_if_index[adj->rewrite_header.sw_if_index] = ci; - if (NULL != rewrite) - { - /* - * new rewrite provided. - * use a dummy rewrite header to get the interface to print into. - */ - ip_adjacency_t dummy; - dpo_id_t tmp = DPO_NULL; - - vnet_rewrite_for_tunnel(vnet_get_main(), - adj->rewrite_header.sw_if_index, - adj_get_midchain_node(adj->ia_link), - adj->sub_type.midchain.tx_function_node, - &dummy.rewrite_header, - rewrite, - vec_len(rewrite)); - - /* - * this is an update of an existing rewrite. - * packets are in flight. we'll need to briefly stack on the drop DPO - * whilst the rewrite is written, so any packets that see the partial update - * are binned. - */ - if (!dpo_id_is_valid(&adj->sub_type.midchain.next_dpo)) - { - /* - * not stacked yet. stack on the drop - */ - dpo_stack(DPO_ADJACENCY_MIDCHAIN, - fib_link_to_dpo_proto(adj->ia_link), - &adj->sub_type.midchain.next_dpo, - drop_dpo_get(fib_link_to_dpo_proto(adj->ia_link))); - } - - dpo_copy(&tmp, &adj->sub_type.midchain.next_dpo); - dpo_stack(DPO_ADJACENCY_MIDCHAIN, - fib_link_to_dpo_proto(adj->ia_link), - &adj->sub_type.midchain.next_dpo, - drop_dpo_get(fib_link_to_dpo_proto(adj->ia_link))); - - CLIB_MEMORY_BARRIER(); - clib_memcpy(&adj->rewrite_header, - &dummy.rewrite_header, - VLIB_BUFFER_PRE_DATA_SIZE); - - CLIB_MEMORY_BARRIER(); + /* + * stack the midchain on the drop so it's ready to forward in the adj-midchain-tx. + * The graph arc used/created here is from the midchain-tx node to the + * child's registered node. This is because post adj processing the next + * node are any output features, then the midchain-tx. from there we + * need to get to the stacked child's node. + */ + dpo_stack_from_node(adj->sub_type.midchain.tx_function_node, + &adj->sub_type.midchain.next_dpo, + drop_dpo_get(fib_link_to_dpo_proto(adj->ia_link))); - /* - * The graph arc used/created here is from the midchain-tx node to the - * child's registered node. This is because post adj processing the next - * node are any output features, then the midchain-tx. from there we - * need to get to the stacked child's node. - */ - dpo_stack_from_node(adj->sub_type.midchain.tx_function_node, - &adj->sub_type.midchain.next_dpo, - &tmp); - dpo_reset(&tmp); - } - else - { - ASSERT(0); - } + /* + * update the rewirte with the workers paused. + */ + adj_nbr_update_rewrite_internal(adj, + IP_LOOKUP_NEXT_MIDCHAIN, + adj_get_midchain_node(adj->ia_link), + adj->sub_type.midchain.tx_function_node, + rewrite); /* * time for walkies fido. diff --git a/vnet/vnet/adj/adj_nbr.c b/vnet/vnet/adj/adj_nbr.c index 23e40a6850c..5351520e7ed 100644 --- a/vnet/vnet/adj/adj_nbr.c +++ b/vnet/vnet/adj/adj_nbr.c @@ -115,152 +115,19 @@ adj_nbr_find (fib_protocol_t nh_proto, } } -static inline vlib_node_registration_t* +static inline u32 adj_get_nd_node (fib_protocol_t proto) { switch (proto) { case FIB_PROTOCOL_IP4: - return (&ip4_arp_node); + return (ip4_arp_node.index); case FIB_PROTOCOL_IP6: - return (&ip6_discover_neighbor_node); + return (ip6_discover_neighbor_node.index); case FIB_PROTOCOL_MPLS: break; } ASSERT(0); - return (NULL); -} - -static void -adj_ip4_nbr_probe (ip_adjacency_t *adj) -{ - vnet_main_t * vnm = vnet_get_main(); - ip4_main_t * im = &ip4_main; - ip_interface_address_t * ia; - ethernet_arp_header_t * h; - vnet_hw_interface_t * hi; - vnet_sw_interface_t * si; - ip4_address_t * src; - vlib_buffer_t * b; - vlib_main_t * vm; - u32 bi = 0; - - vm = vlib_get_main(); - - si = vnet_get_sw_interface (vnm, - adj->rewrite_header.sw_if_index); - - if (!(si->flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP)) - { - return; - } - - src = - ip4_interface_address_matching_destination(im, - &adj->sub_type.nbr.next_hop.ip4, - adj->rewrite_header.sw_if_index, - &ia); - if (! src) - { - return; - } - - h = vlib_packet_template_get_packet (vm, &im->ip4_arp_request_packet_template, &bi); - - hi = vnet_get_sup_hw_interface (vnm, adj->rewrite_header.sw_if_index); - - clib_memcpy (h->ip4_over_ethernet[0].ethernet, - hi->hw_address, - sizeof (h->ip4_over_ethernet[0].ethernet)); - - h->ip4_over_ethernet[0].ip4 = src[0]; - h->ip4_over_ethernet[1].ip4 = adj->sub_type.nbr.next_hop.ip4; - - b = vlib_get_buffer (vm, bi); - vnet_buffer (b)->sw_if_index[VLIB_RX] = - vnet_buffer (b)->sw_if_index[VLIB_TX] = - adj->rewrite_header.sw_if_index; - - /* Add encapsulation string for software interface (e.g. ethernet header). */ - vnet_rewrite_one_header (adj[0], h, sizeof (ethernet_header_t)); - vlib_buffer_advance (b, -adj->rewrite_header.data_bytes); - - { - vlib_frame_t * f = vlib_get_frame_to_node (vm, hi->output_node_index); - u32 * to_next = vlib_frame_vector_args (f); - to_next[0] = bi; - f->n_vectors = 1; - vlib_put_frame_to_node (vm, hi->output_node_index, f); - } -} - -static void -adj_ip6_nbr_probe (ip_adjacency_t *adj) -{ - icmp6_neighbor_solicitation_header_t * h; - vnet_main_t * vnm = vnet_get_main(); - ip6_main_t * im = &ip6_main; - ip_interface_address_t * ia; - ip6_address_t * dst, *src; - vnet_hw_interface_t * hi; - vnet_sw_interface_t * si; - vlib_buffer_t * b; - int bogus_length; - vlib_main_t * vm; - u32 bi = 0; - - vm = vlib_get_main(); - - si = vnet_get_sw_interface(vnm, adj->rewrite_header.sw_if_index); - dst = &adj->sub_type.nbr.next_hop.ip6; - - if (!(si->flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP)) - { - return; - } - src = ip6_interface_address_matching_destination(im, dst, - adj->rewrite_header.sw_if_index, - &ia); - if (! src) - { - return; - } - - h = vlib_packet_template_get_packet(vm, - &im->discover_neighbor_packet_template, - &bi); - - hi = vnet_get_sup_hw_interface(vnm, adj->rewrite_header.sw_if_index); - - h->ip.dst_address.as_u8[13] = dst->as_u8[13]; - h->ip.dst_address.as_u8[14] = dst->as_u8[14]; - h->ip.dst_address.as_u8[15] = dst->as_u8[15]; - h->ip.src_address = src[0]; - h->neighbor.target_address = dst[0]; - - clib_memcpy (h->link_layer_option.ethernet_address, - hi->hw_address, - vec_len(hi->hw_address)); - - h->neighbor.icmp.checksum = - ip6_tcp_udp_icmp_compute_checksum(vm, 0, &h->ip, &bogus_length); - ASSERT(bogus_length == 0); - - b = vlib_get_buffer (vm, bi); - vnet_buffer (b)->sw_if_index[VLIB_RX] = - vnet_buffer (b)->sw_if_index[VLIB_TX] = - adj->rewrite_header.sw_if_index; - - /* Add encapsulation string for software interface (e.g. ethernet header). */ - vnet_rewrite_one_header(adj[0], h, sizeof (ethernet_header_t)); - vlib_buffer_advance(b, -adj->rewrite_header.data_bytes); - - { - vlib_frame_t * f = vlib_get_frame_to_node(vm, hi->output_node_index); - u32 * to_next = vlib_frame_vector_args(f); - to_next[0] = bi; - f->n_vectors = 1; - vlib_put_frame_to_node(vm, hi->output_node_index, f); - } + return (ip4_arp_node.index); } static ip_adjacency_t* @@ -285,6 +152,7 @@ adj_nbr_alloc (fib_protocol_t nh_proto, adj->sub_type.nbr.next_hop = *nh_addr; adj->ia_link = link_type; adj->ia_nh_proto = nh_proto; + adj->rewrite_header.sw_if_index = sw_if_index; memset(&adj->sub_type.midchain.next_dpo, 0, sizeof(adj->sub_type.midchain.next_dpo)); @@ -300,8 +168,6 @@ adj_nbr_alloc (fib_protocol_t nh_proto, * - the Next-hops protocol (i.e. v4 or v6) * - the address of the next-hop * - the interface the next-hop is reachable through - * - fib_index; this is broken. i will fix it. - * the adj lookup currently occurs in the FIB. */ adj_index_t adj_nbr_add_or_lock (fib_protocol_t nh_proto, @@ -316,55 +182,31 @@ adj_nbr_add_or_lock (fib_protocol_t nh_proto, if (ADJ_INDEX_INVALID == adj_index) { + vnet_main_t *vnm; + + vnm = vnet_get_main(); adj = adj_nbr_alloc(nh_proto, link_type, nh_addr, sw_if_index); + adj_index = adj_get_index(adj); + adj_lock(adj_index); + + vnet_rewrite_init(vnm, sw_if_index, + adj_get_nd_node(nh_proto), + vnet_tx_node_index_for_sw_interface(vnm, sw_if_index), + &adj->rewrite_header); /* - * If there is no next-hop, this is the 'auto-adj' used on p2p - * links instead of a glean. + * we need a rewrite where the destination IP address is converted + * to the appropriate link-layer address. This is interface specific. + * So ask the interface to do it. */ - if (ip46_address_is_zero(nh_addr)) - { - adj->lookup_next_index = IP_LOOKUP_NEXT_REWRITE; - - vnet_rewrite_for_sw_interface(vnet_get_main(), - adj_fib_link_2_vnet(link_type), - sw_if_index, - adj_get_rewrite_node(link_type)->index, - VNET_REWRITE_FOR_SW_INTERFACE_ADDRESS_BROADCAST, - &adj->rewrite_header, - sizeof (adj->rewrite_data)); - } - else - { - vnet_rewrite_for_sw_interface(vnet_get_main(), - adj_fib_proto_2_nd(nh_proto), - sw_if_index, - adj_get_nd_node(nh_proto)->index, - VNET_REWRITE_FOR_SW_INTERFACE_ADDRESS_BROADCAST, - &adj->rewrite_header, - sizeof (adj->rewrite_data)); - - switch (nh_proto) - { - case FIB_PROTOCOL_IP4: - adj_ip4_nbr_probe(adj); - break; - case FIB_PROTOCOL_IP6: - adj_ip6_nbr_probe(adj); - break; - case FIB_PROTOCOL_MPLS: - break; - } - } + vnet_update_adjacency_for_sw_interface(vnm, sw_if_index, adj_index); } else { - adj = adj_get(adj_index); + adj_lock(adj_index); } - adj_lock(adj_get_index(adj)); - - return (adj_get_index(adj)); + return (adj_index); } adj_index_t @@ -390,7 +232,9 @@ adj_nbr_add_or_lock_w_rewrite (fib_protocol_t nh_proto, } adj_lock(adj_get_index(adj)); - adj_nbr_update_rewrite(adj_get_index(adj), rewrite); + adj_nbr_update_rewrite(adj_get_index(adj), + ADJ_NBR_REWRITE_FLAG_COMPLETE, + rewrite); return (adj_get_index(adj)); } @@ -404,86 +248,134 @@ adj_nbr_add_or_lock_w_rewrite (fib_protocol_t nh_proto, */ void adj_nbr_update_rewrite (adj_index_t adj_index, + adj_nbr_rewrite_flag_t flags, u8 *rewrite) { ip_adjacency_t *adj; + u32 old_next; ASSERT(ADJ_INDEX_INVALID != adj_index); adj = adj_get(adj_index); + old_next = adj->lookup_next_index; - if (NULL != rewrite) + if (flags & ADJ_NBR_REWRITE_FLAG_COMPLETE) { /* - * new rewrite provided. - * use a dummy rewrite header to get the interface to print into. - */ - ip_adjacency_t dummy; - - vnet_rewrite_for_sw_interface(vnet_get_main(), - adj_fib_link_2_vnet(adj->ia_link), - adj->rewrite_header.sw_if_index, - adj_get_rewrite_node(adj->ia_link)->index, - rewrite, - &dummy.rewrite_header, - sizeof (dummy.rewrite_data)); - - if (IP_LOOKUP_NEXT_REWRITE == adj->lookup_next_index) - { - /* - * this is an update of an existing rewrite. - * we can't just paste in the new rewrite as that is not atomic. - * So we briefly swap the ADJ to ARP type, paste, then swap back. - */ - adj->lookup_next_index = IP_LOOKUP_NEXT_ARP; - CLIB_MEMORY_BARRIER(); - } - /* - * else - * this is the first time the rewrite is added. - * paste it on then swap the next type. + * update the adj's rewrite string and build the arc + * from the rewrite node to the interface's TX node */ - clib_memcpy(&adj->rewrite_header, - &dummy.rewrite_header, - VLIB_BUFFER_PRE_DATA_SIZE); - - adj->lookup_next_index = IP_LOOKUP_NEXT_REWRITE; + adj_nbr_update_rewrite_internal(adj, IP_LOOKUP_NEXT_REWRITE, + adj_get_rewrite_node(adj->ia_link), + vnet_tx_node_index_for_sw_interface( + vnet_get_main(), + adj->rewrite_header.sw_if_index), + rewrite); } else + { + adj_nbr_update_rewrite_internal(adj, IP_LOOKUP_NEXT_ARP, + adj_get_nd_node(adj->ia_nh_proto), + vnet_tx_node_index_for_sw_interface( + vnet_get_main(), + adj->rewrite_header.sw_if_index), + rewrite); + } + + if (old_next != adj->lookup_next_index) { /* - * clear the rewrite. + * time for walkies fido. + * The link type MPLS Adj never has children. So if it is this adj + * that is updated, we need to walk from its IP sibling. */ - adj->lookup_next_index = IP_LOOKUP_NEXT_ARP; - CLIB_MEMORY_BARRIER(); + if (FIB_LINK_MPLS == adj->ia_link) + { + adj_index = adj_nbr_find(adj->ia_nh_proto, + fib_proto_to_link(adj->ia_nh_proto), + &adj->sub_type.nbr.next_hop, + adj->rewrite_header.sw_if_index); + + ASSERT(ADJ_INDEX_INVALID != adj_index); + } - adj->rewrite_header.data_bytes = 0; + fib_node_back_walk_ctx_t bw_ctx = { + .fnbw_reason = FIB_NODE_BW_REASON_FLAG_ADJ_UPDATE, + /* + * This walk only needs to go back one level, but there is no control + * here. the first receiving fib_entry_t will quash the walk + */ + }; + + fib_walk_sync(FIB_NODE_TYPE_ADJ, adj_index, &bw_ctx); } +} + +/** + * adj_nbr_update_rewrite_internal + * + * Update the adjacency's rewrite string. A NULL string implies the + * rewirte is reset (i.e. when ARP/ND etnry is gone). + * NB: the adj being updated may be handling traffic in the DP. + */ +void +adj_nbr_update_rewrite_internal (ip_adjacency_t *adj, + u32 adj_next_index, + u32 this_node, + u32 next_node, + u8 *rewrite) +{ + vlib_main_t * vm = vlib_get_main(); /* - * time for walkies fido. - * The link type MPLS Adj never has children. So if it is this adj - * that is updated, we need to walk from its IP sibling. + * Updating a rewrite string is not atomic; + * - the rewrite string is too long to write in one instruction + * - when swapping from incomplete to complete, we also need to update + * the VLIB graph next-index. + * ideally we would only want to suspend forwarding via this adj whilst we + * do this, but we do not have that level of granularity - it's suspend all + * worker threads or nothing. + * The other chioces are: + * - to mark the adj down and back walk so child load-balances drop this adj + * from the set. + * - update the next_node index of this adj to point to error-drop + * both of which will mean for MAC change we will drop for this adj + * which is not acceptable. + * So the pause all threads is preferable. We don't update MAC addresses often + * so it's no big deal. */ - if (FIB_LINK_MPLS == adj->ia_link) - { - adj_index = adj_nbr_find(adj->ia_nh_proto, - fib_proto_to_link(adj->ia_nh_proto), - &adj->sub_type.nbr.next_hop, - adj->rewrite_header.sw_if_index); + vlib_worker_thread_barrier_sync(vm); - ASSERT(ADJ_INDEX_INVALID != adj_index); - } + adj->lookup_next_index = adj_next_index; - fib_node_back_walk_ctx_t bw_ctx = { - .fnbw_reason = FIB_NODE_BW_REASON_FLAG_ADJ_UPDATE, + if (NULL != rewrite) + { /* - * This walk only needs to go back one level, but there is no control here. - * the first receiving fib_entry_t will quash the walk + * new rewrite provided. + * fill in the adj's rewrite string, and build the VLIB graph arc. */ - }; + vnet_rewrite_set_data_internal(&adj->rewrite_header, + sizeof(adj->rewrite_data), + rewrite, + vec_len(rewrite)); - fib_walk_sync(FIB_NODE_TYPE_ADJ, adj_index, &bw_ctx); + adj->rewrite_header.node_index = this_node; + adj->rewrite_header.next_index = vlib_node_add_next (vlib_get_main(), + this_node, + next_node); + + vec_free(rewrite); + } + else + { + vnet_rewrite_clear_data_internal(&adj->rewrite_header, + sizeof(adj->rewrite_data)); + } + + /* + * done with the rewirte update - let the workers loose. + */ + vlib_worker_thread_barrier_release(vm); } typedef struct adj_db_count_ctx_t_ { @@ -523,6 +415,152 @@ adj_nbr_db_size (void) return (ctx.count); } +/** + * @brief Context for a walk of the adjacency neighbour DB + */ +typedef struct adj_walk_ctx_t_ +{ + adj_walk_cb_t awc_cb; + void *awc_ctx; +} adj_walk_ctx_t; + +static void +adj_nbr_walk_cb (BVT(clib_bihash_kv) * kvp, + void *arg) +{ + adj_walk_ctx_t *ctx = arg; + + // FIXME: can't stop early... + ctx->awc_cb(kvp->value, ctx->awc_ctx); +} + +void +adj_nbr_walk (u32 sw_if_index, + fib_protocol_t adj_nh_proto, + adj_walk_cb_t cb, + void *ctx) +{ + if (!ADJ_NBR_ITF_OK(adj_nh_proto, sw_if_index)) + return; + + adj_walk_ctx_t awc = { + .awc_ctx = ctx, + .awc_cb = cb, + }; + + BV(clib_bihash_foreach_key_value_pair) ( + adj_nbr_tables[adj_nh_proto][sw_if_index], + adj_nbr_walk_cb, + &awc); +} + +/** + * @brief Context for a walk of the adjacency neighbour DB + */ +typedef struct adj_walk_nh_ctx_t_ +{ + adj_walk_cb_t awc_cb; + void *awc_ctx; + const ip46_address_t *awc_nh; +} adj_walk_nh_ctx_t; + +static void +adj_nbr_walk_nh_cb (BVT(clib_bihash_kv) * kvp, + void *arg) +{ + ip_adjacency_t *adj; + adj_walk_nh_ctx_t *ctx = arg; + + adj = adj_get(kvp->value); + + if (!ip46_address_cmp(&adj->sub_type.nbr.next_hop, ctx->awc_nh)) + ctx->awc_cb(kvp->value, ctx->awc_ctx); +} + +/** + * @brief Walk adjacencies on a link with a given v4 next-hop. + * that is visit the adjacencies with different link types. + */ +void +adj_nbr_walk_nh4 (u32 sw_if_index, + const ip4_address_t *addr, + adj_walk_cb_t cb, + void *ctx) +{ + if (!ADJ_NBR_ITF_OK(FIB_PROTOCOL_IP4, sw_if_index)) + return; + + ip46_address_t nh = { + .ip4 = *addr, + }; + + adj_walk_nh_ctx_t awc = { + .awc_ctx = ctx, + .awc_cb = cb, + .awc_nh = &nh, + }; + + BV(clib_bihash_foreach_key_value_pair) ( + adj_nbr_tables[FIB_PROTOCOL_IP4][sw_if_index], + adj_nbr_walk_nh_cb, + &awc); +} + +/** + * @brief Walk adjacencies on a link with a given v6 next-hop. + * that is visit the adjacencies with different link types. + */ +void +adj_nbr_walk_nh6 (u32 sw_if_index, + const ip6_address_t *addr, + adj_walk_cb_t cb, + void *ctx) +{ + if (!ADJ_NBR_ITF_OK(FIB_PROTOCOL_IP6, sw_if_index)) + return; + + ip46_address_t nh = { + .ip6 = *addr, + }; + + adj_walk_nh_ctx_t awc = { + .awc_ctx = ctx, + .awc_cb = cb, + .awc_nh = &nh, + }; + + BV(clib_bihash_foreach_key_value_pair) ( + adj_nbr_tables[FIB_PROTOCOL_IP6][sw_if_index], + adj_nbr_walk_nh_cb, + &awc); +} + +/** + * @brief Walk adjacencies on a link with a given next-hop. + * that is visit the adjacencies with different link types. + */ +void +adj_nbr_walk_nh (u32 sw_if_index, + fib_protocol_t adj_nh_proto, + const ip46_address_t *nh, + adj_walk_cb_t cb, + void *ctx) +{ + if (!ADJ_NBR_ITF_OK(adj_nh_proto, sw_if_index)) + return; + + adj_walk_nh_ctx_t awc = { + .awc_ctx = ctx, + .awc_cb = cb, + .awc_nh = nh, + }; + + BV(clib_bihash_foreach_key_value_pair) ( + adj_nbr_tables[adj_nh_proto][sw_if_index], + adj_nbr_walk_nh_cb, + &awc); +} + /** * Context for the state change walk of the DB */ @@ -534,8 +572,8 @@ typedef struct adj_nbr_interface_state_change_ctx_t_ int flags; } adj_nbr_interface_state_change_ctx_t; -static void -adj_nbr_interface_state_change_one (BVT(clib_bihash_kv) * kvp, +static adj_walk_rc_t +adj_nbr_interface_state_change_one (adj_index_t ai, void *arg) { /* @@ -550,7 +588,9 @@ adj_nbr_interface_state_change_one (BVT(clib_bihash_kv) * kvp, FIB_NODE_BW_REASON_FLAG_INTERFACE_DOWN), }; - fib_walk_sync(FIB_NODE_TYPE_ADJ, kvp->value, &bw_ctx); + fib_walk_sync(FIB_NODE_TYPE_ADJ, ai, &bw_ctx); + + return (ADJ_WALK_RC_CONTINUE); } static clib_error_t * @@ -565,17 +605,13 @@ adj_nbr_interface_state_change (vnet_main_t * vnm, */ for (proto = FIB_PROTOCOL_IP4; proto <= FIB_PROTOCOL_IP6; proto++) { - if (!ADJ_NBR_ITF_OK(proto, sw_if_index)) - continue; - adj_nbr_interface_state_change_ctx_t ctx = { .flags = flags, }; - BV(clib_bihash_foreach_key_value_pair) ( - adj_nbr_tables[proto][sw_if_index], - adj_nbr_interface_state_change_one, - &ctx); + adj_nbr_walk(sw_if_index, proto, + adj_nbr_interface_state_change_one, + &ctx); } return (NULL); @@ -583,8 +619,8 @@ adj_nbr_interface_state_change (vnet_main_t * vnm, VNET_SW_INTERFACE_ADMIN_UP_DOWN_FUNCTION(adj_nbr_interface_state_change); -static void -adj_nbr_interface_delete_one (BVT(clib_bihash_kv) * kvp, +static adj_walk_rc_t +adj_nbr_interface_delete_one (adj_index_t ai, void *arg) { /* @@ -595,7 +631,9 @@ adj_nbr_interface_delete_one (BVT(clib_bihash_kv) * kvp, .fnbw_reason = FIB_NODE_BW_REASON_FLAG_INTERFACE_DELETE, }; - fib_walk_sync(FIB_NODE_TYPE_ADJ, kvp->value, &bw_ctx); + fib_walk_sync(FIB_NODE_TYPE_ADJ, ai, &bw_ctx); + + return (ADJ_WALK_RC_CONTINUE); } /** @@ -630,13 +668,9 @@ adj_nbr_interface_add_del (vnet_main_t * vnm, for (proto = FIB_PROTOCOL_IP4; proto <= FIB_PROTOCOL_IP6; proto++) { - if (!ADJ_NBR_ITF_OK(proto, sw_if_index)) - continue; - - BV(clib_bihash_foreach_key_value_pair) ( - adj_nbr_tables[proto][sw_if_index], - adj_nbr_interface_delete_one, - NULL); + adj_nbr_walk(sw_if_index, proto, + adj_nbr_interface_delete_one, + NULL); } return (NULL); @@ -646,15 +680,16 @@ adj_nbr_interface_add_del (vnet_main_t * vnm, VNET_SW_INTERFACE_ADD_DEL_FUNCTION(adj_nbr_interface_add_del); -static void -adj_nbr_show_one (BVT(clib_bihash_kv) * kvp, +static adj_walk_rc_t +adj_nbr_show_one (adj_index_t ai, void *arg) { vlib_cli_output (arg, "[@%d] %U", - kvp->value, - format_ip_adjacency, - vnet_get_main(), kvp->value, + ai, + format_ip_adjacency, ai, FORMAT_IP_ADJACENCY_NONE); + + return (ADJ_WALK_RC_CONTINUE); } static clib_error_t * @@ -663,11 +698,16 @@ adj_nbr_show (vlib_main_t * vm, vlib_cli_command_t * cmd) { adj_index_t ai = ADJ_INDEX_INVALID; + u32 sw_if_index = ~0; while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) { if (unformat (input, "%d", &ai)) ; + else if (unformat (input, "%U", + unformat_vnet_sw_interface, vnet_get_main(), + &sw_if_index)) + ; else break; } @@ -676,28 +716,31 @@ adj_nbr_show (vlib_main_t * vm, { vlib_cli_output (vm, "[@%d] %U", ai, - - format_ip_adjacency, - vnet_get_main(), ai, + format_ip_adjacency, ai, FORMAT_IP_ADJACENCY_DETAIL); } - else + else if (~0 != sw_if_index) { fib_protocol_t proto; for (proto = FIB_PROTOCOL_IP4; proto <= FIB_PROTOCOL_IP6; proto++) { - u32 sw_if_index; + adj_nbr_walk(sw_if_index, proto, + adj_nbr_show_one, + vm); + } + } + else + { + fib_protocol_t proto; + for (proto = FIB_PROTOCOL_IP4; proto <= FIB_PROTOCOL_IP6; proto++) + { vec_foreach_index(sw_if_index, adj_nbr_tables[proto]) { - if (!ADJ_NBR_ITF_OK(proto, sw_if_index)) - continue; - - BV(clib_bihash_foreach_key_value_pair) ( - adj_nbr_tables[proto][sw_if_index], - adj_nbr_show_one, - vm); + adj_nbr_walk(sw_if_index, proto, + adj_nbr_show_one, + vm); } } } @@ -705,12 +748,37 @@ adj_nbr_show (vlib_main_t * vm, return 0; } +/*? + * Show all neighbour adjacencies. + * @cliexpar + * @cliexstart{sh adj nbr} + * [@2] ipv4 via 1.0.0.2 loop0: IP4: 00:00:22:aa:bb:cc -> 00:00:11:aa:bb:cc + * [@3] mpls via 1.0.0.2 loop0: MPLS_UNICAST: 00:00:22:aa:bb:cc -> 00:00:11:aa:bb:cc + * [@4] ipv4 via 1.0.0.3 loop0: IP4: 00:00:22:aa:bb:cc -> 00:00:11:aa:bb:cc + * [@5] mpls via 1.0.0.3 loop0: MPLS_UNICAST: 00:00:22:aa:bb:cc -> 00:00:11:aa:bb:cc + * @cliexend + ?*/ VLIB_CLI_COMMAND (ip4_show_fib_command, static) = { .path = "show adj nbr", - .short_help = "show adj nbr [] [sw_if_index ]", + .short_help = "show adj nbr [] [interface]", .function = adj_nbr_show, }; +static ip46_type_t +adj_proto_to_46 (fib_protocol_t proto) +{ + switch (proto) + { + case FIB_PROTOCOL_IP4: + return (IP46_TYPE_IP4); + case FIB_PROTOCOL_IP6: + return (IP46_TYPE_IP6); + default: + return (IP46_TYPE_IP4); + } + return (IP46_TYPE_IP4); +} + u8* format_adj_nbr_incomplete (u8* s, va_list *ap) { @@ -721,7 +789,8 @@ format_adj_nbr_incomplete (u8* s, va_list *ap) s = format (s, "arp-%U", format_fib_link, adj->ia_link); s = format (s, ": via %U", - format_ip46_address, &adj->sub_type.nbr.next_hop, IP46_TYPE_ANY); + format_ip46_address, &adj->sub_type.nbr.next_hop, + adj_proto_to_46(adj->ia_nh_proto)); s = format (s, " %U", format_vnet_sw_interface_name, vnm, @@ -741,7 +810,8 @@ format_adj_nbr (u8* s, va_list *ap) s = format (s, "%U", format_fib_link, adj->ia_link); s = format (s, " via %U ", - format_ip46_address, &adj->sub_type.nbr.next_hop, IP46_TYPE_ANY); + format_ip46_address, &adj->sub_type.nbr.next_hop, + adj_proto_to_46(adj->ia_nh_proto)); s = format (s, "%U", format_vnet_rewrite, vnm->vlib_main, &adj->rewrite_header, sizeof (adj->rewrite_data), 0); diff --git a/vnet/vnet/adj/adj_nbr.h b/vnet/vnet/adj/adj_nbr.h index 331423bd036..39663b603c6 100644 --- a/vnet/vnet/adj/adj_nbr.h +++ b/vnet/vnet/adj/adj_nbr.h @@ -75,6 +75,28 @@ extern adj_index_t adj_nbr_add_or_lock_w_rewrite(fib_protocol_t nh_proto, const ip46_address_t *nh_addr, u32 sw_if_index, u8 *rewrite); +/** + * @brief When adding a rewrite to an adjacency these are flags that + * apply to that rewrite + */ +typedef enum adj_nbr_rewrite_flag_t_ +{ + ADJ_NBR_REWRITE_FLAG_NONE, + + /** + * An indication that the rewrite is incomplete, i.e. that it describes the + * ARP/ND rewrite when probing. + */ + ADJ_NBR_REWRITE_FLAG_INCOMPLETE = ADJ_NBR_REWRITE_FLAG_NONE, + + /** + * An indication that the rewrite is complete, i.e. that it fully describes + * the link-layer addressing for the desintation. + * The opposite of this is an incomplete rewrite that describes the ARP/ND + * rewrite when probing. + */ + ADJ_NBR_REWRITE_FLAG_COMPLETE = (1 << 0), +} adj_nbr_rewrite_flag_t; /** * @brief @@ -87,6 +109,7 @@ extern adj_index_t adj_nbr_add_or_lock_w_rewrite(fib_protocol_t nh_proto, * The new rewrite */ extern void adj_nbr_update_rewrite(adj_index_t adj_index, + adj_nbr_rewrite_flag_t flags, u8 *rewrite); /** @@ -101,6 +124,43 @@ extern u8* format_adj_nbr_incomplete(u8* s, va_list *ap); */ extern u8* format_adj_nbr(u8* s, va_list *ap); +/** + * @brief Walk the neighbour Adjacencies on a given interface + */ +extern void adj_nbr_walk (u32 sw_if_index, + fib_protocol_t adj_nh_proto, + adj_walk_cb_t cb, + void *ctx); +/** + * @brief Walk the neighbour Adjacencies on a given interface with a given next-hop + */ +void +adj_nbr_walk_nh (u32 sw_if_index, + fib_protocol_t adj_nh_proto, + const ip46_address_t *nh, + adj_walk_cb_t cb, + void *ctx); + +/** + * @brief Walk adjacencies on a link with a given v4 next-hop. + * that is visit the adjacencies with different link types. + */ +void +adj_nbr_walk_nh4 (u32 sw_if_index, + const ip4_address_t *addr, + adj_walk_cb_t cb, + void *ctx); + +/** + * @brief Walk adjacencies on a link with a given v6 next-hop. + * that is visit the adjacencies with different link types. + */ +void +adj_nbr_walk_nh6 (u32 sw_if_index, + const ip6_address_t *addr, + adj_walk_cb_t cb, + void *ctx); + /** * @brief * Module initialisation diff --git a/vnet/vnet/adj/adj_rewrite.c b/vnet/vnet/adj/adj_rewrite.c index eb93f6a4377..046fff44731 100644 --- a/vnet/vnet/adj/adj_rewrite.c +++ b/vnet/vnet/adj/adj_rewrite.c @@ -32,15 +32,17 @@ adj_rewrite_add_and_lock (fib_protocol_t nh_proto, adj = adj_alloc(nh_proto); adj->lookup_next_index = IP_LOOKUP_NEXT_REWRITE; + memset(&adj->sub_type.nbr.next_hop, 0, sizeof(adj->sub_type.nbr.next_hop)); adj->ia_link = link_type; + adj->ia_nh_proto = nh_proto; adj->rewrite_header.sw_if_index = sw_if_index; ASSERT(NULL != rewrite); vnet_rewrite_for_sw_interface(vnet_get_main(), - adj_fib_link_2_vnet(link_type), + link_type, adj->rewrite_header.sw_if_index, - adj_get_rewrite_node(link_type)->index, + adj_get_rewrite_node(link_type), rewrite, &adj->rewrite_header, sizeof (adj->rewrite_data)); diff --git a/vnet/vnet/adj/adj_types.h b/vnet/vnet/adj/adj_types.h index a7234663d29..cf90c08418d 100644 --- a/vnet/vnet/adj/adj_types.h +++ b/vnet/vnet/adj/adj_types.h @@ -35,4 +35,19 @@ typedef u32 adj_index_t; */ #define ADJ_INDEX_INVALID ((u32)~0) +/** + * @brief return codes from a adjacency walker callback function + */ +typedef enum adj_walk_rc_t_ +{ + ADJ_WALK_RC_STOP, + ADJ_WALK_RC_CONTINUE, +} adj_walk_rc_t; + +/** + * @brief Call back function when walking adjacencies + */ +typedef adj_walk_rc_t (*adj_walk_cb_t)(adj_index_t ai, + void *ctx); + #endif diff --git a/vnet/vnet/dhcp/client.c b/vnet/vnet/dhcp/client.c index ffe6e8dab7c..f555f19ef12 100644 --- a/vnet/vnet/dhcp/client.c +++ b/vnet/vnet/dhcp/client.c @@ -44,23 +44,15 @@ dhcp_client_release_address (dhcp_client_main_t * dcm, dhcp_client_t * c) c->subnet_mask_width, 1 /*is_del*/); } -static void set_l2_rewrite (dhcp_client_main_t * dcm, dhcp_client_t * c) +static void +set_l2_rewrite (dhcp_client_main_t * dcm, dhcp_client_t * c) { - vnet_main_t * vnm = dcm->vnet_main; - vnet_hw_interface_t * hw = vnet_get_sup_hw_interface (vnm, c->sw_if_index); - vnet_hw_interface_class_t * hc = - vnet_get_hw_interface_class (vnm, hw->hw_class_index); - u32 n_rw; - /* Acquire the L2 rewrite string for the indicated sw_if_index */ - vec_validate (c->l2_rewrite, 32); - ASSERT (hc->set_rewrite); - n_rw = hc->set_rewrite (dcm->vnet_main, c->sw_if_index, - VNET_L3_PACKET_TYPE_IP4, - 0 /* broadcast */, c->l2_rewrite, - vec_len(c->l2_rewrite)); - - _vec_len (c->l2_rewrite) = n_rw; + c->l2_rewrite = vnet_build_rewrite_for_sw_interface( + dcm->vnet_main, + c->sw_if_index, + VNET_LINK_IP4, + 0 /* broadcast */); } /* diff --git a/vnet/vnet/ethernet/arp.c b/vnet/vnet/ethernet/arp.c index 645ff86c274..eeaac4d3808 100644 --- a/vnet/vnet/ethernet/arp.c +++ b/vnet/vnet/ethernet/arp.c @@ -22,7 +22,7 @@ #include #include #include -#include +#include #include /** @@ -48,33 +48,23 @@ typedef struct #define ETHERNET_ARP_IP4_ENTRY_FLAG_DYNAMIC (1 << 1) u64 cpu_time_last_updated; - adj_index_t adj_index[FIB_LINK_NUM]; -} ethernet_arp_ip4_entry_t; -/** - * @brief administrative and operational state falgs on an interface - */ -typedef enum ethernet_arp_interface_flags_t_ -{ - ETHERNET_ARP_INTERFACE_UP = (0 << 1), - ETHERNET_ARP_INTERFACE_MPLS_ENABLE = (1 << 0), -} ethernet_arp_interface_flags_t; + /** + * The index of the adj-fib entry created + */ + fib_node_index_t fib_entry_index; +} ethernet_arp_ip4_entry_t; /** * @brief Per-interface ARP configuration and state */ typedef struct ethernet_arp_interface_t_ { - /** - * Hash table of ARP entries. - * Since this hash table is per-interface, the key is only the IPv4 address. - */ + /** + * Hash table of ARP entries. + * Since this hash table is per-interface, the key is only the IPv4 address. + */ uword *arp_entries; - - /** - * Flags for administrative and operational state - */ - ethernet_arp_interface_flags_t flags; } ethernet_arp_interface_t; typedef struct @@ -123,14 +113,6 @@ typedef struct static ethernet_arp_main_t ethernet_arp_main; - -typedef enum arp_ether_type_t_ -{ - ARP_ETHER_TYPE_IP4 = (1 << 0), - ARP_ETHER_TYPE_MPLS = (1 << 1), -} arp_ether_type_t; -#define ARP_ETHER_TYPE_BOTH (ARP_ETHER_TYPE_MPLS | ARP_ETHER_TYPE_IP4) - typedef struct { u32 sw_if_index; @@ -140,7 +122,6 @@ typedef struct #define ETHERNET_ARP_ARGS_REMOVE (1<<0) #define ETHERNET_ARP_ARGS_FLUSH (1<<1) #define ETHERNET_ARP_ARGS_POPULATE (1<<2) - arp_ether_type_t ether_type; } vnet_arp_set_ip4_over_ethernet_rpc_args_t; static void @@ -339,68 +320,170 @@ format_arp_term_input_trace (u8 * s, va_list * va) } static void -arp_mk_complete (ethernet_arp_interface_t * eai, - ethernet_arp_ip4_entry_t * e, arp_ether_type_t et) +arp_nbr_probe (ip_adjacency_t * adj) { - fib_prefix_t pfx = { - .fp_len = 32, - .fp_proto = FIB_PROTOCOL_IP4, - .fp_addr = { - .ip4 = e->ip4_address, - }, - }; - u32 fib_index; + vnet_main_t *vnm = vnet_get_main (); + ip4_main_t *im = &ip4_main; + ip_interface_address_t *ia; + ethernet_arp_header_t *h; + vnet_hw_interface_t *hi; + vnet_sw_interface_t *si; + ip4_address_t *src; + vlib_buffer_t *b; + vlib_main_t *vm; + u32 bi = 0; - fib_index = ip4_fib_table_get_index_for_sw_if_index (e->sw_if_index); + vm = vlib_get_main (); - if (et & ARP_ETHER_TYPE_IP4) + si = vnet_get_sw_interface (vnm, adj->rewrite_header.sw_if_index); + + if (!(si->flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP)) { - if (ADJ_INDEX_INVALID == e->adj_index[FIB_LINK_IP4]) - { - e->adj_index[FIB_LINK_IP4] = - adj_nbr_add_or_lock_w_rewrite (FIB_PROTOCOL_IP4, - FIB_LINK_IP4, - &pfx.fp_addr, - e->sw_if_index, - e->ethernet_address); - ASSERT (ADJ_INDEX_INVALID != e->adj_index[FIB_LINK_IP4]); - - fib_table_entry_update_one_path (fib_index, - &pfx, - FIB_SOURCE_ADJ, - FIB_ENTRY_FLAG_ATTACHED, - FIB_PROTOCOL_IP4, - &pfx.fp_addr, - e->sw_if_index, - ~0, - 1, - MPLS_LABEL_INVALID, - FIB_ROUTE_PATH_FLAG_NONE); - } - else - { - adj_nbr_update_rewrite (e->adj_index[FIB_LINK_IP4], - e->ethernet_address); - } + return; } - if ((et & ARP_ETHER_TYPE_MPLS) && - eai->flags & ETHERNET_ARP_INTERFACE_MPLS_ENABLE) + + src = + ip4_interface_address_matching_destination (im, + &adj->sub_type.nbr.next_hop. + ip4, + adj->rewrite_header. + sw_if_index, &ia); + if (!src) { - if (ADJ_INDEX_INVALID == e->adj_index[FIB_LINK_MPLS]) - { - e->adj_index[FIB_LINK_MPLS] = - adj_nbr_add_or_lock_w_rewrite (FIB_PROTOCOL_IP4, - FIB_LINK_MPLS, - &pfx.fp_addr, - e->sw_if_index, - e->ethernet_address); - ASSERT (ADJ_INDEX_INVALID != e->adj_index[FIB_LINK_MPLS]); - } - else - { - adj_nbr_update_rewrite (e->adj_index[FIB_LINK_MPLS], - e->ethernet_address); - } + return; + } + + h = + vlib_packet_template_get_packet (vm, &im->ip4_arp_request_packet_template, + &bi); + + hi = vnet_get_sup_hw_interface (vnm, adj->rewrite_header.sw_if_index); + + clib_memcpy (h->ip4_over_ethernet[0].ethernet, + hi->hw_address, sizeof (h->ip4_over_ethernet[0].ethernet)); + + h->ip4_over_ethernet[0].ip4 = src[0]; + h->ip4_over_ethernet[1].ip4 = adj->sub_type.nbr.next_hop.ip4; + + b = vlib_get_buffer (vm, bi); + vnet_buffer (b)->sw_if_index[VLIB_RX] = + vnet_buffer (b)->sw_if_index[VLIB_TX] = adj->rewrite_header.sw_if_index; + + /* Add encapsulation string for software interface (e.g. ethernet header). */ + vnet_rewrite_one_header (adj[0], h, sizeof (ethernet_header_t)); + vlib_buffer_advance (b, -adj->rewrite_header.data_bytes); + + { + vlib_frame_t *f = vlib_get_frame_to_node (vm, hi->output_node_index); + u32 *to_next = vlib_frame_vector_args (f); + to_next[0] = bi; + f->n_vectors = 1; + vlib_put_frame_to_node (vm, hi->output_node_index, f); + } +} + +static void +arp_mk_complete (adj_index_t ai, ethernet_arp_ip4_entry_t * e) +{ + adj_nbr_update_rewrite + (ai, ADJ_NBR_REWRITE_FLAG_COMPLETE, + ethernet_build_rewrite (vnet_get_main (), + e->sw_if_index, + adj_get_link_type (ai), e->ethernet_address)); +} + +static void +arp_mk_incomplete (adj_index_t ai, ethernet_arp_ip4_entry_t * e) +{ + adj_nbr_update_rewrite + (ai, + ADJ_NBR_REWRITE_FLAG_INCOMPLETE, + ethernet_build_rewrite (vnet_get_main (), + e->sw_if_index, + VNET_LINK_ARP, + VNET_REWRITE_FOR_SW_INTERFACE_ADDRESS_BROADCAST)); +} + +static ethernet_arp_ip4_entry_t * +arp_entry_find (ethernet_arp_interface_t * eai, const ip4_address_t * addr) +{ + ethernet_arp_main_t *am = ðernet_arp_main; + ethernet_arp_ip4_entry_t *e = NULL; + uword *p; + + if (NULL != eai->arp_entries) + { + p = hash_get (eai->arp_entries, addr->as_u32); + if (!p) + return (NULL); + + e = pool_elt_at_index (am->ip4_entry_pool, p[0]); + } + + return (e); +} + +static adj_walk_rc_t +arp_mk_complete_walk (adj_index_t ai, void *ctx) +{ + ethernet_arp_ip4_entry_t *e = ctx; + + arp_mk_complete (ai, e); + + return (ADJ_WALK_RC_CONTINUE); +} + +static adj_walk_rc_t +arp_mk_incomplete_walk (adj_index_t ai, void *ctx) +{ + ethernet_arp_ip4_entry_t *e = ctx; + + arp_mk_incomplete (ai, e); + + return (ADJ_WALK_RC_CONTINUE); +} + +void +arp_update_adjacency (vnet_main_t * vnm, u32 sw_if_index, u32 ai) +{ + ethernet_arp_main_t *am = ðernet_arp_main; + ethernet_arp_interface_t *arp_int; + ethernet_arp_ip4_entry_t *e; + ip_adjacency_t *adj; + + adj = adj_get (ai); + + vec_validate (am->ethernet_arp_by_sw_if_index, sw_if_index); + arp_int = &am->ethernet_arp_by_sw_if_index[sw_if_index]; + e = arp_entry_find (arp_int, &adj->sub_type.nbr.next_hop.ip4); + + if (NULL != e) + { + adj_nbr_walk_nh4 (sw_if_index, + &e->ip4_address, arp_mk_complete_walk, e); + } + else + { + /* + * no matching ARP entry. + * construct the rewire required to for an ARP packet, and stick + * that in the adj's pipe to smoke. + */ + adj_nbr_update_rewrite (ai, + ADJ_NBR_REWRITE_FLAG_INCOMPLETE, + ethernet_build_rewrite (vnm, + sw_if_index, + VNET_LINK_ARP, + VNET_REWRITE_FOR_SW_INTERFACE_ADDRESS_BROADCAST)); + + /* + * since the FIB has added this adj for a route, it makes sense it may + * want to forward traffic sometime soon. Let's send a speculative ARP. + * just one. If we were to do periodically that wouldn't be bad either, + * but that's more code than i'm prepared to write at this time for + * relatively little reward. + */ + arp_nbr_probe (adj); } } @@ -417,7 +500,6 @@ vnet_arp_set_ip4_over_ethernet_internal (vnet_main_t * vnm, uword *p; pending_resolution_t *pr, *mc; ethernet_arp_interface_t *arp_int; - fib_link_t link; int is_static = args->is_static; u32 sw_if_index = args->sw_if_index; @@ -441,23 +523,43 @@ vnet_arp_set_ip4_over_ethernet_internal (vnet_main_t * vnm, if (make_new_arp_cache_entry) { + fib_prefix_t pfx = { + .fp_len = 32, + .fp_proto = FIB_PROTOCOL_IP4, + .fp_addr = { + .ip4 = a->ip4, + } + , + }; + u32 fib_index; + pool_get (am->ip4_entry_pool, e); if (NULL == arp_int->arp_entries) { arp_int->arp_entries = hash_create (0, sizeof (u32)); - if (mpls_sw_interface_is_enabled (sw_if_index)) - arp_int->flags |= ETHERNET_ARP_INTERFACE_MPLS_ENABLE; } hash_set (arp_int->arp_entries, a->ip4.as_u32, e - am->ip4_entry_pool); e->sw_if_index = sw_if_index; e->ip4_address = a->ip4; - FOR_EACH_FIB_LINK (link) - { - e->adj_index[link] = ADJ_INDEX_INVALID; - } + clib_memcpy (e->ethernet_address, + a->ethernet, sizeof (e->ethernet_address)); + + fib_index = ip4_fib_table_get_index_for_sw_if_index (e->sw_if_index); + e->fib_entry_index = + fib_table_entry_update_one_path (fib_index, + &pfx, + FIB_SOURCE_ADJ, + FIB_ENTRY_FLAG_ATTACHED, + FIB_PROTOCOL_IP4, + &pfx.fp_addr, + e->sw_if_index, + ~0, + 1, + MPLS_LABEL_INVALID, + FIB_ROUTE_PATH_FLAG_NONE); } else { @@ -468,18 +570,19 @@ vnet_arp_set_ip4_over_ethernet_internal (vnet_main_t * vnm, if (0 == memcmp (e->ethernet_address, a->ethernet, sizeof (e->ethernet_address))) return -1; + + /* Update time stamp and ethernet address. */ + clib_memcpy (e->ethernet_address, a->ethernet, + sizeof (e->ethernet_address)); } - /* Update time stamp and ethernet address. */ - clib_memcpy (e->ethernet_address, a->ethernet, - sizeof (e->ethernet_address)); e->cpu_time_last_updated = clib_cpu_time_now (); if (is_static) e->flags |= ETHERNET_ARP_IP4_ENTRY_FLAG_STATIC; else e->flags |= ETHERNET_ARP_IP4_ENTRY_FLAG_DYNAMIC; - arp_mk_complete (arp_int, e, ARP_ETHER_TYPE_BOTH); + adj_nbr_walk_nh4 (sw_if_index, &e->ip4_address, arp_mk_complete_walk, e); /* Customer(s) waiting for this address to be resolved? */ p = hash_get (am->pending_resolutions_by_address, a->ip4.as_u32); @@ -1334,7 +1437,6 @@ vnet_arp_unset_ip4_over_ethernet (vnet_main_t * vnm, args.sw_if_index = sw_if_index; args.flags = ETHERNET_ARP_ARGS_REMOVE; - args.ether_type = ARP_ETHER_TYPE_IP4; clib_memcpy (&args.a, a, sizeof (*a)); vl_api_rpc_call_main_thread (set_ip4_over_ethernet_rpc_callback, @@ -1350,15 +1452,13 @@ vnet_arp_unset_ip4_over_ethernet (vnet_main_t * vnm, */ static int vnet_arp_flush_ip4_over_ethernet (vnet_main_t * vnm, - u32 sw_if_index, - arp_ether_type_t et, void *a_arg) + u32 sw_if_index, void *a_arg) { ethernet_arp_ip4_over_ethernet_address_t *a = a_arg; vnet_arp_set_ip4_over_ethernet_rpc_args_t args; args.sw_if_index = sw_if_index; args.flags = ETHERNET_ARP_ARGS_FLUSH; - args.ether_type = et; clib_memcpy (&args.a, a, sizeof (*a)); vl_api_rpc_call_main_thread (set_ip4_over_ethernet_rpc_callback, @@ -1372,19 +1472,16 @@ vnet_arp_flush_ip4_over_ethernet (vnet_main_t * vnm, * For static entries this will re-source the adjacencies. * * @param sw_if_index The interface on which the ARP entires are acted - * @param et The ether type of those ARP entries. */ static int vnet_arp_populate_ip4_over_ethernet (vnet_main_t * vnm, - u32 sw_if_index, - arp_ether_type_t et, void *a_arg) + u32 sw_if_index, void *a_arg) { ethernet_arp_ip4_over_ethernet_address_t *a = a_arg; vnet_arp_set_ip4_over_ethernet_rpc_args_t args; args.sw_if_index = sw_if_index; args.flags = ETHERNET_ARP_ARGS_POPULATE; - args.ether_type = et; clib_memcpy (&args.a, a, sizeof (*a)); vl_api_rpc_call_main_thread (set_ip4_over_ethernet_rpc_callback, @@ -1423,22 +1520,18 @@ arp_add_del_interface_address (ip4_main_t * im, eai = &am->ethernet_arp_by_sw_if_index[sw_if_index]; - hash_foreach_pair (pair, eai->arp_entries, ( - { - e = - pool_elt_at_index - (am->ip4_entry_pool, - pair->value[0]); - if - (ip4_destination_matches_route - (im, &e->ip4_address, - address, address_length)) - { - vec_add1 (to_delete, - e - - am->ip4_entry_pool);} - } - )); + /* *INDENT-OFF* */ + hash_foreach_pair (pair, eai->arp_entries, + ({ + e = pool_elt_at_index(am->ip4_entry_pool, + pair->value[0]); + if (ip4_destination_matches_route (im, &e->ip4_address, + address, address_length)) + { + vec_add1 (to_delete, e - am->ip4_entry_pool); + } + })); + /* *INDENT-ON* */ for (i = 0; i < vec_len (to_delete); i++) { @@ -1449,62 +1542,13 @@ arp_add_del_interface_address (ip4_main_t * im, delme.ip4.as_u32 = e->ip4_address.as_u32; vnet_arp_flush_ip4_over_ethernet (vnet_get_main (), - e->sw_if_index, - ARP_ETHER_TYPE_BOTH, &delme); + e->sw_if_index, &delme); } vec_free (to_delete); } } -static void -ethernet_arp_sw_interface_mpls_state_change (u32 sw_if_index, u32 is_enable) -{ - ethernet_arp_main_t *am = ðernet_arp_main; - ethernet_arp_ip4_entry_t *e; - ethernet_arp_interface_t *eai; - u32 i, *to_update = 0; - hash_pair_t *pair; - - if (vec_len (am->ethernet_arp_by_sw_if_index) < sw_if_index) - return; - - eai = &am->ethernet_arp_by_sw_if_index[sw_if_index]; - - if (is_enable) - eai->flags |= ETHERNET_ARP_INTERFACE_MPLS_ENABLE; - else - eai->flags &= ~ETHERNET_ARP_INTERFACE_MPLS_ENABLE; - - hash_foreach_pair (pair, eai->arp_entries, ( - { - vec_add1 (to_update, - pair->value[0]); - } - )); - - for (i = 0; i < vec_len (to_update); i++) - { - ethernet_arp_ip4_over_ethernet_address_t updateme; - e = pool_elt_at_index (am->ip4_entry_pool, to_update[i]); - - clib_memcpy (&updateme.ethernet, e->ethernet_address, 6); - updateme.ip4.as_u32 = e->ip4_address.as_u32; - - if (is_enable) - { - vnet_arp_populate_ip4_over_ethernet (vnet_get_main (), - e->sw_if_index, - ARP_ETHER_TYPE_MPLS, - &updateme); - } - else - continue; - - } - vec_free (to_update); -} - static clib_error_t * ethernet_arp_init (vlib_main_t * vm) { @@ -1550,92 +1594,21 @@ ethernet_arp_init (vlib_main_t * vm) cb.function_opaque = 0; vec_add1 (im->add_del_interface_address_callbacks, cb); - vec_add1 (mpls_main.mpls_interface_state_change_callbacks, - ethernet_arp_sw_interface_mpls_state_change); - return 0; } VLIB_INIT_FUNCTION (ethernet_arp_init); -static void -arp_mk_incomplete (ethernet_arp_interface_t * eai, - ethernet_arp_ip4_entry_t * e, arp_ether_type_t et) -{ - fib_prefix_t pfx = { - .fp_len = 32, - .fp_proto = FIB_PROTOCOL_IP4, - .fp_addr = { - .ip4 = e->ip4_address, - }, - }; - u32 fib_index; - - fib_index = ip4_fib_table_get_index_for_sw_if_index (e->sw_if_index); - - if ((ARP_ETHER_TYPE_IP4 & et) && - (ADJ_INDEX_INVALID != e->adj_index[FIB_LINK_IP4])) - { - /* - * revert the adj this ARP entry sourced to incomplete - */ - adj_nbr_update_rewrite (e->adj_index[FIB_LINK_IP4], NULL); - - /* - * remove the FIB erntry the ARP entry sourced - */ - fib_table_entry_delete (fib_index, &pfx, FIB_SOURCE_ADJ); - - /* - * Unlock the adj now that the ARP entry is no longer a source - */ - adj_unlock (e->adj_index[FIB_LINK_IP4]); - e->adj_index[FIB_LINK_IP4] = ADJ_INDEX_INVALID; - } - if ((ARP_ETHER_TYPE_MPLS & et) && - (ADJ_INDEX_INVALID != e->adj_index[FIB_LINK_MPLS])) - { - /* - * revert the adj this ARP entry sourced to incomplete - */ - adj_nbr_update_rewrite (e->adj_index[FIB_LINK_MPLS], NULL); - - /* - * Unlock the adj now that the ARP entry is no longer a source - */ - adj_unlock (e->adj_index[FIB_LINK_MPLS]); - e->adj_index[FIB_LINK_MPLS] = ADJ_INDEX_INVALID; - } -} - static void arp_entry_free (ethernet_arp_interface_t * eai, ethernet_arp_ip4_entry_t * e) { ethernet_arp_main_t *am = ðernet_arp_main; + fib_table_entry_delete_index (e->fib_entry_index, FIB_SOURCE_ADJ); hash_unset (eai->arp_entries, e->ip4_address.as_u32); pool_put (am->ip4_entry_pool, e); } -static ethernet_arp_ip4_entry_t * -arp_entry_find (ethernet_arp_interface_t * eai, const ip4_address_t * addr) -{ - ethernet_arp_main_t *am = ðernet_arp_main; - ethernet_arp_ip4_entry_t *e = NULL; - uword *p; - - if (NULL != eai->arp_entries) - { - p = hash_get (eai->arp_entries, addr->as_u32); - if (!p) - return (NULL); - - e = pool_elt_at_index (am->ip4_entry_pool, p[0]); - } - - return (e); -} - static inline int vnet_arp_unset_ip4_over_ethernet_internal (vnet_main_t * vnm, vnet_arp_set_ip4_over_ethernet_rpc_args_t @@ -1651,7 +1624,8 @@ vnet_arp_unset_ip4_over_ethernet_internal (vnet_main_t * vnm, if (NULL != e) { - arp_mk_incomplete (eai, e, ARP_ETHER_TYPE_BOTH); + adj_nbr_walk_nh4 (e->sw_if_index, + &e->ip4_address, arp_mk_incomplete_walk, e); arp_entry_free (eai, e); } @@ -1673,7 +1647,8 @@ vnet_arp_flush_ip4_over_ethernet_internal (vnet_main_t * vnm, if (NULL != e) { - arp_mk_incomplete (eai, e, args->ether_type); + adj_nbr_walk_nh4 (e->sw_if_index, + &e->ip4_address, arp_mk_incomplete_walk, e); /* * The difference between flush and unset, is that an unset @@ -1682,8 +1657,7 @@ vnet_arp_flush_ip4_over_ethernet_internal (vnet_main_t * vnm, * does in response to interface events. unset is only done * by the control plane. */ - if ((e->flags & ETHERNET_ARP_IP4_ENTRY_FLAG_DYNAMIC) && - (args->ether_type & ARP_ETHER_TYPE_IP4)) + if (e->flags & ETHERNET_ARP_IP4_ENTRY_FLAG_DYNAMIC) { arp_entry_free (eai, e); } @@ -1706,7 +1680,8 @@ vnet_arp_populate_ip4_over_ethernet_internal (vnet_main_t * vnm, if (NULL != e) { - arp_mk_complete (eai, e, args->ether_type); + adj_nbr_walk_nh4 (e->sw_if_index, + &e->ip4_address, arp_mk_complete_walk, e); } return (0); } @@ -1743,9 +1718,8 @@ ethernet_arp_sw_interface_up_down (vnet_main_t * vnm, pool_foreach (e, am->ip4_entry_pool, ({ if (e->sw_if_index == sw_if_index) - { - vec_add1 (to_delete, e - am->ip4_entry_pool); - } + vec_add1 (to_delete, + e - am->ip4_entry_pool); })); /* *INDENT-ON* */ @@ -1759,25 +1733,21 @@ ethernet_arp_sw_interface_up_down (vnet_main_t * vnm, if (flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP) { - vnet_arp_populate_ip4_over_ethernet (vnm, e->sw_if_index, - ARP_ETHER_TYPE_BOTH, &delme); + vnet_arp_populate_ip4_over_ethernet (vnm, e->sw_if_index, &delme); } else { - vnet_arp_flush_ip4_over_ethernet (vnm, e->sw_if_index, - ARP_ETHER_TYPE_BOTH, &delme); + vnet_arp_flush_ip4_over_ethernet (vnm, e->sw_if_index, &delme); } } vec_free (to_delete); - return 0; } VNET_SW_INTERFACE_ADMIN_UP_DOWN_FUNCTION (ethernet_arp_sw_interface_up_down); - static void increment_ip4_and_mac_address (ethernet_arp_ip4_over_ethernet_address_t * a) { @@ -1811,7 +1781,6 @@ vnet_arp_set_ip4_over_ethernet (vnet_main_t * vnm, args.sw_if_index = sw_if_index; args.is_static = is_static; args.flags = 0; - args.ether_type = ARP_ETHER_TYPE_IP4; clib_memcpy (&args.a, a, sizeof (*a)); vl_api_rpc_call_main_thread (set_ip4_over_ethernet_rpc_callback, @@ -1990,7 +1959,7 @@ ip_arp_add_del_command_fn (vlib_main_t * vm, return 0; } - +/* *INDENT-OFF* */ /*? * Add or delete IPv4 ARP cache entries. * @@ -2019,19 +1988,18 @@ ip_arp_add_del_command_fn (vlib_main_t * vm, * @cliexcmd{set ip arp count 10 GigabitEthernet2/0/0 6.0.0.3 dead.beef.babe} * @endparblock ?*/ -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (ip_arp_add_del_command, static) = { .path = "set ip arp", .short_help = - "set ip arp [del] [static] [count ] [fib-id ] [proxy - ]", + "set ip arp [del] [static] [count ] [fib-id ] [proxy - ]", .function = ip_arp_add_del_command_fn, }; /* *INDENT-ON* */ static clib_error_t * set_int_proxy_arp_command_fn (vlib_main_t * vm, - unformat_input_t * input, - vlib_cli_command_t * cmd) + unformat_input_t * + input, vlib_cli_command_t * cmd) { vnet_main_t *vnm = vnet_get_main (); u32 sw_if_index; @@ -2066,7 +2034,7 @@ set_int_proxy_arp_command_fn (vlib_main_t * vm, return 0; } - +/* *INDENT-OFF* */ /*? * Enable proxy-arp on an interface. The vpp stack will answer ARP * requests for the indicated address range. Multiple proxy-arp @@ -2086,11 +2054,10 @@ set_int_proxy_arp_command_fn (vlib_main_t * vm, * To disable proxy arp on an individual interface: * @cliexcmd{set interface proxy-arp GigabitEthernet0/8/0 disable} ?*/ -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (set_int_proxy_enable_command, static) = { .path = "set interface proxy-arp", .short_help = - "set interface proxy-arp [enable|disable]", + "set interface proxy-arp [enable|disable]", .function = set_int_proxy_arp_command_fn, }; /* *INDENT-ON* */ @@ -2174,8 +2141,8 @@ arp_term_l2bd (vlib_main_t * vm, error0 = ETHERNET_ARP_ERROR_replies_sent; error0 = (arp0->l2_type != - clib_net_to_host_u16 (ETHERNET_ARP_HARDWARE_TYPE_ethernet) ? - ETHERNET_ARP_ERROR_l2_type_not_ethernet : error0); + clib_net_to_host_u16 (ETHERNET_ARP_HARDWARE_TYPE_ethernet) + ? ETHERNET_ARP_ERROR_l2_type_not_ethernet : error0); error0 = (arp0->l3_type != clib_net_to_host_u16 (ETHERNET_TYPE_IP4) ? @@ -2269,8 +2236,9 @@ arp_term_l2bd (vlib_main_t * vm, for ARP requests from other hosts. If output to VXLAN tunnel is required, however, can just clear the SHG in packet as follows: vnet_buffer(p0)->l2.shg = 0; */ - vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next, - n_left_to_next, pi0, next0); + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, + to_next, n_left_to_next, pi0, + next0); continue; check_ip6_nd: @@ -2283,9 +2251,9 @@ arp_term_l2bd (vlib_main_t * vm, (&iph0->src_address))) { sw_if_index0 = vnet_buffer (p0)->sw_if_index[VLIB_RX]; - if (vnet_ip6_nd_term (vm, node, p0, eth0, iph0, sw_if_index0, - vnet_buffer (p0)->l2.bd_index, - vnet_buffer (p0)->l2.shg)) + if (vnet_ip6_nd_term + (vm, node, p0, eth0, iph0, sw_if_index0, + vnet_buffer (p0)->l2.bd_index, vnet_buffer (p0)->l2.shg)) goto output_response; } @@ -2294,10 +2262,12 @@ arp_term_l2bd (vlib_main_t * vm, u32 feature_bitmap0 = vnet_buffer (p0)->l2.feature_bitmap & ~L2INPUT_FEAT_ARP_TERM; vnet_buffer (p0)->l2.feature_bitmap = feature_bitmap0; - next0 = feat_bitmap_get_next_node_index (arp_term_next_node_index, - feature_bitmap0); - vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next, - n_left_to_next, pi0, next0); + next0 = + feat_bitmap_get_next_node_index (arp_term_next_node_index, + feature_bitmap0); + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, + to_next, n_left_to_next, + pi0, next0); continue; } @@ -2311,8 +2281,9 @@ arp_term_l2bd (vlib_main_t * vm, next0 = ARP_TERM_NEXT_DROP; p0->error = node->errors[error0]; - vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next, - n_left_to_next, pi0, next0); + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, + to_next, n_left_to_next, pi0, + next0); } vlib_put_next_frame (vm, node, next_index, n_left_to_next); @@ -2342,7 +2313,8 @@ VLIB_REGISTER_NODE (arp_term_l2bd_node, static) = { clib_error_t * arp_term_init (vlib_main_t * vm) -{ // Initialize the feature next-node indexes +{ + // Initialize the feature next-node indexes feat_bitmap_init_next_nodes (vm, arp_term_l2bd_node.index, L2INPUT_N_FEAT, @@ -2358,21 +2330,8 @@ change_arp_mac (u32 sw_if_index, ethernet_arp_ip4_entry_t * e) { if (e->sw_if_index == sw_if_index) { - - if (ADJ_INDEX_INVALID != e->adj_index[FIB_LINK_IP4]) - { - // the update rewrite function takes the dst mac (which is not changing) - // the new source mac will be retrieved from the interface - // when the full rewrite is constructed. - adj_nbr_update_rewrite (e->adj_index[FIB_LINK_IP4], - e->ethernet_address); - } - if (ADJ_INDEX_INVALID != e->adj_index[FIB_LINK_MPLS]) - { - adj_nbr_update_rewrite (e->adj_index[FIB_LINK_MPLS], - e->ethernet_address); - } - + adj_nbr_walk_nh4 (e->sw_if_index, + &e->ip4_address, arp_mk_complete_walk, e); } } @@ -2384,9 +2343,9 @@ ethernet_arp_change_mac (vnet_main_t * vnm, u32 sw_if_index) /* *INDENT-OFF* */ pool_foreach (e, am->ip4_entry_pool, - ({ - change_arp_mac (sw_if_index, e); - })); + ({ + change_arp_mac (sw_if_index, e); + })); /* *INDENT-ON* */ } diff --git a/vnet/vnet/ethernet/ethernet.h b/vnet/vnet/ethernet/ethernet.h index 973ed58ce72..34ddb82bb0a 100644 --- a/vnet/vnet/ethernet/ethernet.h +++ b/vnet/vnet/ethernet/ethernet.h @@ -562,6 +562,13 @@ int vnet_add_del_ip4_arp_change_event (vnet_main_t * vnm, void ethernet_arp_change_mac (vnet_main_t * vnm, u32 sw_if_index); +void arp_update_adjacency (vnet_main_t * vnm, u32 sw_if_index, u32 ai); + +void ethernet_update_adjacency (vnet_main_t * vnm, u32 sw_if_index, u32 ai); +u8 *ethernet_build_rewrite (vnet_main_t * vnm, + u32 sw_if_index, + vnet_link_t link_type, const void *dst_address); + extern vlib_node_registration_t ethernet_input_node; #endif /* included_ethernet_h */ diff --git a/vnet/vnet/ethernet/interface.c b/vnet/vnet/ethernet/interface.c index 43f1cd4af44..45d215d324f 100644 --- a/vnet/vnet/ethernet/interface.c +++ b/vnet/vnet/ethernet/interface.c @@ -42,9 +42,7 @@ #include #include #include -#include -#include -#include +#include /** * @file @@ -53,32 +51,24 @@ * This file contains code to manage loopback interfaces. */ -int -vnet_sw_interface_is_p2p (vnet_main_t * vnm, u32 sw_if_index) -{ - // FIXME - use flags on the HW itf - vnet_hw_interface_t *hw = vnet_get_sup_hw_interface (vnm, sw_if_index); - return (!(hw->hw_class_index == ethernet_hw_interface_class.index || - hw->hw_class_index == af_packet_device_class.index || - hw->hw_class_index == lisp_gpe_hw_class.index || - hw->hw_class_index == srp_hw_interface_class.index)); -} - -static uword -ethernet_set_rewrite (vnet_main_t * vnm, - u32 sw_if_index, - u32 l3_type, - void *dst_address, - void *rewrite, uword max_rewrite_bytes) +/** + * @brief build a rewrite string to use for sending packets of type 'link_type' + * to 'dst_address' + */ +u8 * +ethernet_build_rewrite (vnet_main_t * vnm, + u32 sw_if_index, + vnet_link_t link_type, const void *dst_address) { vnet_sw_interface_t *sub_sw = vnet_get_sw_interface (vnm, sw_if_index); vnet_sw_interface_t *sup_sw = vnet_get_sup_sw_interface (vnm, sw_if_index); vnet_hw_interface_t *hw = vnet_get_sup_hw_interface (vnm, sw_if_index); ethernet_main_t *em = ðernet_main; ethernet_interface_t *ei; - ethernet_header_t *h = rewrite; + ethernet_header_t *h; ethernet_type_t type; uword n_bytes = sizeof (h[0]); + u8 *rewrite = NULL; if (sub_sw != sup_sw) { @@ -100,22 +90,20 @@ ethernet_set_rewrite (vnet_main_t * vnm, } } - if (n_bytes > max_rewrite_bytes) - return 0; - - switch (l3_type) + switch (link_type) { -#define _(a,b) case VNET_L3_PACKET_TYPE_##a: type = ETHERNET_TYPE_##b; break +#define _(a,b) case VNET_LINK_##a: type = ETHERNET_TYPE_##b; break _(IP4, IP4); _(IP6, IP6); - _(MPLS_UNICAST, MPLS_UNICAST); - _(MPLS_MULTICAST, MPLS_MULTICAST); + _(MPLS, MPLS_UNICAST); _(ARP, ARP); #undef _ default: - return 0; + return NULL; } + vec_validate (rewrite, n_bytes - 1); + h = (ethernet_header_t *) rewrite; ei = pool_elt_at_index (em->interfaces, hw->hw_instance); clib_memcpy (h->src_address, ei->address, sizeof (h->src_address)); if (dst_address) @@ -156,7 +144,28 @@ ethernet_set_rewrite (vnet_main_t * vnm, h->type = clib_host_to_net_u16 (type); } - return n_bytes; + return (rewrite); +} + +void +ethernet_update_adjacency (vnet_main_t * vnm, u32 sw_if_index, u32 ai) +{ + ip_adjacency_t *adj; + + adj = adj_get (ai); + + if (FIB_PROTOCOL_IP4 == adj->ia_nh_proto) + { + arp_update_adjacency (vnm, sw_if_index, ai); + } + else if (FIB_PROTOCOL_IP6 == adj->ia_nh_proto) + { + ip6_ethernet_update_adjacency (vnm, sw_if_index, ai); + } + else + { + ASSERT (0); + } } /* *INDENT-OFF* */ @@ -166,7 +175,8 @@ VNET_HW_INTERFACE_CLASS (ethernet_hw_interface_class) = { .format_header = format_ethernet_header_with_length, .unformat_hw_address = unformat_ethernet_address, .unformat_header = unformat_ethernet_header, - .set_rewrite = ethernet_set_rewrite, + .build_rewrite = ethernet_build_rewrite, + .update_adjacency = ethernet_update_adjacency, }; /* *INDENT-ON* */ diff --git a/vnet/vnet/fib/fib_entry.c b/vnet/vnet/fib/fib_entry.c index 5429da2983d..404f0f40da7 100644 --- a/vnet/vnet/fib/fib_entry.c +++ b/vnet/vnet/fib/fib_entry.c @@ -402,35 +402,21 @@ fib_entry_back_walk_notify (fib_node_t *node, fib_entry_get_index(fib_entry))); } - if (FIB_NODE_BW_REASON_FLAG_ADJ_UPDATE & ctx->fnbw_reason) - { - /* - * ADJ updates (complete<->incomplete) do not need to propagate to - * recursive entries. - * The only reason its needed as far back as here, is that the adj - * and the incomplete adj are a different DPO type, so the LBs need - * to re-stack. - */ - return (FIB_NODE_BACK_WALK_CONTINUE); - } - else - { - /* - * all other walk types can be reclassifed to a re-evaluate to - * all recursive dependents. - * By reclassifying we ensure that should any of these walk types meet - * they can be merged. - */ - ctx->fnbw_reason = FIB_NODE_BW_REASON_FLAG_EVALUATE; - - /* - * propagate the backwalk further if we haven't already reached the - * maximum depth. - */ - fib_walk_sync(FIB_NODE_TYPE_ENTRY, - fib_entry_get_index(fib_entry), - ctx); - } + /* + * all other walk types can be reclassifed to a re-evaluate to + * all recursive dependents. + * By reclassifying we ensure that should any of these walk types meet + * they can be merged. + */ + ctx->fnbw_reason = FIB_NODE_BW_REASON_FLAG_EVALUATE; + + /* + * propagate the backwalk further if we haven't already reached the + * maximum depth. + */ + fib_walk_sync(FIB_NODE_TYPE_ENTRY, + fib_entry_get_index(fib_entry), + ctx); return (FIB_NODE_BACK_WALK_CONTINUE); } diff --git a/vnet/vnet/fib/fib_path.c b/vnet/vnet/fib/fib_path.c index bea17218547..ba42e6be545 100644 --- a/vnet/vnet/fib/fib_path.c +++ b/vnet/vnet/fib/fib_path.c @@ -757,6 +757,20 @@ fib_path_back_walk_notify (fib_node_t *node, fib_path_proto_to_chain_type(path->fp_nh_proto), &path->fp_dpo); } + if (FIB_NODE_BW_REASON_FLAG_ADJ_UPDATE & ctx->fnbw_reason) + { + /* + * ADJ updates (complete<->incomplete) do not need to propagate to + * recursive entries. + * The only reason its needed as far back as here, is that the adj + * and the incomplete adj are a different DPO type, so the LBs need + * to re-stack. + * If this walk was quashed in the fib_entry, then any non-fib_path + * children (like tunnels that collapse out the LB when they stack) + * would not see the update. + */ + return (FIB_NODE_BACK_WALK_CONTINUE); + } break; case FIB_PATH_TYPE_ATTACHED_NEXT_HOP: /* diff --git a/vnet/vnet/fib/fib_test.c b/vnet/vnet/fib/fib_test.c index 7729209d6ab..1e459cf1a6c 100644 --- a/vnet/vnet/fib/fib_test.c +++ b/vnet/vnet/fib/fib_test.c @@ -222,6 +222,19 @@ fib_test_urpf_is_equal (fib_node_index_t fei, return (1); } +static u8* +fib_test_build_rewrite (u8 *eth_addr) +{ + u8* rewrite = NULL; + + vec_validate(rewrite, 13); + + memcpy(rewrite, eth_addr, 6); + memcpy(rewrite+6, eth_addr, 6); + + return (rewrite); +} + static void fib_test_v4 (void) { @@ -523,6 +536,7 @@ fib_test_v4 (void) u8 eth_addr[] = { 0xde, 0xde, 0xde, 0xba, 0xba, 0xba, }; + ip46_address_t nh_12_12_12_12 = { .ip4.as_u32 = clib_host_to_net_u32(0x0c0c0c0c), }; @@ -561,7 +575,8 @@ fib_test_v4 (void) &adj->sub_type.nbr.next_hop)), "adj nbr next-hop ok"); - adj_nbr_update_rewrite(ai_01, eth_addr); + adj_nbr_update_rewrite(ai_01, ADJ_NBR_REWRITE_FLAG_COMPLETE, + fib_test_build_rewrite(eth_addr)); FIB_TEST((IP_LOOKUP_NEXT_REWRITE == adj->lookup_next_index), "adj is complete"); FIB_TEST((0 == ip46_address_cmp(&pfx_10_10_10_1_s_32.fp_addr, @@ -589,7 +604,8 @@ fib_test_v4 (void) FIB_TEST((0 == ip46_address_cmp(&nh_12_12_12_12, &adj->sub_type.nbr.next_hop)), "adj nbr next-hop ok"); - adj_nbr_update_rewrite(ai_12_12_12_12, eth_addr); + adj_nbr_update_rewrite(ai_12_12_12_12, ADJ_NBR_REWRITE_FLAG_COMPLETE, + fib_test_build_rewrite(eth_addr)); FIB_TEST((IP_LOOKUP_NEXT_REWRITE == adj->lookup_next_index), "adj is complete"); @@ -636,7 +652,8 @@ fib_test_v4 (void) &adj->sub_type.nbr.next_hop)), "adj nbr next-hop ok"); - adj_nbr_update_rewrite(ai_02, eth_addr); + adj_nbr_update_rewrite(ai_02, ADJ_NBR_REWRITE_FLAG_COMPLETE, + fib_test_build_rewrite(eth_addr)); FIB_TEST((IP_LOOKUP_NEXT_REWRITE == adj->lookup_next_index), "adj is complete"); FIB_TEST((0 == ip46_address_cmp(&pfx_10_10_10_2_s_32.fp_addr, @@ -3113,7 +3130,8 @@ fib_test_v6 (void) &adj->sub_type.nbr.next_hop)), "adj nbr next-hop ok"); - adj_nbr_update_rewrite(ai_01, eth_addr); + adj_nbr_update_rewrite(ai_01, ADJ_NBR_REWRITE_FLAG_COMPLETE, + fib_test_build_rewrite(eth_addr)); FIB_TEST((IP_LOOKUP_NEXT_REWRITE == adj->lookup_next_index), "adj is complete"); FIB_TEST((0 == ip46_address_cmp(&pfx_2001_1_2_s_128.fp_addr, @@ -3150,7 +3168,8 @@ fib_test_v6 (void) &adj->sub_type.nbr.next_hop)), "adj nbr next-hop ok"); - adj_nbr_update_rewrite(ai_02, eth_addr); + adj_nbr_update_rewrite(ai_02, ADJ_NBR_REWRITE_FLAG_COMPLETE, + fib_test_build_rewrite(eth_addr)); FIB_TEST((IP_LOOKUP_NEXT_REWRITE == adj->lookup_next_index), "adj is complete"); FIB_TEST((0 == ip46_address_cmp(&pfx_2001_1_3_s_128.fp_addr, diff --git a/vnet/vnet/fib/fib_types.h b/vnet/vnet/fib/fib_types.h index 4d656565de4..2f23527ec1b 100644 --- a/vnet/vnet/fib/fib_types.h +++ b/vnet/vnet/fib/fib_types.h @@ -73,21 +73,17 @@ typedef enum fib_protocol_t_ { * Link Type. This maps directly into the ethertype. */ typedef enum fib_link_t_ { -#if CLIB_DEBUG > 0 - FIB_LINK_IP4 = 1, -#else - FIB_LINK_IP4 = 0, -#endif - FIB_LINK_IP6, - FIB_LINK_ETHERNET, - FIB_LINK_MPLS, + FIB_LINK_IP4 = VNET_LINK_IP4, + FIB_LINK_IP6 = VNET_LINK_IP6, + FIB_LINK_MPLS = VNET_LINK_MPLS, + FIB_LINK_ETHERNET = VNET_LINK_ETHERNET, } __attribute__ ((packed)) fib_link_t; /** * Definition outside of enum so it does not need to be included in non-defaulted * switch statements */ -#define FIB_LINK_NUM (FIB_LINK_MPLS+1) +#define FIB_LINK_NUM (FIB_LINK_ETHERNET+1) #define FIB_LINKS { \ [FIB_LINK_ETHERNET] = "ethernet", \ diff --git a/vnet/vnet/gre/gre.c b/vnet/vnet/gre/gre.c index aa6fca0f27d..a4b3f9fc228 100644 --- a/vnet/vnet/gre/gre.c +++ b/vnet/vnet/gre/gre.c @@ -17,7 +17,7 @@ #include #include -#include +#include gre_main_t gre_main; @@ -162,133 +162,95 @@ unformat_gre_header (unformat_input_t * input, va_list * args) return 1; } -static uword gre_set_rewrite (vnet_main_t * vnm, - u32 sw_if_index, - u32 l3_type, - void * dst_address, - void * rewrite, - uword max_rewrite_bytes) +static int +gre_proto_from_vnet_link (vnet_link_t link) { - /* - * Conundrum: packets from tun/tap destined for the tunnel - * actually have this rewrite applied. Transit packets do not. - * To make the two cases equivalent, don't generate a - * rewrite here, build the entire header in the fast path. - */ - return 0; - -#ifdef THINGS_WORKED_AS_ONE_MIGHT_LIKE - ip4_and_gre_header_t * h = rewrite; - gre_protocol_t protocol; - - if (max_rewrite_bytes < sizeof (h[0])) - return 0; - - switch (l3_type) { -#define _(a,b) case VNET_L3_PACKET_TYPE_##a: protocol = GRE_PROTOCOL_##b; break - _ (IP4, ip4); - _ (IP6, ip6); -#undef _ - default: - return 0; - } - - memset (h, 0, sizeof (*h)); - h->ip4.ip_version_and_header_length = 0x45; - h->ip4.ttl = 64; - h->ip4.protocol = IP_PROTOCOL_GRE; - h->gre.protocol = clib_host_to_net_u16 (protocol); - - return sizeof (h[0]); -#endif + switch (link) + { + case VNET_LINK_IP4: + return (GRE_PROTOCOL_ip4); + case VNET_LINK_IP6: + return (GRE_PROTOCOL_ip6); + case VNET_LINK_MPLS: + return (GRE_PROTOCOL_mpls_unicast); + case VNET_LINK_ETHERNET: + return (GRE_PROTOCOL_teb); + case VNET_LINK_ARP: + return (GRE_PROTOCOL_arp); + } + ASSERT(0); + return (GRE_PROTOCOL_ip4); } -static uword -gre_interface_tx (vlib_main_t * vm, - vlib_node_runtime_t * node, - vlib_frame_t * frame) +static u8* +gre_build_rewrite (vnet_main_t * vnm, + u32 sw_if_index, + vnet_link_t link_type, + const void *dst_address) { gre_main_t * gm = &gre_main; - u32 next_index; - u32 * from, * to_next, n_left_from, n_left_to_next; - vnet_interface_output_runtime_t * rd = (void *) node->runtime_data; - gre_tunnel_t *t = pool_elt_at_index (gm->tunnels, rd->dev_instance); - - /* Vector of buffer / pkt indices we're supposed to process */ - from = vlib_frame_vector_args (frame); - - /* Number of buffers / pkts */ - n_left_from = frame->n_vectors; - - /* Speculatively send the first buffer to the last disposition we used */ - next_index = node->cached_next_index; - - while (n_left_from > 0) - { - /* set up to enqueue to our disposition with index = next_index */ - vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); + ip4_and_gre_header_t * h; + u8* rewrite = NULL; + gre_tunnel_t *t; + u32 ti; - /* - * FIXME DUAL LOOP - */ + ti = gm->tunnel_index_by_sw_if_index[sw_if_index]; - while (n_left_from > 0 && n_left_to_next > 0) - { - u32 bi0, adj_index0, next0; - const ip_adjacency_t * adj0; - const dpo_id_t *dpo0; - ip4_header_t * ip0; - vlib_buffer_t * b0; + if (~0 == ti) + /* not one of ours */ + return (0); - bi0 = from[0]; - to_next[0] = bi0; - from += 1; - to_next += 1; - n_left_from -= 1; - n_left_to_next -= 1; + t = pool_elt_at_index(gm->tunnels, ti); - b0 = vlib_get_buffer(vm, bi0); - ip0 = vlib_buffer_get_current (b0); + vec_validate(rewrite, sizeof(*h)-1); + h = (ip4_and_gre_header_t*)rewrite; + h->gre.protocol = clib_host_to_net_u16(gre_proto_from_vnet_link(link_type)); - /* Fixup the checksum and len fields in the GRE tunnel encap - * that was applied at the midchain node */ - ip0->length = - clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b0)); - ip0->checksum = ip4_header_checksum (ip0); + h->ip4.ip_version_and_header_length = 0x45; + h->ip4.ttl = 254; + h->ip4.protocol = IP_PROTOCOL_GRE; + /* fixup ip4 header length and checksum after-the-fact */ + h->ip4.src_address.as_u32 = t->tunnel_src.as_u32; + h->ip4.dst_address.as_u32 = t->tunnel_dst.as_u32; + h->ip4.checksum = ip4_header_checksum (&h->ip4); - /* Follow the DPO on which the midchain is stacked */ - adj_index0 = vnet_buffer(b0)->ip.adj_index[VLIB_TX]; - adj0 = adj_get(adj_index0); - dpo0 = &adj0->sub_type.midchain.next_dpo; - next0 = dpo0->dpoi_next_node; - vnet_buffer(b0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index; + return (rewrite); +} - if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED)) - { - gre_tx_trace_t *tr = vlib_add_trace (vm, node, - b0, sizeof (*tr)); - tr->tunnel_id = t - gm->tunnels; - tr->length = ip0->length; - tr->src.as_u32 = ip0->src_address.as_u32; - tr->dst.as_u32 = ip0->dst_address.as_u32; - } +void +gre_fixup (vlib_main_t *vm, + ip_adjacency_t *adj, + vlib_buffer_t *b0) +{ + ip4_header_t * ip0; - vlib_validate_buffer_enqueue_x1 (vm, node, next_index, - to_next, n_left_to_next, - bi0, next0); - } + ip0 = vlib_buffer_get_current (b0); - vlib_put_next_frame (vm, node, next_index, n_left_to_next); - } + /* Fixup the checksum and len fields in the GRE tunnel encap + * that was applied at the midchain node */ + ip0->length = clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b0)); + ip0->checksum = ip4_header_checksum (ip0); +} - vlib_node_increment_counter (vm, gre_input_node.index, - GRE_ERROR_PKTS_ENCAP, frame->n_vectors); +void +gre_update_adj (vnet_main_t * vnm, + u32 sw_if_index, + adj_index_t ai) +{ + adj_nbr_midchain_update_rewrite (ai, gre_fixup, + ADJ_MIDCHAIN_FLAG_NONE, + gre_build_rewrite(vnm, sw_if_index, + adj_get_link_type(ai), + NULL)); - return frame->n_vectors; + gre_tunnel_stack(ai); } +/** + * @brief TX function. Only called L2. L3 traffic uses the adj-midchains + */ static uword -gre_l2_interface_tx (vlib_main_t * vm, +gre_interface_tx (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) { @@ -330,7 +292,7 @@ gre_l2_interface_tx (vlib_main_t * vm, b0 = vlib_get_buffer(vm, bi0); - vnet_buffer(b0)->ip.adj_index[VLIB_TX] = gt->adj_index[FIB_LINK_ETHERNET]; + vnet_buffer(b0)->ip.adj_index[VLIB_TX] = gt->l2_adj_index; if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED)) { @@ -356,38 +318,6 @@ gre_l2_interface_tx (vlib_main_t * vm, return frame->n_vectors; } -static clib_error_t * -gre_interface_admin_up_down (vnet_main_t * vnm, u32 hw_if_index, u32 flags) -{ - gre_main_t * gm = &gre_main; - vnet_hw_interface_t * hi; - gre_tunnel_t *t; - u32 ti; - - hi = vnet_get_hw_interface (vnm, hw_if_index); - - if (NULL == gm->tunnel_index_by_sw_if_index || - hi->sw_if_index >= vec_len(gm->tunnel_index_by_sw_if_index)) - return (NULL); - - ti = gm->tunnel_index_by_sw_if_index[hi->sw_if_index]; - - if (~0 == ti) - /* not one of ours */ - return (NULL); - - t = pool_elt_at_index(gm->tunnels, ti); - - if (flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP) - vnet_hw_interface_set_flags (vnm, hw_if_index, VNET_HW_INTERFACE_FLAG_LINK_UP); - else - vnet_hw_interface_set_flags (vnm, hw_if_index, 0 /* down */); - - gre_tunnel_stack(t); - - return /* no error */ 0; -} - static u8 * format_gre_tunnel_name (u8 * s, va_list * args) { u32 dev_instance = va_arg (*args, u32); @@ -403,15 +333,6 @@ static u8 * format_gre_device (u8 * s, va_list * args) return s; } -static u8 * format_gre_l2_device (u8 * s, va_list * args) -{ - u32 dev_instance = va_arg (*args, u32); - CLIB_UNUSED (int verbose) = va_arg (*args, int); - - s = format (s, "GRE L2-tunnel: id %d\n", dev_instance); - return s; -} - VNET_DEVICE_CLASS (gre_device_class) = { .name = "GRE tunnel device", .format_device_name = format_gre_tunnel_name, @@ -427,27 +348,13 @@ VNET_DEVICE_CLASS (gre_device_class) = { VLIB_DEVICE_TX_FUNCTION_MULTIARCH (gre_device_class, gre_interface_tx) -VNET_DEVICE_CLASS (gre_l2_device_class) = { - .name = "GRE L2 tunnel device", - .format_device_name = format_gre_tunnel_name, - .format_device = format_gre_l2_device, - .format_tx_trace = format_gre_tx_trace, - .tx_function = gre_l2_interface_tx, - .admin_up_down_function = gre_interface_admin_up_down, -#ifdef SOON - .clear counter = 0; -#endif -}; - -VLIB_DEVICE_TX_FUNCTION_MULTIARCH (gre_l2_device_class, - gre_l2_interface_tx) - - VNET_HW_INTERFACE_CLASS (gre_hw_interface_class) = { .name = "GRE", .format_header = format_gre_header_with_length, .unformat_header = unformat_gre_header, - .set_rewrite = gre_set_rewrite, + .build_rewrite = gre_build_rewrite, + .update_adjacency = gre_update_adj, + .flags = VNET_HW_INTERFACE_CLASS_FLAG_P2P, }; static void add_protocol (gre_main_t * gm, diff --git a/vnet/vnet/gre/gre.h b/vnet/vnet/gre/gre.h index d1a6f319bac..a0ee9ad263b 100644 --- a/vnet/vnet/gre/gre.h +++ b/vnet/vnet/gre/gre.h @@ -86,14 +86,14 @@ typedef struct { u32 sibling_index; /** - * The index of the midchain adjacency created for this tunnel + * on a L2 tunnel this is the VLIB arc from the L2-tx to the l2-midchain */ - adj_index_t adj_index[FIB_LINK_NUM]; + u32 l2_tx_arc; /** - * on a L2 tunnel this is the VLIB arc from the L2-tx to the l2-midchain + * an L2 tunnel always rquires an L2 midchain. cache here for DP. */ - u32 l2_tx_arc; + adj_index_t l2_adj_index; } gre_tunnel_t; typedef struct { @@ -142,7 +142,14 @@ gre_register_input_type (vlib_main_t * vm, gre_protocol_t protocol, u32 node_index); -extern void gre_tunnel_stack (gre_tunnel_t *gt); +extern clib_error_t * gre_interface_admin_up_down (vnet_main_t * vnm, + u32 hw_if_index, + u32 flags); + +extern void gre_tunnel_stack (adj_index_t ai); +extern void gre_update_adj (vnet_main_t * vnm, + u32 sw_if_index, + adj_index_t ai); format_function_t format_gre_protocol; format_function_t format_gre_header; diff --git a/vnet/vnet/gre/interface.c b/vnet/vnet/gre/interface.c index 0550c0bdab1..397a0427267 100644 --- a/vnet/vnet/gre/interface.c +++ b/vnet/vnet/gre/interface.c @@ -21,6 +21,7 @@ #include #include #include +#include #include static inline u64 @@ -36,7 +37,6 @@ static u8 * format_gre_tunnel (u8 * s, va_list * args) { gre_tunnel_t * t = va_arg (*args, gre_tunnel_t *); - int detail = va_arg (*args, int); gre_main_t * gm = &gre_main; s = format (s, @@ -46,14 +46,6 @@ format_gre_tunnel (u8 * s, va_list * args) format_ip4_address, &t->tunnel_dst, (t->teb ? "teb" : "ip"), t->outer_fib_index); - if (detail) - { - s = format (s, "\n fib-entry:%d adj-ip4:%d adj-ip6:%d adj-mpls:%d", - t->fib_entry_index, - t->adj_index[FIB_LINK_IP4], - t->adj_index[FIB_LINK_IP6], - t->adj_index[FIB_LINK_MPLS]); - } return s; } @@ -113,32 +105,68 @@ gre_tunnel_from_fib_node (fib_node_t *node) * 'stack' (resolve the recursion for) the tunnel's midchain adjacency */ void -gre_tunnel_stack (gre_tunnel_t *gt) +gre_tunnel_stack (adj_index_t ai) { - fib_link_t linkt; + gre_main_t * gm = &gre_main; + ip_adjacency_t *adj; + gre_tunnel_t *gt; + u32 sw_if_index; + + adj = adj_get(ai); + sw_if_index = adj->rewrite_header.sw_if_index; + + if ((vec_len(gm->tunnel_index_by_sw_if_index) < sw_if_index) || + (~0 == gm->tunnel_index_by_sw_if_index[sw_if_index])) + return; + + gt = pool_elt_at_index(gm->tunnels, + gm->tunnel_index_by_sw_if_index[sw_if_index]); /* * find the adjacency that is contributed by the FIB entry * that this tunnel resovles via, and use it as the next adj * in the midchain */ - FOR_EACH_FIB_LINK(linkt) + if (vnet_hw_interface_get_flags(vnet_get_main(), + gt->hw_if_index) & + VNET_HW_INTERFACE_FLAG_LINK_UP) { - if (ADJ_INDEX_INVALID != gt->adj_index[linkt]) - { - if (vnet_hw_interface_get_flags(vnet_get_main(), - gt->hw_if_index) & - VNET_HW_INTERFACE_FLAG_LINK_UP) - { - adj_nbr_midchain_stack( - gt->adj_index[linkt], - fib_entry_contribute_ip_forwarding(gt->fib_entry_index)); - } - else - { - adj_nbr_midchain_unstack(gt->adj_index[linkt]); - } - } + adj_nbr_midchain_stack( + ai, + fib_entry_contribute_ip_forwarding(gt->fib_entry_index)); + } + else + { + adj_nbr_midchain_unstack(ai); + } +} + +/** + * @brief Call back when restacking all adjacencies on a GRE interface + */ +static adj_walk_rc_t +gre_adj_walk_cb (adj_index_t ai, + void *ctx) +{ + gre_tunnel_stack(ai); + + return (ADJ_WALK_RC_CONTINUE); +} + +static void +gre_tunnel_restack (gre_tunnel_t *gt) +{ + fib_protocol_t proto; + + /* + * walk all the adjacencies on th GRE interface and restack them + */ + FOR_EACH_FIB_IP_PROTOCOL(proto) + { + adj_nbr_walk(gt->sw_if_index, + proto, + gre_adj_walk_cb, + NULL); } } @@ -147,9 +175,9 @@ gre_tunnel_stack (gre_tunnel_t *gt) */ static fib_node_back_walk_rc_t gre_tunnel_back_walk (fib_node_t *node, - fib_node_back_walk_ctx_t *ctx) + fib_node_back_walk_ctx_t *ctx) { - gre_tunnel_stack(gre_tunnel_from_fib_node(node)); + gre_tunnel_restack(gre_tunnel_from_fib_node(node)); return (FIB_NODE_BACK_WALK_CONTINUE); } @@ -192,63 +220,6 @@ const static fib_node_vft_t gre_vft = { .fnv_back_walk = gre_tunnel_back_walk, }; -static int -gre_proto_from_fib_link (fib_link_t link) -{ - switch (link) - { - case FIB_LINK_IP4: - return (GRE_PROTOCOL_ip4); - case FIB_LINK_IP6: - return (GRE_PROTOCOL_ip6); - case FIB_LINK_MPLS: - return (GRE_PROTOCOL_mpls_unicast); - case FIB_LINK_ETHERNET: - return (GRE_PROTOCOL_teb); - } - ASSERT(0); - return (GRE_PROTOCOL_ip4); -} - -static u8 * -gre_rewrite (gre_tunnel_t * t, - fib_link_t link) -{ - ip4_and_gre_header_t * h0; - u8 * rewrite_data = 0; - - vec_validate_init_empty (rewrite_data, sizeof (*h0) - 1, 0); - - h0 = (ip4_and_gre_header_t *) rewrite_data; - - h0->gre.protocol = clib_host_to_net_u16(gre_proto_from_fib_link(link)); - - h0->ip4.ip_version_and_header_length = 0x45; - h0->ip4.ttl = 254; - h0->ip4.protocol = IP_PROTOCOL_GRE; - /* $$$ fixup ip4 header length and checksum after-the-fact */ - h0->ip4.src_address.as_u32 = t->tunnel_src.as_u32; - h0->ip4.dst_address.as_u32 = t->tunnel_dst.as_u32; - h0->ip4.checksum = ip4_header_checksum (&h0->ip4); - - return (rewrite_data); -} - -static void -gre_fixup (vlib_main_t *vm, - ip_adjacency_t *adj, - vlib_buffer_t *b0) -{ - ip4_header_t * ip0; - - ip0 = vlib_buffer_get_current (b0); - - /* Fixup the checksum and len fields in the GRE tunnel encap - * that was applied at the midchain node */ - ip0->length = clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b0)); - ip0->checksum = ip4_header_checksum (ip0); -} - static int vnet_gre_tunnel_add (vnet_gre_add_del_tunnel_args_t *a, u32 * sw_if_indexp) @@ -262,8 +233,6 @@ vnet_gre_tunnel_add (vnet_gre_add_del_tunnel_args_t *a, u32 outer_fib_index; u8 address[6]; clib_error_t *error; - fib_link_t linkt; - u8 *rewrite; outer_fib_index = ip4_fib_index_from_table_id(a->outer_fib_id); @@ -278,10 +247,6 @@ vnet_gre_tunnel_add (vnet_gre_add_del_tunnel_args_t *a, pool_get_aligned (gm->tunnels, t, CLIB_CACHE_LINE_BYTES); memset (t, 0, sizeof (*t)); fib_node_init(&t->node, FIB_NODE_TYPE_GRE_TUNNEL); - FOR_EACH_FIB_LINK(linkt) - { - t->adj_index[linkt] = ADJ_INDEX_INVALID; - } if (vec_len (gm->free_gre_tunnel_hw_if_indices) > 0) { vnet_interface_main_t * im = &vnm->interface_main; @@ -321,10 +286,11 @@ vnet_gre_tunnel_add (vnet_gre_add_del_tunnel_args_t *a, address[3] = 0xd0; address[4] = t - gm->tunnels; - error = ethernet_register_interface - (vnm, - gre_l2_device_class.index, t - gm->tunnels, address, &hw_if_index, - 0); + error = ethernet_register_interface(vnm, + gre_device_class.index, + t - gm->tunnels, address, + &hw_if_index, + 0); if (error) { @@ -337,10 +303,11 @@ vnet_gre_tunnel_add (vnet_gre_add_del_tunnel_args_t *a, hi->tx_node_index, "adj-l2-midchain"); } else { - hw_if_index = vnet_register_interface - (vnm, gre_device_class.index, t - gm->tunnels, - gre_hw_interface_class.index, - t - gm->tunnels); + hw_if_index = vnet_register_interface(vnm, + gre_device_class.index, + t - gm->tunnels, + gre_hw_interface_class.index, + t - gm->tunnels); } hi = vnet_get_hw_interface (vnm, hw_if_index); sw_if_index = hi->sw_if_index; @@ -395,48 +362,18 @@ vnet_gre_tunnel_add (vnet_gre_add_del_tunnel_args_t *a, FIB_NODE_TYPE_GRE_TUNNEL, t - gm->tunnels); - /* - * create and update the midchain adj this tunnel sources. - * We could be smarter here and trigger this on an interface proto enable, - * like we do for MPLS. - */ + clib_memcpy (&t->tunnel_src, &a->src, sizeof (t->tunnel_src)); + clib_memcpy (&t->tunnel_dst, &a->dst, sizeof (t->tunnel_dst)); + if (t->teb) { - t->adj_index[FIB_LINK_ETHERNET] = adj_nbr_add_or_lock(FIB_PROTOCOL_IP4, - FIB_LINK_ETHERNET, - &zero_addr, - sw_if_index); - - rewrite = gre_rewrite(t, FIB_LINK_ETHERNET); - adj_nbr_midchain_update_rewrite(t->adj_index[FIB_LINK_ETHERNET], - gre_fixup, - ADJ_MIDCHAIN_FLAG_NO_COUNT, - rewrite); - vec_free(rewrite); - } - else - { - FOR_EACH_FIB_IP_LINK (linkt) - { - t->adj_index[linkt] = adj_nbr_add_or_lock(FIB_PROTOCOL_IP4, - linkt, - &zero_addr, - sw_if_index); - - rewrite = gre_rewrite(t, linkt); - adj_nbr_midchain_update_rewrite(t->adj_index[linkt], - gre_fixup, - ADJ_MIDCHAIN_FLAG_NONE, - rewrite); - vec_free(rewrite); - } - } - - t->adj_index[FIB_LINK_MPLS] = ADJ_INDEX_INVALID; + t->l2_adj_index = adj_nbr_add_or_lock(FIB_PROTOCOL_IP4, + FIB_LINK_ETHERNET, + &zero_addr, + sw_if_index); - clib_memcpy (&t->tunnel_src, &a->src, sizeof (t->tunnel_src)); - clib_memcpy (&t->tunnel_dst, &a->dst, sizeof (t->tunnel_dst)); - gre_tunnel_stack(t); + gre_update_adj(vnm, t->sw_if_index, t->l2_adj_index); + } if (sw_if_indexp) *sw_if_indexp = sw_if_index; @@ -451,7 +388,6 @@ vnet_gre_tunnel_delete (vnet_gre_add_del_tunnel_args_t *a, gre_main_t * gm = &gre_main; vnet_main_t * vnm = gm->vnet_main; gre_tunnel_t * t; - fib_link_t linkt; u32 sw_if_index; t = gre_tunnel_db_find(&a->src, &a->dst, a->outer_fib_id); @@ -472,11 +408,6 @@ vnet_gre_tunnel_delete (vnet_gre_add_del_tunnel_args_t *a, fib_table_entry_delete_index(t->fib_entry_index, FIB_SOURCE_RR); - FOR_EACH_FIB_LINK(linkt) - { - adj_unlock(t->adj_index[linkt]); - } - gre_tunnel_db_remove(t); fib_node_deinit(&t->node); pool_put (gm->tunnels, t); @@ -497,43 +428,36 @@ vnet_gre_add_del_tunnel (vnet_gre_add_del_tunnel_args_t *a, return (vnet_gre_tunnel_delete(a, sw_if_indexp)); } -static void -gre_sw_interface_mpls_state_change (u32 sw_if_index, - u32 is_enable) +clib_error_t * +gre_interface_admin_up_down (vnet_main_t * vnm, u32 hw_if_index, u32 flags) { - gre_main_t *gm = &gre_main; + gre_main_t * gm = &gre_main; + vnet_hw_interface_t * hi; gre_tunnel_t *t; - u8 *rewrite; + u32 ti; - if ((vec_len(gm->tunnel_index_by_sw_if_index) < sw_if_index) || - (~0 == gm->tunnel_index_by_sw_if_index[sw_if_index])) - return; + hi = vnet_get_hw_interface (vnm, hw_if_index); - t = pool_elt_at_index(gm->tunnels, - gm->tunnel_index_by_sw_if_index[sw_if_index]); + if (NULL == gm->tunnel_index_by_sw_if_index || + hi->sw_if_index >= vec_len(gm->tunnel_index_by_sw_if_index)) + return (NULL); - if (is_enable) - { - t->adj_index[FIB_LINK_MPLS] = - adj_nbr_add_or_lock(FIB_PROTOCOL_IP4, - FIB_LINK_MPLS, - &zero_addr, - sw_if_index); - - rewrite = gre_rewrite(t, FIB_LINK_MPLS); - adj_nbr_midchain_update_rewrite(t->adj_index[FIB_LINK_MPLS], - gre_fixup, - ADJ_MIDCHAIN_FLAG_NONE, - rewrite); - vec_free(rewrite); - } + ti = gm->tunnel_index_by_sw_if_index[hi->sw_if_index]; + + if (~0 == ti) + /* not one of ours */ + return (NULL); + + t = pool_elt_at_index(gm->tunnels, ti); + + if (flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP) + vnet_hw_interface_set_flags (vnm, hw_if_index, VNET_HW_INTERFACE_FLAG_LINK_UP); else - { - adj_unlock(t->adj_index[FIB_LINK_MPLS]); - t->adj_index[FIB_LINK_MPLS] = ADJ_INDEX_INVALID; - } + vnet_hw_interface_set_flags (vnm, hw_if_index, 0 /* down */); - gre_tunnel_stack(t); + gre_tunnel_restack(t); + + return /* no error */ 0; } static clib_error_t * @@ -637,14 +561,14 @@ show_gre_tunnel_command_fn (vlib_main_t * vm, { pool_foreach (t, gm->tunnels, ({ - vlib_cli_output (vm, "%U", format_gre_tunnel, t, 0); + vlib_cli_output (vm, "%U", format_gre_tunnel, t); })); } else { t = pool_elt_at_index(gm->tunnels, ti); - vlib_cli_output (vm, "%U", format_gre_tunnel, t, 1); + vlib_cli_output (vm, "%U", format_gre_tunnel, t); } return 0; @@ -658,9 +582,6 @@ VLIB_CLI_COMMAND (show_gre_tunnel_command, static) = { /* force inclusion from application's main.c */ clib_error_t *gre_interface_init (vlib_main_t *vm) { - vec_add1(mpls_main.mpls_interface_state_change_callbacks, - gre_sw_interface_mpls_state_change); - fib_node_register_type(FIB_NODE_TYPE_GRE_TUNNEL, &gre_vft); return 0; diff --git a/vnet/vnet/hdlc/hdlc.c b/vnet/vnet/hdlc/hdlc.c index 9997ddc5403..174085ac519 100644 --- a/vnet/vnet/hdlc/hdlc.c +++ b/vnet/vnet/hdlc/hdlc.c @@ -167,42 +167,41 @@ unformat_hdlc_header (unformat_input_t * input, va_list * args) return 1; } -static uword hdlc_set_rewrite (vnet_main_t * vnm, - u32 sw_if_index, - u32 l3_type, - void * dst_address, - void * rewrite, - uword max_rewrite_bytes) +static u8* +hdlc_build_rewrite (vnet_main_t * vnm, + u32 sw_if_index, + vnet_link_t link_type, + const void *dst_address) { - hdlc_header_t * h = rewrite; + hdlc_header_t * h; + u8* rewrite = NULL; hdlc_protocol_t protocol; - if (max_rewrite_bytes < sizeof (h[0])) - return 0; - - switch (l3_type) { -#define _(a,b) case VNET_L3_PACKET_TYPE_##a: protocol = HDLC_PROTOCOL_##b; break + switch (link_type) { +#define _(a,b) case VNET_LINK_##a: protocol = HDLC_PROTOCOL_##b; break _ (IP4, ip4); _ (IP6, ip6); - _ (MPLS_UNICAST, mpls_unicast); - _ (MPLS_MULTICAST, mpls_multicast); + _ (MPLS, mpls_unicast); #undef _ default: - return 0; + return (NULL); } + vec_validate(rewrite, sizeof(*h)-1); + h = (hdlc_header_t *)rewrite; h->address = 0x0f; h->control = 0x00; h->protocol = clib_host_to_net_u16 (protocol); - return sizeof (h[0]); + return (rewrite); } VNET_HW_INTERFACE_CLASS (hdlc_hw_interface_class) = { .name = "HDLC", .format_header = format_hdlc_header_with_length, .unformat_header = unformat_hdlc_header, - .set_rewrite = hdlc_set_rewrite, + .build_rewrite = hdlc_build_rewrite, + .flags = VNET_HW_INTERFACE_CLASS_FLAG_P2P, }; static void add_protocol (hdlc_main_t * pm, diff --git a/vnet/vnet/interface.c b/vnet/vnet/interface.c index 08db68324c6..941ab170b49 100644 --- a/vnet/vnet/interface.c +++ b/vnet/vnet/interface.c @@ -40,6 +40,7 @@ #include #include #include +#include #define VNET_INTERFACE_SET_FLAGS_HELPER_IS_CREATE (1 << 0) #define VNET_INTERFACE_SET_FLAGS_HELPER_WANT_REDISTRIBUTE (1 << 1) @@ -1044,6 +1045,16 @@ vnet_hw_interface_compare (vnet_main_t * vnm, return (word) h0->hw_instance - (word) h1->hw_instance; } +int +vnet_sw_interface_is_p2p (vnet_main_t * vnm, u32 sw_if_index) +{ + vnet_hw_interface_t *hw = vnet_get_sup_hw_interface (vnm, sw_if_index); + vnet_hw_interface_class_t *hc = + vnet_get_hw_interface_class (vnm, hw->hw_class_index); + + return (hc->flags & VNET_HW_INTERFACE_CLASS_FLAG_P2P); +} + clib_error_t * vnet_interface_init (vlib_main_t * vm) { @@ -1120,6 +1131,12 @@ vnet_interface_init (vlib_main_t * vm) { c->index = vec_len (im->hw_interface_classes); hash_set_mem (im->hw_interface_class_by_name, c->name, c->index); + + if (NULL == c->build_rewrite) + c->build_rewrite = default_build_rewrite; + if (NULL == c->update_adjacency) + c->update_adjacency = default_update_adjacency; + vec_add1 (im->hw_interface_classes, c[0]); c = c->next_class_registration; } @@ -1287,6 +1304,48 @@ vnet_hw_interface_change_mac_address (vnet_main_t * vnm, u32 hw_if_index, (vnm, hw_if_index, mac_address); } +vnet_l3_packet_type_t +vnet_link_to_l3_proto (vnet_link_t link) +{ + switch (link) + { + case VNET_LINK_IP4: + return (VNET_L3_PACKET_TYPE_IP4); + case VNET_LINK_IP6: + return (VNET_L3_PACKET_TYPE_IP6); + case VNET_LINK_MPLS: + return (VNET_L3_PACKET_TYPE_MPLS_UNICAST); + case VNET_LINK_ARP: + return (VNET_L3_PACKET_TYPE_ARP); + case VNET_LINK_ETHERNET: + ASSERT (0); + break; + } + ASSERT (0); + return (0); +} + +u8 * +default_build_rewrite (vnet_main_t * vnm, + u32 sw_if_index, + vnet_link_t link_type, const void *dst_address) +{ + return (NULL); +} + +void +default_update_adjacency (vnet_main_t * vnm, u32 sw_if_index, u32 ai) +{ + u8 *rewrite; + + rewrite = vnet_build_rewrite_for_sw_interface (vnm, sw_if_index, + adj_get_link_type (ai), + NULL); + + adj_nbr_update_rewrite (ai, ADJ_NBR_REWRITE_FLAG_COMPLETE, rewrite); +} + + /* * fd.io coding-style-patch-verification: ON * diff --git a/vnet/vnet/interface.h b/vnet/vnet/interface.h index 245b86f5830..c5a79472bba 100644 --- a/vnet/vnet/interface.h +++ b/vnet/vnet/interface.h @@ -41,10 +41,12 @@ #define included_vnet_interface_h #include +#include struct vnet_main_t; struct vnet_hw_interface_t; struct vnet_sw_interface_t; +struct ip46_address_t; /* Interface up/down callback. */ typedef clib_error_t *(vnet_interface_function_t) @@ -196,6 +198,39 @@ __VA_ARGS__ vnet_device_class_t x { dev.tx_function = fn ## _multiarch_select(); } #endif +/** + * Link Type: A description of the protocol of packets on the link. + * On an ethernet link this maps directly into the ethertype. On a GRE tunnel + * it maps to the GRE-proto, etc for other lnk types. + */ +typedef enum vnet_link_t_ +{ +#if CLIB_DEBUG > 0 + VNET_LINK_IP4 = 1, +#else + VNET_LINK_IP4 = 0, +#endif + VNET_LINK_IP6, + VNET_LINK_MPLS, + VNET_LINK_ETHERNET, + VNET_LINK_ARP, +} __attribute__ ((packed)) vnet_link_t; + +/** + * @brief Convert a link to to an Ethertype + */ +extern vnet_l3_packet_type_t vnet_link_to_l3_proto (vnet_link_t link); + +/** + * @brief Attributes assignable to a HW interface Class. + */ +typedef enum vnet_hw_interface_class_flags_t_ +{ + /** + * @brief a point 2 point interface + */ + VNET_HW_INTERFACE_CLASS_FLAG_P2P = (1 << 0), +} vnet_hw_interface_class_flags_t; /* Layer-2 (e.g. Ethernet) interface class. */ typedef struct _vnet_hw_interface_class @@ -206,6 +241,9 @@ typedef struct _vnet_hw_interface_class /* Class name (e.g. "Ethernet"). */ char *name; + /* Flags */ + vnet_hw_interface_class_flags_t flags; + /* Function to call when hardware interface is added/deleted. */ vnet_interface_function_t *interface_add_del_function; @@ -233,13 +271,16 @@ typedef struct _vnet_hw_interface_class /* Parser for packet header for e.g. rewrite string. */ unformat_function_t *unformat_header; - /* Forms adjacency for given l3 packet type and destination address. - Returns number of bytes in adjacency. */ - uword (*set_rewrite) (struct vnet_main_t * vnm, - u32 sw_if_index, - u32 l3_packet_type, - void *dst_address, - void *rewrite, uword max_rewrite_bytes); + /* Builds a rewrite string for the interface to the destination + * for the payload/link type. */ + u8 *(*build_rewrite) (struct vnet_main_t * vnm, + u32 sw_if_index, + vnet_link_t link_type, const void *dst_hw_address); + + /* Update an adjacecny added by FIB (as opposed to via the + * neighbour resolution protocol). */ + void (*update_adjacency) (struct vnet_main_t * vnm, + u32 sw_if_index, u32 adj_index); uword (*is_valid_class_for_interface) (struct vnet_main_t * vnm, u32 hw_if_index, @@ -255,6 +296,20 @@ typedef struct _vnet_hw_interface_class } vnet_hw_interface_class_t; +/** + * @brief Return a complete, zero-length (aka dummy) rewrite + */ +extern u8 *default_build_rewrite (struct vnet_main_t *vnm, + u32 sw_if_index, + vnet_link_t link_type, + const void *dst_hw_address); + +/** + * @brief Default adjacency update function + */ +extern void default_update_adjacency (struct vnet_main_t *vnm, + u32 sw_if_index, u32 adj_index); + #define VNET_HW_INTERFACE_CLASS(x,...) \ __VA_ARGS__ vnet_hw_interface_class_t x; \ static void __vnet_add_hw_interface_class_registration_##x (void) \ diff --git a/vnet/vnet/interface_funcs.h b/vnet/vnet/interface_funcs.h index f603a03e77b..424e1764c98 100644 --- a/vnet/vnet/interface_funcs.h +++ b/vnet/vnet/interface_funcs.h @@ -128,6 +128,22 @@ vnet_hw_interface_get_flags (vnet_main_t * vnm, u32 hw_if_index) return hw->flags; } +always_inline uword +vnet_hw_interface_get_mtu (vnet_main_t * vnm, u32 hw_if_index, + vlib_rx_or_tx_t dir) +{ + vnet_hw_interface_t *hw = vnet_get_hw_interface (vnm, hw_if_index); + return hw->max_l3_packet_bytes[dir]; +} + +always_inline uword +vnet_sw_interface_get_mtu (vnet_main_t * vnm, u32 sw_if_index, + vlib_rx_or_tx_t dir) +{ + vnet_hw_interface_t *hw = vnet_get_sup_hw_interface (vnm, sw_if_index); + return (hw->max_l3_packet_bytes[dir]); +} + always_inline uword vnet_hw_interface_is_link_up (vnet_main_t * vnm, u32 hw_if_index) { diff --git a/vnet/vnet/ip/format.h b/vnet/vnet/ip/format.h index 0d0eb6c9476..8a76a61f17a 100644 --- a/vnet/vnet/ip/format.h +++ b/vnet/vnet/ip/format.h @@ -51,6 +51,7 @@ unformat_function_t unformat_tcp_udp_port; typedef enum format_ip_adjacency_flags_t_ { FORMAT_IP_ADJACENCY_NONE, + FORMAT_IP_ADJACENCY_BRIEF = FORMAT_IP_ADJACENCY_NONE, FORMAT_IP_ADJACENCY_DETAIL = (1 << 0), } format_ip_adjacency_flags_t; diff --git a/vnet/vnet/ip/ip4_forward.c b/vnet/vnet/ip/ip4_forward.c index 21973453af7..b0390b8a44a 100644 --- a/vnet/vnet/ip/ip4_forward.c +++ b/vnet/vnet/ip/ip4_forward.c @@ -1118,7 +1118,7 @@ static u8 * format_ip4_rewrite_trace (u8 * s, va_list * args) s = format (s, "tx_sw_if_index %d dpo-idx %d : %U flow hash: 0x%08x", t->fib_index, t->dpo_index, format_ip_adjacency, - vnm, t->dpo_index, FORMAT_IP_ADJACENCY_NONE, + t->dpo_index, FORMAT_IP_ADJACENCY_NONE, t->flow_hash); s = format (s, "\n%U%U", format_white_space, indent, @@ -1890,6 +1890,13 @@ ip4_arp_inline (vlib_main_t * vm, p0->error = node->errors[drop0 ? IP4_ARP_ERROR_DROP : IP4_ARP_ERROR_REQUEST_SENT]; + /* + * the adj has been updated to a rewrite but the node the DPO that got + * us here hasn't - yet. no big deal. we'll drop while we wait. + */ + if (IP_LOOKUP_NEXT_REWRITE == adj0->lookup_next_index) + continue; + if (drop0) continue; diff --git a/vnet/vnet/ip/ip6.h b/vnet/vnet/ip/ip6.h index ab0e650bef1..78546120a23 100644 --- a/vnet/vnet/ip/ip6.h +++ b/vnet/vnet/ip/ip6.h @@ -375,6 +375,10 @@ void ip6_register_protocol (u32 protocol, u32 node_index); serialize_function_t serialize_vnet_ip6_main, unserialize_vnet_ip6_main; +void ip6_ethernet_update_adjacency (vnet_main_t * vnm, + u32 sw_if_index, + u32 ai); + int vnet_set_ip6_ethernet_neighbor (vlib_main_t * vm, u32 sw_if_index, diff --git a/vnet/vnet/ip/ip6_forward.c b/vnet/vnet/ip/ip6_forward.c index d48ccad6984..2487af94326 100644 --- a/vnet/vnet/ip/ip6_forward.c +++ b/vnet/vnet/ip/ip6_forward.c @@ -916,7 +916,7 @@ static u8 * format_ip6_rewrite_trace (u8 * s, va_list * args) s = format (s, "tx_sw_if_index %d adj-idx %d : %U flow hash: 0x%08x", t->fib_index, t->adj_index, format_ip_adjacency, - vnm, t->adj_index, FORMAT_IP_ADJACENCY_NONE, + t->adj_index, FORMAT_IP_ADJACENCY_NONE, t->flow_hash); s = format (s, "\n%U%U", format_white_space, indent, @@ -1605,6 +1605,13 @@ ip6_discover_neighbor_inline (vlib_main_t * vm, if (drop0) continue; + /* + * the adj has been updated to a rewrite but the node the DPO that got + * us here hasn't - yet. no big deal. we'll drop while we wait. + */ + if (IP_LOOKUP_NEXT_REWRITE == adj0->lookup_next_index) + continue; + { u32 bi0 = 0; icmp6_neighbor_solicitation_header_t * h0; @@ -2167,10 +2174,6 @@ VLIB_REGISTER_NODE (ip6_midchain_node) = { .format_trace = format_ip6_forward_next_trace, .sibling_of = "ip6-rewrite", - - .next_nodes = { - [IP6_REWRITE_NEXT_DROP] = "error-drop", - }, }; VLIB_NODE_FUNCTION_MULTIARCH (ip6_midchain_node, ip6_midchain) diff --git a/vnet/vnet/ip/ip6_neighbor.c b/vnet/vnet/ip/ip6_neighbor.c index e042385d9fa..3aef2326237 100644 --- a/vnet/vnet/ip/ip6_neighbor.c +++ b/vnet/vnet/ip/ip6_neighbor.c @@ -51,7 +51,7 @@ typedef struct { #define IP6_NEIGHBOR_FLAG_STATIC (1 << 0) #define IP6_NEIGHBOR_FLAG_DYNAMIC (2 << 0) u64 cpu_time_last_updated; - adj_index_t adj_index; + fib_node_index_t fib_entry_index; } ip6_neighbor_t; /* advertised prefix option */ @@ -267,6 +267,7 @@ ip6_neighbor_sw_interface_up_down (vnet_main_t * vnm, { n = pool_elt_at_index (nm->neighbor_pool, to_delete[i]); mhash_unset (&nm->neighbor_index_by_key, &n->key, 0); + fib_table_entry_delete_index (n->fib_entry_index, FIB_SOURCE_ADJ); pool_put (nm->neighbor_pool, n); } @@ -342,48 +343,182 @@ static void set_unset_ip6_neighbor_rpc #endif static void -ip6_nd_mk_complete (ip6_neighbor_t * nbr) +ip6_nbr_probe (ip_adjacency_t *adj) { - fib_prefix_t pfx = { - .fp_len = 128, - .fp_proto = FIB_PROTOCOL_IP6, - .fp_addr = { - .ip6 = nbr->key.ip6_address, - }, - }; - ip6_main_t *im; - u32 fib_index; + icmp6_neighbor_solicitation_header_t * h; + vnet_main_t * vnm = vnet_get_main(); + ip6_main_t * im = &ip6_main; + ip_interface_address_t * ia; + ip6_address_t * dst, *src; + vnet_hw_interface_t * hi; + vnet_sw_interface_t * si; + vlib_buffer_t * b; + int bogus_length; + vlib_main_t * vm; + u32 bi = 0; + + vm = vlib_get_main(); - im = &ip6_main; - fib_index = im->fib_index_by_sw_if_index[nbr->key.sw_if_index]; + si = vnet_get_sw_interface(vnm, adj->rewrite_header.sw_if_index); + dst = &adj->sub_type.nbr.next_hop.ip6; - /* only once please */ - if (ADJ_INDEX_INVALID == nbr->adj_index) + if (!(si->flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP)) { - nbr->adj_index = - adj_nbr_add_or_lock_w_rewrite(FIB_PROTOCOL_IP6, - FIB_LINK_IP6, - &pfx.fp_addr, - nbr->key.sw_if_index, - nbr->link_layer_address); - ASSERT(ADJ_INDEX_INVALID != nbr->adj_index); - - fib_table_entry_update_one_path(fib_index, - &pfx, - FIB_SOURCE_ADJ, - FIB_ENTRY_FLAG_NONE, - FIB_PROTOCOL_IP6, - &pfx.fp_addr, - nbr->key.sw_if_index, - ~0, - 1, - MPLS_LABEL_INVALID, - FIB_ROUTE_PATH_FLAG_NONE); + return; + } + src = ip6_interface_address_matching_destination(im, dst, + adj->rewrite_header.sw_if_index, + &ia); + if (! src) + { + return; + } + + h = vlib_packet_template_get_packet(vm, + &im->discover_neighbor_packet_template, + &bi); + + hi = vnet_get_sup_hw_interface(vnm, adj->rewrite_header.sw_if_index); + + h->ip.dst_address.as_u8[13] = dst->as_u8[13]; + h->ip.dst_address.as_u8[14] = dst->as_u8[14]; + h->ip.dst_address.as_u8[15] = dst->as_u8[15]; + h->ip.src_address = src[0]; + h->neighbor.target_address = dst[0]; + + clib_memcpy (h->link_layer_option.ethernet_address, + hi->hw_address, + vec_len(hi->hw_address)); + + h->neighbor.icmp.checksum = + ip6_tcp_udp_icmp_compute_checksum(vm, 0, &h->ip, &bogus_length); + ASSERT(bogus_length == 0); + + b = vlib_get_buffer (vm, bi); + vnet_buffer (b)->sw_if_index[VLIB_RX] = + vnet_buffer (b)->sw_if_index[VLIB_TX] = + adj->rewrite_header.sw_if_index; + + /* Add encapsulation string for software interface (e.g. ethernet header). */ + vnet_rewrite_one_header(adj[0], h, sizeof (ethernet_header_t)); + vlib_buffer_advance(b, -adj->rewrite_header.data_bytes); + + { + vlib_frame_t * f = vlib_get_frame_to_node(vm, hi->output_node_index); + u32 * to_next = vlib_frame_vector_args(f); + to_next[0] = bi; + f->n_vectors = 1; + vlib_put_frame_to_node(vm, hi->output_node_index, f); + } +} + +static void +ip6_nd_mk_complete (adj_index_t ai, ip6_neighbor_t * nbr) +{ + adj_nbr_update_rewrite (ai, ADJ_NBR_REWRITE_FLAG_COMPLETE, + ethernet_build_rewrite (vnet_get_main (), + nbr->key.sw_if_index, + adj_get_link_type(ai), + nbr->link_layer_address)); +} + +static void +ip6_nd_mk_incomplete (adj_index_t ai, ip6_neighbor_t * nbr) +{ + adj_nbr_update_rewrite ( + ai, + ADJ_NBR_REWRITE_FLAG_INCOMPLETE, + ethernet_build_rewrite (vnet_get_main (), + nbr->key.sw_if_index, + adj_get_link_type(ai), + VNET_REWRITE_FOR_SW_INTERFACE_ADDRESS_BROADCAST)); +} + +#define IP6_NBR_MK_KEY(k, sw_if_index, addr) \ +{ \ + k.sw_if_index = sw_if_index; \ + k.ip6_address = *addr; \ + k.pad = 0; \ +} + +static ip6_neighbor_t * +ip6_nd_find (u32 sw_if_index, + const ip6_address_t * addr) +{ + ip6_neighbor_main_t * nm = &ip6_neighbor_main; + ip6_neighbor_t * n = NULL; + ip6_neighbor_key_t k; + uword *p; + + IP6_NBR_MK_KEY(k, sw_if_index, addr); + + p = mhash_get (&nm->neighbor_index_by_key, &k); + if (p) { + n = pool_elt_at_index (nm->neighbor_pool, p[0]); + } + + return (n); +} + +static adj_walk_rc_t +ip6_nd_mk_complete_walk (adj_index_t ai, void *ctx) +{ + ip6_neighbor_t *nbr = ctx; + + ip6_nd_mk_complete (ai, nbr); + + return (ADJ_WALK_RC_CONTINUE); +} + +static adj_walk_rc_t +ip6_nd_mk_incomplete_walk (adj_index_t ai, void *ctx) +{ + ip6_neighbor_t *nbr = ctx; + + ip6_nd_mk_incomplete (ai, nbr); + + return (ADJ_WALK_RC_CONTINUE); +} + +void +ip6_ethernet_update_adjacency (vnet_main_t * vnm, + u32 sw_if_index, + u32 ai) +{ + ip6_neighbor_t *nbr; + ip_adjacency_t *adj; + + adj = adj_get (ai); + + nbr = ip6_nd_find (sw_if_index, &adj->sub_type.nbr.next_hop.ip6); + + if (NULL != nbr) + { + adj_nbr_walk_nh6 (sw_if_index, &nbr->key.ip6_address, + ip6_nd_mk_complete_walk, nbr); } else { - adj_nbr_update_rewrite(nbr->adj_index, - nbr->link_layer_address); + /* + * no matching ND entry. + * construct the rewrite required to for an ND packet, and stick + * that in the adj's pipe to smoke. + */ + adj_nbr_update_rewrite (ai, + ADJ_NBR_REWRITE_FLAG_INCOMPLETE, + ethernet_build_rewrite (vnm, + sw_if_index, + VNET_LINK_IP6, + VNET_REWRITE_FOR_SW_INTERFACE_ADDRESS_BROADCAST)); + + /* + * since the FIB has added this adj for a route, it makes sense it may + * want to forward traffic sometime soon. Let's send a speculative ND. + * just one. If we were to do periodically that wouldn't be bad either, + * but that's more code than i'm prepared to write at this time for + * relatively little reward. + */ + ip6_nbr_probe (adj); } } @@ -416,8 +551,6 @@ vnet_set_ip6_ethernet_neighbor (vlib_main_t * vm, k.ip6_address = a[0]; k.pad = 0; - vlib_worker_thread_barrier_sync (vm); - p = mhash_get (&nm->neighbor_index_by_key, &k); if (p) { n = pool_elt_at_index (nm->neighbor_pool, p[0]); @@ -429,11 +562,40 @@ vnet_set_ip6_ethernet_neighbor (vlib_main_t * vm, } if (make_new_nd_cache_entry) { + fib_prefix_t pfx = { + .fp_len = 128, + .fp_proto = FIB_PROTOCOL_IP6, + .fp_addr = { + .ip6 = k.ip6_address, + }, + }; + u32 fib_index; + pool_get (nm->neighbor_pool, n); mhash_set (&nm->neighbor_index_by_key, &k, n - nm->neighbor_pool, /* old value */ 0); n->key = k; - n->adj_index = ADJ_INDEX_INVALID; + + clib_memcpy (n->link_layer_address, + link_layer_address, + n_bytes_link_layer_address); + + /* + * create the adj-fib. the entry in the FIB table for and to the peer. + */ + fib_index = ip6_main.fib_index_by_sw_if_index[n->key.sw_if_index]; + n->fib_entry_index = + fib_table_entry_update_one_path(fib_index, + &pfx, + FIB_SOURCE_ADJ, + FIB_ENTRY_FLAG_NONE, + FIB_PROTOCOL_IP6, + &pfx.fp_addr, + n->key.sw_if_index, + ~0, + 1, + MPLS_LABEL_INVALID, + FIB_ROUTE_PATH_FLAG_NONE); } else { @@ -445,20 +607,22 @@ vnet_set_ip6_ethernet_neighbor (vlib_main_t * vm, link_layer_address, n_bytes_link_layer_address)) return -1; - } - /* Update time stamp and ethernet address. */ - clib_memcpy (n->link_layer_address, - link_layer_address, - n_bytes_link_layer_address); + clib_memcpy (n->link_layer_address, + link_layer_address, + n_bytes_link_layer_address); + } + /* Update time stamp and flags. */ n->cpu_time_last_updated = clib_cpu_time_now (); if (is_static) n->flags |= IP6_NEIGHBOR_FLAG_STATIC; else n->flags |= IP6_NEIGHBOR_FLAG_DYNAMIC; - ip6_nd_mk_complete(n); + adj_nbr_walk_nh6 (sw_if_index, + &n->key.ip6_address, + ip6_nd_mk_complete_walk, n); /* Customer(s) waiting for this address to be resolved? */ p = mhash_get (&nm->pending_resolutions_by_address, a); @@ -507,44 +671,9 @@ vnet_set_ip6_ethernet_neighbor (vlib_main_t * vm, } } - vlib_worker_thread_barrier_release(vm); return 0; } -static void -ip6_nd_mk_incomplete (ip6_neighbor_t *nbr) -{ - fib_prefix_t pfx = { - .fp_len = 128, - .fp_proto = FIB_PROTOCOL_IP6, - .fp_addr = { - .ip6 = nbr->key.ip6_address, - }, - }; - u32 fib_index; - ip6_main_t *im; - - im = &ip6_main; - fib_index = im->fib_index_by_sw_if_index[nbr->key.sw_if_index]; - - /* - * revert the adj this ND entry sourced to incomplete - */ - adj_nbr_update_rewrite(nbr->adj_index, - NULL); - - /* - * remove the FIB entry the ND entry sourced - */ - fib_table_entry_delete(fib_index, &pfx, FIB_SOURCE_ADJ); - - /* - * Unlock the adj now that the ARP entry is no longer a source - */ - adj_unlock(nbr->adj_index); - nbr->adj_index = ADJ_INDEX_INVALID; -} - int vnet_unset_ip6_ethernet_neighbor (vlib_main_t * vm, u32 sw_if_index, @@ -571,8 +700,6 @@ vnet_unset_ip6_ethernet_neighbor (vlib_main_t * vm, k.ip6_address = a[0]; k.pad = 0; - vlib_worker_thread_barrier_sync (vm); - p = mhash_get (&nm->neighbor_index_by_key, &k); if (p == 0) { @@ -582,12 +709,16 @@ vnet_unset_ip6_ethernet_neighbor (vlib_main_t * vm, n = pool_elt_at_index (nm->neighbor_pool, p[0]); - ip6_nd_mk_incomplete(n); + adj_nbr_walk_nh6 (sw_if_index, + &n->key.ip6_address, + ip6_nd_mk_incomplete_walk, + n); + mhash_unset (&nm->neighbor_index_by_key, &n->key, 0); + fib_table_entry_delete_index (n->fib_entry_index, FIB_SOURCE_ADJ); pool_put (nm->neighbor_pool, n); out: - vlib_worker_thread_barrier_release(vm); return rv; } @@ -3725,11 +3856,9 @@ ethernet_ndp_change_mac (vlib_main_t * vm, u32 sw_if_index) pool_foreach (n, nm->neighbor_pool, ({ if (n->key.sw_if_index == sw_if_index) { - if (ADJ_INDEX_INVALID != n->adj_index) - { - adj_nbr_update_rewrite(n->adj_index, - n->link_layer_address); - } + adj_nbr_walk_nh6 (sw_if_index, + &n->key.ip6_address, + ip6_nd_mk_complete_walk, n); } })); /* *INDENT-ON* */ diff --git a/vnet/vnet/ipsec/ipsec_if.c b/vnet/vnet/ipsec/ipsec_if.c index 13901efe03e..77d5d19a82e 100644 --- a/vnet/vnet/ipsec/ipsec_if.c +++ b/vnet/vnet/ipsec/ipsec_if.c @@ -38,24 +38,23 @@ dummy_interface_tx (vlib_main_t * vm, return frame->n_vectors; } +/* *INDENT-OFF* */ VNET_DEVICE_CLASS (ipsec_device_class, static) = { -.name = "IPSec",.format_device_name = format_ipsec_name,.format_tx_trace = - format_ipsec_if_output_trace,.tx_function = dummy_interface_tx,}; - -static uword -dummy_set_rewrite (vnet_main_t * vnm, - u32 sw_if_index, - u32 l3_type, - void *dst_address, void *rewrite, uword max_rewrite_bytes) -{ - return 0; -} - + .name = "IPSec", + .format_device_name = format_ipsec_name, + .format_tx_trace = format_ipsec_if_output_trace, + .tx_function = dummy_interface_tx, +}; +/* *INDENT-ON* */ + +/* *INDENT-OFF* */ VNET_HW_INTERFACE_CLASS (ipsec_hw_class) = { -.name = "IPSec",.set_rewrite = dummy_set_rewrite,}; - + .name = "IPSec", + .build_rewrite = default_build_rewrite, +}; +/* *INDENT-ON* */ static int ipsec_add_del_tunnel_if_internal (vnet_main_t * vnm, diff --git a/vnet/vnet/l2tp/l2tp.c b/vnet/vnet/l2tp/l2tp.c index a043483ede4..13854e2ff30 100644 --- a/vnet/vnet/l2tp/l2tp.c +++ b/vnet/vnet/l2tp/l2tp.c @@ -271,21 +271,6 @@ VNET_DEVICE_CLASS (l2tpv3_device_class,static) = { }; /* *INDENT-ON* */ -static uword -dummy_set_rewrite (vnet_main_t * vnm, - u32 sw_if_index, - u32 l3_type, - void *dst_address, void *rewrite, uword max_rewrite_bytes) -{ - /* - * Conundrum: packets from tun/tap destined for the tunnel - * actually have this rewrite applied. Transit packets do not. - * To make the two cases equivalent, don't generate a - * rewrite here, build the entire header in the fast path. - */ - return 0; -} - static u8 * format_l2tp_header_with_length (u8 * s, va_list * args) { @@ -298,7 +283,8 @@ format_l2tp_header_with_length (u8 * s, va_list * args) VNET_HW_INTERFACE_CLASS (l2tpv3_hw_class) = { .name = "L2TPV3", .format_header = format_l2tp_header_with_length, - .set_rewrite = dummy_set_rewrite, + .build_rewrite = default_build_rewrite, + .flags = VNET_HW_INTERFACE_CLASS_FLAG_P2P, }; /* *INDENT-ON* */ diff --git a/vnet/vnet/lisp-cp/lisp_types.c b/vnet/vnet/lisp-cp/lisp_types.c index 1f527d6e92e..04b8462e4fc 100644 --- a/vnet/vnet/lisp-cp/lisp_types.c +++ b/vnet/vnet/lisp-cp/lisp_types.c @@ -698,7 +698,17 @@ ip_address_cmp (const ip_address_t * ip1, const ip_address_t * ip2) void ip_address_copy (ip_address_t * dst, const ip_address_t * src) { - clib_memcpy (dst, src, sizeof (ip_address_t)); + if (IP4 == ip_addr_version (src)) + { + /* don't copy any garbe from the union */ + memset (dst, 0, sizeof (*dst)); + dst->ip.v4 = src->ip.v4; + dst->version = IP4; + } + else + { + clib_memcpy (dst, src, sizeof (ip_address_t)); + } } void @@ -714,6 +724,26 @@ ip_address_set (ip_address_t * dst, const void *src, u8 version) ip_addr_version (dst) = version; } +void +ip_address_to_46 (const ip_address_t * addr, + ip46_address_t * a, fib_protocol_t * proto) +{ + *proto = (IP4 == ip_addr_version (addr) ? + FIB_PROTOCOL_IP4 : FIB_PROTOCOL_IP6); + switch (*proto) + { + case FIB_PROTOCOL_IP4: + ip46_address_set_ip4 (a, &addr->ip.v4); + break; + case FIB_PROTOCOL_IP6: + a->ip6 = addr->ip.v6; + break; + default: + ASSERT (0); + break; + } +} + static void ip_prefix_normalize_ip4 (ip4_address_t * ip4, u8 preflen) { diff --git a/vnet/vnet/lisp-cp/lisp_types.h b/vnet/vnet/lisp-cp/lisp_types.h index b37315edf8d..dd7a53ee548 100644 --- a/vnet/vnet/lisp-cp/lisp_types.h +++ b/vnet/vnet/lisp-cp/lisp_types.h @@ -186,6 +186,8 @@ u16 ip_address_size_to_write (ip_address_t * a); u16 ip_address_iana_afi (ip_address_t * a); u8 ip_address_max_len (u8 ver); u32 ip_address_put (u8 * b, ip_address_t * a); +void ip_address_to_46 (const ip_address_t * addr, + ip46_address_t * a, fib_protocol_t * proto); /* LISP AFI codes */ typedef enum diff --git a/vnet/vnet/lisp-gpe/interface.c b/vnet/vnet/lisp-gpe/interface.c index 0b4f7ed92ba..15d203ce30d 100644 --- a/vnet/vnet/lisp-gpe/interface.c +++ b/vnet/vnet/lisp-gpe/interface.c @@ -28,6 +28,7 @@ #include #include #include +#include #include #include #include @@ -167,13 +168,6 @@ VNET_DEVICE_CLASS (lisp_gpe_device_class) = { }; /* *INDENT-ON* */ -static uword -dummy_set_rewrite (vnet_main_t * vnm, u32 sw_if_index, u32 l3_type, - void *dst_address, void *rewrite, uword max_rewrite_bytes) -{ - return 0; -} - u8 * format_lisp_gpe_header_with_length (u8 * s, va_list * args) { @@ -200,7 +194,9 @@ format_lisp_gpe_header_with_length (u8 * s, va_list * args) VNET_HW_INTERFACE_CLASS (lisp_gpe_hw_class) = { .name = "LISP_GPE", .format_header = format_lisp_gpe_header_with_length, - .set_rewrite = dummy_set_rewrite, + .build_rewrite = default_build_rewrite, + .build_rewrite = lisp_gpe_build_rewrite, + .update_adjacency = lisp_gpe_update_adjacency, }; /* *INDENT-ON* */ diff --git a/vnet/vnet/lisp-gpe/lisp_gpe_adjacency.c b/vnet/vnet/lisp-gpe/lisp_gpe_adjacency.c index d042f116dea..93a6d01326a 100644 --- a/vnet/vnet/lisp-gpe/lisp_gpe_adjacency.c +++ b/vnet/vnet/lisp-gpe/lisp_gpe_adjacency.c @@ -18,7 +18,8 @@ * */ -#include +#include +#include #include #include #include @@ -106,30 +107,99 @@ lisp_gpe_adj_get_fib_chain_type (const lisp_gpe_adjacency_t * ladj) return (FIB_FORW_CHAIN_TYPE_UNICAST_IP4); } +static void +ip46_address_to_ip_address (const ip46_address_t * a, ip_address_t * b) +{ + if (ip46_address_is_ip4 (a)) + { + memset (b, 0, sizeof (*b)); + ip_address_set (b, &a->ip4, IP4); + } + else + { + ip_address_set (b, &a->ip6, IP6); + } +} + /** * @brief Stack the tunnel's midchain on the IP forwarding chain of the via */ static void -lisp_gpe_adj_stack (lisp_gpe_adjacency_t * ladj) +lisp_gpe_adj_stack_one (lisp_gpe_adjacency_t * ladj, adj_index_t ai) { const lisp_gpe_tunnel_t *lgt; dpo_id_t tmp = DPO_NULL; - fib_link_t linkt; lgt = lisp_gpe_tunnel_get (ladj->tunnel_index); fib_entry_contribute_forwarding (lgt->fib_entry_index, lisp_gpe_adj_get_fib_chain_type (ladj), &tmp); - FOR_EACH_FIB_LINK (linkt) - { - if (FIB_LINK_MPLS == linkt) - continue; - adj_nbr_midchain_stack (ladj->adjs[linkt], &tmp); - } + if (DPO_LOAD_BALANCE == tmp.dpoi_type) + { + /* + * post LISP rewrite we will load-balance. However, the LISP encap + * is always the same for this adjacency/tunnel and hence the IP/UDP src,dst + * hash is always the same result too. So we do that hash now and + * stack on the choice. + * If the choice is an incomplete adj then we will need a poke when + * it becomes complete. This happens since the adj update walk propagates + * as far a recursive paths. + */ + const dpo_id_t *choice; + load_balance_t *lb; + int hash; + + lb = load_balance_get (tmp.dpoi_index); + + if (IP4 == ip_addr_version (&ladj->remote_rloc)) + { + hash = ip4_compute_flow_hash ((ip4_header_t *) adj_get_rewrite (ai), + lb->lb_hash_config); + } + else + { + hash = ip6_compute_flow_hash ((ip6_header_t *) adj_get_rewrite (ai), + lb->lb_hash_config); + } + + choice = + load_balance_get_bucket_i (lb, hash & lb->lb_n_buckets_minus_1); + dpo_copy (&tmp, choice); + } + + adj_nbr_midchain_stack (ai, &tmp); dpo_reset (&tmp); } +/** + * @brief Call back when restacking all adjacencies on a GRE interface + */ +static adj_walk_rc_t +lisp_gpe_adj_walk_cb (adj_index_t ai, void *ctx) +{ + lisp_gpe_adjacency_t *ladj = ctx; + + lisp_gpe_adj_stack_one (ladj, ai); + + return (ADJ_WALK_RC_CONTINUE); +} + +static void +lisp_gpe_adj_stack (lisp_gpe_adjacency_t * ladj) +{ + fib_protocol_t nh_proto; + ip46_address_t nh; + + ip_address_to_46 (&ladj->remote_rloc, &nh, &nh_proto); + + /* + * walk all the adjacencies on th lisp interface and restack them + */ + adj_nbr_walk_nh (ladj->sw_if_index, + nh_proto, &nh, lisp_gpe_adj_walk_cb, ladj); +} + static lisp_gpe_next_protocol_e lisp_gpe_adj_proto_from_fib_link_type (fib_link_t linkt) { @@ -157,10 +227,59 @@ lisp_gpe_fixup (vlib_main_t * vm, ip_adjacency_t * adj, vlib_buffer_t * b) ip_udp_fixup_one (vm, b, is_v4_packet (vlib_buffer_get_current (b))); } +/** + * @brief The LISP-GPE interface registered function to update, i.e. + * provide an rewrite string for, an adjacency. + */ +void +lisp_gpe_update_adjacency (vnet_main_t * vnm, u32 sw_if_index, adj_index_t ai) +{ + const lisp_gpe_tunnel_t *lgt; + lisp_gpe_adjacency_t *ladj; + ip_adjacency_t *adj; + ip_address_t rloc; + vnet_link_t linkt; + index_t lai; + + adj = adj_get (ai); + ip46_address_to_ip_address (&adj->sub_type.nbr.next_hop, &rloc); + + /* + * find an existing or create a new adj + */ + lai = lisp_adj_find (&rloc, sw_if_index); + + ASSERT (INDEX_INVALID != lai); + + ladj = pool_elt_at_index (lisp_adj_pool, lai); + lgt = lisp_gpe_tunnel_get (ladj->tunnel_index); + linkt = adj_get_link_type (ai); + + adj_nbr_midchain_update_rewrite + (ai, lisp_gpe_fixup, + (VNET_LINK_ETHERNET == linkt ? + ADJ_MIDCHAIN_FLAG_NO_COUNT : + ADJ_MIDCHAIN_FLAG_NONE), + lisp_gpe_tunnel_build_rewrite + (lgt, ladj, lisp_gpe_adj_proto_from_fib_link_type (linkt))); + + lisp_gpe_adj_stack_one (ladj, ai); +} + +u8 * +lisp_gpe_build_rewrite (vnet_main_t * vnm, + u32 sw_if_index, + vnet_link_t link_type, const void *dst_address) +{ + ASSERT (0); + return (NULL); +} + index_t lisp_gpe_adjacency_find_or_create_and_lock (const locator_pair_t * pair, u32 overlay_table_id, u32 vni) { + const lisp_gpe_sub_interface_t *l3s; const lisp_gpe_tunnel_t *lgt; lisp_gpe_adjacency_t *ladj; index_t lai, l3si; @@ -171,29 +290,24 @@ lisp_gpe_adjacency_find_or_create_and_lock (const locator_pair_t * pair, l3si = lisp_gpe_sub_interface_find_or_create_and_lock (&pair->lcl_loc, overlay_table_id, vni); + l3s = lisp_gpe_sub_interface_get (l3si); /* * find an existing or create a new adj */ - lai = lisp_adj_find (&pair->rmt_loc, l3si); + lai = lisp_adj_find (&pair->rmt_loc, l3s->sw_if_index); if (INDEX_INVALID == lai) { - const lisp_gpe_sub_interface_t *l3s; - u8 *rewrite = NULL; - fib_link_t linkt; - fib_prefix_t nh; pool_get (lisp_adj_pool, ladj); memset (ladj, 0, sizeof (*ladj)); lai = (ladj - lisp_adj_pool); - ladj->remote_rloc = pair->rmt_loc; + ip_address_copy (&ladj->remote_rloc, &pair->rmt_loc); ladj->vni = vni; /* transfer the lock to the adj */ ladj->lisp_l3_sub_index = l3si; - - l3s = lisp_gpe_sub_interface_get (l3si); ladj->sw_if_index = l3s->sw_if_index; /* if vni is non-default */ @@ -219,38 +333,8 @@ lisp_gpe_adjacency_find_or_create_and_lock (const locator_pair_t * pair, ladj->fib_entry_child_index = fib_entry_child_add (lgt->fib_entry_index, FIB_NODE_TYPE_LISP_ADJ, lai); - ip_address_to_fib_prefix (&pair->rmt_loc, &nh); - - /* - * construct and stack the FIB midchain adjacencies - */ - FOR_EACH_FIB_LINK (linkt) - { - if (FIB_LINK_MPLS == linkt) - continue; - - ladj->adjs[linkt] = adj_nbr_add_or_lock (nh.fp_proto, - linkt, - &nh.fp_addr, - ladj->sw_if_index); - - rewrite = - lisp_gpe_tunnel_build_rewrite (lgt, ladj, - lisp_gpe_adj_proto_from_fib_link_type - (linkt)); - adj_nbr_midchain_update_rewrite (ladj->adjs[linkt], - lisp_gpe_fixup, - (FIB_LINK_ETHERNET == linkt ? - ADJ_MIDCHAIN_FLAG_NO_COUNT : - ADJ_MIDCHAIN_FLAG_NONE), rewrite); - - vec_free (rewrite); - } - - lisp_gpe_adj_stack (ladj); - - lisp_adj_insert (&ladj->remote_rloc, ladj->lisp_l3_sub_index, lai); + lisp_adj_insert (&ladj->remote_rloc, ladj->sw_if_index, lai); } else { @@ -278,15 +362,21 @@ lisp_gpe_adjacency_from_fib_node (const fib_node_t * node) static void lisp_gpe_adjacency_last_lock_gone (lisp_gpe_adjacency_t * ladj) { + const lisp_gpe_tunnel_t *lgt; + /* * no children so we are not counting locks. no-op. * at least not counting */ - lisp_adj_remove (&ladj->remote_rloc, ladj->lisp_l3_sub_index); + lisp_adj_remove (&ladj->remote_rloc, ladj->sw_if_index); /* * unlock the resources this adj holds */ + lgt = lisp_gpe_tunnel_get (ladj->tunnel_index); + + fib_entry_child_remove (lgt->fib_entry_index, ladj->fib_entry_child_index); + lisp_gpe_tunnel_unlock (ladj->tunnel_index); lisp_gpe_sub_interface_unlock (ladj->lisp_l3_sub_index); @@ -375,9 +465,9 @@ format_lisp_gpe_adjacency (u8 * s, va_list * args) s = format (s, " %U\n", format_lisp_gpe_tunnel, lisp_gpe_tunnel_get (ladj->tunnel_index)); - s = format (s, " FIB adjacencies: IPV4:%d IPv6:%d L2:%d\n", - ladj->adjs[FIB_LINK_IP4], - ladj->adjs[FIB_LINK_IP6], ladj->adjs[FIB_LINK_ETHERNET]); + /* s = format (s, " FIB adjacencies: IPV4:%d IPv6:%d L2:%d\n", */ + /* ladj->adjs[FIB_LINK_IP4], */ + /* ladj->adjs[FIB_LINK_IP6], ladj->adjs[FIB_LINK_ETHERNET]); */ } else { diff --git a/vnet/vnet/lisp-gpe/lisp_gpe_adjacency.h b/vnet/vnet/lisp-gpe/lisp_gpe_adjacency.h index f6a66cddf0b..adc3acaee3c 100644 --- a/vnet/vnet/lisp-gpe/lisp_gpe_adjacency.h +++ b/vnet/vnet/lisp-gpe/lisp_gpe_adjacency.h @@ -79,12 +79,6 @@ typedef struct lisp_gpe_adjacency_t_ */ u32 tunnel_index; - /** - * Per-link-type FIB adjacencies contributed. - * These will be used as a result of a FIB lookup. - */ - adj_index_t adjs[FIB_LINK_NUM]; - /** * This adjacency is a child of the FIB entry to reach the RLOC. * This is so when the reachability of that RLOC changes, we can restack @@ -112,6 +106,14 @@ extern void lisp_gpe_adjacency_unlock (index_t l3si); extern const lisp_gpe_adjacency_t *lisp_gpe_adjacency_get (index_t l3si); +extern void lisp_gpe_update_adjacency (vnet_main_t * vnm, + u32 sw_if_index, adj_index_t ai); +extern u8 *lisp_gpe_build_rewrite (vnet_main_t * vnm, + u32 sw_if_index, + vnet_link_t link_type, + const void *dst_address); + + /** * @brief Flags for displaying the adjacency */ diff --git a/vnet/vnet/lisp-gpe/lisp_gpe_fwd_entry.c b/vnet/vnet/lisp-gpe/lisp_gpe_fwd_entry.c index 3bd83dca87c..75db97d9b53 100644 --- a/vnet/vnet/lisp-gpe/lisp_gpe_fwd_entry.c +++ b/vnet/vnet/lisp-gpe/lisp_gpe_fwd_entry.c @@ -190,26 +190,6 @@ ip_src_fib_add_route_w_dpo (u32 src_fib_index, } } -static void -ip_address_to_46 (const ip_address_t * addr, - ip46_address_t * a, fib_protocol_t * proto) -{ - *proto = (IP4 == ip_addr_version (addr) ? - FIB_PROTOCOL_IP4 : FIB_PROTOCOL_IP6); - switch (*proto) - { - case FIB_PROTOCOL_IP4: - a->ip4 = addr->ip.v4; - break; - case FIB_PROTOCOL_IP6: - a->ip6 = addr->ip.v6; - break; - default: - ASSERT (0); - break; - } -} - static fib_route_path_t * lisp_gpe_mk_fib_paths (const lisp_fwd_path_t * paths) { diff --git a/vnet/vnet/llc/llc.c b/vnet/vnet/llc/llc.c index 0496e8d48ce..975207b651d 100644 --- a/vnet/vnet/llc/llc.c +++ b/vnet/vnet/llc/llc.c @@ -155,31 +155,30 @@ unformat_llc_header (unformat_input_t * input, va_list * args) return 1; } -static uword -llc_set_rewrite (vnet_main_t * vnm, - u32 sw_if_index, - u32 l3_type, - void *dst_address, void *rewrite, uword max_rewrite_bytes) +static u8 * +llc_build_rewrite (vnet_main_t * vnm, + u32 sw_if_index, + vnet_link_t link_type, const void *dst_address) { - llc_header_t *h = rewrite; + llc_header_t *h; + u8 *rewrite = NULL; llc_protocol_t protocol; - if (max_rewrite_bytes < sizeof (h[0])) - return 0; - - switch (l3_type) + switch (link_type) { -#define _(a,b) case VNET_L3_PACKET_TYPE_##a: protocol = LLC_PROTOCOL_##b; break +#define _(a,b) case VNET_LINK_##a: protocol = LLC_PROTOCOL_##b; break _(IP4, ip4); #undef _ default: - return 0; + return (NULL); } + vec_validate (rewrite, sizeof (*h) - 1); + h = (llc_header_t *) rewrite; h->src_sap = h->dst_sap = protocol; h->control = 0x3; - return sizeof (h[0]); + return (rewrite); } /* *INDENT-OFF* */ @@ -187,7 +186,7 @@ VNET_HW_INTERFACE_CLASS (llc_hw_interface_class) = { .name = "LLC", .format_header = format_llc_header_with_length, .unformat_header = unformat_llc_header, - .set_rewrite = llc_set_rewrite, + .build_rewrite = llc_build_rewrite, }; /* *INDENT-ON* */ diff --git a/vnet/vnet/mpls/interface.c b/vnet/vnet/mpls/interface.c index dd40fc29e25..553d7fb5f82 100644 --- a/vnet/vnet/mpls/interface.c +++ b/vnet/vnet/mpls/interface.c @@ -23,22 +23,6 @@ #include #include -static uword mpls_gre_set_rewrite (vnet_main_t * vnm, - u32 sw_if_index, - u32 l3_type, - void * dst_address, - void * rewrite, - uword max_rewrite_bytes) -{ - /* - * Conundrum: packets from tun/tap destined for the tunnel - * actually have this rewrite applied. Transit packets do not. - * To make the two cases equivalent, don't generate a - * rewrite here, build the entire header in the fast path. - */ - return 0; -} - /* manually added to the interface output node */ #define MPLS_GRE_OUTPUT_NEXT_POST_REWRITE 1 @@ -271,26 +255,10 @@ VNET_HW_INTERFACE_CLASS (mpls_gre_hw_interface_class) = { #if 0 .unformat_header = unformat_mpls_gre_header, #endif - .set_rewrite = mpls_gre_set_rewrite, + .build_rewrite = default_build_rewrite, + .flags = VNET_HW_INTERFACE_CLASS_FLAG_P2P, }; - -static uword mpls_eth_set_rewrite (vnet_main_t * vnm, - u32 sw_if_index, - u32 l3_type, - void * dst_address, - void * rewrite, - uword max_rewrite_bytes) -{ - /* - * Conundrum: packets from tun/tap destined for the tunnel - * actually have this rewrite applied. Transit packets do not. - * To make the two cases equivalent, don't generate a - * rewrite here, build the entire header in the fast path. - */ - return 0; -} - /* manually added to the interface output node */ #define MPLS_ETH_OUTPUT_NEXT_OUTPUT 1 @@ -525,7 +493,8 @@ VNET_HW_INTERFACE_CLASS (mpls_eth_hw_interface_class) = { #if 0 .unformat_header = unformat_mpls_eth_header, #endif - .set_rewrite = mpls_eth_set_rewrite, + .build_rewrite = default_build_rewrite, + .flags = VNET_HW_INTERFACE_CLASS_FLAG_P2P, }; /** @@ -609,7 +578,6 @@ mpls_sw_interface_enable_disable (mpls_main_t * mm, u32 sw_if_index, u8 is_enable) { - mpls_interface_state_change_callback_t *callback; vlib_main_t * vm = vlib_get_main(); ip_config_main_t * cm = &mm->feature_config_mains[VNET_IP_RX_UNICAST_FEAT]; vnet_config_main_t * vcm = &cm->config_main; @@ -660,14 +628,6 @@ mpls_sw_interface_enable_disable (mpls_main_t * mm, /* # bytes of config data */ 0); cm->config_index_by_sw_if_index[sw_if_index] = ci; - - /* - * notify all interested clients of the change of state. - */ - vec_foreach(callback, mm->mpls_interface_state_change_callbacks) - { - (*callback)(sw_if_index, is_enable); - } } static mpls_gre_tunnel_t * @@ -1578,7 +1538,7 @@ int vnet_mpls_ethernet_add_del_tunnel (u8 *dst, vnet_rewrite_for_sw_interface (vnm, - VNET_L3_PACKET_TYPE_MPLS_UNICAST, + VNET_LINK_MPLS, tx_sw_if_index, ip4_rewrite_node.index, tp->tunnel_dst, @@ -1763,7 +1723,7 @@ int vnet_mpls_policy_tunnel_add_rewrite (mpls_main_t * mm, /* Build L2 encap */ vnet_rewrite_for_sw_interface (mm->vnet_main, - VNET_L3_PACKET_TYPE_MPLS_UNICAST, + VNET_LINK_MPLS, t->tx_sw_if_index, mpls_policy_encap_node.index, t->tunnel_dst, diff --git a/vnet/vnet/mpls/mpls.h b/vnet/vnet/mpls/mpls.h index e4737709561..35755333e1d 100644 --- a/vnet/vnet/mpls/mpls.h +++ b/vnet/vnet/mpls/mpls.h @@ -163,9 +163,6 @@ typedef struct { /* IP4 enabled count by software interface */ u8 * mpls_enabled_by_sw_if_index; - /* Functions to call when MPLS state on an interface changes. */ - mpls_interface_state_change_callback_t * mpls_interface_state_change_callbacks; - /* convenience */ vlib_main_t * vlib_main; vnet_main_t * vnet_main; diff --git a/vnet/vnet/mpls/mpls_output.c b/vnet/vnet/mpls/mpls_output.c index 932fcb8d0bd..299e1dd242a 100644 --- a/vnet/vnet/mpls/mpls_output.c +++ b/vnet/vnet/mpls/mpls_output.c @@ -39,7 +39,7 @@ format_mpls_output_trace (u8 * s, va_list * args) s = format (s, "adj-idx %d : %U flow hash: 0x%08x", t->adj_index, - format_ip_adjacency, vnm, t->adj_index, FORMAT_IP_ADJACENCY_NONE, + format_ip_adjacency, t->adj_index, FORMAT_IP_ADJACENCY_NONE, t->flow_hash); s = format (s, "\n%U%U", format_white_space, indent, diff --git a/vnet/vnet/pg/stream.c b/vnet/vnet/pg/stream.c index b66fb742ab4..20f54a74b8c 100644 --- a/vnet/vnet/pg/stream.c +++ b/vnet/vnet/pg/stream.c @@ -121,25 +121,25 @@ VNET_DEVICE_CLASS (pg_dev_class) = { }; /* *INDENT-ON* */ -static uword -pg_set_rewrite (vnet_main_t * vnm, - u32 sw_if_index, - u32 l3_type, - void *dst_address, void *rewrite, uword max_rewrite_bytes) +static u8 * +pg_build_rewrite (vnet_main_t * vnm, + u32 sw_if_index, + vnet_link_t link_type, const void *dst_address) { - u16 *h = rewrite; + u8 *rewrite = NULL; + u16 *h; - if (max_rewrite_bytes < sizeof (h[0])) - return 0; + vec_validate (rewrite, sizeof (*h) - 1); + h = (u16 *) rewrite; + h[0] = clib_host_to_net_u16 (vnet_link_to_l3_proto (link_type)); - h[0] = clib_host_to_net_u16 (l3_type); - return sizeof (h[0]); + return (rewrite); } /* *INDENT-OFF* */ VNET_HW_INTERFACE_CLASS (pg_interface_class,static) = { .name = "Packet generator", - .set_rewrite = pg_set_rewrite, + .build_rewrite = pg_build_rewrite, }; /* *INDENT-ON* */ diff --git a/vnet/vnet/ppp/ppp.c b/vnet/vnet/ppp/ppp.c index 427fd7b7a79..a0eefbadc3b 100644 --- a/vnet/vnet/ppp/ppp.c +++ b/vnet/vnet/ppp/ppp.c @@ -168,35 +168,33 @@ unformat_ppp_header (unformat_input_t * input, va_list * args) return 1; } -static uword -ppp_set_rewrite (vnet_main_t * vnm, - u32 sw_if_index, - u32 l3_type, - void *dst_address, void *rewrite, uword max_rewrite_bytes) +static u8 * +ppp_build_rewrite (vnet_main_t * vnm, + u32 sw_if_index, + vnet_link_t link_type, const void *dst_hw_address) { - ppp_header_t *h = rewrite; + ppp_header_t *h; + u8 *rewrite = NULL; ppp_protocol_t protocol; - if (max_rewrite_bytes < sizeof (h[0])) - return 0; - - switch (l3_type) + switch (link_type) { -#define _(a,b) case VNET_L3_PACKET_TYPE_##a: protocol = PPP_PROTOCOL_##b; break +#define _(a,b) case VNET_LINK_##a: protocol = PPP_PROTOCOL_##b; break _(IP4, ip4); _(IP6, ip6); - _(MPLS_UNICAST, mpls_unicast); - _(MPLS_MULTICAST, mpls_multicast); + _(MPLS, mpls_unicast); #undef _ default: - return 0; + return (NULL); } + vec_validate (rewrite, sizeof (*h) - 1); + h = (ppp_header_t *) rewrite; h->address = 0xff; h->control = 0x03; h->protocol = clib_host_to_net_u16 (protocol); - return sizeof (h[0]); + return (rewrite); } /* *INDENT-OFF* */ @@ -204,7 +202,8 @@ VNET_HW_INTERFACE_CLASS (ppp_hw_interface_class) = { .name = "PPP", .format_header = format_ppp_header_with_length, .unformat_header = unformat_ppp_header, - .set_rewrite = ppp_set_rewrite, + .build_rewrite = ppp_build_rewrite, + .flags = VNET_HW_INTERFACE_CLASS_FLAG_P2P, }; /* *INDENT-ON* */ diff --git a/vnet/vnet/replication.c b/vnet/vnet/replication.c index 571be7d807c..561c86cdfa4 100644 --- a/vnet/vnet/replication.c +++ b/vnet/vnet/replication.c @@ -225,6 +225,12 @@ replication_recycle_callback (vlib_main_t * vm, vlib_buffer_free_list_t * fl) /* Mark that this buffer was just recycled */ b0->flags |= VLIB_BUFFER_IS_RECYCLED; +#if (CLIB_DEBUG > 0) +#if DPDK == 0 + vlib_buffer_set_known_state (vm, bi0, VLIB_BUFFER_KNOWN_ALLOCATED); +#endif +#endif + /* If buffer is traced, mark frame as traced */ if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED)) f->flags |= VLIB_FRAME_TRACE; diff --git a/vnet/vnet/rewrite.c b/vnet/vnet/rewrite.c index 42d0688a5cc..53d548bc8ae 100644 --- a/vnet/vnet/rewrite.c +++ b/vnet/vnet/rewrite.c @@ -184,38 +184,51 @@ done: return error == 0; } +u32 +vnet_tx_node_index_for_sw_interface (vnet_main_t * vnm, u32 sw_if_index) +{ + vnet_hw_interface_t *hw = vnet_get_sup_hw_interface (vnm, sw_if_index); + return (hw->output_node_index); +} + +void +vnet_rewrite_init (vnet_main_t * vnm, + u32 sw_if_index, + u32 this_node, u32 next_node, vnet_rewrite_header_t * rw) +{ + rw->sw_if_index = sw_if_index; + rw->node_index = this_node; + rw->next_index = vlib_node_add_next (vnm->vlib_main, this_node, next_node); + rw->max_l3_packet_bytes = + vnet_sw_interface_get_mtu (vnm, sw_if_index, VLIB_TX); +} + void vnet_rewrite_for_sw_interface (vnet_main_t * vnm, - vnet_l3_packet_type_t packet_type, + vnet_link_t link_type, u32 sw_if_index, u32 node_index, void *dst_address, vnet_rewrite_header_t * rw, u32 max_rewrite_bytes) { + vnet_hw_interface_t *hw = vnet_get_sup_hw_interface (vnm, sw_if_index); vnet_hw_interface_class_t *hc = vnet_get_hw_interface_class (vnm, hw->hw_class_index); - static u8 *rw_tmp = 0; - uword n_rw_tmp; + u8 *rewrite = NULL; - rw->sw_if_index = sw_if_index; - rw->node_index = node_index; - rw->next_index = - vlib_node_add_next (vnm->vlib_main, node_index, hw->output_node_index); - rw->max_l3_packet_bytes = hw->max_l3_packet_bytes[VLIB_TX]; - - ASSERT (max_rewrite_bytes > 0); - vec_reset_length (rw_tmp); - vec_validate (rw_tmp, max_rewrite_bytes - 1); - - ASSERT (hc->set_rewrite); - n_rw_tmp = - hc->set_rewrite (vnm, sw_if_index, packet_type, dst_address, rw_tmp, - max_rewrite_bytes); - - ASSERT (n_rw_tmp < max_rewrite_bytes); - vnet_rewrite_set_data_internal (rw, max_rewrite_bytes, rw_tmp, n_rw_tmp); + vnet_rewrite_init (vnm, sw_if_index, node_index, + vnet_tx_node_index_for_sw_interface (vnm, sw_if_index), + rw); + + ASSERT (hc->build_rewrite); + rewrite = hc->build_rewrite (vnm, sw_if_index, link_type, dst_address); + + ASSERT (vec_len (rewrite) < max_rewrite_bytes); + vnet_rewrite_set_data_internal (rw, max_rewrite_bytes, rewrite, + vec_len (rewrite)); + vec_free (rewrite); } void @@ -280,6 +293,33 @@ unserialize_vnet_rewrite (serialize_main_t * m, va_list * va) rw->data_bytes); } +u8 * +vnet_build_rewrite_for_sw_interface (vnet_main_t * vnm, + u32 sw_if_index, + vnet_link_t link_type, + const void *dst_address) +{ + vnet_hw_interface_t *hw = vnet_get_sup_hw_interface (vnm, sw_if_index); + vnet_hw_interface_class_t *hc = + vnet_get_hw_interface_class (vnm, hw->hw_class_index); + + ASSERT (hc->build_rewrite); + return (hc->build_rewrite (vnm, sw_if_index, link_type, dst_address)); +} + + +void +vnet_update_adjacency_for_sw_interface (vnet_main_t * vnm, + u32 sw_if_index, u32 ai) +{ + vnet_hw_interface_t *hw = vnet_get_sup_hw_interface (vnm, sw_if_index); + vnet_hw_interface_class_t *hc = + vnet_get_hw_interface_class (vnm, hw->hw_class_index); + + ASSERT (hc->update_adjacency); + hc->update_adjacency (vnm, sw_if_index, ai); +} + /* * fd.io coding-style-patch-verification: ON * diff --git a/vnet/vnet/rewrite.h b/vnet/vnet/rewrite.h index fb800da7b37..00c1efbdc5e 100644 --- a/vnet/vnet/rewrite.h +++ b/vnet/vnet/rewrite.h @@ -88,6 +88,16 @@ struct { \ u8 rewrite_data[(total_bytes) - sizeof (vnet_rewrite_header_t)]; \ } +always_inline void +vnet_rewrite_clear_data_internal (vnet_rewrite_header_t * rw, int max_size) +{ + /* Sanity check values carefully for this memset operation */ + ASSERT ((max_size > 0) && (max_size < VLIB_BUFFER_PRE_DATA_SIZE)); + + rw->data_bytes = 0; + memset (rw->data, 0xfe, max_size); +} + always_inline void vnet_rewrite_set_data_internal (vnet_rewrite_header_t * rw, int max_size, void *data, int data_bytes) @@ -252,20 +262,29 @@ _vnet_rewrite_two_headers (vnet_rewrite_header_t * h0, (most_likely_size)) #define VNET_REWRITE_FOR_SW_INTERFACE_ADDRESS_BROADCAST ((void *) 0) +/** Deprecated */ void vnet_rewrite_for_sw_interface (struct vnet_main_t *vnm, - vnet_l3_packet_type_t packet_type, + vnet_link_t packet_type, u32 sw_if_index, u32 node_index, void *dst_address, vnet_rewrite_header_t * rw, u32 max_rewrite_bytes); -void vnet_rewrite_for_tunnel (struct vnet_main_t *vnm, - u32 tx_sw_if_index, - u32 rewrite_node_index, - u32 post_rewrite_node_index, - vnet_rewrite_header_t * rw, - u8 * rewrite_data, u32 rewrite_length); +u32 vnet_tx_node_index_for_sw_interface (struct vnet_main_t *vnm, + u32 sw_if_index); + +void vnet_rewrite_init (struct vnet_main_t *vnm, + u32 sw_if_index, + u32 this_node, + u32 next_node, vnet_rewrite_header_t * rw); + +u8 *vnet_build_rewrite_for_sw_interface (struct vnet_main_t *vnm, + u32 sw_if_index, + vnet_link_t packet_type, + const void *dst_address); +void vnet_update_adjacency_for_sw_interface (struct vnet_main_t *vnm, + u32 sw_if_index, u32 ai); /* Parser for unformat header & rewrite string. */ unformat_function_t unformat_vnet_rewrite; diff --git a/vnet/vnet/srp/interface.c b/vnet/vnet/srp/interface.c index 46c091d490c..d427cc3c523 100644 --- a/vnet/vnet/srp/interface.c +++ b/vnet/vnet/srp/interface.c @@ -41,34 +41,33 @@ #include #include -static uword srp_set_rewrite (vnet_main_t * vnm, - u32 sw_if_index, - u32 l3_type, - void * dst_address, - void * rewrite, - uword max_rewrite_bytes) +static u8* +srp_build_rewrite (vnet_main_t * vnm, + u32 sw_if_index, + vnet_link_t link_type, + const void * dst_address) { vnet_hw_interface_t * hw = vnet_get_sup_hw_interface (vnm, sw_if_index); srp_main_t * sm = &srp_main; - srp_and_ethernet_header_t * h = rewrite; + srp_and_ethernet_header_t * h; + u8* rewrite = NULL; u16 type; uword n_bytes = sizeof (h[0]); - if (n_bytes > max_rewrite_bytes) - return 0; - - switch (l3_type) { -#define _(a,b) case VNET_L3_PACKET_TYPE_##a: type = ETHERNET_TYPE_##b; break + switch (link_type) { +#define _(a,b) case VNET_LINK_##a: type = ETHERNET_TYPE_##b; break _ (IP4, IP4); _ (IP6, IP6); - _ (MPLS_UNICAST, MPLS_UNICAST); - _ (MPLS_MULTICAST, MPLS_MULTICAST); + _ (MPLS, MPLS_UNICAST); _ (ARP, ARP); #undef _ default: - return 0; + return (NULL); } + vec_validate(rewrite, n_bytes-1); + h = (srp_and_ethernet_header_t *)rewrite; + clib_memcpy (h->ethernet.src_address, hw->hw_address, sizeof (h->ethernet.src_address)); if (dst_address) clib_memcpy (h->ethernet.dst_address, dst_address, sizeof (h->ethernet.dst_address)); @@ -82,7 +81,7 @@ static uword srp_set_rewrite (vnet_main_t * vnm, h->srp.ttl = sm->default_data_ttl; srp_header_compute_parity (&h->srp); - return n_bytes; + return (rewrite); } static void srp_register_interface_helper (u32 * hw_if_indices_by_side, u32 redistribute); @@ -293,7 +292,8 @@ VNET_HW_INTERFACE_CLASS (srp_hw_interface_class) = { .format_device = format_srp_device, .unformat_hw_address = unformat_ethernet_address, .unformat_header = unformat_srp_header, - .set_rewrite = srp_set_rewrite, + .build_rewrite = srp_build_rewrite, + .update_adjacency = ethernet_update_adjacency, .is_valid_class_for_interface = srp_is_valid_class_for_interface, .hw_class_change = srp_interface_hw_class_change, }; diff --git a/vnet/vnet/unix/tapcli.c b/vnet/vnet/unix/tapcli.c index 0be68a9eae2..8a5d47ddb96 100644 --- a/vnet/vnet/unix/tapcli.c +++ b/vnet/vnet/unix/tapcli.c @@ -585,6 +585,7 @@ tapcli_nopunt_frame (vlib_main_t * vm, VNET_HW_INTERFACE_CLASS (tapcli_interface_class,static) = { .name = "tapcli", + .flags = VNET_HW_INTERFACE_CLASS_FLAG_P2P, }; /** diff --git a/vnet/vnet/unix/tuntap.c b/vnet/vnet/unix/tuntap.c index 48d5dc2387d..89fd1dcfe81 100644 --- a/vnet/vnet/unix/tuntap.c +++ b/vnet/vnet/unix/tuntap.c @@ -946,6 +946,7 @@ tuntap_nopunt_frame (vlib_main_t * vm, VNET_HW_INTERFACE_CLASS (tuntap_interface_class,static) = { .name = "tuntap", + .flags = VNET_HW_INTERFACE_CLASS_FLAG_P2P, }; /** diff --git a/vnet/vnet/vxlan-gpe/vxlan_gpe.c b/vnet/vnet/vxlan-gpe/vxlan_gpe.c index fae481c36bb..979864e9092 100644 --- a/vnet/vnet/vxlan-gpe/vxlan_gpe.c +++ b/vnet/vnet/vxlan-gpe/vxlan_gpe.c @@ -121,17 +121,6 @@ VNET_DEVICE_CLASS (vxlan_gpe_device_class,static) = { .admin_up_down_function = vxlan_gpe_interface_admin_up_down, }; -static uword dummy_set_rewrite (vnet_main_t * vnm, - u32 sw_if_index, - u32 l3_type, - void * dst_address, - void * rewrite, - uword max_rewrite_bytes) -{ - return 0; -} - - /** * @brief Formatting function for tracing VXLAN GPE with length * @@ -151,7 +140,8 @@ static u8 * format_vxlan_gpe_header_with_length (u8 * s, va_list * args) VNET_HW_INTERFACE_CLASS (vxlan_gpe_hw_class) = { .name = "VXLAN_GPE", .format_header = format_vxlan_gpe_header_with_length, - .set_rewrite = dummy_set_rewrite, + .build_rewrite = default_build_rewrite, + .flags = VNET_HW_INTERFACE_CLASS_FLAG_P2P, }; diff --git a/vnet/vnet/vxlan/vxlan.c b/vnet/vnet/vxlan/vxlan.c index da359a8d1bb..5b521dbeeb8 100644 --- a/vnet/vnet/vxlan/vxlan.c +++ b/vnet/vnet/vxlan/vxlan.c @@ -103,16 +103,6 @@ VNET_DEVICE_CLASS (vxlan_device_class,static) = { .admin_up_down_function = vxlan_interface_admin_up_down, }; -static uword dummy_set_rewrite (vnet_main_t * vnm, - u32 sw_if_index, - u32 l3_type, - void * dst_address, - void * rewrite, - uword max_rewrite_bytes) -{ - return 0; -} - static u8 * format_vxlan_header_with_length (u8 * s, va_list * args) { u32 dev_instance = va_arg (*args, u32); @@ -123,7 +113,7 @@ static u8 * format_vxlan_header_with_length (u8 * s, va_list * args) VNET_HW_INTERFACE_CLASS (vxlan_hw_class) = { .name = "VXLAN", .format_header = format_vxlan_header_with_length, - .set_rewrite = dummy_set_rewrite, + .build_rewrite = default_build_rewrite, }; #define foreach_copy_field \ diff --git a/vpp/vpp-api/api.c b/vpp/vpp-api/api.c index ca56ab24480..b0e0412b127 100644 --- a/vpp/vpp-api/api.c +++ b/vpp/vpp-api/api.c @@ -3211,7 +3211,9 @@ dhcp_compl_event_callback (u32 client_index, u32 pid, u8 * hostname, mp->hostname[vec_len (hostname) + 1] = '\n'; clib_memcpy (&mp->host_address[0], host_address, 16); clib_memcpy (&mp->router_address[0], router_address, 16); - clib_memcpy (&mp->host_mac[0], host_mac, 6); + + if (NULL != host_mac) + clib_memcpy (&mp->host_mac[0], host_mac, 6); mp->_vl_msg_id = ntohs (VL_API_DHCP_COMPL_EVENT);