X-Git-Url: https://gerrit.fd.io/r/gitweb?a=blobdiff_plain;f=vnet%2Fvnet%2Fip%2Fip4_forward.c;h=4b019bce8deb3eac02b5981496dab09dac567b35;hb=f3f25416900288d86511133b4d47e68ccf359772;hp=720450f01abefb53634cf8501669ba1b14adeac0;hpb=f1213b82771ce929c076339c24a777cfd59690e6;p=vpp.git diff --git a/vnet/vnet/ip/ip4_forward.c b/vnet/vnet/ip/ip4_forward.c index 720450f01ab..4b019bce8de 100644 --- a/vnet/vnet/ip/ip4_forward.c +++ b/vnet/vnet/ip/ip4_forward.c @@ -39,11 +39,19 @@ #include #include -#include /* for ethernet_header_t */ -#include /* for ethernet_arp_header_t */ +/** for ethernet_header_t */ +#include +/** for ethernet_arp_header_t */ +#include #include -#include /* for srp_hw_interface_class */ -#include /* for API error numbers */ +/** for srp_hw_interface_class */ +#include +/** for API error numbers */ +#include + +/** @file + vnet ip4 forwarding +*/ /* This is really, really simple but stupid fib. */ u32 @@ -71,7 +79,7 @@ ip4_fib_lookup_with_table (ip4_main_t * im, u32 fib_index, goto done; } } - + /* Nothing matches in table. */ ai = lm->miss_adj_index; @@ -79,6 +87,11 @@ ip4_fib_lookup_with_table (ip4_main_t * im, u32 fib_index, return ai; } +/** @brief Create FIB from table ID and init all hashing. + @param im - @ref ip4_main_t + @param table_id - table ID + @return fib - @ref ip4_fib_t +*/ static ip4_fib_t * create_fib_with_table_id (ip4_main_t * im, u32 table_id) { @@ -87,6 +100,7 @@ create_fib_with_table_id (ip4_main_t * im, u32 table_id) vec_add2 (im->fibs, fib, 1); fib->table_id = table_id; fib->index = fib - im->fibs; + /* IP_FLOW_HASH_DEFAULT is net value of 5 tuple flags without "reverse" bit */ fib->flow_hash_config = IP_FLOW_HASH_DEFAULT; fib->fwd_classify_table_index = ~0; fib->rev_classify_table_index = ~0; @@ -94,20 +108,41 @@ create_fib_with_table_id (ip4_main_t * im, u32 table_id) return fib; } +/** @brief Find existing or Create new FIB based on index + @param im @ref ip4_main_t + @param table_index_or_id - overloaded parameter referring + to the table or a table's index in the FIB vector + @param flags - used to check if table_index_or_id was a table or + an index (detected by @ref IP4_ROUTE_FLAG_FIB_INDEX) + @return either the existing or a new ip4_fib_t entry +*/ ip4_fib_t * -find_ip4_fib_by_table_index_or_id (ip4_main_t * im, +find_ip4_fib_by_table_index_or_id (ip4_main_t * im, u32 table_index_or_id, u32 flags) { uword * p, fib_index; fib_index = table_index_or_id; + /* If this isn't a FIB_INDEX ... */ if (! (flags & IP4_ROUTE_FLAG_FIB_INDEX)) { + /* If passed ~0 then request the next table available */ + if (table_index_or_id == ~0) { + table_index_or_id = 0; + while ((p = hash_get (im->fib_index_by_table_id, table_index_or_id))) { + table_index_or_id++; + } + /* Create the next table and return the ip4_fib_t associated with it */ + return create_fib_with_table_id (im, table_index_or_id); + } + /* A specific table_id was requested.. */ p = hash_get (im->fib_index_by_table_id, table_index_or_id); + /* ... and if it doesn't exist create it else grab its index */ if (! p) return create_fib_with_table_id (im, table_index_or_id); fib_index = p[0]; } + /* Return the ip4_fib_t associated with this index */ return vec_elt_at_index (im->fibs, fib_index); } @@ -181,7 +216,12 @@ ip4_fib_set_adj_index (ip4_main_t * im, fib->new_hash_values); p = hash_get (hash, dst_address_u32); - clib_memcpy (p, fib->new_hash_values, vec_bytes (fib->new_hash_values)); + /* hash_get should never return NULL here */ + if (p) + clib_memcpy (p, fib->new_hash_values, + vec_bytes (fib->new_hash_values)); + else + ASSERT(0); } } @@ -242,7 +282,9 @@ void ip4_add_del_route (ip4_main_t * im, ip4_add_del_route_args_t * a) old_adj_index = fib->old_hash_values[0]; /* Avoid spurious reference count increments */ - if (old_adj_index == adj_index && !(a->flags & IP4_ROUTE_FLAG_KEEP_OLD_ADJACENCY)) + if (old_adj_index == adj_index + && adj_index != ~0 + && !(a->flags & IP4_ROUTE_FLAG_KEEP_OLD_ADJACENCY)) { ip_adjacency_t * adj = ip_get_adjacency (lm, adj_index); if (adj->share_count > 0) @@ -260,6 +302,85 @@ void ip4_add_del_route (ip4_main_t * im, ip4_add_del_route_args_t * a) ip_del_adjacency (lm, old_adj_index); } + +u32 +ip4_route_get_next_hop_adj (ip4_main_t * im, + u32 fib_index, + ip4_address_t *next_hop, + u32 next_hop_sw_if_index, + u32 explicit_fib_index) +{ + ip_lookup_main_t * lm = &im->lookup_main; + vnet_main_t * vnm = vnet_get_main(); + uword * nh_hash, * nh_result; + int is_interface_next_hop; + u32 nh_adj_index; + ip4_fib_t * fib; + + fib = vec_elt_at_index (im->fibs, fib_index); + + is_interface_next_hop = next_hop->data_u32 == 0; + if (is_interface_next_hop) + { + nh_result = hash_get (im->interface_route_adj_index_by_sw_if_index, next_hop_sw_if_index); + if (nh_result) + nh_adj_index = *nh_result; + else + { + ip_adjacency_t * adj; + adj = ip_add_adjacency (lm, /* template */ 0, /* block size */ 1, + &nh_adj_index); + ip4_adjacency_set_interface_route (vnm, adj, next_hop_sw_if_index, /* if_address_index */ ~0); + ip_call_add_del_adjacency_callbacks (lm, nh_adj_index, /* is_del */ 0); + hash_set (im->interface_route_adj_index_by_sw_if_index, next_hop_sw_if_index, nh_adj_index); + } + } + else if (next_hop_sw_if_index == ~0) + { + /* next-hop is recursive. we always need a indirect adj + * for recursive paths. Any LPM we perform now will give + * us a valid adj, but without tracking the next-hop we + * have no way to keep it valid. + */ + ip_adjacency_t add_adj; + memset (&add_adj, 0, sizeof(add_adj)); + add_adj.n_adj = 1; + add_adj.lookup_next_index = IP_LOOKUP_NEXT_INDIRECT; + add_adj.indirect.next_hop.ip4.as_u32 = next_hop->as_u32; + add_adj.explicit_fib_index = explicit_fib_index; + ip_add_adjacency (lm, &add_adj, 1, &nh_adj_index); + } + else + { + nh_hash = fib->adj_index_by_dst_address[32]; + nh_result = hash_get (nh_hash, next_hop->data_u32); + + /* Next hop must be known. */ + if (! nh_result) + { + ip_adjacency_t * adj; + + /* no /32 exists, get the longest prefix match */ + nh_adj_index = ip4_fib_lookup_with_table (im, fib_index, + next_hop, 0); + adj = ip_get_adjacency (lm, nh_adj_index); + /* if ARP interface adjacency is present, we need to + install ARP adjaceny for specific next hop */ + if (adj->lookup_next_index == IP_LOOKUP_NEXT_ARP && + adj->arp.next_hop.ip4.as_u32 == 0) + { + nh_adj_index = vnet_arp_glean_add(fib_index, next_hop); + } + } + else + { + nh_adj_index = *nh_result; + } + } + + return (nh_adj_index); +} + void ip4_add_del_route_next_hop (ip4_main_t * im, u32 flags, @@ -277,11 +398,9 @@ ip4_add_del_route_next_hop (ip4_main_t * im, u32 dst_address_u32, old_mp_adj_index, new_mp_adj_index; u32 dst_adj_index, nh_adj_index; uword * dst_hash, * dst_result; - uword * nh_hash, * nh_result; ip_adjacency_t * dst_adj; ip_multipath_adjacency_t * old_mp, * new_mp; int is_del = (flags & IP4_ROUTE_FLAG_DEL) != 0; - int is_interface_next_hop; clib_error_t * error = 0; if (explicit_fib_index == (u32)~0) @@ -290,57 +409,14 @@ ip4_add_del_route_next_hop (ip4_main_t * im, fib_index = explicit_fib_index; fib = vec_elt_at_index (im->fibs, fib_index); - + /* Lookup next hop to be added or deleted. */ - is_interface_next_hop = next_hop->data_u32 == 0; if (adj_index == (u32)~0) { - if (is_interface_next_hop) - { - nh_result = hash_get (im->interface_route_adj_index_by_sw_if_index, next_hop_sw_if_index); - if (nh_result) - nh_adj_index = *nh_result; - else - { - ip_adjacency_t * adj; - adj = ip_add_adjacency (lm, /* template */ 0, /* block size */ 1, - &nh_adj_index); - ip4_adjacency_set_interface_route (vnm, adj, next_hop_sw_if_index, /* if_address_index */ ~0); - ip_call_add_del_adjacency_callbacks (lm, nh_adj_index, /* is_del */ 0); - hash_set (im->interface_route_adj_index_by_sw_if_index, next_hop_sw_if_index, nh_adj_index); - } - } - else - { - nh_hash = fib->adj_index_by_dst_address[32]; - nh_result = hash_get (nh_hash, next_hop->data_u32); - - /* Next hop must be known. */ - if (! nh_result) - { - ip_adjacency_t * adj; - - nh_adj_index = ip4_fib_lookup_with_table (im, fib_index, - next_hop, 0); - adj = ip_get_adjacency (lm, nh_adj_index); - /* if ARP interface adjacencty is present, we need to - install ARP adjaceny for specific next hop */ - if (adj->lookup_next_index == IP_LOOKUP_NEXT_ARP && - adj->arp.next_hop.ip4.as_u32 == 0) - { - nh_adj_index = vnet_arp_glean_add(fib_index, next_hop); - } - else - { - vnm->api_errno = VNET_API_ERROR_NEXT_HOP_NOT_IN_FIB; - error = clib_error_return (0, "next-hop %U/32 not in FIB", - format_ip4_address, next_hop); - goto done; - } - } - else - nh_adj_index = *nh_result; - } + nh_adj_index = ip4_route_get_next_hop_adj(im, fib_index, + next_hop, + next_hop_sw_if_index, + explicit_fib_index); } else { @@ -389,8 +465,9 @@ ip4_add_del_route_next_hop (ip4_main_t * im, to existing non-multipath adjacency */ if (dst_adj_index == ~0 && next_hop_weight == 1 && next_hop_sw_if_index == ~0) { - /* create new adjacency */ + /* create / delete additional mapping of existing adjacency */ ip4_add_del_route_args_t a; + a.table_index_or_table_id = fib_index; a.flags = ((is_del ? IP4_ROUTE_FLAG_DEL : IP4_ROUTE_FLAG_ADD) | IP4_ROUTE_FLAG_FIB_INDEX @@ -404,7 +481,6 @@ ip4_add_del_route_next_hop (ip4_main_t * im, a.n_add_adj = 0; ip4_add_del_route (im, &a); - goto done; } @@ -432,6 +508,8 @@ ip4_add_del_route_next_hop (ip4_main_t * im, if (old_mp != new_mp) { ip4_add_del_route_args_t a; + ip_adjacency_t * adj; + a.table_index_or_table_id = fib_index; a.flags = ((is_del && ! new_mp ? IP4_ROUTE_FLAG_DEL : IP4_ROUTE_FLAG_ADD) | IP4_ROUTE_FLAG_FIB_INDEX @@ -444,6 +522,10 @@ ip4_add_del_route_next_hop (ip4_main_t * im, a.n_add_adj = 0; ip4_add_del_route (im, &a); + + adj = ip_get_adjacency (lm, new_mp ? new_mp->adj_index : dst_adj_index); + if (adj->n_adj == 1) + adj->share_count += is_del ? -1 : 1; } done: @@ -620,11 +702,18 @@ void ip4_delete_matching_routes (ip4_main_t * im, ip4_maybe_remap_adjacencies (im, table_index_or_table_id, flags); } +void +ip4_forward_next_trace (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame, + vlib_rx_or_tx_t which_adj_index); + always_inline uword ip4_lookup_inline (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame, - int lookup_for_responses_to_locally_received_packets) + int lookup_for_responses_to_locally_received_packets, + int is_indirect) { ip4_main_t * im = &ip4_main; ip_lookup_main_t * lm = &im->lookup_main; @@ -651,6 +740,7 @@ ip4_lookup_inline (vlib_main_t * vm, ip_adjacency_t * adj0, * adj1; ip4_fib_mtrie_t * mtrie0, * mtrie1; ip4_fib_mtrie_leaf_t leaf0, leaf1; + ip4_address_t * dst_addr0, *dst_addr1; __attribute__((unused)) u32 pi0, fib_index0, adj_index0, is_tcp_udp0; __attribute__((unused)) u32 pi1, fib_index1, adj_index1, is_tcp_udp1; u32 flow_hash_config0, flow_hash_config1; @@ -680,6 +770,20 @@ ip4_lookup_inline (vlib_main_t * vm, ip0 = vlib_buffer_get_current (p0); ip1 = vlib_buffer_get_current (p1); + if (is_indirect) + { + ip_adjacency_t * iadj0, * iadj1; + iadj0 = ip_get_adjacency (lm, vnet_buffer(p0)->ip.adj_index[VLIB_TX]); + iadj1 = ip_get_adjacency (lm, vnet_buffer(p1)->ip.adj_index[VLIB_TX]); + dst_addr0 = &iadj0->indirect.next_hop.ip4; + dst_addr1 = &iadj1->indirect.next_hop.ip4; + } + else + { + dst_addr0 = &ip0->dst_address; + dst_addr1 = &ip1->dst_address; + } + fib_index0 = vec_elt (im->fib_index_by_sw_if_index, vnet_buffer (p0)->sw_if_index[VLIB_RX]); fib_index1 = vec_elt (im->fib_index_by_sw_if_index, vnet_buffer (p1)->sw_if_index[VLIB_RX]); fib_index0 = (vnet_buffer(p0)->sw_if_index[VLIB_TX] == (u32)~0) ? @@ -695,8 +799,8 @@ ip4_lookup_inline (vlib_main_t * vm, leaf0 = leaf1 = IP4_FIB_MTRIE_LEAF_ROOT; - leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->dst_address, 0); - leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->dst_address, 0); + leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 0); + leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, dst_addr1, 0); } tcp0 = (void *) (ip0 + 1); @@ -709,20 +813,20 @@ ip4_lookup_inline (vlib_main_t * vm, if (! lookup_for_responses_to_locally_received_packets) { - leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->dst_address, 1); - leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->dst_address, 1); + leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 1); + leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, dst_addr1, 1); } if (! lookup_for_responses_to_locally_received_packets) { - leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->dst_address, 2); - leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->dst_address, 2); + leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 2); + leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, dst_addr1, 2); } if (! lookup_for_responses_to_locally_received_packets) { - leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->dst_address, 3); - leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->dst_address, 3); + leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 3); + leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, dst_addr1, 3); } if (lookup_for_responses_to_locally_received_packets) @@ -741,10 +845,10 @@ ip4_lookup_inline (vlib_main_t * vm, } ASSERT (adj_index0 == ip4_fib_lookup_with_table (im, fib_index0, - &ip0->dst_address, + dst_addr0, /* no_default_route */ 0)); ASSERT (adj_index1 == ip4_fib_lookup_with_table (im, fib_index1, - &ip1->dst_address, + dst_addr1, /* no_default_route */ 0)); adj0 = ip_get_adjacency (lm, adj_index0); adj1 = ip_get_adjacency (lm, adj_index1); @@ -840,6 +944,7 @@ ip4_lookup_inline (vlib_main_t * vm, ip_adjacency_t * adj0; ip4_fib_mtrie_t * mtrie0; ip4_fib_mtrie_leaf_t leaf0; + ip4_address_t * dst_addr0; __attribute__((unused)) u32 pi0, fib_index0, adj_index0, is_tcp_udp0; u32 flow_hash_config0, hash_c0; @@ -850,6 +955,17 @@ ip4_lookup_inline (vlib_main_t * vm, ip0 = vlib_buffer_get_current (p0); + if (is_indirect) + { + ip_adjacency_t * iadj0; + iadj0 = ip_get_adjacency (lm, vnet_buffer(p0)->ip.adj_index[VLIB_TX]); + dst_addr0 = &iadj0->indirect.next_hop.ip4; + } + else + { + dst_addr0 = &ip0->dst_address; + } + fib_index0 = vec_elt (im->fib_index_by_sw_if_index, vnet_buffer (p0)->sw_if_index[VLIB_RX]); fib_index0 = (vnet_buffer(p0)->sw_if_index[VLIB_TX] == (u32)~0) ? fib_index0 : vnet_buffer(p0)->sw_if_index[VLIB_TX]; @@ -860,7 +976,7 @@ ip4_lookup_inline (vlib_main_t * vm, leaf0 = IP4_FIB_MTRIE_LEAF_ROOT; - leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->dst_address, 0); + leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 0); } tcp0 = (void *) (ip0 + 1); @@ -869,13 +985,13 @@ ip4_lookup_inline (vlib_main_t * vm, || ip0->protocol == IP_PROTOCOL_UDP); if (! lookup_for_responses_to_locally_received_packets) - leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->dst_address, 1); + leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 1); if (! lookup_for_responses_to_locally_received_packets) - leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->dst_address, 2); + leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 2); if (! lookup_for_responses_to_locally_received_packets) - leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->dst_address, 3); + leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 3); if (lookup_for_responses_to_locally_received_packets) adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_RX]; @@ -887,7 +1003,7 @@ ip4_lookup_inline (vlib_main_t * vm, } ASSERT (adj_index0 == ip4_fib_lookup_with_table (im, fib_index0, - &ip0->dst_address, + dst_addr0, /* no_default_route */ 0)); adj0 = ip_get_adjacency (lm, adj_index0); @@ -937,15 +1053,52 @@ ip4_lookup_inline (vlib_main_t * vm, vlib_put_next_frame (vm, node, next, n_left_to_next); } + if (node->flags & VLIB_NODE_FLAG_TRACE) + ip4_forward_next_trace(vm, node, frame, VLIB_TX); + return frame->n_vectors; } +/** @brief IPv4 lookup node. + @node ip4-lookup + + This is the main IPv4 lookup dispatch node. + + @param vm vlib_main_t corresponding to the current thread + @param node vlib_node_runtime_t + @param frame vlib_frame_t whose contents should be dispatched + + @par Graph mechanics: buffer metadata, next index usage + + @em Uses: + - vnet_buffer(b)->sw_if_index[VLIB_RX] + - Indicates the @c sw_if_index value of the interface that the + packet was received on. + - vnet_buffer(b)->sw_if_index[VLIB_TX] + - When the value is @c ~0 then the node performs a longest prefix + match (LPM) for the packet destination address in the FIB attached + to the receive interface. + - Otherwise perform LPM for the packet destination address in the + indicated FIB. In this case [VLIB_TX] is a FIB index + value (0, 1, ...) and not a VRF id. + + @em Sets: + - vnet_buffer(b)->ip.adj_index[VLIB_TX] + - The lookup result adjacency index. + + Next Index: + - Dispatches the packet to the node index found in + ip_adjacency_t @c adj->lookup_next_index + (where @c adj is the lookup result adjacency). +*/ static uword ip4_lookup (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) { - return ip4_lookup_inline (vm, node, frame, /* lookup_for_responses_to_locally_received_packets */ 0); + return ip4_lookup_inline (vm, node, frame, + /* lookup_for_responses_to_locally_received_packets */ 0, + /* is_indirect */ 0); } @@ -974,6 +1127,7 @@ void ip4_adjacency_set_interface_route (vnet_main_t * vnm, node_index = ip4_arp_node.index; adj->if_address_index = if_address_index; adj->arp.next_hop.ip4.as_u32 = 0; + ip46_address_reset(&adj->arp.next_hop); packet_type = VNET_L3_PACKET_TYPE_ARP; } else @@ -1230,6 +1384,97 @@ ip4_sw_interface_admin_up_down (vnet_main_t * vnm, VNET_SW_INTERFACE_ADMIN_UP_DOWN_FUNCTION (ip4_sw_interface_admin_up_down); +/* Built-in ip4 unicast rx feature path definition */ +VNET_IP4_UNICAST_FEATURE_INIT (ip4_inacl, static) = { + .node_name = "ip4-inacl", + .runs_before = {"ip4-source-check-via-rx", 0}, + .feature_index = &ip4_main.ip4_unicast_rx_feature_check_access, +}; + +VNET_IP4_UNICAST_FEATURE_INIT (ip4_source_check_1, static) = { + .node_name = "ip4-source-check-via-rx", + .runs_before = {"ip4-source-check-via-any", 0}, + .feature_index = + &ip4_main.ip4_unicast_rx_feature_source_reachable_via_rx, +}; + +VNET_IP4_UNICAST_FEATURE_INIT (ip4_source_check_2, static) = { + .node_name = "ip4-source-check-via-any", + .runs_before = {"ip4-policer-classify", 0}, + .feature_index = + &ip4_main.ip4_unicast_rx_feature_source_reachable_via_any, +}; + +VNET_IP4_UNICAST_FEATURE_INIT (ip4_source_and_port_range_check, static) = { + .node_name = "ip4-source-and-port-range-check", + .runs_before = {"ip4-policer-classify", 0}, + .feature_index = + &ip4_main.ip4_unicast_rx_feature_source_and_port_range_check, +}; + +VNET_IP4_UNICAST_FEATURE_INIT (ip4_policer_classify, static) = { + .node_name = "ip4-policer-classify", + .runs_before = {"ipsec-input-ip4", 0}, + .feature_index = + &ip4_main.ip4_unicast_rx_feature_policer_classify, +}; + +VNET_IP4_UNICAST_FEATURE_INIT (ip4_ipsec, static) = { + .node_name = "ipsec-input-ip4", + .runs_before = {"vpath-input-ip4", 0}, + .feature_index = &ip4_main.ip4_unicast_rx_feature_ipsec, +}; + +VNET_IP4_UNICAST_FEATURE_INIT (ip4_vpath, static) = { + .node_name = "vpath-input-ip4", + .runs_before = {"ip4-lookup", 0}, + .feature_index = &ip4_main.ip4_unicast_rx_feature_vpath, +}; + +VNET_IP4_UNICAST_FEATURE_INIT (ip4_lookup, static) = { + .node_name = "ip4-lookup", + .runs_before = {0}, /* not before any other features */ + .feature_index = &ip4_main.ip4_unicast_rx_feature_lookup, +}; + +/* Built-in ip4 multicast rx feature path definition */ +VNET_IP4_MULTICAST_FEATURE_INIT (ip4_vpath_mc, static) = { + .node_name = "vpath-input-ip4", + .runs_before = {"ip4-lookup-multicast", 0}, + .feature_index = &ip4_main.ip4_multicast_rx_feature_vpath, +}; + +VNET_IP4_MULTICAST_FEATURE_INIT (ip4_lookup_mc, static) = { + .node_name = "ip4-lookup-multicast", + .runs_before = {0}, /* not before any other features */ + .feature_index = &ip4_main.ip4_multicast_rx_feature_lookup, +}; + +static char * feature_start_nodes[] = + { "ip4-input", "ip4-input-no-checksum"}; + +static clib_error_t * +ip4_feature_init (vlib_main_t * vm, ip4_main_t * im) +{ + ip_lookup_main_t * lm = &im->lookup_main; + clib_error_t * error; + vnet_cast_t cast; + + for (cast = 0; cast < VNET_N_CAST; cast++) + { + ip_config_main_t * cm = &lm->rx_config_mains[cast]; + vnet_config_main_t * vcm = &cm->config_main; + + if ((error = ip_feature_init_cast (vm, cm, vcm, + feature_start_nodes, + ARRAY_LEN(feature_start_nodes), + cast, + 1 /* is_ip4 */))) + return error; + } + return 0; +} + static clib_error_t * ip4_sw_interface_add_del (vnet_main_t * vnm, u32 sw_if_index, @@ -1239,57 +1484,31 @@ ip4_sw_interface_add_del (vnet_main_t * vnm, ip4_main_t * im = &ip4_main; ip_lookup_main_t * lm = &im->lookup_main; u32 ci, cast; + u32 feature_index; for (cast = 0; cast < VNET_N_CAST; cast++) { ip_config_main_t * cm = &lm->rx_config_mains[cast]; vnet_config_main_t * vcm = &cm->config_main; - if (! vcm->node_index_by_feature_index) - { - if (cast == VNET_UNICAST) - { - static char * start_nodes[] = { "ip4-input", "ip4-input-no-checksum", }; - static char * feature_nodes[] = { - [IP4_RX_FEATURE_CHECK_ACCESS] = "ip4-inacl", - [IP4_RX_FEATURE_SOURCE_CHECK_REACHABLE_VIA_RX] = "ip4-source-check-via-rx", - [IP4_RX_FEATURE_SOURCE_CHECK_REACHABLE_VIA_ANY] = "ip4-source-check-via-any", - [IP4_RX_FEATURE_IPSEC] = "ipsec-input-ip4", - [IP4_RX_FEATURE_VPATH] = "vpath-input-ip4", - [IP4_RX_FEATURE_LOOKUP] = "ip4-lookup", - }; - - vnet_config_init (vm, vcm, - start_nodes, ARRAY_LEN (start_nodes), - feature_nodes, ARRAY_LEN (feature_nodes)); - } - else - { - static char * start_nodes[] = { "ip4-input", "ip4-input-no-checksum", }; - static char * feature_nodes[] = { - [IP4_RX_FEATURE_VPATH] = "vpath-input-ip4", - [IP4_RX_FEATURE_LOOKUP] = "ip4-lookup-multicast", - }; - - vnet_config_init (vm, vcm, - start_nodes, ARRAY_LEN (start_nodes), - feature_nodes, ARRAY_LEN (feature_nodes)); - } - } - vec_validate_init_empty (cm->config_index_by_sw_if_index, sw_if_index, ~0); ci = cm->config_index_by_sw_if_index[sw_if_index]; + if (cast == VNET_UNICAST) + feature_index = im->ip4_unicast_rx_feature_lookup; + else + feature_index = im->ip4_multicast_rx_feature_lookup; + if (is_add) ci = vnet_config_add_feature (vm, vcm, ci, - IP4_RX_FEATURE_LOOKUP, + feature_index, /* config data */ 0, /* # bytes of config data */ 0); else ci = vnet_config_del_feature (vm, vcm, ci, - IP4_RX_FEATURE_LOOKUP, + feature_index, /* config data */ 0, /* # bytes of config data */ 0); @@ -1301,16 +1520,44 @@ ip4_sw_interface_add_del (vnet_main_t * vnm, VNET_SW_INTERFACE_ADD_DEL_FUNCTION (ip4_sw_interface_add_del); +static u8 * format_ip4_lookup_trace (u8 * s, va_list * args); VLIB_REGISTER_NODE (ip4_lookup_node) = { .function = ip4_lookup, .name = "ip4-lookup", .vector_size = sizeof (u32), - .n_next_nodes = IP_LOOKUP_N_NEXT, + .format_trace = format_ip4_lookup_trace, + + .n_next_nodes = IP4_LOOKUP_N_NEXT, .next_nodes = IP4_LOOKUP_NEXT_NODES, }; +VLIB_NODE_FUNCTION_MULTIARCH (ip4_lookup_node, ip4_lookup) + +static uword +ip4_indirect (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame) +{ + return ip4_lookup_inline (vm, node, frame, + /* lookup_for_responses_to_locally_received_packets */ 0, + /* is_indirect */ 1); +} + +VLIB_REGISTER_NODE (ip4_indirect_node) = { + .function = ip4_indirect, + .name = "ip4-indirect", + .vector_size = sizeof (u32), + .sibling_of = "ip4-lookup", + .format_trace = format_ip4_lookup_trace, + + .n_next_nodes = 0, +}; + +VLIB_NODE_FUNCTION_MULTIARCH (ip4_indirect_node, ip4_indirect) + + /* Global IP4 main. */ ip4_main_t ip4_main; @@ -1318,6 +1565,7 @@ clib_error_t * ip4_lookup_init (vlib_main_t * vm) { ip4_main_t * im = &ip4_main; + clib_error_t * error; uword i; for (i = 0; i < ARRAY_LEN (im->fib_masks); i++) @@ -1368,7 +1616,9 @@ ip4_lookup_init (vlib_main_t * vm) "ip4 arp"); } - return 0; + error = ip4_feature_init (vm, im); + + return error; } VLIB_INIT_FUNCTION (ip4_lookup_init); @@ -1384,33 +1634,52 @@ typedef struct { } ip4_forward_next_trace_t; static u8 * format_ip4_forward_next_trace (u8 * s, va_list * args) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); + ip4_forward_next_trace_t * t = va_arg (*args, ip4_forward_next_trace_t *); + uword indent = format_get_indent (s); + s = format (s, "%U%U", + format_white_space, indent, + format_ip4_header, t->packet_data); + return s; +} + +static u8 * format_ip4_lookup_trace (u8 * s, va_list * args) { CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); ip4_forward_next_trace_t * t = va_arg (*args, ip4_forward_next_trace_t *); vnet_main_t * vnm = vnet_get_main(); ip4_main_t * im = &ip4_main; - ip_adjacency_t * adj; uword indent = format_get_indent (s); - adj = ip_get_adjacency (&im->lookup_main, t->adj_index); s = format (s, "fib %d adj-idx %d : %U flow hash: 0x%08x", - t->fib_index, t->adj_index, format_ip_adjacency, - vnm, &im->lookup_main, t->adj_index, t->flow_hash); - switch (adj->lookup_next_index) - { - case IP_LOOKUP_NEXT_REWRITE: - s = format (s, "\n%U%U", - format_white_space, indent, - format_ip_adjacency_packet_data, - vnm, &im->lookup_main, t->adj_index, - t->packet_data, sizeof (t->packet_data)); - break; + t->fib_index, t->adj_index, format_ip_adjacency, + vnm, &im->lookup_main, t->adj_index, t->flow_hash); + s = format (s, "\n%U%U", + format_white_space, indent, + format_ip4_header, t->packet_data); + return s; +} - default: - break; - } +static u8 * format_ip4_rewrite_trace (u8 * s, va_list * args) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); + ip4_forward_next_trace_t * t = va_arg (*args, ip4_forward_next_trace_t *); + vnet_main_t * vnm = vnet_get_main(); + ip4_main_t * im = &ip4_main; + uword indent = format_get_indent (s); + s = format (s, "tx_sw_if_index %d adj-idx %d : %U flow hash: 0x%08x", + t->fib_index, t->adj_index, format_ip_adjacency, + vnm, &im->lookup_main, t->adj_index, t->flow_hash); + s = format (s, "\n%U%U", + format_white_space, indent, + format_ip_adjacency_packet_data, + vnm, &im->lookup_main, t->adj_index, + t->packet_data, sizeof (t->packet_data)); return s; } @@ -1448,8 +1717,11 @@ ip4_forward_next_trace (vlib_main_t * vm, t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0])); t0->adj_index = vnet_buffer (b0)->ip.adj_index[which_adj_index]; t0->flow_hash = vnet_buffer (b0)->ip.flow_hash; - t0->fib_index = vec_elt (im->fib_index_by_sw_if_index, - vnet_buffer(b0)->sw_if_index[VLIB_RX]); + t0->fib_index = (vnet_buffer(b0)->sw_if_index[VLIB_TX] != (u32)~0) ? + vnet_buffer(b0)->sw_if_index[VLIB_TX] : + vec_elt (im->fib_index_by_sw_if_index, + vnet_buffer(b0)->sw_if_index[VLIB_RX]); + clib_memcpy (t0->packet_data, vlib_buffer_get_current (b0), sizeof (t0->packet_data)); @@ -1459,8 +1731,10 @@ ip4_forward_next_trace (vlib_main_t * vm, t1 = vlib_add_trace (vm, node, b1, sizeof (t1[0])); t1->adj_index = vnet_buffer (b1)->ip.adj_index[which_adj_index]; t1->flow_hash = vnet_buffer (b1)->ip.flow_hash; - t1->fib_index = vec_elt (im->fib_index_by_sw_if_index, - vnet_buffer(b1)->sw_if_index[VLIB_RX]); + t1->fib_index = (vnet_buffer(b1)->sw_if_index[VLIB_TX] != (u32)~0) ? + vnet_buffer(b1)->sw_if_index[VLIB_TX] : + vec_elt (im->fib_index_by_sw_if_index, + vnet_buffer(b1)->sw_if_index[VLIB_RX]); clib_memcpy (t1->packet_data, vlib_buffer_get_current (b1), sizeof (t1->packet_data)); @@ -1484,8 +1758,10 @@ ip4_forward_next_trace (vlib_main_t * vm, t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0])); t0->adj_index = vnet_buffer (b0)->ip.adj_index[which_adj_index]; t0->flow_hash = vnet_buffer (b0)->ip.flow_hash; - t0->fib_index = vec_elt (im->fib_index_by_sw_if_index, - vnet_buffer(b0)->sw_if_index[VLIB_RX]); + t0->fib_index = (vnet_buffer(b0)->sw_if_index[VLIB_TX] != (u32)~0) ? + vnet_buffer(b0)->sw_if_index[VLIB_TX] : + vec_elt (im->fib_index_by_sw_if_index, + vnet_buffer(b0)->sw_if_index[VLIB_RX]); clib_memcpy (t0->packet_data, vlib_buffer_get_current (b0), sizeof (t0->packet_data)); @@ -1549,6 +1825,8 @@ VLIB_REGISTER_NODE (ip4_drop_node,static) = { }, }; +VLIB_NODE_FUNCTION_MULTIARCH (ip4_drop_node, ip4_drop) + VLIB_REGISTER_NODE (ip4_punt_node,static) = { .function = ip4_punt, .name = "ip4-punt", @@ -1562,6 +1840,8 @@ VLIB_REGISTER_NODE (ip4_punt_node,static) = { }, }; +VLIB_NODE_FUNCTION_MULTIARCH (ip4_punt_node, ip4_punt) + VLIB_REGISTER_NODE (ip4_miss_node,static) = { .function = ip4_miss, .name = "ip4-miss", @@ -1575,6 +1855,8 @@ VLIB_REGISTER_NODE (ip4_miss_node,static) = { }, }; +VLIB_NODE_FUNCTION_MULTIARCH (ip4_miss_node, ip4_miss) + /* Compute TCP/UDP/ICMP4 checksum in software. */ u16 ip4_tcp_udp_compute_checksum (vlib_main_t * vm, vlib_buffer_t * p0, @@ -1710,8 +1992,10 @@ ip4_local (vlib_main_t * vm, leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 0); leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->src_address, 0); - proto0 = ip0->protocol; - proto1 = ip1->protocol; + /* Treat IP frag packets as "experimental" protocol for now + until support of IP frag reassembly is implemented */ + proto0 = ip4_is_fragment(ip0) ? 0xfe : ip0->protocol; + proto1 = ip4_is_fragment(ip1) ? 0xfe : ip1->protocol; is_udp0 = proto0 == IP_PROTOCOL_UDP; is_udp1 = proto1 == IP_PROTOCOL_UDP; is_tcp_udp0 = is_udp0 || proto0 == IP_PROTOCOL_TCP; @@ -1903,7 +2187,9 @@ ip4_local (vlib_main_t * vm, leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 0); - proto0 = ip0->protocol; + /* Treat IP frag packets as "experimental" protocol for now + until support of IP frag reassembly is implemented */ + proto0 = ip4_is_fragment(ip0) ? 0xfe : ip0->protocol; is_udp0 = proto0 == IP_PROTOCOL_UDP; is_tcp_udp0 = is_udp0 || proto0 == IP_PROTOCOL_TCP; @@ -2013,6 +2299,8 @@ VLIB_REGISTER_NODE (ip4_local_node,static) = { }, }; +VLIB_NODE_FUNCTION_MULTIARCH (ip4_local_node, ip4_local) + void ip4_register_protocol (u32 protocol, u32 node_index) { vlib_main_t * vm = vlib_get_main(); @@ -2200,7 +2488,12 @@ ip4_arp (vlib_main_t * vm, clib_memcpy (h0->ip4_over_ethernet[0].ethernet, hw_if0->hw_address, sizeof (h0->ip4_over_ethernet[0].ethernet)); - ip4_src_address_for_packet (im, p0, &h0->ip4_over_ethernet[0].ip4, sw_if_index0); + if (ip4_src_address_for_packet (im, p0, &h0->ip4_over_ethernet[0].ip4, sw_if_index0)) { + //No source address available + p0->error = node->errors[IP4_ARP_ERROR_NO_SOURCE_ADDRESS]; + vlib_buffer_free(vm, &bi0, 1); + continue; + } /* Copy in destination address we are requesting. */ h0->ip4_over_ethernet[1].ip4.data_u32 = ip0->dst_address.data_u32; @@ -2227,6 +2520,7 @@ static char * ip4_arp_error_strings[] = { [IP4_ARP_ERROR_NON_ARP_ADJ] = "ARPs to non-ARP adjacencies", [IP4_ARP_ERROR_REPLICATE_DROP] = "ARP replication completed", [IP4_ARP_ERROR_REPLICATE_FAIL] = "ARP replication failed", + [IP4_ARP_ERROR_NO_SOURCE_ADDRESS] = "no source address for ARP request", }; VLIB_REGISTER_NODE (ip4_arp_node) = { @@ -2336,6 +2630,7 @@ ip4_probe_neighbor (vlib_main_t * vm, ip4_address_t * dst, u32 sw_if_index) typedef enum { IP4_REWRITE_NEXT_DROP, IP4_REWRITE_NEXT_ARP, + IP4_REWRITE_NEXT_ICMP_ERROR, } ip4_rewrite_next_t; always_inline uword @@ -2405,6 +2700,7 @@ ip4_rewrite_inline (vlib_main_t * vm, ip1 = vlib_buffer_get_current (p1); error0 = error1 = IP4_ERROR_NONE; + next0 = next1 = IP4_REWRITE_NEXT_DROP; /* Decrement TTL & update checksum. Works either endian, so no need for byte swap. */ @@ -2431,8 +2727,26 @@ ip4_rewrite_inline (vlib_main_t * vm, ip0->ttl = ttl0; ip1->ttl = ttl1; - error0 = ttl0 <= 0 ? IP4_ERROR_TIME_EXPIRED : error0; - error1 = ttl1 <= 0 ? IP4_ERROR_TIME_EXPIRED : error1; + /* + * If the ttl drops below 1 when forwarding, generate + * an ICMP response. + */ + if (PREDICT_FALSE(ttl0 <= 0)) + { + error0 = IP4_ERROR_TIME_EXPIRED; + vnet_buffer (p0)->sw_if_index[VLIB_TX] = (u32)~0; + icmp4_error_set_vnet_buffer(p0, ICMP4_time_exceeded, + ICMP4_time_exceeded_ttl_exceeded_in_transit, 0); + next0 = IP4_REWRITE_NEXT_ICMP_ERROR; + } + if (PREDICT_FALSE(ttl1 <= 0)) + { + error1 = IP4_ERROR_TIME_EXPIRED; + vnet_buffer (p1)->sw_if_index[VLIB_TX] = (u32)~0; + icmp4_error_set_vnet_buffer(p1, ICMP4_time_exceeded, + ICMP4_time_exceeded_ttl_exceeded_in_transit, 0); + next1 = IP4_REWRITE_NEXT_ICMP_ERROR; + } /* Verify checksum. */ ASSERT (ip0->checksum == ip4_header_checksum (ip0)); @@ -2468,14 +2782,23 @@ ip4_rewrite_inline (vlib_main_t * vm, /* Worth pipelining. No guarantee that adj0,1 are hot... */ rw_len0 = adj0[0].rewrite_header.data_bytes; rw_len1 = adj1[0].rewrite_header.data_bytes; - next0 = (error0 == IP4_ERROR_NONE) - ? adj0[0].rewrite_header.next_index : 0; + + /* Check MTU of outgoing interface. */ + error0 = (vlib_buffer_length_in_chain (vm, p0) > adj0[0].rewrite_header.max_l3_packet_bytes + ? IP4_ERROR_MTU_EXCEEDED + : error0); + error1 = (vlib_buffer_length_in_chain (vm, p1) > adj1[0].rewrite_header.max_l3_packet_bytes + ? IP4_ERROR_MTU_EXCEEDED + : error1); + + next0 = (error0 == IP4_ERROR_NONE) + ? adj0[0].rewrite_header.next_index : next0; if (rewrite_for_locally_received_packets) next0 = next0 && next0_override ? next0_override : next0; - next1 = (error1 == IP4_ERROR_NONE) - ? adj1[0].rewrite_header.next_index : 0; + next1 = (error1 == IP4_ERROR_NONE) + ? adj1[0].rewrite_header.next_index : next1; if (rewrite_for_locally_received_packets) next1 = next1 && next1_override ? next1_override : next1; @@ -2497,25 +2820,24 @@ ip4_rewrite_inline (vlib_main_t * vm, /* packet increment */ 0, /* byte increment */ rw_len1-sizeof(ethernet_header_t)); - /* Check MTU of outgoing interface. */ - error0 = (vlib_buffer_length_in_chain (vm, p0) > adj0[0].rewrite_header.max_l3_packet_bytes - ? IP4_ERROR_MTU_EXCEEDED - : error0); - error1 = (vlib_buffer_length_in_chain (vm, p1) > adj1[0].rewrite_header.max_l3_packet_bytes - ? IP4_ERROR_MTU_EXCEEDED - : error1); - - p0->current_data -= rw_len0; - p1->current_data -= rw_len1; - - p0->current_length += rw_len0; - p1->current_length += rw_len1; - - vnet_buffer (p0)->sw_if_index[VLIB_TX] = adj0[0].rewrite_header.sw_if_index; - vnet_buffer (p1)->sw_if_index[VLIB_TX] = adj1[0].rewrite_header.sw_if_index; - - p0->error = error_node->errors[error0]; - p1->error = error_node->errors[error1]; + /* Don't adjust the buffer for ttl issue; icmp-error node wants + * to see the IP headerr */ + if (PREDICT_TRUE(error0 == IP4_ERROR_NONE)) + { + p0->current_data -= rw_len0; + p0->current_length += rw_len0; + p0->error = error_node->errors[error0]; + vnet_buffer (p0)->sw_if_index[VLIB_TX] = + adj0[0].rewrite_header.sw_if_index; + } + if (PREDICT_TRUE(error1 == IP4_ERROR_NONE)) + { + p1->current_data -= rw_len1; + p1->current_length += rw_len1; + p1->error = error_node->errors[error1]; + vnet_buffer (p1)->sw_if_index[VLIB_TX] = + adj1[0].rewrite_header.sw_if_index; + } /* Guess we are only writing on simple Ethernet header. */ vnet_rewrite_two_headers (adj0[0], adj1[0], @@ -2552,7 +2874,7 @@ ip4_rewrite_inline (vlib_main_t * vm, ip0 = vlib_buffer_get_current (p0); error0 = IP4_ERROR_NONE; - next0 = 0; /* drop on error */ + next0 = IP4_REWRITE_NEXT_DROP; /* drop on error */ /* Decrement TTL & update checksum. */ if (! rewrite_for_locally_received_packets) @@ -2573,7 +2895,18 @@ ip4_rewrite_inline (vlib_main_t * vm, ASSERT (ip0->checksum == ip4_header_checksum (ip0)); - error0 = ttl0 <= 0 ? IP4_ERROR_TIME_EXPIRED : error0; + if (PREDICT_FALSE(ttl0 <= 0)) + { + /* + * If the ttl drops below 1 when forwarding, generate + * an ICMP response. + */ + error0 = IP4_ERROR_TIME_EXPIRED; + next0 = IP4_REWRITE_NEXT_ICMP_ERROR; + vnet_buffer (p0)->sw_if_index[VLIB_TX] = (u32)~0; + icmp4_error_set_vnet_buffer(p0, ICMP4_time_exceeded, + ICMP4_time_exceeded_ttl_exceeded_in_transit, 0); + } } if (rewrite_for_locally_received_packets) @@ -2615,15 +2948,20 @@ ip4_rewrite_inline (vlib_main_t * vm, > adj0[0].rewrite_header.max_l3_packet_bytes ? IP4_ERROR_MTU_EXCEEDED : error0); - + p0->error = error_node->errors[error0]; - p0->current_data -= rw_len0; - p0->current_length += rw_len0; - vnet_buffer (p0)->sw_if_index[VLIB_TX] = - adj0[0].rewrite_header.sw_if_index; - - next0 = (error0 == IP4_ERROR_NONE) - ? adj0[0].rewrite_header.next_index : 0; + + /* Don't adjust the buffer for ttl issue; icmp-error node wants + * to see the IP headerr */ + if (PREDICT_TRUE(error0 == IP4_ERROR_NONE)) + { + p0->current_data -= rw_len0; + p0->current_length += rw_len0; + + vnet_buffer (p0)->sw_if_index[VLIB_TX] = + adj0[0].rewrite_header.sw_if_index; + next0 = adj0[0].rewrite_header.next_index; + } if (rewrite_for_locally_received_packets) next0 = next0 && next0_override ? next0_override : next0; @@ -2648,6 +2986,38 @@ ip4_rewrite_inline (vlib_main_t * vm, return frame->n_vectors; } + +/** @brief IPv4 transit rewrite node. + @node ip4-rewrite-transit + + This is the IPv4 transit-rewrite node: decrement TTL, fix the ipv4 + header checksum, fetch the ip adjacency, check the outbound mtu, + apply the adjacency rewrite, and send pkts to the adjacency + rewrite header's rewrite_next_index. + + @param vm vlib_main_t corresponding to the current thread + @param node vlib_node_runtime_t + @param frame vlib_frame_t whose contents should be dispatched + + @par Graph mechanics: buffer metadata, next index usage + + @em Uses: + - vnet_buffer(b)->ip.adj_index[VLIB_TX] + - the rewrite adjacency index + - adj->lookup_next_index + - Must be IP_LOOKUP_NEXT_REWRITE or IP_LOOKUP_NEXT_ARP, otherwise + the packet will be dropped. + - adj->rewrite_header + - Rewrite string length, rewrite string, next_index + + @em Sets: + - b->current_data, b->current_length + - Updated net of applying the rewrite string + + Next Indices: + - adj->rewrite_header.next_index + or @c error-drop +*/ static uword ip4_rewrite_transit (vlib_main_t * vm, vlib_node_runtime_t * node, @@ -2657,6 +3027,39 @@ ip4_rewrite_transit (vlib_main_t * vm, /* rewrite_for_locally_received_packets */ 0); } +/** @brief IPv4 local rewrite node. + @node ip4-rewrite-local + + This is the IPv4 local rewrite node. Fetch the ip adjacency, check + the outbound interface mtu, apply the adjacency rewrite, and send + pkts to the adjacency rewrite header's rewrite_next_index. Deal + with hemorrhoids of the form "some clown sends an icmp4 w/ src = + dst = interface addr." + + @param vm vlib_main_t corresponding to the current thread + @param node vlib_node_runtime_t + @param frame vlib_frame_t whose contents should be dispatched + + @par Graph mechanics: buffer metadata, next index usage + + @em Uses: + - vnet_buffer(b)->ip.adj_index[VLIB_RX] + - the rewrite adjacency index + - adj->lookup_next_index + - Must be IP_LOOKUP_NEXT_REWRITE or IP_LOOKUP_NEXT_ARP, otherwise + the packet will be dropped. + - adj->rewrite_header + - Rewrite string length, rewrite string, next_index + + @em Sets: + - b->current_data, b->current_length + - Updated net of applying the rewrite string + + Next Indices: + - adj->rewrite_header.next_index + or @c error-drop +*/ + static uword ip4_rewrite_local (vlib_main_t * vm, vlib_node_runtime_t * node, @@ -2671,31 +3074,32 @@ VLIB_REGISTER_NODE (ip4_rewrite_node) = { .name = "ip4-rewrite-transit", .vector_size = sizeof (u32), - .format_trace = format_ip4_forward_next_trace, + .format_trace = format_ip4_rewrite_trace, - .n_next_nodes = 2, + .n_next_nodes = 3, .next_nodes = { [IP4_REWRITE_NEXT_DROP] = "error-drop", [IP4_REWRITE_NEXT_ARP] = "ip4-arp", + [IP4_REWRITE_NEXT_ICMP_ERROR] = "ip4-icmp-error", }, }; -VLIB_REGISTER_NODE (ip4_rewrite_local_node,static) = { +VLIB_NODE_FUNCTION_MULTIARCH (ip4_rewrite_node, ip4_rewrite_transit) + +VLIB_REGISTER_NODE (ip4_rewrite_local_node) = { .function = ip4_rewrite_local, .name = "ip4-rewrite-local", .vector_size = sizeof (u32), .sibling_of = "ip4-rewrite-transit", - .format_trace = format_ip4_forward_next_trace, + .format_trace = format_ip4_rewrite_trace, - .n_next_nodes = 2, - .next_nodes = { - [IP4_REWRITE_NEXT_DROP] = "error-drop", - [IP4_REWRITE_NEXT_ARP] = "ip4-arp", - }, + .n_next_nodes = 0, }; +VLIB_NODE_FUNCTION_MULTIARCH (ip4_rewrite_local_node, ip4_rewrite_local) + static clib_error_t * add_del_interface_table (vlib_main_t * vm, unformat_input_t * input, @@ -2957,6 +3361,9 @@ ip4_lookup_multicast (vlib_main_t * vm, vlib_put_next_frame (vm, node, next, n_left_to_next); } + if (node->flags & VLIB_NODE_FLAG_TRACE) + ip4_forward_next_trace(vm, node, frame, VLIB_TX); + return frame->n_vectors; } @@ -2964,11 +3371,14 @@ VLIB_REGISTER_NODE (ip4_lookup_multicast_node,static) = { .function = ip4_lookup_multicast, .name = "ip4-lookup-multicast", .vector_size = sizeof (u32), + .sibling_of = "ip4-lookup", + .format_trace = format_ip4_lookup_trace, - .n_next_nodes = IP_LOOKUP_N_NEXT, - .next_nodes = IP4_LOOKUP_NEXT_NODES, + .n_next_nodes = 0, }; +VLIB_NODE_FUNCTION_MULTIARCH (ip4_lookup_multicast_node, ip4_lookup_multicast) + VLIB_REGISTER_NODE (ip4_multicast_node,static) = { .function = ip4_drop, .name = "ip4-multicast",