#include <vnet/vnet.h>
#include <vnet/ip/ip.h>
-#include <vnet/ethernet/ethernet.h> /* for ethernet_header_t */
-#include <vnet/ethernet/arp_packet.h> /* for ethernet_arp_header_t */
+/** for ethernet_header_t */
+#include <vnet/ethernet/ethernet.h>
+/** for ethernet_arp_header_t */
+#include <vnet/ethernet/arp_packet.h>
#include <vnet/ppp/ppp.h>
-#include <vnet/srp/srp.h> /* for srp_hw_interface_class */
-#include <vnet/api_errno.h> /* for API error numbers */
+/** for srp_hw_interface_class */
+#include <vnet/srp/srp.h>
+/** for API error numbers */
+#include <vnet/api_errno.h>
+
+/** @file
+ vnet ip4 forwarding
+*/
/* This is really, really simple but stupid fib. */
u32
goto done;
}
}
-
+
/* Nothing matches in table. */
ai = lm->miss_adj_index;
return ai;
}
+/** @brief Create FIB from table ID and init all hashing.
+ @param im - @ref ip4_main_t
+ @param table_id - table ID
+ @return fib - @ref ip4_fib_t
+*/
static ip4_fib_t *
create_fib_with_table_id (ip4_main_t * im, u32 table_id)
{
vec_add2 (im->fibs, fib, 1);
fib->table_id = table_id;
fib->index = fib - im->fibs;
+ /* IP_FLOW_HASH_DEFAULT is net value of 5 tuple flags without "reverse" bit */
fib->flow_hash_config = IP_FLOW_HASH_DEFAULT;
fib->fwd_classify_table_index = ~0;
fib->rev_classify_table_index = ~0;
return fib;
}
+/** @brief Find existing or Create new FIB based on index
+ @param im @ref ip4_main_t
+ @param table_index_or_id - overloaded parameter referring
+ to the table or a table's index in the FIB vector
+ @param flags - used to check if table_index_or_id was a table or
+ an index (detected by @ref IP4_ROUTE_FLAG_FIB_INDEX)
+ @return either the existing or a new ip4_fib_t entry
+*/
ip4_fib_t *
-find_ip4_fib_by_table_index_or_id (ip4_main_t * im,
+find_ip4_fib_by_table_index_or_id (ip4_main_t * im,
u32 table_index_or_id, u32 flags)
{
uword * p, fib_index;
fib_index = table_index_or_id;
+ /* If this isn't a FIB_INDEX ... */
if (! (flags & IP4_ROUTE_FLAG_FIB_INDEX))
{
+ /* If passed ~0 then request the next table available */
if (table_index_or_id == ~0) {
table_index_or_id = 0;
while ((p = hash_get (im->fib_index_by_table_id, table_index_or_id))) {
table_index_or_id++;
}
- return create_fib_with_table_id (im, table_index_or_id);
+ /* Create the next table and return the ip4_fib_t associated with it */
+ return create_fib_with_table_id (im, table_index_or_id);
}
-
+ /* A specific table_id was requested.. */
p = hash_get (im->fib_index_by_table_id, table_index_or_id);
+ /* ... and if it doesn't exist create it else grab its index */
if (! p)
return create_fib_with_table_id (im, table_index_or_id);
fib_index = p[0];
}
+ /* Return the ip4_fib_t associated with this index */
return vec_elt_at_index (im->fibs, fib_index);
}
fib->new_hash_values);
p = hash_get (hash, dst_address_u32);
- clib_memcpy (p, fib->new_hash_values, vec_bytes (fib->new_hash_values));
+ /* hash_get should never return NULL here */
+ if (p)
+ clib_memcpy (p, fib->new_hash_values,
+ vec_bytes (fib->new_hash_values));
+ else
+ ASSERT(0);
}
}
ip_del_adjacency (lm, old_adj_index);
}
+
+u32
+ip4_route_get_next_hop_adj (ip4_main_t * im,
+ u32 fib_index,
+ ip4_address_t *next_hop,
+ u32 next_hop_sw_if_index,
+ u32 explicit_fib_index)
+{
+ ip_lookup_main_t * lm = &im->lookup_main;
+ vnet_main_t * vnm = vnet_get_main();
+ uword * nh_hash, * nh_result;
+ int is_interface_next_hop;
+ u32 nh_adj_index;
+ ip4_fib_t * fib;
+
+ fib = vec_elt_at_index (im->fibs, fib_index);
+
+ is_interface_next_hop = next_hop->data_u32 == 0;
+ if (is_interface_next_hop)
+ {
+ nh_result = hash_get (im->interface_route_adj_index_by_sw_if_index, next_hop_sw_if_index);
+ if (nh_result)
+ nh_adj_index = *nh_result;
+ else
+ {
+ ip_adjacency_t * adj;
+ adj = ip_add_adjacency (lm, /* template */ 0, /* block size */ 1,
+ &nh_adj_index);
+ ip4_adjacency_set_interface_route (vnm, adj, next_hop_sw_if_index, /* if_address_index */ ~0);
+ ip_call_add_del_adjacency_callbacks (lm, nh_adj_index, /* is_del */ 0);
+ hash_set (im->interface_route_adj_index_by_sw_if_index, next_hop_sw_if_index, nh_adj_index);
+ }
+ }
+ else if (next_hop_sw_if_index == ~0)
+ {
+ /* next-hop is recursive. we always need a indirect adj
+ * for recursive paths. Any LPM we perform now will give
+ * us a valid adj, but without tracking the next-hop we
+ * have no way to keep it valid.
+ */
+ ip_adjacency_t add_adj;
+ memset (&add_adj, 0, sizeof(add_adj));
+ add_adj.n_adj = 1;
+ add_adj.lookup_next_index = IP_LOOKUP_NEXT_INDIRECT;
+ add_adj.indirect.next_hop.ip4.as_u32 = next_hop->as_u32;
+ add_adj.explicit_fib_index = explicit_fib_index;
+ ip_add_adjacency (lm, &add_adj, 1, &nh_adj_index);
+ }
+ else
+ {
+ nh_hash = fib->adj_index_by_dst_address[32];
+ nh_result = hash_get (nh_hash, next_hop->data_u32);
+
+ /* Next hop must be known. */
+ if (! nh_result)
+ {
+ ip_adjacency_t * adj;
+
+ /* no /32 exists, get the longest prefix match */
+ nh_adj_index = ip4_fib_lookup_with_table (im, fib_index,
+ next_hop, 0);
+ adj = ip_get_adjacency (lm, nh_adj_index);
+ /* if ARP interface adjacency is present, we need to
+ install ARP adjaceny for specific next hop */
+ if (adj->lookup_next_index == IP_LOOKUP_NEXT_ARP &&
+ adj->arp.next_hop.ip4.as_u32 == 0)
+ {
+ nh_adj_index = vnet_arp_glean_add(fib_index, next_hop);
+ }
+ }
+ else
+ {
+ nh_adj_index = *nh_result;
+ }
+ }
+
+ return (nh_adj_index);
+}
+
void
ip4_add_del_route_next_hop (ip4_main_t * im,
u32 flags,
u32 dst_address_u32, old_mp_adj_index, new_mp_adj_index;
u32 dst_adj_index, nh_adj_index;
uword * dst_hash, * dst_result;
- uword * nh_hash, * nh_result;
ip_adjacency_t * dst_adj;
ip_multipath_adjacency_t * old_mp, * new_mp;
int is_del = (flags & IP4_ROUTE_FLAG_DEL) != 0;
- int is_interface_next_hop;
clib_error_t * error = 0;
if (explicit_fib_index == (u32)~0)
fib_index = explicit_fib_index;
fib = vec_elt_at_index (im->fibs, fib_index);
-
+
/* Lookup next hop to be added or deleted. */
- is_interface_next_hop = next_hop->data_u32 == 0;
if (adj_index == (u32)~0)
{
- if (is_interface_next_hop)
- {
- nh_result = hash_get (im->interface_route_adj_index_by_sw_if_index, next_hop_sw_if_index);
- if (nh_result)
- nh_adj_index = *nh_result;
- else
- {
- ip_adjacency_t * adj;
- adj = ip_add_adjacency (lm, /* template */ 0, /* block size */ 1,
- &nh_adj_index);
- ip4_adjacency_set_interface_route (vnm, adj, next_hop_sw_if_index, /* if_address_index */ ~0);
- ip_call_add_del_adjacency_callbacks (lm, nh_adj_index, /* is_del */ 0);
- hash_set (im->interface_route_adj_index_by_sw_if_index, next_hop_sw_if_index, nh_adj_index);
- }
- }
- else
- {
- nh_hash = fib->adj_index_by_dst_address[32];
- nh_result = hash_get (nh_hash, next_hop->data_u32);
-
- /* Next hop must be known. */
- if (! nh_result)
- {
- ip_adjacency_t * adj;
-
- nh_adj_index = ip4_fib_lookup_with_table (im, fib_index,
- next_hop, 0);
- adj = ip_get_adjacency (lm, nh_adj_index);
- /* if ARP interface adjacencty is present, we need to
- install ARP adjaceny for specific next hop */
- if (adj->lookup_next_index == IP_LOOKUP_NEXT_ARP &&
- adj->arp.next_hop.ip4.as_u32 == 0)
- {
- nh_adj_index = vnet_arp_glean_add(fib_index, next_hop);
- }
- else
- {
- /* Next hop is not known, so create indirect adj */
- ip_adjacency_t add_adj;
- add_adj.lookup_next_index = IP_LOOKUP_NEXT_INDIRECT;
- add_adj.indirect.next_hop.ip4.as_u32 = next_hop->as_u32;
- add_adj.explicit_fib_index = explicit_fib_index;
- ip_add_adjacency (lm, &add_adj, 1, &nh_adj_index);
- }
- }
- else
- nh_adj_index = *nh_result;
- }
+ nh_adj_index = ip4_route_get_next_hop_adj(im, fib_index,
+ next_hop,
+ next_hop_sw_if_index,
+ explicit_fib_index);
}
else
{
to existing non-multipath adjacency */
if (dst_adj_index == ~0 && next_hop_weight == 1 && next_hop_sw_if_index == ~0)
{
- /* create new adjacency */
+ /* create / delete additional mapping of existing adjacency */
ip4_add_del_route_args_t a;
+
a.table_index_or_table_id = fib_index;
a.flags = ((is_del ? IP4_ROUTE_FLAG_DEL : IP4_ROUTE_FLAG_ADD)
| IP4_ROUTE_FLAG_FIB_INDEX
a.n_add_adj = 0;
ip4_add_del_route (im, &a);
-
goto done;
}
if (old_mp != new_mp)
{
ip4_add_del_route_args_t a;
+ ip_adjacency_t * adj;
+
a.table_index_or_table_id = fib_index;
a.flags = ((is_del && ! new_mp ? IP4_ROUTE_FLAG_DEL : IP4_ROUTE_FLAG_ADD)
| IP4_ROUTE_FLAG_FIB_INDEX
a.n_add_adj = 0;
ip4_add_del_route (im, &a);
+
+ adj = ip_get_adjacency (lm, new_mp ? new_mp->adj_index : dst_adj_index);
+ if (adj->n_adj == 1)
+ adj->share_count += is_del ? -1 : 1;
}
done:
ip4_maybe_remap_adjacencies (im, table_index_or_table_id, flags);
}
+void
+ip4_forward_next_trace (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * frame,
+ vlib_rx_or_tx_t which_adj_index);
+
always_inline uword
ip4_lookup_inline (vlib_main_t * vm,
vlib_node_runtime_t * node,
vlib_put_next_frame (vm, node, next, n_left_to_next);
}
+ if (node->flags & VLIB_NODE_FLAG_TRACE)
+ ip4_forward_next_trace(vm, node, frame, VLIB_TX);
+
return frame->n_vectors;
}
+/** @brief IPv4 lookup node.
+ @node ip4-lookup
+
+ This is the main IPv4 lookup dispatch node.
+
+ @param vm vlib_main_t corresponding to the current thread
+ @param node vlib_node_runtime_t
+ @param frame vlib_frame_t whose contents should be dispatched
+
+ @par Graph mechanics: buffer metadata, next index usage
+
+ @em Uses:
+ - <code>vnet_buffer(b)->sw_if_index[VLIB_RX]</code>
+ - Indicates the @c sw_if_index value of the interface that the
+ packet was received on.
+ - <code>vnet_buffer(b)->sw_if_index[VLIB_TX]</code>
+ - When the value is @c ~0 then the node performs a longest prefix
+ match (LPM) for the packet destination address in the FIB attached
+ to the receive interface.
+ - Otherwise perform LPM for the packet destination address in the
+ indicated FIB. In this case <code>[VLIB_TX]</code> is a FIB index
+ value (0, 1, ...) and not a VRF id.
+
+ @em Sets:
+ - <code>vnet_buffer(b)->ip.adj_index[VLIB_TX]</code>
+ - The lookup result adjacency index.
+
+ <em>Next Index:</em>
+ - Dispatches the packet to the node index found in
+ ip_adjacency_t @c adj->lookup_next_index
+ (where @c adj is the lookup result adjacency).
+*/
static uword
ip4_lookup (vlib_main_t * vm,
vlib_node_runtime_t * node,
VNET_SW_INTERFACE_ADMIN_UP_DOWN_FUNCTION (ip4_sw_interface_admin_up_down);
+/* Built-in ip4 unicast rx feature path definition */
+VNET_IP4_UNICAST_FEATURE_INIT (ip4_inacl, static) = {
+ .node_name = "ip4-inacl",
+ .runs_before = {"ip4-source-check-via-rx", 0},
+ .feature_index = &ip4_main.ip4_unicast_rx_feature_check_access,
+};
+
+VNET_IP4_UNICAST_FEATURE_INIT (ip4_source_check_1, static) = {
+ .node_name = "ip4-source-check-via-rx",
+ .runs_before = {"ip4-source-check-via-any", 0},
+ .feature_index =
+ &ip4_main.ip4_unicast_rx_feature_source_reachable_via_rx,
+};
+
+VNET_IP4_UNICAST_FEATURE_INIT (ip4_source_check_2, static) = {
+ .node_name = "ip4-source-check-via-any",
+ .runs_before = {"ip4-policer-classify", 0},
+ .feature_index =
+ &ip4_main.ip4_unicast_rx_feature_source_reachable_via_any,
+};
+
+VNET_IP4_UNICAST_FEATURE_INIT (ip4_source_and_port_range_check, static) = {
+ .node_name = "ip4-source-and-port-range-check",
+ .runs_before = {"ip4-policer-classify", 0},
+ .feature_index =
+ &ip4_main.ip4_unicast_rx_feature_source_and_port_range_check,
+};
+
+VNET_IP4_UNICAST_FEATURE_INIT (ip4_policer_classify, static) = {
+ .node_name = "ip4-policer-classify",
+ .runs_before = {"ipsec-input-ip4", 0},
+ .feature_index =
+ &ip4_main.ip4_unicast_rx_feature_policer_classify,
+};
+
+VNET_IP4_UNICAST_FEATURE_INIT (ip4_ipsec, static) = {
+ .node_name = "ipsec-input-ip4",
+ .runs_before = {"vpath-input-ip4", 0},
+ .feature_index = &ip4_main.ip4_unicast_rx_feature_ipsec,
+};
+
+VNET_IP4_UNICAST_FEATURE_INIT (ip4_vpath, static) = {
+ .node_name = "vpath-input-ip4",
+ .runs_before = {"ip4-lookup", 0},
+ .feature_index = &ip4_main.ip4_unicast_rx_feature_vpath,
+};
+
+VNET_IP4_UNICAST_FEATURE_INIT (ip4_lookup, static) = {
+ .node_name = "ip4-lookup",
+ .runs_before = {0}, /* not before any other features */
+ .feature_index = &ip4_main.ip4_unicast_rx_feature_lookup,
+};
+
+/* Built-in ip4 multicast rx feature path definition */
+VNET_IP4_MULTICAST_FEATURE_INIT (ip4_vpath_mc, static) = {
+ .node_name = "vpath-input-ip4",
+ .runs_before = {"ip4-lookup-multicast", 0},
+ .feature_index = &ip4_main.ip4_multicast_rx_feature_vpath,
+};
+
+VNET_IP4_MULTICAST_FEATURE_INIT (ip4_lookup_mc, static) = {
+ .node_name = "ip4-lookup-multicast",
+ .runs_before = {0}, /* not before any other features */
+ .feature_index = &ip4_main.ip4_multicast_rx_feature_lookup,
+};
+
+static char * feature_start_nodes[] =
+ { "ip4-input", "ip4-input-no-checksum"};
+
+static clib_error_t *
+ip4_feature_init (vlib_main_t * vm, ip4_main_t * im)
+{
+ ip_lookup_main_t * lm = &im->lookup_main;
+ clib_error_t * error;
+ vnet_cast_t cast;
+
+ for (cast = 0; cast < VNET_N_CAST; cast++)
+ {
+ ip_config_main_t * cm = &lm->rx_config_mains[cast];
+ vnet_config_main_t * vcm = &cm->config_main;
+
+ if ((error = ip_feature_init_cast (vm, cm, vcm,
+ feature_start_nodes,
+ ARRAY_LEN(feature_start_nodes),
+ cast,
+ 1 /* is_ip4 */)))
+ return error;
+ }
+ return 0;
+}
+
static clib_error_t *
ip4_sw_interface_add_del (vnet_main_t * vnm,
u32 sw_if_index,
ip4_main_t * im = &ip4_main;
ip_lookup_main_t * lm = &im->lookup_main;
u32 ci, cast;
+ u32 feature_index;
for (cast = 0; cast < VNET_N_CAST; cast++)
{
ip_config_main_t * cm = &lm->rx_config_mains[cast];
vnet_config_main_t * vcm = &cm->config_main;
- if (! vcm->node_index_by_feature_index)
- {
- if (cast == VNET_UNICAST)
- {
- static char * start_nodes[] = { "ip4-input", "ip4-input-no-checksum", };
- static char * feature_nodes[] = {
- [IP4_RX_FEATURE_CHECK_ACCESS] = "ip4-inacl",
- [IP4_RX_FEATURE_SOURCE_CHECK_REACHABLE_VIA_RX] = "ip4-source-check-via-rx",
- [IP4_RX_FEATURE_SOURCE_CHECK_REACHABLE_VIA_ANY] = "ip4-source-check-via-any",
- [IP4_RX_FEATURE_IPSEC] = "ipsec-input-ip4",
- [IP4_RX_FEATURE_VPATH] = "vpath-input-ip4",
- [IP4_RX_FEATURE_LOOKUP] = "ip4-lookup",
- };
-
- vnet_config_init (vm, vcm,
- start_nodes, ARRAY_LEN (start_nodes),
- feature_nodes, ARRAY_LEN (feature_nodes));
- }
- else
- {
- static char * start_nodes[] = { "ip4-input", "ip4-input-no-checksum", };
- static char * feature_nodes[] = {
- [IP4_RX_FEATURE_VPATH] = "vpath-input-ip4",
- [IP4_RX_FEATURE_LOOKUP] = "ip4-lookup-multicast",
- };
-
- vnet_config_init (vm, vcm,
- start_nodes, ARRAY_LEN (start_nodes),
- feature_nodes, ARRAY_LEN (feature_nodes));
- }
- }
-
vec_validate_init_empty (cm->config_index_by_sw_if_index, sw_if_index, ~0);
ci = cm->config_index_by_sw_if_index[sw_if_index];
+ if (cast == VNET_UNICAST)
+ feature_index = im->ip4_unicast_rx_feature_lookup;
+ else
+ feature_index = im->ip4_multicast_rx_feature_lookup;
+
if (is_add)
ci = vnet_config_add_feature (vm, vcm,
ci,
- IP4_RX_FEATURE_LOOKUP,
+ feature_index,
/* config data */ 0,
/* # bytes of config data */ 0);
else
ci = vnet_config_del_feature (vm, vcm,
ci,
- IP4_RX_FEATURE_LOOKUP,
+ feature_index,
/* config data */ 0,
/* # bytes of config data */ 0);
VNET_SW_INTERFACE_ADD_DEL_FUNCTION (ip4_sw_interface_add_del);
+static u8 * format_ip4_lookup_trace (u8 * s, va_list * args);
VLIB_REGISTER_NODE (ip4_lookup_node) = {
.function = ip4_lookup,
.name = "ip4-lookup",
.vector_size = sizeof (u32),
- .n_next_nodes = IP_LOOKUP_N_NEXT,
+ .format_trace = format_ip4_lookup_trace,
+
+ .n_next_nodes = IP4_LOOKUP_N_NEXT,
.next_nodes = IP4_LOOKUP_NEXT_NODES,
};
+VLIB_NODE_FUNCTION_MULTIARCH (ip4_lookup_node, ip4_lookup)
+
static uword
ip4_indirect (vlib_main_t * vm,
vlib_node_runtime_t * node,
.function = ip4_indirect,
.name = "ip4-indirect",
.vector_size = sizeof (u32),
+ .sibling_of = "ip4-lookup",
+ .format_trace = format_ip4_lookup_trace,
- .n_next_nodes = IP_LOOKUP_N_NEXT,
- .next_nodes = IP4_LOOKUP_NEXT_NODES,
+ .n_next_nodes = 0,
};
+VLIB_NODE_FUNCTION_MULTIARCH (ip4_indirect_node, ip4_indirect)
+
/* Global IP4 main. */
ip4_main_t ip4_main;
ip4_lookup_init (vlib_main_t * vm)
{
ip4_main_t * im = &ip4_main;
+ clib_error_t * error;
uword i;
for (i = 0; i < ARRAY_LEN (im->fib_masks); i++)
"ip4 arp");
}
- return 0;
+ error = ip4_feature_init (vm, im);
+
+ return error;
}
VLIB_INIT_FUNCTION (ip4_lookup_init);
} ip4_forward_next_trace_t;
static u8 * format_ip4_forward_next_trace (u8 * s, va_list * args)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+ ip4_forward_next_trace_t * t = va_arg (*args, ip4_forward_next_trace_t *);
+ uword indent = format_get_indent (s);
+ s = format (s, "%U%U",
+ format_white_space, indent,
+ format_ip4_header, t->packet_data);
+ return s;
+}
+
+static u8 * format_ip4_lookup_trace (u8 * s, va_list * args)
{
CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
ip4_forward_next_trace_t * t = va_arg (*args, ip4_forward_next_trace_t *);
vnet_main_t * vnm = vnet_get_main();
ip4_main_t * im = &ip4_main;
- ip_adjacency_t * adj;
uword indent = format_get_indent (s);
- adj = ip_get_adjacency (&im->lookup_main, t->adj_index);
s = format (s, "fib %d adj-idx %d : %U flow hash: 0x%08x",
- t->fib_index, t->adj_index, format_ip_adjacency,
- vnm, &im->lookup_main, t->adj_index, t->flow_hash);
- switch (adj->lookup_next_index)
- {
- case IP_LOOKUP_NEXT_REWRITE:
- s = format (s, "\n%U%U",
- format_white_space, indent,
- format_ip_adjacency_packet_data,
- vnm, &im->lookup_main, t->adj_index,
- t->packet_data, sizeof (t->packet_data));
- break;
+ t->fib_index, t->adj_index, format_ip_adjacency,
+ vnm, &im->lookup_main, t->adj_index, t->flow_hash);
+ s = format (s, "\n%U%U",
+ format_white_space, indent,
+ format_ip4_header, t->packet_data);
+ return s;
+}
- default:
- break;
- }
+static u8 * format_ip4_rewrite_trace (u8 * s, va_list * args)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+ ip4_forward_next_trace_t * t = va_arg (*args, ip4_forward_next_trace_t *);
+ vnet_main_t * vnm = vnet_get_main();
+ ip4_main_t * im = &ip4_main;
+ uword indent = format_get_indent (s);
+ s = format (s, "tx_sw_if_index %d adj-idx %d : %U flow hash: 0x%08x",
+ t->fib_index, t->adj_index, format_ip_adjacency,
+ vnm, &im->lookup_main, t->adj_index, t->flow_hash);
+ s = format (s, "\n%U%U",
+ format_white_space, indent,
+ format_ip_adjacency_packet_data,
+ vnm, &im->lookup_main, t->adj_index,
+ t->packet_data, sizeof (t->packet_data));
return s;
}
t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0]));
t0->adj_index = vnet_buffer (b0)->ip.adj_index[which_adj_index];
t0->flow_hash = vnet_buffer (b0)->ip.flow_hash;
- t0->fib_index = vec_elt (im->fib_index_by_sw_if_index,
- vnet_buffer(b0)->sw_if_index[VLIB_RX]);
+ t0->fib_index = (vnet_buffer(b0)->sw_if_index[VLIB_TX] != (u32)~0) ?
+ vnet_buffer(b0)->sw_if_index[VLIB_TX] :
+ vec_elt (im->fib_index_by_sw_if_index,
+ vnet_buffer(b0)->sw_if_index[VLIB_RX]);
+
clib_memcpy (t0->packet_data,
vlib_buffer_get_current (b0),
sizeof (t0->packet_data));
t1 = vlib_add_trace (vm, node, b1, sizeof (t1[0]));
t1->adj_index = vnet_buffer (b1)->ip.adj_index[which_adj_index];
t1->flow_hash = vnet_buffer (b1)->ip.flow_hash;
- t1->fib_index = vec_elt (im->fib_index_by_sw_if_index,
- vnet_buffer(b1)->sw_if_index[VLIB_RX]);
+ t1->fib_index = (vnet_buffer(b1)->sw_if_index[VLIB_TX] != (u32)~0) ?
+ vnet_buffer(b1)->sw_if_index[VLIB_TX] :
+ vec_elt (im->fib_index_by_sw_if_index,
+ vnet_buffer(b1)->sw_if_index[VLIB_RX]);
clib_memcpy (t1->packet_data,
vlib_buffer_get_current (b1),
sizeof (t1->packet_data));
t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0]));
t0->adj_index = vnet_buffer (b0)->ip.adj_index[which_adj_index];
t0->flow_hash = vnet_buffer (b0)->ip.flow_hash;
- t0->fib_index = vec_elt (im->fib_index_by_sw_if_index,
- vnet_buffer(b0)->sw_if_index[VLIB_RX]);
+ t0->fib_index = (vnet_buffer(b0)->sw_if_index[VLIB_TX] != (u32)~0) ?
+ vnet_buffer(b0)->sw_if_index[VLIB_TX] :
+ vec_elt (im->fib_index_by_sw_if_index,
+ vnet_buffer(b0)->sw_if_index[VLIB_RX]);
clib_memcpy (t0->packet_data,
vlib_buffer_get_current (b0),
sizeof (t0->packet_data));
},
};
+VLIB_NODE_FUNCTION_MULTIARCH (ip4_drop_node, ip4_drop)
+
VLIB_REGISTER_NODE (ip4_punt_node,static) = {
.function = ip4_punt,
.name = "ip4-punt",
},
};
+VLIB_NODE_FUNCTION_MULTIARCH (ip4_punt_node, ip4_punt)
+
VLIB_REGISTER_NODE (ip4_miss_node,static) = {
.function = ip4_miss,
.name = "ip4-miss",
},
};
+VLIB_NODE_FUNCTION_MULTIARCH (ip4_miss_node, ip4_miss)
+
/* Compute TCP/UDP/ICMP4 checksum in software. */
u16
ip4_tcp_udp_compute_checksum (vlib_main_t * vm, vlib_buffer_t * p0,
leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 0);
leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->src_address, 0);
- proto0 = ip0->protocol;
- proto1 = ip1->protocol;
+ /* Treat IP frag packets as "experimental" protocol for now
+ until support of IP frag reassembly is implemented */
+ proto0 = ip4_is_fragment(ip0) ? 0xfe : ip0->protocol;
+ proto1 = ip4_is_fragment(ip1) ? 0xfe : ip1->protocol;
is_udp0 = proto0 == IP_PROTOCOL_UDP;
is_udp1 = proto1 == IP_PROTOCOL_UDP;
is_tcp_udp0 = is_udp0 || proto0 == IP_PROTOCOL_TCP;
leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 0);
- proto0 = ip0->protocol;
+ /* Treat IP frag packets as "experimental" protocol for now
+ until support of IP frag reassembly is implemented */
+ proto0 = ip4_is_fragment(ip0) ? 0xfe : ip0->protocol;
is_udp0 = proto0 == IP_PROTOCOL_UDP;
is_tcp_udp0 = is_udp0 || proto0 == IP_PROTOCOL_TCP;
},
};
+VLIB_NODE_FUNCTION_MULTIARCH (ip4_local_node, ip4_local)
+
void ip4_register_protocol (u32 protocol, u32 node_index)
{
vlib_main_t * vm = vlib_get_main();
clib_memcpy (h0->ip4_over_ethernet[0].ethernet, hw_if0->hw_address,
sizeof (h0->ip4_over_ethernet[0].ethernet));
- ip4_src_address_for_packet (im, p0, &h0->ip4_over_ethernet[0].ip4, sw_if_index0);
+ if (ip4_src_address_for_packet (im, p0, &h0->ip4_over_ethernet[0].ip4, sw_if_index0)) {
+ //No source address available
+ p0->error = node->errors[IP4_ARP_ERROR_NO_SOURCE_ADDRESS];
+ vlib_buffer_free(vm, &bi0, 1);
+ continue;
+ }
/* Copy in destination address we are requesting. */
h0->ip4_over_ethernet[1].ip4.data_u32 = ip0->dst_address.data_u32;
[IP4_ARP_ERROR_NON_ARP_ADJ] = "ARPs to non-ARP adjacencies",
[IP4_ARP_ERROR_REPLICATE_DROP] = "ARP replication completed",
[IP4_ARP_ERROR_REPLICATE_FAIL] = "ARP replication failed",
+ [IP4_ARP_ERROR_NO_SOURCE_ADDRESS] = "no source address for ARP request",
};
VLIB_REGISTER_NODE (ip4_arp_node) = {
typedef enum {
IP4_REWRITE_NEXT_DROP,
IP4_REWRITE_NEXT_ARP,
+ IP4_REWRITE_NEXT_ICMP_ERROR,
} ip4_rewrite_next_t;
always_inline uword
ip1 = vlib_buffer_get_current (p1);
error0 = error1 = IP4_ERROR_NONE;
+ next0 = next1 = IP4_REWRITE_NEXT_DROP;
/* Decrement TTL & update checksum.
Works either endian, so no need for byte swap. */
ip0->ttl = ttl0;
ip1->ttl = ttl1;
- error0 = ttl0 <= 0 ? IP4_ERROR_TIME_EXPIRED : error0;
- error1 = ttl1 <= 0 ? IP4_ERROR_TIME_EXPIRED : error1;
+ /*
+ * If the ttl drops below 1 when forwarding, generate
+ * an ICMP response.
+ */
+ if (PREDICT_FALSE(ttl0 <= 0))
+ {
+ error0 = IP4_ERROR_TIME_EXPIRED;
+ vnet_buffer (p0)->sw_if_index[VLIB_TX] = (u32)~0;
+ icmp4_error_set_vnet_buffer(p0, ICMP4_time_exceeded,
+ ICMP4_time_exceeded_ttl_exceeded_in_transit, 0);
+ next0 = IP4_REWRITE_NEXT_ICMP_ERROR;
+ }
+ if (PREDICT_FALSE(ttl1 <= 0))
+ {
+ error1 = IP4_ERROR_TIME_EXPIRED;
+ vnet_buffer (p1)->sw_if_index[VLIB_TX] = (u32)~0;
+ icmp4_error_set_vnet_buffer(p1, ICMP4_time_exceeded,
+ ICMP4_time_exceeded_ttl_exceeded_in_transit, 0);
+ next1 = IP4_REWRITE_NEXT_ICMP_ERROR;
+ }
/* Verify checksum. */
ASSERT (ip0->checksum == ip4_header_checksum (ip0));
/* Worth pipelining. No guarantee that adj0,1 are hot... */
rw_len0 = adj0[0].rewrite_header.data_bytes;
rw_len1 = adj1[0].rewrite_header.data_bytes;
- next0 = (error0 == IP4_ERROR_NONE)
- ? adj0[0].rewrite_header.next_index : 0;
+
+ /* Check MTU of outgoing interface. */
+ error0 = (vlib_buffer_length_in_chain (vm, p0) > adj0[0].rewrite_header.max_l3_packet_bytes
+ ? IP4_ERROR_MTU_EXCEEDED
+ : error0);
+ error1 = (vlib_buffer_length_in_chain (vm, p1) > adj1[0].rewrite_header.max_l3_packet_bytes
+ ? IP4_ERROR_MTU_EXCEEDED
+ : error1);
+
+ next0 = (error0 == IP4_ERROR_NONE)
+ ? adj0[0].rewrite_header.next_index : next0;
if (rewrite_for_locally_received_packets)
next0 = next0 && next0_override ? next0_override : next0;
- next1 = (error1 == IP4_ERROR_NONE)
- ? adj1[0].rewrite_header.next_index : 0;
+ next1 = (error1 == IP4_ERROR_NONE)
+ ? adj1[0].rewrite_header.next_index : next1;
if (rewrite_for_locally_received_packets)
next1 = next1 && next1_override ? next1_override : next1;
/* packet increment */ 0,
/* byte increment */ rw_len1-sizeof(ethernet_header_t));
- /* Check MTU of outgoing interface. */
- error0 = (vlib_buffer_length_in_chain (vm, p0) > adj0[0].rewrite_header.max_l3_packet_bytes
- ? IP4_ERROR_MTU_EXCEEDED
- : error0);
- error1 = (vlib_buffer_length_in_chain (vm, p1) > adj1[0].rewrite_header.max_l3_packet_bytes
- ? IP4_ERROR_MTU_EXCEEDED
- : error1);
-
- p0->current_data -= rw_len0;
- p1->current_data -= rw_len1;
-
- p0->current_length += rw_len0;
- p1->current_length += rw_len1;
-
- vnet_buffer (p0)->sw_if_index[VLIB_TX] = adj0[0].rewrite_header.sw_if_index;
- vnet_buffer (p1)->sw_if_index[VLIB_TX] = adj1[0].rewrite_header.sw_if_index;
-
- p0->error = error_node->errors[error0];
- p1->error = error_node->errors[error1];
+ /* Don't adjust the buffer for ttl issue; icmp-error node wants
+ * to see the IP headerr */
+ if (PREDICT_TRUE(error0 == IP4_ERROR_NONE))
+ {
+ p0->current_data -= rw_len0;
+ p0->current_length += rw_len0;
+ p0->error = error_node->errors[error0];
+ vnet_buffer (p0)->sw_if_index[VLIB_TX] =
+ adj0[0].rewrite_header.sw_if_index;
+ }
+ if (PREDICT_TRUE(error1 == IP4_ERROR_NONE))
+ {
+ p1->current_data -= rw_len1;
+ p1->current_length += rw_len1;
+ p1->error = error_node->errors[error1];
+ vnet_buffer (p1)->sw_if_index[VLIB_TX] =
+ adj1[0].rewrite_header.sw_if_index;
+ }
/* Guess we are only writing on simple Ethernet header. */
vnet_rewrite_two_headers (adj0[0], adj1[0],
ip0 = vlib_buffer_get_current (p0);
error0 = IP4_ERROR_NONE;
- next0 = 0; /* drop on error */
+ next0 = IP4_REWRITE_NEXT_DROP; /* drop on error */
/* Decrement TTL & update checksum. */
if (! rewrite_for_locally_received_packets)
ASSERT (ip0->checksum == ip4_header_checksum (ip0));
- error0 = ttl0 <= 0 ? IP4_ERROR_TIME_EXPIRED : error0;
+ if (PREDICT_FALSE(ttl0 <= 0))
+ {
+ /*
+ * If the ttl drops below 1 when forwarding, generate
+ * an ICMP response.
+ */
+ error0 = IP4_ERROR_TIME_EXPIRED;
+ next0 = IP4_REWRITE_NEXT_ICMP_ERROR;
+ vnet_buffer (p0)->sw_if_index[VLIB_TX] = (u32)~0;
+ icmp4_error_set_vnet_buffer(p0, ICMP4_time_exceeded,
+ ICMP4_time_exceeded_ttl_exceeded_in_transit, 0);
+ }
}
if (rewrite_for_locally_received_packets)
> adj0[0].rewrite_header.max_l3_packet_bytes
? IP4_ERROR_MTU_EXCEEDED
: error0);
-
+
p0->error = error_node->errors[error0];
- p0->current_data -= rw_len0;
- p0->current_length += rw_len0;
- vnet_buffer (p0)->sw_if_index[VLIB_TX] =
- adj0[0].rewrite_header.sw_if_index;
-
- next0 = (error0 == IP4_ERROR_NONE)
- ? adj0[0].rewrite_header.next_index : 0;
+
+ /* Don't adjust the buffer for ttl issue; icmp-error node wants
+ * to see the IP headerr */
+ if (PREDICT_TRUE(error0 == IP4_ERROR_NONE))
+ {
+ p0->current_data -= rw_len0;
+ p0->current_length += rw_len0;
+
+ vnet_buffer (p0)->sw_if_index[VLIB_TX] =
+ adj0[0].rewrite_header.sw_if_index;
+ next0 = adj0[0].rewrite_header.next_index;
+ }
if (rewrite_for_locally_received_packets)
next0 = next0 && next0_override ? next0_override : next0;
return frame->n_vectors;
}
+
+/** @brief IPv4 transit rewrite node.
+ @node ip4-rewrite-transit
+
+ This is the IPv4 transit-rewrite node: decrement TTL, fix the ipv4
+ header checksum, fetch the ip adjacency, check the outbound mtu,
+ apply the adjacency rewrite, and send pkts to the adjacency
+ rewrite header's rewrite_next_index.
+
+ @param vm vlib_main_t corresponding to the current thread
+ @param node vlib_node_runtime_t
+ @param frame vlib_frame_t whose contents should be dispatched
+
+ @par Graph mechanics: buffer metadata, next index usage
+
+ @em Uses:
+ - <code>vnet_buffer(b)->ip.adj_index[VLIB_TX]</code>
+ - the rewrite adjacency index
+ - <code>adj->lookup_next_index</code>
+ - Must be IP_LOOKUP_NEXT_REWRITE or IP_LOOKUP_NEXT_ARP, otherwise
+ the packet will be dropped.
+ - <code>adj->rewrite_header</code>
+ - Rewrite string length, rewrite string, next_index
+
+ @em Sets:
+ - <code>b->current_data, b->current_length</code>
+ - Updated net of applying the rewrite string
+
+ <em>Next Indices:</em>
+ - <code> adj->rewrite_header.next_index </code>
+ or @c error-drop
+*/
static uword
ip4_rewrite_transit (vlib_main_t * vm,
vlib_node_runtime_t * node,
/* rewrite_for_locally_received_packets */ 0);
}
+/** @brief IPv4 local rewrite node.
+ @node ip4-rewrite-local
+
+ This is the IPv4 local rewrite node. Fetch the ip adjacency, check
+ the outbound interface mtu, apply the adjacency rewrite, and send
+ pkts to the adjacency rewrite header's rewrite_next_index. Deal
+ with hemorrhoids of the form "some clown sends an icmp4 w/ src =
+ dst = interface addr."
+
+ @param vm vlib_main_t corresponding to the current thread
+ @param node vlib_node_runtime_t
+ @param frame vlib_frame_t whose contents should be dispatched
+
+ @par Graph mechanics: buffer metadata, next index usage
+
+ @em Uses:
+ - <code>vnet_buffer(b)->ip.adj_index[VLIB_RX]</code>
+ - the rewrite adjacency index
+ - <code>adj->lookup_next_index</code>
+ - Must be IP_LOOKUP_NEXT_REWRITE or IP_LOOKUP_NEXT_ARP, otherwise
+ the packet will be dropped.
+ - <code>adj->rewrite_header</code>
+ - Rewrite string length, rewrite string, next_index
+
+ @em Sets:
+ - <code>b->current_data, b->current_length</code>
+ - Updated net of applying the rewrite string
+
+ <em>Next Indices:</em>
+ - <code> adj->rewrite_header.next_index </code>
+ or @c error-drop
+*/
+
static uword
ip4_rewrite_local (vlib_main_t * vm,
vlib_node_runtime_t * node,
.name = "ip4-rewrite-transit",
.vector_size = sizeof (u32),
- .format_trace = format_ip4_forward_next_trace,
+ .format_trace = format_ip4_rewrite_trace,
- .n_next_nodes = 2,
+ .n_next_nodes = 3,
.next_nodes = {
[IP4_REWRITE_NEXT_DROP] = "error-drop",
[IP4_REWRITE_NEXT_ARP] = "ip4-arp",
+ [IP4_REWRITE_NEXT_ICMP_ERROR] = "ip4-icmp-error",
},
};
-VLIB_REGISTER_NODE (ip4_rewrite_local_node,static) = {
+VLIB_NODE_FUNCTION_MULTIARCH (ip4_rewrite_node, ip4_rewrite_transit)
+
+VLIB_REGISTER_NODE (ip4_rewrite_local_node) = {
.function = ip4_rewrite_local,
.name = "ip4-rewrite-local",
.vector_size = sizeof (u32),
.sibling_of = "ip4-rewrite-transit",
- .format_trace = format_ip4_forward_next_trace,
+ .format_trace = format_ip4_rewrite_trace,
- .n_next_nodes = 2,
- .next_nodes = {
- [IP4_REWRITE_NEXT_DROP] = "error-drop",
- [IP4_REWRITE_NEXT_ARP] = "ip4-arp",
- },
+ .n_next_nodes = 0,
};
+VLIB_NODE_FUNCTION_MULTIARCH (ip4_rewrite_local_node, ip4_rewrite_local)
+
static clib_error_t *
add_del_interface_table (vlib_main_t * vm,
unformat_input_t * input,
vlib_put_next_frame (vm, node, next, n_left_to_next);
}
+ if (node->flags & VLIB_NODE_FLAG_TRACE)
+ ip4_forward_next_trace(vm, node, frame, VLIB_TX);
+
return frame->n_vectors;
}
.function = ip4_lookup_multicast,
.name = "ip4-lookup-multicast",
.vector_size = sizeof (u32),
+ .sibling_of = "ip4-lookup",
+ .format_trace = format_ip4_lookup_trace,
- .n_next_nodes = IP_LOOKUP_N_NEXT,
- .next_nodes = IP4_LOOKUP_NEXT_NODES,
+ .n_next_nodes = 0,
};
+VLIB_NODE_FUNCTION_MULTIARCH (ip4_lookup_multicast_node, ip4_lookup_multicast)
+
VLIB_REGISTER_NODE (ip4_multicast_node,static) = {
.function = ip4_drop,
.name = "ip4-multicast",