#include <vnet/mfib/mfib_table.h> /* for mFIB table and entry creation */
#include <vnet/ip/ip4_forward.h>
+#include <vnet/interface_output.h>
/** @brief IPv4 lookup node.
@node ip4-lookup
VLIB_NODE_FN (ip4_lookup_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
vlib_frame_t * frame)
{
- return ip4_lookup_inline (vm, node, frame,
- /* lookup_for_responses_to_locally_received_packets */
- 0);
-
+ return ip4_lookup_inline (vm, node, frame);
}
static u8 *format_ip4_lookup_trace (u8 * s, va_list * args);
vlib_frame_t * frame)
{
vlib_combined_counter_main_t *cm = &load_balance_main.lbm_via_counters;
- u32 n_left_from, n_left_to_next, *from, *to_next;
- ip_lookup_next_t next;
+ u32 n_left, *from;
u32 thread_index = vm->thread_index;
+ vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b = bufs;
+ u16 nexts[VLIB_FRAME_SIZE], *next;
from = vlib_frame_vector_args (frame);
- n_left_from = frame->n_vectors;
- next = node->cached_next_index;
+ n_left = frame->n_vectors;
+ next = nexts;
- if (node->flags & VLIB_NODE_FLAG_TRACE)
- ip4_forward_next_trace (vm, node, frame, VLIB_TX);
+ vlib_get_buffers (vm, from, bufs, n_left);
- while (n_left_from > 0)
+ while (n_left >= 4)
{
- vlib_get_next_frame (vm, node, next, to_next, n_left_to_next);
-
-
- while (n_left_from >= 4 && n_left_to_next >= 2)
- {
- ip_lookup_next_t next0, next1;
- const load_balance_t *lb0, *lb1;
- vlib_buffer_t *p0, *p1;
- u32 pi0, lbi0, hc0, pi1, lbi1, hc1;
- const ip4_header_t *ip0, *ip1;
- const dpo_id_t *dpo0, *dpo1;
-
- /* Prefetch next iteration. */
- {
- vlib_buffer_t *p2, *p3;
-
- p2 = vlib_get_buffer (vm, from[2]);
- p3 = vlib_get_buffer (vm, from[3]);
-
- vlib_prefetch_buffer_header (p2, STORE);
- vlib_prefetch_buffer_header (p3, STORE);
+ const load_balance_t *lb0, *lb1;
+ const ip4_header_t *ip0, *ip1;
+ u32 lbi0, hc0, lbi1, hc1;
+ const dpo_id_t *dpo0, *dpo1;
- CLIB_PREFETCH (p2->data, sizeof (ip0[0]), STORE);
- CLIB_PREFETCH (p3->data, sizeof (ip0[0]), STORE);
- }
-
- pi0 = to_next[0] = from[0];
- pi1 = to_next[1] = from[1];
-
- from += 2;
- n_left_from -= 2;
- to_next += 2;
- n_left_to_next -= 2;
+ /* Prefetch next iteration. */
+ {
+ vlib_prefetch_buffer_header (b[2], LOAD);
+ vlib_prefetch_buffer_header (b[3], LOAD);
- p0 = vlib_get_buffer (vm, pi0);
- p1 = vlib_get_buffer (vm, pi1);
+ CLIB_PREFETCH (b[2]->data, sizeof (ip0[0]), LOAD);
+ CLIB_PREFETCH (b[3]->data, sizeof (ip0[0]), LOAD);
+ }
- ip0 = vlib_buffer_get_current (p0);
- ip1 = vlib_buffer_get_current (p1);
- lbi0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
- lbi1 = vnet_buffer (p1)->ip.adj_index[VLIB_TX];
+ ip0 = vlib_buffer_get_current (b[0]);
+ ip1 = vlib_buffer_get_current (b[1]);
+ lbi0 = vnet_buffer (b[0])->ip.adj_index[VLIB_TX];
+ lbi1 = vnet_buffer (b[1])->ip.adj_index[VLIB_TX];
- lb0 = load_balance_get (lbi0);
- lb1 = load_balance_get (lbi1);
+ lb0 = load_balance_get (lbi0);
+ lb1 = load_balance_get (lbi1);
- /*
- * this node is for via FIBs we can re-use the hash value from the
- * to node if present.
- * We don't want to use the same hash value at each level in the recursion
- * graph as that would lead to polarisation
- */
- hc0 = hc1 = 0;
+ /*
+ * this node is for via FIBs we can re-use the hash value from the
+ * to node if present.
+ * We don't want to use the same hash value at each level in the recursion
+ * graph as that would lead to polarisation
+ */
+ hc0 = hc1 = 0;
- if (PREDICT_FALSE (lb0->lb_n_buckets > 1))
+ if (PREDICT_FALSE (lb0->lb_n_buckets > 1))
+ {
+ if (PREDICT_TRUE (vnet_buffer (b[0])->ip.flow_hash))
{
- if (PREDICT_TRUE (vnet_buffer (p0)->ip.flow_hash))
- {
- hc0 = vnet_buffer (p0)->ip.flow_hash =
- vnet_buffer (p0)->ip.flow_hash >> 1;
- }
- else
- {
- hc0 = vnet_buffer (p0)->ip.flow_hash =
- ip4_compute_flow_hash (ip0, lb0->lb_hash_config);
- }
- dpo0 = load_balance_get_fwd_bucket
- (lb0, (hc0 & (lb0->lb_n_buckets_minus_1)));
+ hc0 = vnet_buffer (b[0])->ip.flow_hash =
+ vnet_buffer (b[0])->ip.flow_hash >> 1;
}
else
{
- dpo0 = load_balance_get_bucket_i (lb0, 0);
+ hc0 = vnet_buffer (b[0])->ip.flow_hash =
+ ip4_compute_flow_hash (ip0, lb0->lb_hash_config);
}
- if (PREDICT_FALSE (lb1->lb_n_buckets > 1))
+ dpo0 = load_balance_get_fwd_bucket
+ (lb0, (hc0 & (lb0->lb_n_buckets_minus_1)));
+ }
+ else
+ {
+ dpo0 = load_balance_get_bucket_i (lb0, 0);
+ }
+ if (PREDICT_FALSE (lb1->lb_n_buckets > 1))
+ {
+ if (PREDICT_TRUE (vnet_buffer (b[1])->ip.flow_hash))
{
- if (PREDICT_TRUE (vnet_buffer (p1)->ip.flow_hash))
- {
- hc1 = vnet_buffer (p1)->ip.flow_hash =
- vnet_buffer (p1)->ip.flow_hash >> 1;
- }
- else
- {
- hc1 = vnet_buffer (p1)->ip.flow_hash =
- ip4_compute_flow_hash (ip1, lb1->lb_hash_config);
- }
- dpo1 = load_balance_get_fwd_bucket
- (lb1, (hc1 & (lb1->lb_n_buckets_minus_1)));
+ hc1 = vnet_buffer (b[1])->ip.flow_hash =
+ vnet_buffer (b[1])->ip.flow_hash >> 1;
}
else
{
- dpo1 = load_balance_get_bucket_i (lb1, 0);
+ hc1 = vnet_buffer (b[1])->ip.flow_hash =
+ ip4_compute_flow_hash (ip1, lb1->lb_hash_config);
}
+ dpo1 = load_balance_get_fwd_bucket
+ (lb1, (hc1 & (lb1->lb_n_buckets_minus_1)));
+ }
+ else
+ {
+ dpo1 = load_balance_get_bucket_i (lb1, 0);
+ }
- next0 = dpo0->dpoi_next_node;
- next1 = dpo1->dpoi_next_node;
-
- vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
- vnet_buffer (p1)->ip.adj_index[VLIB_TX] = dpo1->dpoi_index;
-
- vlib_increment_combined_counter
- (cm, thread_index, lbi0, 1, vlib_buffer_length_in_chain (vm, p0));
- vlib_increment_combined_counter
- (cm, thread_index, lbi1, 1, vlib_buffer_length_in_chain (vm, p1));
+ next[0] = dpo0->dpoi_next_node;
+ next[1] = dpo1->dpoi_next_node;
- vlib_validate_buffer_enqueue_x2 (vm, node, next,
- to_next, n_left_to_next,
- pi0, pi1, next0, next1);
- }
+ vnet_buffer (b[0])->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
+ vnet_buffer (b[1])->ip.adj_index[VLIB_TX] = dpo1->dpoi_index;
- while (n_left_from > 0 && n_left_to_next > 0)
- {
- ip_lookup_next_t next0;
- const load_balance_t *lb0;
- vlib_buffer_t *p0;
- u32 pi0, lbi0, hc0;
- const ip4_header_t *ip0;
- const dpo_id_t *dpo0;
+ vlib_increment_combined_counter
+ (cm, thread_index, lbi0, 1, vlib_buffer_length_in_chain (vm, b[0]));
+ vlib_increment_combined_counter
+ (cm, thread_index, lbi1, 1, vlib_buffer_length_in_chain (vm, b[1]));
- pi0 = from[0];
- to_next[0] = pi0;
- from += 1;
- to_next += 1;
- n_left_to_next -= 1;
- n_left_from -= 1;
+ b += 2;
+ next += 2;
+ n_left -= 2;
+ }
- p0 = vlib_get_buffer (vm, pi0);
+ while (n_left > 0)
+ {
+ const load_balance_t *lb0;
+ const ip4_header_t *ip0;
+ const dpo_id_t *dpo0;
+ u32 lbi0, hc0;
- ip0 = vlib_buffer_get_current (p0);
- lbi0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
+ ip0 = vlib_buffer_get_current (b[0]);
+ lbi0 = vnet_buffer (b[0])->ip.adj_index[VLIB_TX];
- lb0 = load_balance_get (lbi0);
+ lb0 = load_balance_get (lbi0);
- hc0 = 0;
- if (PREDICT_FALSE (lb0->lb_n_buckets > 1))
+ hc0 = 0;
+ if (PREDICT_FALSE (lb0->lb_n_buckets > 1))
+ {
+ if (PREDICT_TRUE (vnet_buffer (b[0])->ip.flow_hash))
{
- if (PREDICT_TRUE (vnet_buffer (p0)->ip.flow_hash))
- {
- hc0 = vnet_buffer (p0)->ip.flow_hash =
- vnet_buffer (p0)->ip.flow_hash >> 1;
- }
- else
- {
- hc0 = vnet_buffer (p0)->ip.flow_hash =
- ip4_compute_flow_hash (ip0, lb0->lb_hash_config);
- }
- dpo0 = load_balance_get_fwd_bucket
- (lb0, (hc0 & (lb0->lb_n_buckets_minus_1)));
+ hc0 = vnet_buffer (b[0])->ip.flow_hash =
+ vnet_buffer (b[0])->ip.flow_hash >> 1;
}
else
{
- dpo0 = load_balance_get_bucket_i (lb0, 0);
+ hc0 = vnet_buffer (b[0])->ip.flow_hash =
+ ip4_compute_flow_hash (ip0, lb0->lb_hash_config);
}
+ dpo0 = load_balance_get_fwd_bucket
+ (lb0, (hc0 & (lb0->lb_n_buckets_minus_1)));
+ }
+ else
+ {
+ dpo0 = load_balance_get_bucket_i (lb0, 0);
+ }
- next0 = dpo0->dpoi_next_node;
- vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
-
- vlib_increment_combined_counter
- (cm, thread_index, lbi0, 1, vlib_buffer_length_in_chain (vm, p0));
+ next[0] = dpo0->dpoi_next_node;
+ vnet_buffer (b[0])->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
- vlib_validate_buffer_enqueue_x1 (vm, node, next,
- to_next, n_left_to_next,
- pi0, next0);
- }
+ vlib_increment_combined_counter
+ (cm, thread_index, lbi0, 1, vlib_buffer_length_in_chain (vm, b[0]));
- vlib_put_next_frame (vm, node, next, n_left_to_next);
+ b += 1;
+ next += 1;
+ n_left -= 1;
}
+ vlib_buffer_enqueue_to_next (vm, node, from, nexts, frame->n_vectors);
+ if (node->flags & VLIB_NODE_FLAG_TRACE)
+ ip4_forward_next_trace (vm, node, frame, VLIB_TX);
+
return frame->n_vectors;
}
*result_ia = result ? ia : 0;
return result;
}
+#endif
static void
ip4_add_subnet_bcast_route (u32 fib_index,
}
static void
-ip4_add_interface_routes (u32 sw_if_index,
- ip4_main_t * im, u32 fib_index,
- ip_interface_address_t * a)
+ip4_add_interface_prefix_routes (ip4_main_t *im,
+ u32 sw_if_index,
+ u32 fib_index,
+ ip_interface_address_t * a)
{
ip_lookup_main_t *lm = &im->lookup_main;
+ ip_interface_prefix_t *if_prefix;
ip4_address_t *address = ip_interface_address_get_address (lm, a);
- fib_prefix_t pfx = {
- .fp_len = a->address_length,
+
+ ip_interface_prefix_key_t key = {
+ .prefix = {
+ .fp_len = a->address_length,
+ .fp_proto = FIB_PROTOCOL_IP4,
+ .fp_addr.ip4.as_u32 = address->as_u32 & im->fib_masks[a->address_length],
+ },
+ .sw_if_index = sw_if_index,
+ };
+
+ fib_prefix_t pfx_special = {
.fp_proto = FIB_PROTOCOL_IP4,
- .fp_addr.ip4 = *address,
};
- if (pfx.fp_len <= 30)
+ /* If prefix already set on interface, just increment ref count & return */
+ if_prefix = ip_get_interface_prefix (lm, &key);
+ if (if_prefix)
{
- /* a /30 or shorter - add a glean for the network address */
- fib_table_entry_update_one_path (fib_index, &pfx,
- FIB_SOURCE_INTERFACE,
- (FIB_ENTRY_FLAG_CONNECTED |
- FIB_ENTRY_FLAG_ATTACHED),
- DPO_PROTO_IP4,
- /* No next-hop address */
- NULL,
- sw_if_index,
- // invalid FIB index
+ if_prefix->ref_count += 1;
+ return;
+ }
+
+ /* New prefix - allocate a pool entry, initialize it, add to the hash */
+ pool_get (lm->if_prefix_pool, if_prefix);
+ if_prefix->ref_count = 1;
+ if_prefix->src_ia_index = a - lm->if_address_pool;
+ clib_memcpy (&if_prefix->key, &key, sizeof (key));
+ mhash_set (&lm->prefix_to_if_prefix_index, &key,
+ if_prefix - lm->if_prefix_pool, 0 /* old value */);
+
+ /* length <= 30 - add glean, drop first address, maybe drop bcast address */
+ if (a->address_length <= 30)
+ {
+ pfx_special.fp_len = a->address_length;
+ pfx_special.fp_addr.ip4.as_u32 = address->as_u32;
+
+ /* set the glean route for the prefix */
+ fib_table_entry_update_one_path (fib_index, &pfx_special,
+ FIB_SOURCE_INTERFACE,
+ (FIB_ENTRY_FLAG_CONNECTED |
+ FIB_ENTRY_FLAG_ATTACHED),
+ DPO_PROTO_IP4,
+ /* No next-hop address */
+ NULL,
+ sw_if_index,
+ /* invalid FIB index */
~0,
1,
- // no out-label stack
+ /* no out-label stack */
NULL,
FIB_ROUTE_PATH_FLAG_NONE);
- /* Add the two broadcast addresses as drop */
- fib_prefix_t net_pfx = {
- .fp_len = 32,
- .fp_proto = FIB_PROTOCOL_IP4,
- .fp_addr.ip4.as_u32 = address->as_u32 & im->fib_masks[pfx.fp_len],
- };
- if (net_pfx.fp_addr.ip4.as_u32 != pfx.fp_addr.ip4.as_u32)
- fib_table_entry_special_add(fib_index,
- &net_pfx,
- FIB_SOURCE_INTERFACE,
- (FIB_ENTRY_FLAG_DROP |
- FIB_ENTRY_FLAG_LOOSE_URPF_EXEMPT));
- net_pfx.fp_addr.ip4.as_u32 |= ~im->fib_masks[pfx.fp_len];
- if (net_pfx.fp_addr.ip4.as_u32 != pfx.fp_addr.ip4.as_u32)
- ip4_add_subnet_bcast_route(fib_index, &net_pfx, sw_if_index);
- }
- else if (pfx.fp_len == 31)
- {
- u32 mask = clib_host_to_net_u32(1);
- fib_prefix_t net_pfx = pfx;
+ /* set a drop route for the base address of the prefix */
+ pfx_special.fp_len = 32;
+ pfx_special.fp_addr.ip4.as_u32 =
+ address->as_u32 & im->fib_masks[a->address_length];
- net_pfx.fp_len = 32;
- net_pfx.fp_addr.ip4.as_u32 ^= mask;
+ if (pfx_special.fp_addr.ip4.as_u32 != address->as_u32)
+ fib_table_entry_special_add (fib_index, &pfx_special,
+ FIB_SOURCE_INTERFACE,
+ (FIB_ENTRY_FLAG_DROP |
+ FIB_ENTRY_FLAG_LOOSE_URPF_EXEMPT));
- /* a /31 - add the other end as an attached host */
- fib_table_entry_update_one_path (fib_index, &net_pfx,
- FIB_SOURCE_INTERFACE,
- (FIB_ENTRY_FLAG_ATTACHED),
- DPO_PROTO_IP4,
- &net_pfx.fp_addr,
- sw_if_index,
- // invalid FIB index
+ /* set a route for the broadcast address of the prefix */
+ pfx_special.fp_len = 32;
+ pfx_special.fp_addr.ip4.as_u32 =
+ address->as_u32 | ~im->fib_masks[a->address_length];
+ if (pfx_special.fp_addr.ip4.as_u32 != address->as_u32)
+ ip4_add_subnet_bcast_route (fib_index, &pfx_special, sw_if_index);
+
+
+ }
+ /* length == 31 - add an attached route for the other address */
+ else if (a->address_length == 31)
+ {
+ pfx_special.fp_len = 32;
+ pfx_special.fp_addr.ip4.as_u32 =
+ address->as_u32 ^ clib_host_to_net_u32(1);
+
+ fib_table_entry_update_one_path (fib_index, &pfx_special,
+ FIB_SOURCE_INTERFACE,
+ (FIB_ENTRY_FLAG_ATTACHED),
+ DPO_PROTO_IP4,
+ &pfx_special.fp_addr,
+ sw_if_index,
+ /* invalid FIB index */
~0,
1,
NULL,
FIB_ROUTE_PATH_FLAG_NONE);
}
- pfx.fp_len = 32;
+}
+
+static void
+ip4_add_interface_routes (u32 sw_if_index,
+ ip4_main_t * im, u32 fib_index,
+ ip_interface_address_t * a)
+{
+ ip_lookup_main_t *lm = &im->lookup_main;
+ ip4_address_t *address = ip_interface_address_get_address (lm, a);
+ fib_prefix_t pfx = {
+ .fp_len = 32,
+ .fp_proto = FIB_PROTOCOL_IP4,
+ .fp_addr.ip4 = *address,
+ };
+
+ /* set special routes for the prefix if needed */
+ ip4_add_interface_prefix_routes (im, sw_if_index, fib_index, a);
if (sw_if_index < vec_len (lm->classify_table_index_by_sw_if_index))
{
}
static void
-ip4_del_interface_routes (ip4_main_t * im,
- u32 fib_index,
- ip4_address_t * address, u32 address_length)
+ip4_del_interface_prefix_routes (ip4_main_t * im,
+ u32 sw_if_index,
+ u32 fib_index,
+ ip4_address_t * address,
+ u32 address_length)
{
- fib_prefix_t pfx = {
- .fp_len = address_length,
+ ip_lookup_main_t *lm = &im->lookup_main;
+ ip_interface_prefix_t *if_prefix;
+
+ ip_interface_prefix_key_t key = {
+ .prefix = {
+ .fp_len = address_length,
+ .fp_proto = FIB_PROTOCOL_IP4,
+ .fp_addr.ip4.as_u32 = address->as_u32 & im->fib_masks[address_length],
+ },
+ .sw_if_index = sw_if_index,
+ };
+
+ fib_prefix_t pfx_special = {
+ .fp_len = 32,
.fp_proto = FIB_PROTOCOL_IP4,
- .fp_addr.ip4 = *address,
};
- if (pfx.fp_len <= 30)
+ if_prefix = ip_get_interface_prefix (lm, &key);
+ if (!if_prefix)
{
- fib_prefix_t net_pfx = {
- .fp_len = 32,
- .fp_proto = FIB_PROTOCOL_IP4,
- .fp_addr.ip4.as_u32 = address->as_u32 & im->fib_masks[pfx.fp_len],
- };
- if (net_pfx.fp_addr.ip4.as_u32 != pfx.fp_addr.ip4.as_u32)
- fib_table_entry_special_remove(fib_index,
- &net_pfx,
- FIB_SOURCE_INTERFACE);
- net_pfx.fp_addr.ip4.as_u32 |= ~im->fib_masks[pfx.fp_len];
- if (net_pfx.fp_addr.ip4.as_u32 != pfx.fp_addr.ip4.as_u32)
- fib_table_entry_special_remove(fib_index,
- &net_pfx,
- FIB_SOURCE_INTERFACE);
- fib_table_entry_delete (fib_index, &pfx, FIB_SOURCE_INTERFACE);
+ clib_warning ("Prefix not found while deleting %U",
+ format_ip4_address_and_length, address, address_length);
+ return;
}
- else if (pfx.fp_len == 31)
+
+ if_prefix->ref_count -= 1;
+
+ /*
+ * Routes need to be adjusted if:
+ * - deleting last intf addr in prefix
+ * - deleting intf addr used as default source address in glean adjacency
+ *
+ * We're done now otherwise
+ */
+ if ((if_prefix->ref_count > 0) &&
+ !pool_is_free_index (lm->if_address_pool, if_prefix->src_ia_index))
+ return;
+
+ /* length <= 30, delete glean route, first address, last address */
+ if (address_length <= 30)
{
- u32 mask = clib_host_to_net_u32(1);
- fib_prefix_t net_pfx = pfx;
- net_pfx.fp_len = 32;
- net_pfx.fp_addr.ip4.as_u32 ^= mask;
+ /* remove glean route for prefix */
+ pfx_special.fp_addr.ip4 = *address;
+ pfx_special.fp_len = address_length;
+ fib_table_entry_delete (fib_index, &pfx_special, FIB_SOURCE_INTERFACE);
+
+ /* if no more intf addresses in prefix, remove other special routes */
+ if (!if_prefix->ref_count)
+ {
+ /* first address in prefix */
+ pfx_special.fp_addr.ip4.as_u32 =
+ address->as_u32 & im->fib_masks[address_length];
+ pfx_special.fp_len = 32;
+
+ if (pfx_special.fp_addr.ip4.as_u32 != address->as_u32)
+ fib_table_entry_special_remove (fib_index,
+ &pfx_special,
+ FIB_SOURCE_INTERFACE);
+
+ /* prefix broadcast address */
+ pfx_special.fp_addr.ip4.as_u32 =
+ address->as_u32 | ~im->fib_masks[address_length];
+ pfx_special.fp_len = 32;
- fib_table_entry_delete (fib_index, &net_pfx, FIB_SOURCE_INTERFACE);
+ if (pfx_special.fp_addr.ip4.as_u32 != address->as_u32)
+ fib_table_entry_special_remove (fib_index,
+ &pfx_special,
+ FIB_SOURCE_INTERFACE);
+ }
+ else
+ /* default source addr just got deleted, find another */
+ {
+ ip_interface_address_t *new_src_ia = NULL;
+ ip4_address_t *new_src_addr = NULL;
+
+ new_src_addr =
+ ip4_interface_address_matching_destination
+ (im, address, sw_if_index, &new_src_ia);
+
+ if_prefix->src_ia_index = new_src_ia - lm->if_address_pool;
+
+ pfx_special.fp_len = address_length;
+ pfx_special.fp_addr.ip4 = *new_src_addr;
+
+ /* set new glean route for the prefix */
+ fib_table_entry_update_one_path (fib_index, &pfx_special,
+ FIB_SOURCE_INTERFACE,
+ (FIB_ENTRY_FLAG_CONNECTED |
+ FIB_ENTRY_FLAG_ATTACHED),
+ DPO_PROTO_IP4,
+ /* No next-hop address */
+ NULL,
+ sw_if_index,
+ /* invalid FIB index */
+ ~0,
+ 1,
+ /* no out-label stack */
+ NULL,
+ FIB_ROUTE_PATH_FLAG_NONE);
+ return;
+ }
+ }
+ /* length == 31, delete attached route for the other address */
+ else if (address_length == 31)
+ {
+ pfx_special.fp_addr.ip4.as_u32 =
+ address->as_u32 ^ clib_host_to_net_u32(1);
+
+ fib_table_entry_delete (fib_index, &pfx_special, FIB_SOURCE_INTERFACE);
}
+ mhash_unset (&lm->prefix_to_if_prefix_index, &key, 0 /* old_value */);
+ pool_put (lm->if_prefix_pool, if_prefix);
+}
+
+static void
+ip4_del_interface_routes (u32 sw_if_index,
+ ip4_main_t * im,
+ u32 fib_index,
+ ip4_address_t * address, u32 address_length)
+{
+ fib_prefix_t pfx = {
+ .fp_len = address_length,
+ .fp_proto = FIB_PROTOCOL_IP4,
+ .fp_addr.ip4 = *address,
+ };
+
+ ip4_del_interface_prefix_routes (im, sw_if_index, fib_index,
+ address, address_length);
+
pfx.fp_len = 32;
fib_table_entry_delete (fib_index, &pfx, FIB_SOURCE_INTERFACE);
}
+#ifndef CLIB_MARCH_VARIANT
void
ip4_sw_interface_enable_disable (u32 sw_if_index, u32 is_enable)
{
vnet_feature_enable_disable ("ip4-multicast", "ip4-not-enabled",
sw_if_index, !is_enable, 0, 0);
+
+ {
+ ip4_enable_disable_interface_callback_t *cb;
+ vec_foreach (cb, im->enable_disable_interface_callbacks)
+ cb->function (im, cb->function_opaque, sw_if_index, is_enable);
+ }
}
static clib_error_t *
address,
address_length))
{
+ /* an intf may have >1 addr from the same prefix */
+ if ((sw_if_index == sif->sw_if_index) &&
+ (ia->address_length == address_length) &&
+ (x->as_u32 != address->as_u32))
+ continue;
+
+ /* error if the length or intf was different */
vnm->api_errno = VNET_API_ERROR_DUPLICATE_IF_ADDRESS;
return
clib_error_create
- ("failed to add %U which conflicts with %U for interface %U",
+ ("failed to add %U on %U which conflicts with %U for interface %U",
format_ip4_address_and_length, address,
address_length,
+ format_vnet_sw_if_index_name, vnm,
+ sw_if_index,
format_ip4_address_and_length, x,
ia->address_length,
format_vnet_sw_if_index_name, vnm,
ip4_sw_interface_enable_disable (sw_if_index, !is_del);
- if (is_del)
- ip4_del_interface_routes (im, ip4_af.fib_index, address, address_length);
- else
- ip4_add_interface_routes (sw_if_index,
- im, ip4_af.fib_index,
- pool_elt_at_index
- (lm->if_address_pool, if_address_index));
+ /* intf addr routes are added/deleted on admin up/down */
+ if (vnet_sw_interface_is_admin_up (vnm, sw_if_index))
+ {
+ if (is_del)
+ ip4_del_interface_routes (sw_if_index,
+ im, ip4_af.fib_index, address,
+ address_length);
+ else
+ ip4_add_interface_routes (sw_if_index,
+ im, ip4_af.fib_index,
+ pool_elt_at_index
+ (lm->if_address_pool, if_address_index));
+ }
/* If pool did not grow/shrink: add duplicate address. */
if (elts_before != pool_elts (lm->if_address_pool))
}
#endif
+static clib_error_t *
+ip4_sw_interface_admin_up_down (vnet_main_t * vnm, u32 sw_if_index, u32 flags)
+{
+ ip4_main_t *im = &ip4_main;
+ ip_interface_address_t *ia;
+ ip4_address_t *a;
+ u32 is_admin_up, fib_index;
+
+ /* Fill in lookup tables with default table (0). */
+ vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
+
+ vec_validate_init_empty (im->
+ lookup_main.if_address_pool_index_by_sw_if_index,
+ sw_if_index, ~0);
+
+ is_admin_up = (flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP) != 0;
+
+ fib_index = vec_elt (im->fib_index_by_sw_if_index, sw_if_index);
+
+ /* *INDENT-OFF* */
+ foreach_ip_interface_address (&im->lookup_main, ia, sw_if_index,
+ 0 /* honor unnumbered */,
+ ({
+ a = ip_interface_address_get_address (&im->lookup_main, ia);
+ if (is_admin_up)
+ ip4_add_interface_routes (sw_if_index,
+ im, fib_index,
+ ia);
+ else
+ ip4_del_interface_routes (sw_if_index,
+ im, fib_index,
+ a, ia->address_length);
+ }));
+ /* *INDENT-ON* */
+
+ return 0;
+}
+
+VNET_SW_INTERFACE_ADMIN_UP_DOWN_FUNCTION (ip4_sw_interface_admin_up_down);
+
/* Built-in ip4 unicast rx feature path definition */
/* *INDENT-OFF* */
VNET_FEATURE_ARC_INIT (ip4_unicast, static) =
{
.arc_name = "ip4-unicast",
.start_nodes = VNET_FEATURES ("ip4-input", "ip4-input-no-checksum"),
+ .last_in_arc = "ip4-lookup",
.arc_index_ptr = &ip4_main.lookup_main.ucast_feature_arc_index,
};
{
.arc_name = "ip4-unicast",
.node_name = "ip4-policer-classify",
- .runs_before = VNET_FEATURES ("ipsec-input-ip4"),
+ .runs_before = VNET_FEATURES ("ipsec4-input-feature"),
};
VNET_FEATURE_INIT (ip4_ipsec, static) =
{
.arc_name = "ip4-unicast",
- .node_name = "ipsec-input-ip4",
+ .node_name = "ipsec4-input-feature",
.runs_before = VNET_FEATURES ("vpath-input-ip4"),
};
{
.arc_name = "ip4-multicast",
.start_nodes = VNET_FEATURES ("ip4-input", "ip4-input-no-checksum"),
+ .last_in_arc = "ip4-mfib-forward-lookup",
.arc_index_ptr = &ip4_main.lookup_main.mcast_feature_arc_index,
};
{
.arc_name = "ip4-output",
.start_nodes = VNET_FEATURES ("ip4-rewrite", "ip4-midchain", "ip4-dvr-dpo"),
+ .last_in_arc = "interface-output",
.arc_index_ptr = &ip4_main.lookup_main.output_feature_arc_index,
};
{
.arc_name = "ip4-output",
.node_name = "ip4-outacl",
- .runs_before = VNET_FEATURES ("ipsec-output-ip4"),
+ .runs_before = VNET_FEATURES ("ipsec4-output-feature"),
};
VNET_FEATURE_INIT (ip4_ipsec_output, static) =
{
.arc_name = "ip4-output",
- .node_name = "ipsec-output-ip4",
+ .node_name = "ipsec4-output-feature",
.runs_before = VNET_FEATURES ("interface-output"),
};
VNET_SW_INTERFACE_ADD_DEL_FUNCTION (ip4_sw_interface_add_del);
/* Global IP4 main. */
+#ifndef CLIB_MARCH_VARIANT
ip4_main_t ip4_main;
+#endif /* CLIB_MARCH_VARIANT */
static clib_error_t *
ip4_lookup_init (vlib_main_t * vm)
{
ethernet_arp_header_t h;
- memset (&h, 0, sizeof (h));
-
- /* Set target ethernet address to all zeros. */
- memset (h.ip4_over_ethernet[1].ethernet, 0,
- sizeof (h.ip4_over_ethernet[1].ethernet));
+ clib_memset (&h, 0, sizeof (h));
#define _16(f,v) h.f = clib_host_to_net_u16 (v);
#define _8(f,v) h.f = v;
vec_elt (im->fib_index_by_sw_if_index,
vnet_buffer (b0)->sw_if_index[VLIB_RX]);
- clib_memcpy (t0->packet_data,
- vlib_buffer_get_current (b0),
- sizeof (t0->packet_data));
+ clib_memcpy_fast (t0->packet_data,
+ vlib_buffer_get_current (b0),
+ sizeof (t0->packet_data));
}
if (b1->flags & VLIB_BUFFER_IS_TRACED)
{
(u32) ~ 0) ? vnet_buffer (b1)->sw_if_index[VLIB_TX] :
vec_elt (im->fib_index_by_sw_if_index,
vnet_buffer (b1)->sw_if_index[VLIB_RX]);
- clib_memcpy (t1->packet_data, vlib_buffer_get_current (b1),
- sizeof (t1->packet_data));
+ clib_memcpy_fast (t1->packet_data, vlib_buffer_get_current (b1),
+ sizeof (t1->packet_data));
}
from += 2;
n_left -= 2;
(u32) ~ 0) ? vnet_buffer (b0)->sw_if_index[VLIB_TX] :
vec_elt (im->fib_index_by_sw_if_index,
vnet_buffer (b0)->sw_if_index[VLIB_RX]);
- clib_memcpy (t0->packet_data, vlib_buffer_get_current (b0),
- sizeof (t0->packet_data));
+ clib_memcpy_fast (t0->packet_data, vlib_buffer_get_current (b0),
+ sizeof (t0->packet_data));
}
from += 1;
n_left -= 1;
u32 ip_header_length, payload_length_host_byte_order;
u32 n_this_buffer, n_bytes_left, n_ip_bytes_this_buffer;
u16 sum16;
- void *data_this_buffer;
+ u8 *data_this_buffer;
+ u8 length_odd;
/* Initialize checksum with ip header. */
ip_header_length = ip4_header_bytes (ip0);
ip_csum_with_carry (sum0, clib_mem_unaligned (&ip0->src_address, u64));
n_bytes_left = n_this_buffer = payload_length_host_byte_order;
- data_this_buffer = (void *) ip0 + ip_header_length;
+ data_this_buffer = (u8 *) ip0 + ip_header_length;
n_ip_bytes_this_buffer =
p0->current_length - (((u8 *) ip0 - p0->data) - p0->current_data);
if (n_this_buffer + ip_header_length > n_ip_bytes_this_buffer)
n_this_buffer = n_ip_bytes_this_buffer > ip_header_length ?
n_ip_bytes_this_buffer - ip_header_length : 0;
}
+
while (1)
{
sum0 = ip_incremental_checksum (sum0, data_this_buffer, n_this_buffer);
break;
ASSERT (p0->flags & VLIB_BUFFER_NEXT_PRESENT);
+ if (!(p0->flags & VLIB_BUFFER_NEXT_PRESENT))
+ return 0xfefe;
+
+ length_odd = (n_this_buffer & 1);
+
p0 = vlib_get_buffer (vm, p0->next_buffer);
data_this_buffer = vlib_buffer_get_current (p0);
- n_this_buffer = p0->current_length;
+ n_this_buffer = clib_min (p0->current_length, n_bytes_left);
+
+ if (PREDICT_FALSE (length_odd))
+ {
+ /* Prepend a 0 or the resulting checksum will be incorrect. */
+ data_this_buffer--;
+ n_this_buffer++;
+ n_bytes_left++;
+ data_this_buffer[0] = 0;
+ }
}
sum16 = ~ip_csum_fold (sum0);
-
return sum16;
}
{
.arc_name = "ip4-local",
.start_nodes = VNET_FEATURES ("ip4-local"),
+ .last_in_arc = "ip4-local-end-of-arc",
};
/* *INDENT-ON* */
ip4_address_t src;
u32 lbi;
u8 error;
+ u8 first;
} ip4_local_last_check_t;
static inline void
vnet_buffer (b)->sw_if_index[VLIB_TX] != ~0 ?
vnet_buffer (b)->sw_if_index[VLIB_TX] : vnet_buffer (b)->ip.fib_index;
- if (PREDICT_FALSE (last_check->src.as_u32 != ip0->src_address.as_u32))
+ /*
+ * vnet_buffer()->ip.adj_index[VLIB_RX] will be set to the index of the
+ * adjacency for the destination address (the local interface address).
+ * vnet_buffer()->ip.adj_index[VLIB_TX] will be set to the index of the
+ * adjacency for the source address (the remote sender's address)
+ */
+ if (PREDICT_FALSE (last_check->first ||
+ (last_check->src.as_u32 != ip0->src_address.as_u32)))
{
mtrie0 = &ip4_fib_get (vnet_buffer (b)->ip.fib_index)->mtrie;
leaf0 = ip4_fib_mtrie_lookup_step_one (mtrie0, &ip0->src_address);
leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 3);
lbi0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
+ vnet_buffer (b)->ip.adj_index[VLIB_RX] =
+ vnet_buffer (b)->ip.adj_index[VLIB_TX];
vnet_buffer (b)->ip.adj_index[VLIB_TX] = lbi0;
- vnet_buffer (b)->ip.adj_index[VLIB_RX] = lbi0;
lb0 = load_balance_get (lbi0);
dpo0 = load_balance_get_bucket_i (lb0, 0);
}
else
{
+ vnet_buffer (b)->ip.adj_index[VLIB_RX] =
+ vnet_buffer (b)->ip.adj_index[VLIB_TX];
vnet_buffer (b)->ip.adj_index[VLIB_TX] = last_check->lbi;
- vnet_buffer (b)->ip.adj_index[VLIB_RX] = last_check->lbi;
*error0 = last_check->error;
+ last_check->first = 0;
}
}
ip4_fib_mtrie_t *mtrie[2];
const dpo_id_t *dpo[2];
load_balance_t *lb[2];
- u32 not_last_hit = 0;
+ u32 not_last_hit;
u32 lbi[2];
+ not_last_hit = last_check->first;
not_last_hit |= ip[0]->src_address.as_u32 ^ last_check->src.as_u32;
not_last_hit |= ip[1]->src_address.as_u32 ^ last_check->src.as_u32;
vnet_buffer (b[1])->sw_if_index[VLIB_TX] :
vnet_buffer (b[1])->ip.fib_index;
+ /*
+ * vnet_buffer()->ip.adj_index[VLIB_RX] will be set to the index of the
+ * adjacency for the destination address (the local interface address).
+ * vnet_buffer()->ip.adj_index[VLIB_TX] will be set to the index of the
+ * adjacency for the source address (the remote sender's address)
+ */
if (PREDICT_FALSE (not_last_hit))
{
mtrie[0] = &ip4_fib_get (vnet_buffer (b[0])->ip.fib_index)->mtrie;
lbi[0] = ip4_fib_mtrie_leaf_get_adj_index (leaf[0]);
lbi[1] = ip4_fib_mtrie_leaf_get_adj_index (leaf[1]);
+ vnet_buffer (b[0])->ip.adj_index[VLIB_RX] =
+ vnet_buffer (b[0])->ip.adj_index[VLIB_TX];
vnet_buffer (b[0])->ip.adj_index[VLIB_TX] = lbi[0];
- vnet_buffer (b[0])->ip.adj_index[VLIB_RX] = lbi[0];
+ vnet_buffer (b[1])->ip.adj_index[VLIB_RX] =
+ vnet_buffer (b[1])->ip.adj_index[VLIB_TX];
vnet_buffer (b[1])->ip.adj_index[VLIB_TX] = lbi[1];
- vnet_buffer (b[1])->ip.adj_index[VLIB_RX] = lbi[1];
lb[0] = load_balance_get (lbi[0]);
lb[1] = load_balance_get (lbi[1]);
}
else
{
+ vnet_buffer (b[0])->ip.adj_index[VLIB_RX] =
+ vnet_buffer (b[0])->ip.adj_index[VLIB_TX];
vnet_buffer (b[0])->ip.adj_index[VLIB_TX] = last_check->lbi;
- vnet_buffer (b[0])->ip.adj_index[VLIB_RX] = last_check->lbi;
+ vnet_buffer (b[1])->ip.adj_index[VLIB_RX] =
+ vnet_buffer (b[1])->ip.adj_index[VLIB_TX];
vnet_buffer (b[1])->ip.adj_index[VLIB_TX] = last_check->lbi;
- vnet_buffer (b[1])->ip.adj_index[VLIB_RX] = last_check->lbi;
error[0] = last_check->error;
error[1] = last_check->error;
+ last_check->first = 0;
}
}
+enum ip_local_packet_type_e
+{
+ IP_LOCAL_PACKET_TYPE_L4,
+ IP_LOCAL_PACKET_TYPE_NAT,
+ IP_LOCAL_PACKET_TYPE_FRAG,
+};
+
+/**
+ * Determine packet type and next node.
+ *
+ * The expectation is that all packets that are not L4 will skip
+ * checksums and source checks.
+ */
+always_inline u8
+ip4_local_classify (vlib_buffer_t * b, ip4_header_t * ip, u16 * next)
+{
+ ip_lookup_main_t *lm = &ip4_main.lookup_main;
+
+ if (PREDICT_FALSE (ip4_is_fragment (ip)))
+ {
+ *next = IP_LOCAL_NEXT_REASSEMBLY;
+ return IP_LOCAL_PACKET_TYPE_FRAG;
+ }
+ if (PREDICT_FALSE (b->flags & VNET_BUFFER_F_IS_NATED))
+ {
+ *next = lm->local_next_by_ip_protocol[ip->protocol];
+ return IP_LOCAL_PACKET_TYPE_NAT;
+ }
+
+ *next = lm->local_next_by_ip_protocol[ip->protocol];
+ return IP_LOCAL_PACKET_TYPE_L4;
+}
+
static inline uword
ip4_local_inline (vlib_main_t * vm,
vlib_node_runtime_t * node,
vlib_frame_t * frame, int head_of_feature_arc)
{
- ip4_main_t *im = &ip4_main;
- ip_lookup_main_t *lm = &im->lookup_main;
u32 *from, n_left_from;
vlib_node_runtime_t *error_node =
vlib_node_get_runtime (vm, ip4_input_node.index);
u16 nexts[VLIB_FRAME_SIZE], *next;
vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b;
ip4_header_t *ip[2];
- u8 error[2];
+ u8 error[2], pt[2];
ip4_local_last_check_t last_check = {
+ /*
+ * 0.0.0.0 can appear as the source address of an IP packet,
+ * as can any other address, hence the need to use the 'first'
+ * member to make sure the .lbi is initialised for the first
+ * packet.
+ */
.src = {.as_u32 = 0},
.lbi = ~0,
- .error = IP4_ERROR_UNKNOWN_PROTOCOL
+ .error = IP4_ERROR_UNKNOWN_PROTOCOL,
+ .first = 1,
};
from = vlib_frame_vector_args (frame);
while (n_left_from >= 6)
{
- u32 is_nat, not_batch = 0;
+ u8 not_batch = 0;
/* Prefetch next iteration. */
{
vnet_buffer (b[0])->l3_hdr_offset = b[0]->current_data;
vnet_buffer (b[1])->l3_hdr_offset = b[1]->current_data;
- is_nat = b[0]->flags & VNET_BUFFER_F_IS_NATED;
- not_batch |= is_nat ^ (b[1]->flags & VNET_BUFFER_F_IS_NATED);
+ pt[0] = ip4_local_classify (b[0], ip[0], &next[0]);
+ pt[1] = ip4_local_classify (b[1], ip[1], &next[1]);
+
+ not_batch = pt[0] ^ pt[1];
- if (head_of_feature_arc == 0 || (is_nat && not_batch == 0))
+ if (head_of_feature_arc == 0 || (pt[0] && not_batch == 0))
goto skip_checks;
if (PREDICT_TRUE (not_batch == 0))
}
else
{
- if (!(b[0]->flags & VNET_BUFFER_F_IS_NATED))
+ if (!pt[0])
{
ip4_local_check_l4_csum (vm, b[0], ip[0], &error[0]);
ip4_local_check_src (b[0], ip[0], &last_check, &error[0]);
}
- if (!(b[1]->flags & VNET_BUFFER_F_IS_NATED))
+ if (!pt[1])
{
ip4_local_check_l4_csum (vm, b[1], ip[1], &error[1]);
ip4_local_check_src (b[1], ip[1], &last_check, &error[1]);
skip_checks:
- next[0] = lm->local_next_by_ip_protocol[ip[0]->protocol];
- next[1] = lm->local_next_by_ip_protocol[ip[1]->protocol];
ip4_local_set_next_and_error (error_node, b[0], &next[0], error[0],
head_of_feature_arc);
ip4_local_set_next_and_error (error_node, b[1], &next[1], error[1],
ip[0] = vlib_buffer_get_current (b[0]);
vnet_buffer (b[0])->l3_hdr_offset = b[0]->current_data;
+ pt[0] = ip4_local_classify (b[0], ip[0], &next[0]);
- if (head_of_feature_arc == 0 || (b[0]->flags & VNET_BUFFER_F_IS_NATED))
+ if (head_of_feature_arc == 0 || pt[0])
goto skip_check;
ip4_local_check_l4_csum (vm, b[0], ip[0], &error[0]);
skip_check:
- next[0] = lm->local_next_by_ip_protocol[ip[0]->protocol];
ip4_local_set_next_and_error (error_node, b[0], &next[0], error[0],
head_of_feature_arc);
[IP_LOCAL_NEXT_PUNT] = "ip4-punt",
[IP_LOCAL_NEXT_UDP_LOOKUP] = "ip4-udp-lookup",
[IP_LOCAL_NEXT_ICMP] = "ip4-icmp-input",
+ [IP_LOCAL_NEXT_REASSEMBLY] = "ip4-reassembly",
},
};
/* *INDENT-ON* */
lm->local_next_by_ip_protocol[protocol] =
vlib_node_add_next (vm, ip4_local_node.index, node_index);
}
+
+void
+ip4_unregister_protocol (u32 protocol)
+{
+ ip4_main_t *im = &ip4_main;
+ ip_lookup_main_t *lm = &im->lookup_main;
+
+ ASSERT (protocol < ARRAY_LEN (lm->local_next_by_ip_protocol));
+ lm->local_next_by_ip_protocol[protocol] = IP_LOCAL_NEXT_PUNT;
+}
#endif
static clib_error_t *
u32 node_index = vlib_get_node (vm,
ip4_local_node.index)->
next_nodes[lm->local_next_by_ip_protocol[i]];
- vlib_cli_output (vm, "%d: %U", i, format_vlib_node_name, vm,
- node_index);
+ vlib_cli_output (vm, "%U: %U", format_ip_protocol, i,
+ format_vlib_node_name, vm, node_index);
}
}
return 0;
u32 *from, *to_next_drop;
uword n_left_from, n_left_to_next_drop, next_index;
u32 thread_index = vm->thread_index;
- u32 seed;
- f64 time_now;
+ u64 seed;
if (node->flags & VLIB_NODE_FLAG_TRACE)
ip4_forward_next_trace (vm, node, frame, VLIB_TX);
- time_now = vlib_time_now (vm);
- if (time_now - im->arp_throttle_last_seed_change_time[thread_index] > 1e-3)
- {
- (void) random_u32 (&im->arp_throttle_seeds[thread_index]);
- memset (im->arp_throttle_bitmaps[thread_index], 0,
- ARP_THROTTLE_BITS / BITS (u8));
-
- im->arp_throttle_last_seed_change_time[thread_index] = time_now;
- }
- seed = im->arp_throttle_seeds[thread_index];
+ seed = throttle_seed (&im->arp_throttle, thread_index, vlib_time_now (vm));
from = vlib_frame_vector_args (frame);
n_left_from = frame->n_vectors;
while (n_left_from > 0 && n_left_to_next_drop > 0)
{
- u32 pi0, adj_index0, r0, w0, sw_if_index0, drop0;
- uword m0;
+ u32 pi0, bi0, adj_index0, sw_if_index0;
ip_adjacency_t *adj0;
- vlib_buffer_t *p0;
- ip4_header_t *ip0;
+ vlib_buffer_t *p0, *b0;
+ ip4_address_t resolve0;
+ ethernet_arp_header_t *h0;
+ vnet_hw_interface_t *hw_if0;
+ u64 r0;
pi0 = from[0];
-
p0 = vlib_get_buffer (vm, pi0);
+ from += 1;
+ n_left_from -= 1;
+ to_next_drop[0] = pi0;
+ to_next_drop += 1;
+ n_left_to_next_drop -= 1;
+
adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
adj0 = adj_get (adj_index0);
- ip0 = vlib_buffer_get_current (p0);
- sw_if_index0 = adj0->rewrite_header.sw_if_index;
- vnet_buffer (p0)->sw_if_index[VLIB_TX] = sw_if_index0;
-
- if (PREDICT_TRUE (is_glean))
+ if (is_glean)
{
- /*
- * this is the Glean case, so we are ARPing for the
- * packet's destination
- */
- r0 = ip0->dst_address.data_u32;
+ /* resolve the packet's destination */
+ ip4_header_t *ip0 = vlib_buffer_get_current (p0);
+ resolve0 = ip0->dst_address;
}
else
{
- r0 = adj0->sub_type.nbr.next_hop.ip4.data_u32;
+ /* resolve the incomplete adj */
+ resolve0 = adj0->sub_type.nbr.next_hop.ip4;
}
- r0 ^= seed;
- /* Select bit number */
- r0 &= ARP_THROTTLE_BITS - 1;
- w0 = r0 / BITS (uword);
- m0 = (uword) 1 << (r0 % BITS (uword));
-
- drop0 = (im->arp_throttle_bitmaps[thread_index][w0] & m0) != 0;
- im->arp_throttle_bitmaps[thread_index][w0] |= m0;
-
- from += 1;
- n_left_from -= 1;
- to_next_drop[0] = pi0;
- to_next_drop += 1;
- n_left_to_next_drop -= 1;
+ /* combine the address and interface for the hash key */
+ sw_if_index0 = adj0->rewrite_header.sw_if_index;
+ r0 = (u64) resolve0.data_u32 << 32;
+ r0 |= sw_if_index0;
- p0->error =
- node->errors[drop0 ? IP4_ARP_ERROR_DROP :
- IP4_ARP_ERROR_REQUEST_SENT];
+ if (throttle_check (&im->arp_throttle, thread_index, r0, seed))
+ {
+ p0->error = node->errors[IP4_ARP_ERROR_THROTTLED];
+ continue;
+ }
/*
* the adj has been updated to a rewrite but the node the DPO that got
* us here hasn't - yet. no big deal. we'll drop while we wait.
*/
if (IP_LOOKUP_NEXT_REWRITE == adj0->lookup_next_index)
- continue;
-
- if (drop0)
- continue;
+ {
+ p0->error = node->errors[IP4_ARP_ERROR_RESOLVED];
+ continue;
+ }
/*
* Can happen if the control-plane is programming tables
|| (!is_glean && adj0->lookup_next_index != IP_LOOKUP_NEXT_ARP))
{
p0->error = node->errors[IP4_ARP_ERROR_NON_ARP_ADJ];
+ continue;
}
- else
- /* Send ARP request. */
+ /* Send ARP request. */
+ h0 =
+ vlib_packet_template_get_packet (vm,
+ &im->ip4_arp_request_packet_template,
+ &bi0);
+ /* Seems we're out of buffers */
+ if (PREDICT_FALSE (!h0))
{
- u32 bi0 = 0;
- vlib_buffer_t *b0;
- ethernet_arp_header_t *h0;
- vnet_hw_interface_t *hw_if0;
-
- h0 =
- vlib_packet_template_get_packet (vm,
- &im->ip4_arp_request_packet_template,
- &bi0);
+ p0->error = node->errors[IP4_ARP_ERROR_NO_BUFFERS];
+ continue;
+ }
- /* Seems we're out of buffers */
- if (PREDICT_FALSE (!h0))
- continue;
+ b0 = vlib_get_buffer (vm, bi0);
- /* Add rewrite/encap string for ARP packet. */
- vnet_rewrite_one_header (adj0[0], h0,
- sizeof (ethernet_header_t));
+ /* copy the persistent fields from the original */
+ clib_memcpy_fast (b0->opaque2, p0->opaque2, sizeof (p0->opaque2));
- hw_if0 = vnet_get_sup_hw_interface (vnm, sw_if_index0);
+ /* Add rewrite/encap string for ARP packet. */
+ vnet_rewrite_one_header (adj0[0], h0, sizeof (ethernet_header_t));
- /* Src ethernet address in ARP header. */
- clib_memcpy (h0->ip4_over_ethernet[0].ethernet,
- hw_if0->hw_address,
- sizeof (h0->ip4_over_ethernet[0].ethernet));
+ hw_if0 = vnet_get_sup_hw_interface (vnm, sw_if_index0);
- if (is_glean)
- {
- /* The interface's source address is stashed in the Glean Adj */
- h0->ip4_over_ethernet[0].ip4 =
- adj0->sub_type.glean.receive_addr.ip4;
-
- /* Copy in destination address we are requesting. This is the
- * glean case, so it's the packet's destination.*/
- h0->ip4_over_ethernet[1].ip4.data_u32 =
- ip0->dst_address.data_u32;
- }
- else
+ /* Src ethernet address in ARP header. */
+ mac_address_from_bytes (&h0->ip4_over_ethernet[0].mac,
+ hw_if0->hw_address);
+ if (is_glean)
+ {
+ /* The interface's source address is stashed in the Glean Adj */
+ h0->ip4_over_ethernet[0].ip4 =
+ adj0->sub_type.glean.receive_addr.ip4;
+ }
+ else
+ {
+ /* Src IP address in ARP header. */
+ if (ip4_src_address_for_packet (lm, sw_if_index0,
+ &h0->ip4_over_ethernet[0].ip4))
{
- /* Src IP address in ARP header. */
- if (ip4_src_address_for_packet (lm, sw_if_index0,
- &h0->
- ip4_over_ethernet[0].ip4))
- {
- /* No source address available */
- p0->error =
- node->errors[IP4_ARP_ERROR_NO_SOURCE_ADDRESS];
- vlib_buffer_free (vm, &bi0, 1);
- continue;
- }
-
- /* Copy in destination address we are requesting from the
- incomplete adj */
- h0->ip4_over_ethernet[1].ip4.data_u32 =
- adj0->sub_type.nbr.next_hop.ip4.as_u32;
+ /* No source address available */
+ p0->error = node->errors[IP4_ARP_ERROR_NO_SOURCE_ADDRESS];
+ vlib_buffer_free (vm, &bi0, 1);
+ continue;
}
+ }
+ h0->ip4_over_ethernet[1].ip4 = resolve0;
- vlib_buffer_copy_trace_flag (vm, p0, bi0);
- b0 = vlib_get_buffer (vm, bi0);
- VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b0);
- vnet_buffer (b0)->sw_if_index[VLIB_TX] = sw_if_index0;
+ p0->error = node->errors[IP4_ARP_ERROR_REQUEST_SENT];
- vlib_buffer_advance (b0, -adj0->rewrite_header.data_bytes);
+ vlib_buffer_copy_trace_flag (vm, p0, bi0);
+ VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b0);
+ vnet_buffer (b0)->sw_if_index[VLIB_TX] = sw_if_index0;
- vlib_set_next_frame_buffer (vm, node,
- adj0->rewrite_header.next_index,
- bi0);
- }
+ vlib_buffer_advance (b0, -adj0->rewrite_header.data_bytes);
+
+ vlib_set_next_frame_buffer (vm, node,
+ adj0->rewrite_header.next_index, bi0);
}
vlib_put_next_frame (vm, node, IP4_ARP_NEXT_DROP, n_left_to_next_drop);
}
static char *ip4_arp_error_strings[] = {
- [IP4_ARP_ERROR_DROP] = "address overflow drops",
+ [IP4_ARP_ERROR_THROTTLED] = "ARP requests throttled",
+ [IP4_ARP_ERROR_RESOLVED] = "ARP requests resolved",
+ [IP4_ARP_ERROR_NO_BUFFERS] = "ARP requests out of buffer",
[IP4_ARP_ERROR_REQUEST_SENT] = "ARP requests sent",
[IP4_ARP_ERROR_NON_ARP_ADJ] = "ARPs to non-ARP adjacencies",
- [IP4_ARP_ERROR_REPLICATE_DROP] = "ARP replication completed",
- [IP4_ARP_ERROR_REPLICATE_FAIL] = "ARP replication failed",
[IP4_ARP_ERROR_NO_SOURCE_ADDRESS] = "no source address for ARP request",
};
/* *INDENT-ON* */
#define foreach_notrace_ip4_arp_error \
-_(DROP) \
+_(THROTTLED) \
+_(RESOLVED) \
+_(NO_BUFFERS) \
_(REQUEST_SENT) \
-_(REPLICATE_DROP) \
-_(REPLICATE_FAIL)
+_(NON_ARP_ADJ) \
+_(NO_SOURCE_ADDRESS)
static clib_error_t *
arp_notrace_init (vlib_main_t * vm)
sw_if_index);
}
- clib_memcpy (h->ip4_over_ethernet[0].ethernet, hi->hw_address,
- sizeof (h->ip4_over_ethernet[0].ethernet));
+ mac_address_from_bytes (&h->ip4_over_ethernet[0].mac, hi->hw_address);
h->ip4_over_ethernet[0].ip4 = src[0];
h->ip4_over_ethernet[1].ip4 = dst[0];
always_inline void
ip4_mtu_check (vlib_buffer_t * b, u16 packet_len,
- u16 adj_packet_bytes, bool df, u32 * next, u32 * error)
+ u16 adj_packet_bytes, bool df, u16 * next, u32 * error)
{
if (packet_len > adj_packet_bytes)
{
else
{
/* IP fragmentation */
- ip_frag_set_vnet_buffer (b, 0, adj_packet_bytes,
- IP4_FRAG_NEXT_IP4_LOOKUP, 0);
+ ip_frag_set_vnet_buffer (b, adj_packet_bytes,
+ IP4_FRAG_NEXT_IP4_REWRITE, 0);
*next = IP4_REWRITE_NEXT_FRAGMENT;
}
}
}
-always_inline uword
-ip4_rewrite_inline (vlib_main_t * vm,
- vlib_node_runtime_t * node,
- vlib_frame_t * frame,
- int do_counters, int is_midchain, int is_mcast)
+/* Decrement TTL & update checksum.
+ Works either endian, so no need for byte swap. */
+static_always_inline void
+ip4_ttl_and_checksum_check (vlib_buffer_t * b, ip4_header_t * ip, u16 * next,
+ u32 * error)
{
- ip_lookup_main_t *lm = &ip4_main.lookup_main;
- u32 *from = vlib_frame_vector_args (frame);
- u32 n_left_from, n_left_to_next, *to_next, next_index;
- vlib_node_runtime_t *error_node =
- vlib_node_get_runtime (vm, ip4_input_node.index);
-
- n_left_from = frame->n_vectors;
- next_index = node->cached_next_index;
- u32 thread_index = vm->thread_index;
-
- while (n_left_from > 0)
+ i32 ttl;
+ u32 checksum;
+ if (PREDICT_FALSE (b->flags & VNET_BUFFER_F_LOCALLY_ORIGINATED))
{
- vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
-
- while (n_left_from >= 4 && n_left_to_next >= 2)
- {
- ip_adjacency_t *adj0, *adj1;
- vlib_buffer_t *p0, *p1;
- ip4_header_t *ip0, *ip1;
- u32 pi0, rw_len0, next0, error0, checksum0, adj_index0;
- u32 pi1, rw_len1, next1, error1, checksum1, adj_index1;
- u32 tx_sw_if_index0, tx_sw_if_index1;
-
- /* Prefetch next iteration. */
- {
- vlib_buffer_t *p2, *p3;
-
- p2 = vlib_get_buffer (vm, from[2]);
- p3 = vlib_get_buffer (vm, from[3]);
-
- vlib_prefetch_buffer_header (p2, STORE);
- vlib_prefetch_buffer_header (p3, STORE);
-
- CLIB_PREFETCH (p2->data, sizeof (ip0[0]), STORE);
- CLIB_PREFETCH (p3->data, sizeof (ip0[0]), STORE);
- }
-
- pi0 = to_next[0] = from[0];
- pi1 = to_next[1] = from[1];
+ b->flags &= ~VNET_BUFFER_F_LOCALLY_ORIGINATED;
+ return;
+ }
- from += 2;
- n_left_from -= 2;
- to_next += 2;
- n_left_to_next -= 2;
+ ttl = ip->ttl;
- p0 = vlib_get_buffer (vm, pi0);
- p1 = vlib_get_buffer (vm, pi1);
+ /* Input node should have reject packets with ttl 0. */
+ ASSERT (ip->ttl > 0);
- adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
- adj_index1 = vnet_buffer (p1)->ip.adj_index[VLIB_TX];
+ checksum = ip->checksum + clib_host_to_net_u16 (0x0100);
+ checksum += checksum >= 0xffff;
- /*
- * pre-fetch the per-adjacency counters
- */
- if (do_counters)
- {
- vlib_prefetch_combined_counter (&adjacency_counters,
- thread_index, adj_index0);
- vlib_prefetch_combined_counter (&adjacency_counters,
- thread_index, adj_index1);
- }
+ ip->checksum = checksum;
+ ttl -= 1;
+ ip->ttl = ttl;
- ip0 = vlib_buffer_get_current (p0);
- ip1 = vlib_buffer_get_current (p1);
+ /*
+ * If the ttl drops below 1 when forwarding, generate
+ * an ICMP response.
+ */
+ if (PREDICT_FALSE (ttl <= 0))
+ {
+ *error = IP4_ERROR_TIME_EXPIRED;
+ vnet_buffer (b)->sw_if_index[VLIB_TX] = (u32) ~ 0;
+ icmp4_error_set_vnet_buffer (b, ICMP4_time_exceeded,
+ ICMP4_time_exceeded_ttl_exceeded_in_transit,
+ 0);
+ *next = IP4_REWRITE_NEXT_ICMP_ERROR;
+ }
- error0 = error1 = IP4_ERROR_NONE;
- next0 = next1 = IP4_REWRITE_NEXT_DROP;
+ /* Verify checksum. */
+ ASSERT ((ip->checksum == ip4_header_checksum (ip)) ||
+ (b->flags & VNET_BUFFER_F_OFFLOAD_IP_CKSUM));
+}
- /* Decrement TTL & update checksum.
- Works either endian, so no need for byte swap. */
- if (PREDICT_TRUE (!(p0->flags & VNET_BUFFER_F_LOCALLY_ORIGINATED)))
- {
- i32 ttl0 = ip0->ttl;
- /* Input node should have reject packets with ttl 0. */
- ASSERT (ip0->ttl > 0);
+always_inline uword
+ip4_rewrite_inline_with_gso (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * frame,
+ int do_counters, int is_midchain, int is_mcast,
+ int do_gso)
+{
+ ip_lookup_main_t *lm = &ip4_main.lookup_main;
+ u32 *from = vlib_frame_vector_args (frame);
+ vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b;
+ u16 nexts[VLIB_FRAME_SIZE], *next;
+ u32 n_left_from;
+ vlib_node_runtime_t *error_node =
+ vlib_node_get_runtime (vm, ip4_input_node.index);
- checksum0 = ip0->checksum + clib_host_to_net_u16 (0x0100);
- checksum0 += checksum0 >= 0xffff;
+ n_left_from = frame->n_vectors;
+ u32 thread_index = vm->thread_index;
- ip0->checksum = checksum0;
- ttl0 -= 1;
- ip0->ttl = ttl0;
+ vlib_get_buffers (vm, from, bufs, n_left_from);
+ clib_memset_u16 (nexts, IP4_REWRITE_NEXT_DROP, n_left_from);
- /*
- * If the ttl drops below 1 when forwarding, generate
- * an ICMP response.
- */
- if (PREDICT_FALSE (ttl0 <= 0))
- {
- error0 = IP4_ERROR_TIME_EXPIRED;
- vnet_buffer (p0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
- icmp4_error_set_vnet_buffer (p0, ICMP4_time_exceeded,
- ICMP4_time_exceeded_ttl_exceeded_in_transit,
- 0);
- next0 = IP4_REWRITE_NEXT_ICMP_ERROR;
- }
+#if (CLIB_N_PREFETCHES >= 8)
+ if (n_left_from >= 6)
+ {
+ int i;
+ for (i = 2; i < 6; i++)
+ vlib_prefetch_buffer_header (bufs[i], LOAD);
+ }
- /* Verify checksum. */
- ASSERT ((ip0->checksum == ip4_header_checksum (ip0)) ||
- (p0->flags & VNET_BUFFER_F_OFFLOAD_IP_CKSUM));
- }
- else
- {
- p0->flags &= ~VNET_BUFFER_F_LOCALLY_ORIGINATED;
- }
- if (PREDICT_TRUE (!(p1->flags & VNET_BUFFER_F_LOCALLY_ORIGINATED)))
- {
- i32 ttl1 = ip1->ttl;
+ next = nexts;
+ b = bufs;
+ while (n_left_from >= 8)
+ {
+ ip_adjacency_t *adj0, *adj1;
+ ip4_header_t *ip0, *ip1;
+ u32 rw_len0, error0, adj_index0;
+ u32 rw_len1, error1, adj_index1;
+ u32 tx_sw_if_index0, tx_sw_if_index1;
+ u8 *p;
- /* Input node should have reject packets with ttl 0. */
- ASSERT (ip1->ttl > 0);
+ vlib_prefetch_buffer_header (b[6], LOAD);
+ vlib_prefetch_buffer_header (b[7], LOAD);
- checksum1 = ip1->checksum + clib_host_to_net_u16 (0x0100);
- checksum1 += checksum1 >= 0xffff;
+ adj_index0 = vnet_buffer (b[0])->ip.adj_index[VLIB_TX];
+ adj_index1 = vnet_buffer (b[1])->ip.adj_index[VLIB_TX];
- ip1->checksum = checksum1;
- ttl1 -= 1;
- ip1->ttl = ttl1;
+ /*
+ * pre-fetch the per-adjacency counters
+ */
+ if (do_counters)
+ {
+ vlib_prefetch_combined_counter (&adjacency_counters,
+ thread_index, adj_index0);
+ vlib_prefetch_combined_counter (&adjacency_counters,
+ thread_index, adj_index1);
+ }
- /*
- * If the ttl drops below 1 when forwarding, generate
- * an ICMP response.
- */
- if (PREDICT_FALSE (ttl1 <= 0))
- {
- error1 = IP4_ERROR_TIME_EXPIRED;
- vnet_buffer (p1)->sw_if_index[VLIB_TX] = (u32) ~ 0;
- icmp4_error_set_vnet_buffer (p1, ICMP4_time_exceeded,
- ICMP4_time_exceeded_ttl_exceeded_in_transit,
- 0);
- next1 = IP4_REWRITE_NEXT_ICMP_ERROR;
- }
+ ip0 = vlib_buffer_get_current (b[0]);
+ ip1 = vlib_buffer_get_current (b[1]);
+
+ error0 = error1 = IP4_ERROR_NONE;
+
+ ip4_ttl_and_checksum_check (b[0], ip0, next + 0, &error0);
+ ip4_ttl_and_checksum_check (b[1], ip1, next + 1, &error1);
+
+ /* Rewrite packet header and updates lengths. */
+ adj0 = adj_get (adj_index0);
+ adj1 = adj_get (adj_index1);
+
+ /* Worth pipelining. No guarantee that adj0,1 are hot... */
+ rw_len0 = adj0[0].rewrite_header.data_bytes;
+ rw_len1 = adj1[0].rewrite_header.data_bytes;
+ vnet_buffer (b[0])->ip.save_rewrite_length = rw_len0;
+ vnet_buffer (b[1])->ip.save_rewrite_length = rw_len1;
+
+ p = vlib_buffer_get_current (b[2]);
+ CLIB_PREFETCH (p - CLIB_CACHE_LINE_BYTES, CLIB_CACHE_LINE_BYTES, STORE);
+ CLIB_PREFETCH (p, CLIB_CACHE_LINE_BYTES, LOAD);
+
+ p = vlib_buffer_get_current (b[3]);
+ CLIB_PREFETCH (p - CLIB_CACHE_LINE_BYTES, CLIB_CACHE_LINE_BYTES, STORE);
+ CLIB_PREFETCH (p, CLIB_CACHE_LINE_BYTES, LOAD);
+
+ /* Check MTU of outgoing interface. */
+ u16 ip0_len = clib_net_to_host_u16 (ip0->length);
+ u16 ip1_len = clib_net_to_host_u16 (ip1->length);
+
+ if (do_gso && (b[0]->flags & VNET_BUFFER_F_GSO))
+ ip0_len = gso_mtu_sz (b[0]);
+ if (do_gso && (b[1]->flags & VNET_BUFFER_F_GSO))
+ ip1_len = gso_mtu_sz (b[1]);
+
+ ip4_mtu_check (b[0], ip0_len,
+ adj0[0].rewrite_header.max_l3_packet_bytes,
+ ip0->flags_and_fragment_offset &
+ clib_host_to_net_u16 (IP4_HEADER_FLAG_DONT_FRAGMENT),
+ next + 0, &error0);
+ ip4_mtu_check (b[1], ip1_len,
+ adj1[0].rewrite_header.max_l3_packet_bytes,
+ ip1->flags_and_fragment_offset &
+ clib_host_to_net_u16 (IP4_HEADER_FLAG_DONT_FRAGMENT),
+ next + 1, &error1);
+
+ if (is_mcast)
+ {
+ error0 = ((adj0[0].rewrite_header.sw_if_index ==
+ vnet_buffer (b[0])->sw_if_index[VLIB_RX]) ?
+ IP4_ERROR_SAME_INTERFACE : error0);
+ error1 = ((adj1[0].rewrite_header.sw_if_index ==
+ vnet_buffer (b[1])->sw_if_index[VLIB_RX]) ?
+ IP4_ERROR_SAME_INTERFACE : error1);
+ }
- /* Verify checksum. */
- ASSERT ((ip1->checksum == ip4_header_checksum (ip1)) ||
- (p1->flags & VNET_BUFFER_F_OFFLOAD_IP_CKSUM));
- }
- else
- {
- p1->flags &= ~VNET_BUFFER_F_LOCALLY_ORIGINATED;
- }
+ /* Don't adjust the buffer for ttl issue; icmp-error node wants
+ * to see the IP header */
+ if (PREDICT_TRUE (error0 == IP4_ERROR_NONE))
+ {
+ u32 next_index = adj0[0].rewrite_header.next_index;
+ vlib_buffer_advance (b[0], -(word) rw_len0);
+ tx_sw_if_index0 = adj0[0].rewrite_header.sw_if_index;
+ vnet_buffer (b[0])->sw_if_index[VLIB_TX] = tx_sw_if_index0;
+
+ if (PREDICT_FALSE
+ (adj0[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES))
+ vnet_feature_arc_start (lm->output_feature_arc_index,
+ tx_sw_if_index0, &next_index, b[0]);
+ next[0] = next_index;
+ }
+ else
+ {
+ b[0]->error = error_node->errors[error0];
+ }
+ if (PREDICT_TRUE (error1 == IP4_ERROR_NONE))
+ {
+ u32 next_index = adj1[0].rewrite_header.next_index;
+ vlib_buffer_advance (b[1], -(word) rw_len1);
- /* Rewrite packet header and updates lengths. */
- adj0 = adj_get (adj_index0);
- adj1 = adj_get (adj_index1);
-
- /* Worth pipelining. No guarantee that adj0,1 are hot... */
- rw_len0 = adj0[0].rewrite_header.data_bytes;
- rw_len1 = adj1[0].rewrite_header.data_bytes;
- vnet_buffer (p0)->ip.save_rewrite_length = rw_len0;
- vnet_buffer (p1)->ip.save_rewrite_length = rw_len1;
-
- /* Check MTU of outgoing interface. */
- ip4_mtu_check (p0, clib_net_to_host_u16 (ip0->length),
- adj0[0].rewrite_header.max_l3_packet_bytes,
- ip0->flags_and_fragment_offset &
- clib_host_to_net_u16 (IP4_HEADER_FLAG_DONT_FRAGMENT),
- &next0, &error0);
- ip4_mtu_check (p1, clib_net_to_host_u16 (ip1->length),
- adj1[0].rewrite_header.max_l3_packet_bytes,
- ip1->flags_and_fragment_offset &
- clib_host_to_net_u16 (IP4_HEADER_FLAG_DONT_FRAGMENT),
- &next1, &error1);
-
- if (is_mcast)
- {
- error0 = ((adj0[0].rewrite_header.sw_if_index ==
- vnet_buffer (p0)->sw_if_index[VLIB_RX]) ?
- IP4_ERROR_SAME_INTERFACE : error0);
- error1 = ((adj1[0].rewrite_header.sw_if_index ==
- vnet_buffer (p1)->sw_if_index[VLIB_RX]) ?
- IP4_ERROR_SAME_INTERFACE : error1);
- }
+ tx_sw_if_index1 = adj1[0].rewrite_header.sw_if_index;
+ vnet_buffer (b[1])->sw_if_index[VLIB_TX] = tx_sw_if_index1;
- p0->error = error_node->errors[error0];
- p1->error = error_node->errors[error1];
- /* Don't adjust the buffer for ttl issue; icmp-error node wants
- * to see the IP headerr */
- if (PREDICT_TRUE (error0 == IP4_ERROR_NONE))
- {
- next0 = adj0[0].rewrite_header.next_index;
- p0->current_data -= rw_len0;
- p0->current_length += rw_len0;
- tx_sw_if_index0 = adj0[0].rewrite_header.sw_if_index;
- vnet_buffer (p0)->sw_if_index[VLIB_TX] = tx_sw_if_index0;
-
- if (PREDICT_FALSE
- (adj0[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES))
- vnet_feature_arc_start (lm->output_feature_arc_index,
- tx_sw_if_index0, &next0, p0);
- }
- if (PREDICT_TRUE (error1 == IP4_ERROR_NONE))
- {
- next1 = adj1[0].rewrite_header.next_index;
- p1->current_data -= rw_len1;
- p1->current_length += rw_len1;
+ if (PREDICT_FALSE
+ (adj1[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES))
+ vnet_feature_arc_start (lm->output_feature_arc_index,
+ tx_sw_if_index1, &next_index, b[1]);
+ next[1] = next_index;
+ }
+ else
+ {
+ b[1]->error = error_node->errors[error1];
+ }
+ if (is_midchain)
+ {
+ calc_checksums (vm, b[0]);
+ calc_checksums (vm, b[1]);
+ }
+ /* Guess we are only writing on simple Ethernet header. */
+ vnet_rewrite_two_headers (adj0[0], adj1[0],
+ ip0, ip1, sizeof (ethernet_header_t));
- tx_sw_if_index1 = adj1[0].rewrite_header.sw_if_index;
- vnet_buffer (p1)->sw_if_index[VLIB_TX] = tx_sw_if_index1;
+ /*
+ * Bump the per-adjacency counters
+ */
+ if (do_counters)
+ {
+ vlib_increment_combined_counter
+ (&adjacency_counters,
+ thread_index,
+ adj_index0, 1, vlib_buffer_length_in_chain (vm, b[0]) + rw_len0);
- if (PREDICT_FALSE
- (adj1[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES))
- vnet_feature_arc_start (lm->output_feature_arc_index,
- tx_sw_if_index1, &next1, p1);
- }
+ vlib_increment_combined_counter
+ (&adjacency_counters,
+ thread_index,
+ adj_index1, 1, vlib_buffer_length_in_chain (vm, b[1]) + rw_len1);
+ }
- /* Guess we are only writing on simple Ethernet header. */
- vnet_rewrite_two_headers (adj0[0], adj1[0],
- ip0, ip1, sizeof (ethernet_header_t));
+ if (is_midchain)
+ {
+ if (adj0->sub_type.midchain.fixup_func)
+ adj0->sub_type.midchain.fixup_func
+ (vm, adj0, b[0], adj0->sub_type.midchain.fixup_data);
+ if (adj1->sub_type.midchain.fixup_func)
+ adj1->sub_type.midchain.fixup_func
+ (vm, adj1, b[1], adj1->sub_type.midchain.fixup_data);
+ }
+ if (is_mcast)
+ {
/*
- * Bump the per-adjacency counters
+ * copy bytes from the IP address into the MAC rewrite
*/
- if (do_counters)
- {
- vlib_increment_combined_counter
- (&adjacency_counters,
- thread_index,
- adj_index0, 1,
- vlib_buffer_length_in_chain (vm, p0) + rw_len0);
-
- vlib_increment_combined_counter
- (&adjacency_counters,
- thread_index,
- adj_index1, 1,
- vlib_buffer_length_in_chain (vm, p1) + rw_len1);
- }
-
- if (is_midchain)
- {
- adj0->sub_type.midchain.fixup_func
- (vm, adj0, p0, adj0->sub_type.midchain.fixup_data);
- adj1->sub_type.midchain.fixup_func
- (vm, adj1, p1, adj0->sub_type.midchain.fixup_data);
- }
- if (is_mcast)
- {
- /*
- * copy bytes from the IP address into the MAC rewrite
- */
- vnet_ip_mcast_fixup_header (IP4_MCAST_ADDR_MASK,
- adj0->
- rewrite_header.dst_mcast_offset,
- &ip0->dst_address.as_u32,
- (u8 *) ip0);
- vnet_ip_mcast_fixup_header (IP4_MCAST_ADDR_MASK,
- adj0->
- rewrite_header.dst_mcast_offset,
- &ip1->dst_address.as_u32,
- (u8 *) ip1);
- }
+ vnet_ip_mcast_fixup_header (IP4_MCAST_ADDR_MASK,
+ adj0->rewrite_header.dst_mcast_offset,
+ &ip0->dst_address.as_u32, (u8 *) ip0);
+ vnet_ip_mcast_fixup_header (IP4_MCAST_ADDR_MASK,
+ adj1->rewrite_header.dst_mcast_offset,
+ &ip1->dst_address.as_u32, (u8 *) ip1);
+ }
- vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
- to_next, n_left_to_next,
- pi0, pi1, next0, next1);
+ next += 2;
+ b += 2;
+ n_left_from -= 2;
+ }
+#elif (CLIB_N_PREFETCHES >= 4)
+ next = nexts;
+ b = bufs;
+ while (n_left_from >= 1)
+ {
+ ip_adjacency_t *adj0;
+ ip4_header_t *ip0;
+ u32 rw_len0, error0, adj_index0;
+ u32 tx_sw_if_index0;
+ u8 *p;
+
+ /* Prefetch next iteration */
+ if (PREDICT_TRUE (n_left_from >= 4))
+ {
+ ip_adjacency_t *adj2;
+ u32 adj_index2;
+
+ vlib_prefetch_buffer_header (b[3], LOAD);
+ vlib_prefetch_buffer_data (b[2], LOAD);
+
+ /* Prefetch adj->rewrite_header */
+ adj_index2 = vnet_buffer (b[2])->ip.adj_index[VLIB_TX];
+ adj2 = adj_get (adj_index2);
+ p = (u8 *) adj2;
+ CLIB_PREFETCH (p + CLIB_CACHE_LINE_BYTES, CLIB_CACHE_LINE_BYTES,
+ LOAD);
}
- while (n_left_from > 0 && n_left_to_next > 0)
+ adj_index0 = vnet_buffer (b[0])->ip.adj_index[VLIB_TX];
+
+ /*
+ * Prefetch the per-adjacency counters
+ */
+ if (do_counters)
{
- ip_adjacency_t *adj0;
- vlib_buffer_t *p0;
- ip4_header_t *ip0;
- u32 pi0, rw_len0, adj_index0, next0, error0, checksum0;
- u32 tx_sw_if_index0;
+ vlib_prefetch_combined_counter (&adjacency_counters,
+ thread_index, adj_index0);
+ }
- pi0 = to_next[0] = from[0];
+ ip0 = vlib_buffer_get_current (b[0]);
- p0 = vlib_get_buffer (vm, pi0);
+ error0 = IP4_ERROR_NONE;
- adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
+ ip4_ttl_and_checksum_check (b[0], ip0, next + 0, &error0);
- adj0 = adj_get (adj_index0);
+ /* Rewrite packet header and updates lengths. */
+ adj0 = adj_get (adj_index0);
- ip0 = vlib_buffer_get_current (p0);
+ /* Rewrite header was prefetched. */
+ rw_len0 = adj0[0].rewrite_header.data_bytes;
+ vnet_buffer (b[0])->ip.save_rewrite_length = rw_len0;
- error0 = IP4_ERROR_NONE;
- next0 = IP4_REWRITE_NEXT_DROP; /* drop on error */
+ /* Check MTU of outgoing interface. */
+ u16 ip0_len = clib_net_to_host_u16 (ip0->length);
- /* Decrement TTL & update checksum. */
- if (PREDICT_TRUE (!(p0->flags & VNET_BUFFER_F_LOCALLY_ORIGINATED)))
- {
- i32 ttl0 = ip0->ttl;
+ if (do_gso && (b[0]->flags & VNET_BUFFER_F_GSO))
+ ip0_len = gso_mtu_sz (b[0]);
- checksum0 = ip0->checksum + clib_host_to_net_u16 (0x0100);
+ ip4_mtu_check (b[0], ip0_len,
+ adj0[0].rewrite_header.max_l3_packet_bytes,
+ ip0->flags_and_fragment_offset &
+ clib_host_to_net_u16 (IP4_HEADER_FLAG_DONT_FRAGMENT),
+ next + 0, &error0);
- checksum0 += checksum0 >= 0xffff;
+ if (is_mcast)
+ {
+ error0 = ((adj0[0].rewrite_header.sw_if_index ==
+ vnet_buffer (b[0])->sw_if_index[VLIB_RX]) ?
+ IP4_ERROR_SAME_INTERFACE : error0);
+ }
- ip0->checksum = checksum0;
+ /* Don't adjust the buffer for ttl issue; icmp-error node wants
+ * to see the IP header */
+ if (PREDICT_TRUE (error0 == IP4_ERROR_NONE))
+ {
+ u32 next_index = adj0[0].rewrite_header.next_index;
+ vlib_buffer_advance (b[0], -(word) rw_len0);
+ tx_sw_if_index0 = adj0[0].rewrite_header.sw_if_index;
+ vnet_buffer (b[0])->sw_if_index[VLIB_TX] = tx_sw_if_index0;
+
+ if (PREDICT_FALSE
+ (adj0[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES))
+ vnet_feature_arc_start (lm->output_feature_arc_index,
+ tx_sw_if_index0, &next_index, b[0]);
+ next[0] = next_index;
+ }
+ else
+ {
+ b[0]->error = error_node->errors[error0];
+ }
+ if (is_midchain)
+ {
+ calc_checksums (vm, b[0]);
+ }
+ /* Guess we are only writing on simple Ethernet header. */
+ vnet_rewrite_one_header (adj0[0], ip0, sizeof (ethernet_header_t));
- ASSERT (ip0->ttl > 0);
+ /*
+ * Bump the per-adjacency counters
+ */
+ if (do_counters)
+ {
+ vlib_increment_combined_counter
+ (&adjacency_counters,
+ thread_index,
+ adj_index0, 1, vlib_buffer_length_in_chain (vm, b[0]) + rw_len0);
+ }
- ttl0 -= 1;
+ if (is_midchain)
+ {
+ if (adj0->sub_type.midchain.fixup_func)
+ adj0->sub_type.midchain.fixup_func
+ (vm, adj0, b[0], adj0->sub_type.midchain.fixup_data);
+ }
- ip0->ttl = ttl0;
+ if (is_mcast)
+ {
+ /*
+ * copy bytes from the IP address into the MAC rewrite
+ */
+ vnet_ip_mcast_fixup_header (IP4_MCAST_ADDR_MASK,
+ adj0->rewrite_header.dst_mcast_offset,
+ &ip0->dst_address.as_u32, (u8 *) ip0);
+ }
- ASSERT ((ip0->checksum == ip4_header_checksum (ip0)) ||
- (p0->flags & VNET_BUFFER_F_OFFLOAD_IP_CKSUM));
+ next += 1;
+ b += 1;
+ n_left_from -= 1;
+ }
+#endif
- if (PREDICT_FALSE (ttl0 <= 0))
- {
- /*
- * If the ttl drops below 1 when forwarding, generate
- * an ICMP response.
- */
- error0 = IP4_ERROR_TIME_EXPIRED;
- next0 = IP4_REWRITE_NEXT_ICMP_ERROR;
- vnet_buffer (p0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
- icmp4_error_set_vnet_buffer (p0, ICMP4_time_exceeded,
- ICMP4_time_exceeded_ttl_exceeded_in_transit,
- 0);
- }
- }
- else
- {
- p0->flags &= ~VNET_BUFFER_F_LOCALLY_ORIGINATED;
- }
+ while (n_left_from > 0)
+ {
+ ip_adjacency_t *adj0;
+ ip4_header_t *ip0;
+ u32 rw_len0, adj_index0, error0;
+ u32 tx_sw_if_index0;
- if (do_counters)
- vlib_prefetch_combined_counter (&adjacency_counters,
- thread_index, adj_index0);
+ adj_index0 = vnet_buffer (b[0])->ip.adj_index[VLIB_TX];
- /* Guess we are only writing on simple Ethernet header. */
- vnet_rewrite_one_header (adj0[0], ip0, sizeof (ethernet_header_t));
- if (is_mcast)
- {
- /*
- * copy bytes from the IP address into the MAC rewrite
- */
- vnet_ip_mcast_fixup_header (IP4_MCAST_ADDR_MASK,
- adj0->
- rewrite_header.dst_mcast_offset,
- &ip0->dst_address.as_u32,
- (u8 *) ip0);
- }
+ adj0 = adj_get (adj_index0);
- /* Update packet buffer attributes/set output interface. */
- rw_len0 = adj0[0].rewrite_header.data_bytes;
- vnet_buffer (p0)->ip.save_rewrite_length = rw_len0;
+ if (do_counters)
+ vlib_prefetch_combined_counter (&adjacency_counters,
+ thread_index, adj_index0);
- if (do_counters)
- vlib_increment_combined_counter
- (&adjacency_counters,
- thread_index, adj_index0, 1,
- vlib_buffer_length_in_chain (vm, p0) + rw_len0);
+ ip0 = vlib_buffer_get_current (b[0]);
- /* Check MTU of outgoing interface. */
- ip4_mtu_check (p0, clib_net_to_host_u16 (ip0->length),
- adj0[0].rewrite_header.max_l3_packet_bytes,
- ip0->flags_and_fragment_offset &
- clib_host_to_net_u16 (IP4_HEADER_FLAG_DONT_FRAGMENT),
- &next0, &error0);
+ error0 = IP4_ERROR_NONE;
- if (is_mcast)
- {
- error0 = ((adj0[0].rewrite_header.sw_if_index ==
- vnet_buffer (p0)->sw_if_index[VLIB_RX]) ?
- IP4_ERROR_SAME_INTERFACE : error0);
- }
- p0->error = error_node->errors[error0];
+ ip4_ttl_and_checksum_check (b[0], ip0, next + 0, &error0);
- /* Don't adjust the buffer for ttl issue; icmp-error node wants
- * to see the IP headerr */
- if (PREDICT_TRUE (error0 == IP4_ERROR_NONE))
- {
- p0->current_data -= rw_len0;
- p0->current_length += rw_len0;
- tx_sw_if_index0 = adj0[0].rewrite_header.sw_if_index;
- vnet_buffer (p0)->sw_if_index[VLIB_TX] = tx_sw_if_index0;
- next0 = adj0[0].rewrite_header.next_index;
+ /* Update packet buffer attributes/set output interface. */
+ rw_len0 = adj0[0].rewrite_header.data_bytes;
+ vnet_buffer (b[0])->ip.save_rewrite_length = rw_len0;
- if (is_midchain)
- {
- adj0->sub_type.midchain.fixup_func
- (vm, adj0, p0, adj0->sub_type.midchain.fixup_data);
- }
+ /* Check MTU of outgoing interface. */
+ u16 ip0_len = clib_net_to_host_u16 (ip0->length);
+ if (do_gso && (b[0]->flags & VNET_BUFFER_F_GSO))
+ ip0_len = gso_mtu_sz (b[0]);
- if (PREDICT_FALSE
- (adj0[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES))
- vnet_feature_arc_start (lm->output_feature_arc_index,
- tx_sw_if_index0, &next0, p0);
+ ip4_mtu_check (b[0], ip0_len,
+ adj0[0].rewrite_header.max_l3_packet_bytes,
+ ip0->flags_and_fragment_offset &
+ clib_host_to_net_u16 (IP4_HEADER_FLAG_DONT_FRAGMENT),
+ next + 0, &error0);
- }
+ if (is_mcast)
+ {
+ error0 = ((adj0[0].rewrite_header.sw_if_index ==
+ vnet_buffer (b[0])->sw_if_index[VLIB_RX]) ?
+ IP4_ERROR_SAME_INTERFACE : error0);
+ }
- from += 1;
- n_left_from -= 1;
- to_next += 1;
- n_left_to_next -= 1;
+ /* Don't adjust the buffer for ttl issue; icmp-error node wants
+ * to see the IP header */
+ if (PREDICT_TRUE (error0 == IP4_ERROR_NONE))
+ {
+ u32 next_index = adj0[0].rewrite_header.next_index;
+ vlib_buffer_advance (b[0], -(word) rw_len0);
+ tx_sw_if_index0 = adj0[0].rewrite_header.sw_if_index;
+ vnet_buffer (b[0])->sw_if_index[VLIB_TX] = tx_sw_if_index0;
+
+ if (PREDICT_FALSE
+ (adj0[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES))
+ vnet_feature_arc_start (lm->output_feature_arc_index,
+ tx_sw_if_index0, &next_index, b[0]);
+ next[0] = next_index;
+ }
+ else
+ {
+ b[0]->error = error_node->errors[error0];
+ }
+ if (is_midchain)
+ {
+ calc_checksums (vm, b[0]);
+ }
+ /* Guess we are only writing on simple Ethernet header. */
+ vnet_rewrite_one_header (adj0[0], ip0, sizeof (ethernet_header_t));
+
+ if (do_counters)
+ vlib_increment_combined_counter
+ (&adjacency_counters,
+ thread_index, adj_index0, 1,
+ vlib_buffer_length_in_chain (vm, b[0]) + rw_len0);
- vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
- to_next, n_left_to_next,
- pi0, next0);
+ if (is_midchain)
+ {
+ if (adj0->sub_type.midchain.fixup_func)
+ adj0->sub_type.midchain.fixup_func
+ (vm, adj0, b[0], adj0->sub_type.midchain.fixup_data);
}
- vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ if (is_mcast)
+ {
+ /*
+ * copy bytes from the IP address into the MAC rewrite
+ */
+ vnet_ip_mcast_fixup_header (IP4_MCAST_ADDR_MASK,
+ adj0->rewrite_header.dst_mcast_offset,
+ &ip0->dst_address.as_u32, (u8 *) ip0);
+ }
+
+ next += 1;
+ b += 1;
+ n_left_from -= 1;
}
+
/* Need to do trace after rewrites to pick up new packet data. */
if (node->flags & VLIB_NODE_FLAG_TRACE)
ip4_forward_next_trace (vm, node, frame, VLIB_TX);
+ vlib_buffer_enqueue_to_next (vm, node, from, nexts, frame->n_vectors);
return frame->n_vectors;
}
+always_inline uword
+ip4_rewrite_inline (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * frame,
+ int do_counters, int is_midchain, int is_mcast)
+{
+ vnet_main_t *vnm = vnet_get_main ();
+ if (PREDICT_FALSE (vnm->interface_main.gso_interface_count > 0))
+ return ip4_rewrite_inline_with_gso (vm, node, frame, do_counters,
+ is_midchain, is_mcast,
+ 1 /* do_gso */ );
+ else
+ return ip4_rewrite_inline_with_gso (vm, node, frame, do_counters,
+ is_midchain, is_mcast,
+ 0 /* no do_gso */ );
+}
+
/** @brief IPv4 rewrite node.
@node ip4-rewrite