FIB2.0: Adjacency complete pull model (VPP-487) 99/3399/4
authorNeale Ranns <nranns@cisco.com>
Sat, 8 Oct 2016 12:03:40 +0000 (13:03 +0100)
committerDave Barach <openvpp@barachs.net>
Fri, 14 Oct 2016 13:50:39 +0000 (13:50 +0000)
Change the adjacency completion model to pull not push.

A complete adjacency has a rewirte string, an incomplete one does not. the re-write string for a peer comes either from a discovery protocol (i.e. ARP/ND) or can be directly derived from the link type (i.e. GRE tunnels). Which method it is, is interface type specific.
For each packet type sent on a link to a peer there is a corresponding adjacency. For example, if there is a peer 10.0.0.1 on Eth0 and we need to send to it IPv4 and MPLS packets, there will be two adjacencies; one for the IPv4 and one for the MPLS packets. The adjacencies are thus distinguished by the packets the carry, this is known as the adjacency's 'link-type'. It is not an L3 packet type, since the adjacency can have a link type of Ethernet (for L2 over GRE).
The discovery protocols are not aware of all the link types required - only the FIB is. the FIB will create adjacencies as and when they are required, and it is thus then desirable to 'pull' from the discovery protocol the re-write required. The alternative (that we have now) is that the discovery protocol pushes (i.e. creates) adjacencies for each link type - this creates more adjacencies than we need.
To pull, FIB now requests from the interface-type to 'complete' the adjacency. The interface can then delegate to the discovery protocol (on ethernet links) or directly build the re-write (i.e on GRE).

Change-Id: I61451789ae03f26b1012d8d6524007b769b6c6ee
Signed-off-by: Neale Ranns <nranns@cisco.com>
54 files changed:
vnet/vnet/adj/adj.c
vnet/vnet/adj/adj.h
vnet/vnet/adj/adj_glean.c
vnet/vnet/adj/adj_glean.h
vnet/vnet/adj/adj_internal.h
vnet/vnet/adj/adj_l2.c
vnet/vnet/adj/adj_midchain.c
vnet/vnet/adj/adj_nbr.c
vnet/vnet/adj/adj_nbr.h
vnet/vnet/adj/adj_rewrite.c
vnet/vnet/adj/adj_types.h
vnet/vnet/dhcp/client.c
vnet/vnet/ethernet/arp.c
vnet/vnet/ethernet/ethernet.h
vnet/vnet/ethernet/interface.c
vnet/vnet/fib/fib_entry.c
vnet/vnet/fib/fib_path.c
vnet/vnet/fib/fib_test.c
vnet/vnet/fib/fib_types.h
vnet/vnet/gre/gre.c
vnet/vnet/gre/gre.h
vnet/vnet/gre/interface.c
vnet/vnet/hdlc/hdlc.c
vnet/vnet/interface.c
vnet/vnet/interface.h
vnet/vnet/interface_funcs.h
vnet/vnet/ip/format.h
vnet/vnet/ip/ip4_forward.c
vnet/vnet/ip/ip6.h
vnet/vnet/ip/ip6_forward.c
vnet/vnet/ip/ip6_neighbor.c
vnet/vnet/ipsec/ipsec_if.c
vnet/vnet/l2tp/l2tp.c
vnet/vnet/lisp-cp/lisp_types.c
vnet/vnet/lisp-cp/lisp_types.h
vnet/vnet/lisp-gpe/interface.c
vnet/vnet/lisp-gpe/lisp_gpe_adjacency.c
vnet/vnet/lisp-gpe/lisp_gpe_adjacency.h
vnet/vnet/lisp-gpe/lisp_gpe_fwd_entry.c
vnet/vnet/llc/llc.c
vnet/vnet/mpls/interface.c
vnet/vnet/mpls/mpls.h
vnet/vnet/mpls/mpls_output.c
vnet/vnet/pg/stream.c
vnet/vnet/ppp/ppp.c
vnet/vnet/replication.c
vnet/vnet/rewrite.c
vnet/vnet/rewrite.h
vnet/vnet/srp/interface.c
vnet/vnet/unix/tapcli.c
vnet/vnet/unix/tuntap.c
vnet/vnet/vxlan-gpe/vxlan_gpe.c
vnet/vnet/vxlan/vxlan.c
vpp/vpp-api/api.c

index 8f9d96e..0bdecc6 100644 (file)
@@ -92,43 +92,50 @@ adj_index_is_special (adj_index_t adj_index)
 u8 *
 format_ip_adjacency (u8 * s, va_list * args)
 {
-  vnet_main_t * vnm = va_arg (*args, vnet_main_t *);
-  u32 adj_index = va_arg (*args, u32);
-  format_ip_adjacency_flags_t fiaf = va_arg (*args, format_ip_adjacency_flags_t);
-  ip_adjacency_t * adj = adj_get(adj_index);
+    format_ip_adjacency_flags_t fiaf;
+    ip_adjacency_t * adj;
+    u32 adj_index;
+
+    adj_index = va_arg (*args, u32);
+    fiaf = va_arg (*args, format_ip_adjacency_flags_t);
+    adj = adj_get(adj_index);
   
-  switch (adj->lookup_next_index)
-  {
-  case IP_LOOKUP_NEXT_REWRITE:
-      s = format (s, "%U", format_adj_nbr, adj_index, 0);
-      break;
-  case IP_LOOKUP_NEXT_ARP:
-      s = format (s, "%U", format_adj_nbr_incomplete, adj_index, 0);
-      break;
-  case IP_LOOKUP_NEXT_GLEAN:
-      s = format (s, " %U",
-                 format_vnet_sw_interface_name,
-                 vnm,
-                 vnet_get_sw_interface(vnm,
-                                       adj->rewrite_header.sw_if_index));
-      break;
-
-  case IP_LOOKUP_NEXT_MIDCHAIN:
-      s = format (s, "%U", format_adj_midchain, adj_index, 2);
-      break;
-  default:
-      break;
-  }
-  s = format (s, " index:%d", adj_index);
-
-  if (fiaf & FORMAT_IP_ADJACENCY_DETAIL)
-  {
-      s = format (s, " locks:%d", adj->ia_node.fn_locks);
-      s = format(s, "\nchildren:\n ");
-      s = fib_node_children_format(adj->ia_node.fn_children, s);
-  }
-
-  return s;
+    switch (adj->lookup_next_index)
+    {
+    case IP_LOOKUP_NEXT_REWRITE:
+       s = format (s, "%U", format_adj_nbr, adj_index, 0);
+       break;
+    case IP_LOOKUP_NEXT_ARP:
+       s = format (s, "%U", format_adj_nbr_incomplete, adj_index, 0);
+       break;
+    case IP_LOOKUP_NEXT_GLEAN:
+       s = format (s, "%U", format_adj_glean, adj_index, 0);
+       break;
+    case IP_LOOKUP_NEXT_MIDCHAIN:
+       s = format (s, "%U", format_adj_midchain, adj_index, 2);
+       break;
+    default:
+       break;
+    }
+
+    if (fiaf & FORMAT_IP_ADJACENCY_DETAIL)
+    {
+       s = format (s, "\n locks:%d", adj->ia_node.fn_locks);
+       s = format (s, " node:[%d]:%U",
+                   adj->rewrite_header.node_index,
+                   format_vlib_node_name, vlib_get_main(),
+                   adj->rewrite_header.node_index);
+       s = format (s, " next:[%d]:%U",
+                   adj->rewrite_header.next_index,
+                   format_vlib_next_node_name,
+                   vlib_get_main(),
+                   adj->rewrite_header.node_index,
+                   adj->rewrite_header.next_index);
+       s = format(s, "\n children:\n  ");
+       s = fib_node_children_format(adj->ia_node.fn_children, s);
+    }
+
+    return s;
 }
 
 /*
@@ -139,9 +146,13 @@ format_ip_adjacency (u8 * s, va_list * args)
 static void
 adj_last_lock_gone (ip_adjacency_t *adj)
 {
+    vlib_main_t * vm = vlib_get_main();
+
     ASSERT(0 == fib_node_list_get_size(adj->ia_node.fn_children));
     ADJ_DBG(adj, "last-lock-gone");
 
+    vlib_worker_thread_barrier_sync (vm);
+
     switch (adj->lookup_next_index)
     {
     case IP_LOOKUP_NEXT_MIDCHAIN:
@@ -168,6 +179,8 @@ adj_last_lock_gone (ip_adjacency_t *adj)
        break;
     }
 
+    vlib_worker_thread_barrier_release(vm);
+
     fib_node_deinit(&adj->ia_node);
     pool_put(adj_pool, adj);
 }
@@ -239,6 +252,49 @@ adj_child_remove (adj_index_t adj_index,
                           sibling_index);
 }
 
+/**
+ * @brief Return the link type of the adjacency
+ */
+vnet_link_t
+adj_get_link_type (adj_index_t ai)
+{
+    const ip_adjacency_t *adj;
+
+    adj = adj_get(ai);
+
+    return (adj->ia_link); 
+}
+
+/**
+ * @brief Return the sw interface index of the adjacency.
+ */
+u32
+adj_get_sw_if_index (adj_index_t ai)
+{
+    const ip_adjacency_t *adj;
+
+    adj = adj_get(ai);
+
+    return (adj->rewrite_header.sw_if_index);
+}
+
+/**
+ * @brief Return the link type of the adjacency
+ */
+const u8*
+adj_get_rewrite (adj_index_t ai)
+{
+    vnet_rewrite_header_t *rw;
+    ip_adjacency_t *adj;
+
+    adj = adj_get(ai);
+    rw = &adj->rewrite_header;
+
+    ASSERT (rw->data_bytes != 0xfefe);
+
+    return (rw->data - rw->data_bytes);
+}
+
 static fib_node_t *
 adj_get_node (fib_node_index_t index)
 {
@@ -289,7 +345,7 @@ adj_module_init (vlib_main_t * vm)
     adj_midchain_module_init();
 
     /*
-     * 4 special adjs for v4 and v6 resp.
+     * one special adj to reserve index 0
      */
     special_v4_miss_adj_with_index_zero = adj_alloc(FIB_PROTOCOL_IP4);
 
@@ -298,10 +354,73 @@ adj_module_init (vlib_main_t * vm)
 
 VLIB_INIT_FUNCTION (adj_module_init);
 
+static clib_error_t *
+adj_show (vlib_main_t * vm,
+         unformat_input_t * input,
+         vlib_cli_command_t * cmd)
+{
+    adj_index_t ai = ADJ_INDEX_INVALID;
+    u32 sw_if_index = ~0;
+
+    while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+    {
+       if (unformat (input, "%d", &ai))
+           ;
+       else if (unformat (input, "%U",
+                          unformat_vnet_sw_interface, vnet_get_main(),
+                          &sw_if_index))
+           ;
+       else
+           break;
+    }
+
+    if (ADJ_INDEX_INVALID != ai)
+    {
+       vlib_cli_output (vm, "[@%d] %U",
+                         ai,
+                         format_ip_adjacency,  ai,
+                        FORMAT_IP_ADJACENCY_DETAIL);
+    }
+    else
+    {
+       /* *INDENT-OFF* */
+       pool_foreach_index(ai, adj_pool,
+       ({
+           if (~0 != sw_if_index &&
+               sw_if_index == adj_get_sw_if_index(ai))
+           {
+               vlib_cli_output (vm, "[@%d] %U",
+                                ai,
+                                format_ip_adjacency, ai,
+                                FORMAT_IP_ADJACENCY_NONE);
+           }
+       }));
+       /* *INDENT-ON* */
+    }
+
+    return 0;
+}
+
+/*?
+ * Show all adjacencies.
+ * @cliexpar
+ * @cliexstart{sh adj}
+ * [@0]
+ * [@1]  glean: loop0
+ * [@2] ipv4 via 1.0.0.2 loop0: IP4: 00:00:22:aa:bb:cc -> 00:00:11:aa:bb:cc
+ * [@3] mpls via 1.0.0.2 loop0: MPLS_UNICAST: 00:00:22:aa:bb:cc -> 00:00:11:aa:bb:cc
+ * [@4] ipv4 via 1.0.0.3 loop0: IP4: 00:00:22:aa:bb:cc -> 00:00:11:aa:bb:cc
+ * [@5] mpls via 1.0.0.3 loop0: MPLS_UNICAST: 00:00:22:aa:bb:cc -> 00:00:11:aa:bb:cc
+ * @cliexend
+ ?*/
+VLIB_CLI_COMMAND (adj_show_command, static) = {
+    .path = "show adj",
+    .short_help = "show adj [<adj_index>] [interface]",
+    .function = adj_show,
+};
+
 /* 
  * DEPRECATED: DO NOT USE
- *
- * Create new block of given number of contiguous adjacencies.
  */
 ip_adjacency_t *
 ip_add_adjacency (ip_lookup_main_t * lm,
index 002dab3..e85625d 100644 (file)
@@ -75,6 +75,28 @@ extern u32 adj_child_add(adj_index_t adj_index,
 extern void adj_child_remove(adj_index_t adj_index,
                             u32 sibling_index);
 
+/**
+ * @brief Walk the Adjacencies on a given interface
+ */
+extern void adj_walk (u32 sw_if_index,
+                     adj_walk_cb_t cb,
+                     void *ctx);
+
+/**
+ * @brief Return the link type of the adjacency
+ */
+extern vnet_link_t adj_get_link_type (adj_index_t ai);
+
+/**
+ * @brief Return the sw interface index of the adjacency.
+ */
+extern u32 adj_get_sw_if_index (adj_index_t ai);
+
+/**
+ * @brief Return the link type of the adjacency
+ */
+extern const u8* adj_get_rewrite (adj_index_t ai);
+
 /**
  * @brief
  * The global adjacnecy pool. Exposed for fast/inline data-plane access
index f5d1810..290af1f 100644 (file)
@@ -188,7 +188,8 @@ format_adj_glean (u8* s, va_list *ap)
     vnet_main_t * vnm = vnet_get_main();
     ip_adjacency_t * adj = adj_get(index);
 
-    return (format(s, " glean: %U",
+    return (format(s, "%U-glean: %U",
+                  format_fib_protocol, adj->ia_nh_proto,
                    format_vnet_sw_interface_name,
                    vnm,
                    vnet_get_sw_interface(vnm,
index ce3534e..640bd2f 100644 (file)
@@ -47,6 +47,11 @@ extern adj_index_t adj_glean_add_or_lock(fib_protocol_t proto,
                                         u32 sw_if_index,
                                         const ip46_address_t *nh_addr);
 
+/**
+ * @brief Format/display a glean adjacency.
+ */
+extern u8* format_adj_glean(u8* s, va_list *ap);
+
 /**
  * @brief
  *  Module initialisation
index 25a477a..f882bff 100644 (file)
 #define ADJ_DBG(_e, _fmt, _args...)
 #endif
 
-static inline vlib_node_registration_t*
+static inline u32
 adj_get_rewrite_node (fib_link_t linkt)
 {
     switch (linkt) {
     case FIB_LINK_IP4:
-       return (&ip4_rewrite_node);
+       return (ip4_rewrite_node.index);
     case FIB_LINK_IP6:
-       return (&ip6_rewrite_node);
+       return (ip6_rewrite_node.index);
     case FIB_LINK_MPLS:
-       return (&mpls_output_node);
+       return (mpls_output_node.index);
     case FIB_LINK_ETHERNET:
-       return (&adj_l2_rewrite_node);
+       return (adj_l2_rewrite_node.index);
     }
     ASSERT(0);
-    return (NULL);
+    return (0);
 }
 
 static inline vnet_l3_packet_type_t
@@ -75,17 +75,17 @@ adj_fib_link_2_vnet (fib_link_t linkt)
     return (0);
 }
 
-static inline vnet_l3_packet_type_t
+static inline vnet_link_t
 adj_fib_proto_2_nd (fib_protocol_t fp)
 {
     switch (fp)
     {
     case FIB_PROTOCOL_IP4:
-       return (VNET_L3_PACKET_TYPE_ARP);
+       return (VNET_LINK_ARP);
     case FIB_PROTOCOL_IP6:
-       return (VNET_L3_PACKET_TYPE_IP6);
+       return (VNET_LINK_IP6);
     case FIB_PROTOCOL_MPLS:
-       return (VNET_L3_PACKET_TYPE_MPLS_UNICAST);
+       return (VNET_LINK_MPLS);
     }
     return (0);
 }
@@ -100,6 +100,12 @@ adj_get_index (ip_adjacency_t *adj)
     return (adj - adj_pool);
 }
 
+extern void adj_nbr_update_rewrite_internal (ip_adjacency_t *adj,
+                                            adj_nbr_rewrite_flag_t flags,
+                                            u32 complete_next_index,
+                                            u32 next_index,
+                                            u8 *rewrite);
+
 extern ip_adjacency_t * adj_alloc(fib_protocol_t proto);
 
 extern void adj_nbr_remove(fib_protocol_t nh_proto,
index cf0f044..2bb28a2 100644 (file)
@@ -32,11 +32,10 @@ format_adj_l2_trace (u8 * s, va_list * args)
     CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
     CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
     adj_l2_trace_t * t = va_arg (*args, adj_l2_trace_t *);
-    vnet_main_t * vnm = vnet_get_main();
 
     s = format (s, "adj-idx %d : %U",
                t->adj_index,
-               format_ip_adjacency, vnm, t->adj_index, FORMAT_IP_ADJACENCY_NONE);
+               format_ip_adjacency, t->adj_index, FORMAT_IP_ADJACENCY_NONE);
     return s;
 }
 
index c40d4e8..f42e3a9 100644 (file)
@@ -130,7 +130,7 @@ format_adj_midchain_tx_trace (u8 * s, va_list * args)
     adj_midchain_tx_trace_t *tr = va_arg (*args, adj_midchain_tx_trace_t*);
 
     s = format(s, "adj-midchain:[%d]:%U", tr->ai,
-              format_ip_adjacency, vnet_get_main(), tr->ai,
+              format_ip_adjacency, tr->ai,
               FORMAT_IP_ADJACENCY_NONE);
 
     return (s);
@@ -294,7 +294,17 @@ adj_nbr_midchain_update_rewrite (adj_index_t adj_index,
     ASSERT(ADJ_INDEX_INVALID != adj_index);
 
     adj = adj_get(adj_index);
-    adj->lookup_next_index = IP_LOOKUP_NEXT_MIDCHAIN;
+
+    /*
+     * one time only update. since we don't support chainging the tunnel
+     * src,dst, this is all we need.
+     */
+    ASSERT(adj->lookup_next_index == IP_LOOKUP_NEXT_ARP);
+    /*
+     * tunnels can always provide a rewrite.
+     */
+    ASSERT(NULL != rewrite);
+
     adj->sub_type.midchain.fixup_func = fixup;
 
     cm = adj_midchain_get_cofing_for_link_type(adj);
@@ -334,69 +344,26 @@ adj_nbr_midchain_update_rewrite (adj_index_t adj_index,
 
     cm->config_index_by_sw_if_index[adj->rewrite_header.sw_if_index] = ci;
 
-    if (NULL != rewrite)
-    {
-       /*
-        * new rewrite provided.
-        * use a dummy rewrite header to get the interface to print into.
-        */
-       ip_adjacency_t dummy;
-       dpo_id_t tmp = DPO_NULL;
-
-       vnet_rewrite_for_tunnel(vnet_get_main(),
-                               adj->rewrite_header.sw_if_index,
-                               adj_get_midchain_node(adj->ia_link),
-                               adj->sub_type.midchain.tx_function_node,
-                               &dummy.rewrite_header,
-                               rewrite,
-                               vec_len(rewrite));
-
-       /*
-        * this is an update of an existing rewrite.
-        * packets are in flight. we'll need to briefly stack on the drop DPO
-        * whilst the rewrite is written, so any packets that see the partial update
-        * are binned.
-        */
-       if (!dpo_id_is_valid(&adj->sub_type.midchain.next_dpo))
-       {
-           /*
-            * not stacked yet. stack on the drop
-            */
-           dpo_stack(DPO_ADJACENCY_MIDCHAIN,
-                     fib_link_to_dpo_proto(adj->ia_link),
-                     &adj->sub_type.midchain.next_dpo,
-                     drop_dpo_get(fib_link_to_dpo_proto(adj->ia_link)));
-       }
-
-       dpo_copy(&tmp, &adj->sub_type.midchain.next_dpo);
-       dpo_stack(DPO_ADJACENCY_MIDCHAIN,
-                 fib_link_to_dpo_proto(adj->ia_link),
-                 &adj->sub_type.midchain.next_dpo,
-                 drop_dpo_get(fib_link_to_dpo_proto(adj->ia_link)));
-
-       CLIB_MEMORY_BARRIER();
 
-       clib_memcpy(&adj->rewrite_header,
-                   &dummy.rewrite_header,
-                   VLIB_BUFFER_PRE_DATA_SIZE);
-
-       CLIB_MEMORY_BARRIER();
+    /*
+     * stack the midchain on the drop so it's ready to forward in the adj-midchain-tx.
+     * The graph arc used/created here is from the midchain-tx node to the
+     * child's registered node. This is because post adj processing the next
+     * node are any output features, then the midchain-tx.  from there we
+     * need to get to the stacked child's node.
+     */
+    dpo_stack_from_node(adj->sub_type.midchain.tx_function_node,
+                       &adj->sub_type.midchain.next_dpo,
+                       drop_dpo_get(fib_link_to_dpo_proto(adj->ia_link)));
 
-       /*
-        * The graph arc used/created here is from the midchain-tx node to the
-        * child's registered node. This is because post adj processing the next
-        * node are any output features, then the midchain-tx.  from there we
-        * need to get to the stacked child's node.
-        */
-       dpo_stack_from_node(adj->sub_type.midchain.tx_function_node,
-                           &adj->sub_type.midchain.next_dpo,
-                           &tmp);
-       dpo_reset(&tmp);
-    }
-    else
-    {
-       ASSERT(0);
-    }
+    /*
+     * update the rewirte with the workers paused.
+     */
+    adj_nbr_update_rewrite_internal(adj,
+                                   IP_LOOKUP_NEXT_MIDCHAIN,
+                                   adj_get_midchain_node(adj->ia_link),
+                                   adj->sub_type.midchain.tx_function_node,
+                                   rewrite);
 
     /*
      * time for walkies fido.
index 23e40a6..5351520 100644 (file)
@@ -115,152 +115,19 @@ adj_nbr_find (fib_protocol_t nh_proto,
     }
 }
 
-static inline vlib_node_registration_t*
+static inline u32
 adj_get_nd_node (fib_protocol_t proto)
 {
     switch (proto) {
     case FIB_PROTOCOL_IP4:
-       return (&ip4_arp_node);
+       return (ip4_arp_node.index);
     case FIB_PROTOCOL_IP6:
-       return (&ip6_discover_neighbor_node);
+       return (ip6_discover_neighbor_node.index);
     case FIB_PROTOCOL_MPLS:
        break;
     }
     ASSERT(0);
-    return (NULL);
-}
-
-static void
-adj_ip4_nbr_probe (ip_adjacency_t *adj)
-{
-    vnet_main_t * vnm = vnet_get_main();
-    ip4_main_t * im = &ip4_main;
-    ip_interface_address_t * ia;
-    ethernet_arp_header_t * h;
-    vnet_hw_interface_t * hi;
-    vnet_sw_interface_t * si;
-    ip4_address_t * src;
-    vlib_buffer_t * b;
-    vlib_main_t * vm;
-    u32 bi = 0;
-
-    vm = vlib_get_main();
-
-    si = vnet_get_sw_interface (vnm,
-                               adj->rewrite_header.sw_if_index);
-
-    if (!(si->flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP))
-    {
-        return;
-    }
-
-    src =
-      ip4_interface_address_matching_destination(im,
-                                                &adj->sub_type.nbr.next_hop.ip4,
-                                                adj->rewrite_header.sw_if_index,
-                                                &ia);
-    if (! src)
-    {
-        return;
-    }
-
-    h = vlib_packet_template_get_packet (vm, &im->ip4_arp_request_packet_template, &bi);
-
-    hi = vnet_get_sup_hw_interface (vnm, adj->rewrite_header.sw_if_index);
-
-    clib_memcpy (h->ip4_over_ethernet[0].ethernet,
-                hi->hw_address,
-                sizeof (h->ip4_over_ethernet[0].ethernet));
-
-    h->ip4_over_ethernet[0].ip4 = src[0];
-    h->ip4_over_ethernet[1].ip4 = adj->sub_type.nbr.next_hop.ip4;
-
-    b = vlib_get_buffer (vm, bi);
-    vnet_buffer (b)->sw_if_index[VLIB_RX] =
-      vnet_buffer (b)->sw_if_index[VLIB_TX] =
-          adj->rewrite_header.sw_if_index;
-
-    /* Add encapsulation string for software interface (e.g. ethernet header). */
-    vnet_rewrite_one_header (adj[0], h, sizeof (ethernet_header_t));
-    vlib_buffer_advance (b, -adj->rewrite_header.data_bytes);
-
-    {
-        vlib_frame_t * f = vlib_get_frame_to_node (vm, hi->output_node_index);
-       u32 * to_next = vlib_frame_vector_args (f);
-       to_next[0] = bi;
-       f->n_vectors = 1;
-       vlib_put_frame_to_node (vm, hi->output_node_index, f);
-    }
-}
-
-static void
-adj_ip6_nbr_probe (ip_adjacency_t *adj)
-{
-    icmp6_neighbor_solicitation_header_t * h;
-    vnet_main_t * vnm = vnet_get_main();
-    ip6_main_t * im = &ip6_main;
-    ip_interface_address_t * ia;
-    ip6_address_t * dst, *src;
-    vnet_hw_interface_t * hi;
-    vnet_sw_interface_t * si;
-    vlib_buffer_t * b;
-    int bogus_length;
-    vlib_main_t * vm;
-    u32 bi = 0;
-
-    vm = vlib_get_main();
-
-    si = vnet_get_sw_interface(vnm, adj->rewrite_header.sw_if_index);
-    dst = &adj->sub_type.nbr.next_hop.ip6;
-
-    if (!(si->flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP))
-    {
-        return;
-    }
-    src = ip6_interface_address_matching_destination(im, dst,
-                                                    adj->rewrite_header.sw_if_index,
-                                                    &ia);
-    if (! src)
-    {
-       return;
-    }
-
-    h = vlib_packet_template_get_packet(vm,
-                                       &im->discover_neighbor_packet_template,
-                                       &bi);
-
-    hi = vnet_get_sup_hw_interface(vnm, adj->rewrite_header.sw_if_index);
-
-    h->ip.dst_address.as_u8[13] = dst->as_u8[13];
-    h->ip.dst_address.as_u8[14] = dst->as_u8[14];
-    h->ip.dst_address.as_u8[15] = dst->as_u8[15];
-    h->ip.src_address = src[0];
-    h->neighbor.target_address = dst[0];
-
-    clib_memcpy (h->link_layer_option.ethernet_address,
-                hi->hw_address,
-                vec_len(hi->hw_address));
-
-    h->neighbor.icmp.checksum = 
-       ip6_tcp_udp_icmp_compute_checksum(vm, 0, &h->ip, &bogus_length);
-    ASSERT(bogus_length == 0);
-
-    b = vlib_get_buffer (vm, bi);
-    vnet_buffer (b)->sw_if_index[VLIB_RX] =
-       vnet_buffer (b)->sw_if_index[VLIB_TX] =
-          adj->rewrite_header.sw_if_index;
-
-    /* Add encapsulation string for software interface (e.g. ethernet header). */
-    vnet_rewrite_one_header(adj[0], h, sizeof (ethernet_header_t));
-    vlib_buffer_advance(b, -adj->rewrite_header.data_bytes);
-
-    {
-       vlib_frame_t * f = vlib_get_frame_to_node(vm, hi->output_node_index);
-       u32 * to_next = vlib_frame_vector_args(f);
-       to_next[0] = bi;
-       f->n_vectors = 1;
-       vlib_put_frame_to_node(vm, hi->output_node_index, f);
-    }
+    return (ip4_arp_node.index);
 }
 
 static ip_adjacency_t*
@@ -285,6 +152,7 @@ adj_nbr_alloc (fib_protocol_t nh_proto,
     adj->sub_type.nbr.next_hop = *nh_addr;
     adj->ia_link = link_type;
     adj->ia_nh_proto = nh_proto;
+    adj->rewrite_header.sw_if_index = sw_if_index;
     memset(&adj->sub_type.midchain.next_dpo, 0,
            sizeof(adj->sub_type.midchain.next_dpo));
 
@@ -300,8 +168,6 @@ adj_nbr_alloc (fib_protocol_t nh_proto,
  *   - the Next-hops protocol (i.e. v4 or v6)
  *   - the address of the next-hop
  *   - the interface the next-hop is reachable through
- *   - fib_index; this is broken. i will fix it.
- *     the adj lookup currently occurs in the FIB.
  */
 adj_index_t
 adj_nbr_add_or_lock (fib_protocol_t nh_proto,
@@ -316,55 +182,31 @@ adj_nbr_add_or_lock (fib_protocol_t nh_proto,
 
     if (ADJ_INDEX_INVALID == adj_index)
     {
+       vnet_main_t *vnm;
+
+       vnm = vnet_get_main();
        adj = adj_nbr_alloc(nh_proto, link_type, nh_addr, sw_if_index);
+       adj_index = adj_get_index(adj);
+       adj_lock(adj_index);
+
+       vnet_rewrite_init(vnm, sw_if_index,
+                         adj_get_nd_node(nh_proto),
+                         vnet_tx_node_index_for_sw_interface(vnm, sw_if_index),
+                         &adj->rewrite_header);
 
        /*
-        * If there is no next-hop, this is the 'auto-adj' used on p2p
-        * links instead of a glean.
+        * we need a rewrite where the destination IP address is converted
+        * to the appropriate link-layer address. This is interface specific.
+        * So ask the interface to do it.
         */
-       if (ip46_address_is_zero(nh_addr))
-       {
-           adj->lookup_next_index = IP_LOOKUP_NEXT_REWRITE;
-
-           vnet_rewrite_for_sw_interface(vnet_get_main(),
-                                         adj_fib_link_2_vnet(link_type),
-                                         sw_if_index,
-                                         adj_get_rewrite_node(link_type)->index,
-                                         VNET_REWRITE_FOR_SW_INTERFACE_ADDRESS_BROADCAST,
-                                         &adj->rewrite_header,
-                                         sizeof (adj->rewrite_data));
-       }
-       else
-       {
-           vnet_rewrite_for_sw_interface(vnet_get_main(),
-                                         adj_fib_proto_2_nd(nh_proto),
-                                         sw_if_index,
-                                         adj_get_nd_node(nh_proto)->index,
-                                         VNET_REWRITE_FOR_SW_INTERFACE_ADDRESS_BROADCAST,
-                                         &adj->rewrite_header,
-                                         sizeof (adj->rewrite_data));
-
-           switch (nh_proto)
-           {
-           case FIB_PROTOCOL_IP4:
-               adj_ip4_nbr_probe(adj);
-               break;
-           case FIB_PROTOCOL_IP6:
-               adj_ip6_nbr_probe(adj);
-               break;
-           case FIB_PROTOCOL_MPLS:
-               break;
-           }
-       }
+       vnet_update_adjacency_for_sw_interface(vnm, sw_if_index, adj_index);
     }
     else
     {
-       adj = adj_get(adj_index);
+       adj_lock(adj_index);
     }
 
-    adj_lock(adj_get_index(adj));
-
-    return (adj_get_index(adj));
+    return (adj_index);
 }
 
 adj_index_t
@@ -390,7 +232,9 @@ adj_nbr_add_or_lock_w_rewrite (fib_protocol_t nh_proto,
     }
 
     adj_lock(adj_get_index(adj));
-    adj_nbr_update_rewrite(adj_get_index(adj), rewrite);
+    adj_nbr_update_rewrite(adj_get_index(adj),
+                          ADJ_NBR_REWRITE_FLAG_COMPLETE,
+                          rewrite);
 
     return (adj_get_index(adj));
 }
@@ -404,86 +248,134 @@ adj_nbr_add_or_lock_w_rewrite (fib_protocol_t nh_proto,
  */
 void
 adj_nbr_update_rewrite (adj_index_t adj_index,
+                       adj_nbr_rewrite_flag_t flags,
                        u8 *rewrite)
 {
     ip_adjacency_t *adj;
+    u32 old_next;
 
     ASSERT(ADJ_INDEX_INVALID != adj_index);
 
     adj = adj_get(adj_index);
+    old_next = adj->lookup_next_index;
 
-    if (NULL != rewrite)
+    if (flags & ADJ_NBR_REWRITE_FLAG_COMPLETE)
     {
        /*
-        * new rewrite provided.
-        * use a dummy rewrite header to get the interface to print into.
-        */
-       ip_adjacency_t dummy;
-
-       vnet_rewrite_for_sw_interface(vnet_get_main(),
-                                     adj_fib_link_2_vnet(adj->ia_link),
-                                     adj->rewrite_header.sw_if_index,
-                                     adj_get_rewrite_node(adj->ia_link)->index,
-                                     rewrite,
-                                     &dummy.rewrite_header,
-                                     sizeof (dummy.rewrite_data));
-
-       if (IP_LOOKUP_NEXT_REWRITE == adj->lookup_next_index)
-       {
-           /*
-            * this is an update of an existing rewrite.
-            * we can't just paste in the new rewrite as that is not atomic.
-            * So we briefly swap the ADJ to ARP type, paste, then swap back.
-            */
-           adj->lookup_next_index = IP_LOOKUP_NEXT_ARP;
-           CLIB_MEMORY_BARRIER();
-       }
-       /*
-        * else
-        *   this is the first time the rewrite is added.
-        *   paste it on then swap the next type.
+        * update the adj's rewrite string and build the arc
+        * from the rewrite node to the interface's TX node
         */
-       clib_memcpy(&adj->rewrite_header,
-                   &dummy.rewrite_header,
-                   VLIB_BUFFER_PRE_DATA_SIZE);
-
-       adj->lookup_next_index = IP_LOOKUP_NEXT_REWRITE;
+       adj_nbr_update_rewrite_internal(adj, IP_LOOKUP_NEXT_REWRITE,
+                                       adj_get_rewrite_node(adj->ia_link),
+                                       vnet_tx_node_index_for_sw_interface(
+                                           vnet_get_main(),
+                                           adj->rewrite_header.sw_if_index),
+                                       rewrite);
     }
     else
+    {
+       adj_nbr_update_rewrite_internal(adj, IP_LOOKUP_NEXT_ARP,
+                                       adj_get_nd_node(adj->ia_nh_proto),
+                                       vnet_tx_node_index_for_sw_interface(
+                                           vnet_get_main(),
+                                           adj->rewrite_header.sw_if_index),
+                                       rewrite);
+    }
+
+    if (old_next != adj->lookup_next_index)
     {
        /*
-        * clear the rewrite.
+        * time for walkies fido.
+        * The link type MPLS Adj never has children. So if it is this adj
+        * that is updated, we need to walk from its IP sibling.
         */
-       adj->lookup_next_index = IP_LOOKUP_NEXT_ARP;
-       CLIB_MEMORY_BARRIER();
+       if (FIB_LINK_MPLS == adj->ia_link)
+       {
+           adj_index = adj_nbr_find(adj->ia_nh_proto,
+                                    fib_proto_to_link(adj->ia_nh_proto),
+                                    &adj->sub_type.nbr.next_hop,
+                                    adj->rewrite_header.sw_if_index);
+
+           ASSERT(ADJ_INDEX_INVALID != adj_index);
+       }
 
-       adj->rewrite_header.data_bytes = 0;
+       fib_node_back_walk_ctx_t bw_ctx = {
+           .fnbw_reason = FIB_NODE_BW_REASON_FLAG_ADJ_UPDATE,
+           /*
+            * This walk only needs to go back one level, but there is no control
+            * here. the first receiving fib_entry_t will quash the walk
+            */
+       };
+
+       fib_walk_sync(FIB_NODE_TYPE_ADJ, adj_index, &bw_ctx);
     }
+}
+
+/**
+ * adj_nbr_update_rewrite_internal
+ *
+ * Update the adjacency's rewrite string. A NULL string implies the
+ * rewirte is reset (i.e. when ARP/ND etnry is gone).
+ * NB: the adj being updated may be handling traffic in the DP.
+ */
+void
+adj_nbr_update_rewrite_internal (ip_adjacency_t *adj,
+                                u32 adj_next_index,
+                                u32 this_node,
+                                u32 next_node,
+                                u8 *rewrite)
+{
+    vlib_main_t * vm = vlib_get_main();
 
     /*
-     * time for walkies fido.
-     * The link type MPLS Adj never has children. So if it is this adj
-     * that is updated, we need to walk from its IP sibling.
+     * Updating a rewrite string is not atomic;
+     *  - the rewrite string is too long to write in one instruction
+     *  - when swapping from incomplete to complete, we also need to update
+     *    the VLIB graph next-index.
+     * ideally we would only want to suspend forwarding via this adj whilst we
+     * do this, but we do not have that level of granularity - it's suspend all
+     * worker threads or nothing.
+     * The other chioces are:
+     *  - to mark the adj down and back walk so child load-balances drop this adj
+     *    from the set.
+     *  - update the next_node index of this adj to point to error-drop
+     * both of which will mean for MAC change we will drop for this adj
+     * which is not acceptable.
+     * So the pause all threads is preferable. We don't update MAC addresses often
+     * so it's no big deal.
      */
-    if (FIB_LINK_MPLS == adj->ia_link)
-    {
-        adj_index = adj_nbr_find(adj->ia_nh_proto,
-                                fib_proto_to_link(adj->ia_nh_proto),
-                                &adj->sub_type.nbr.next_hop,
-                                adj->rewrite_header.sw_if_index);
+    vlib_worker_thread_barrier_sync(vm);
 
-        ASSERT(ADJ_INDEX_INVALID != adj_index);
-    }
+    adj->lookup_next_index = adj_next_index;
 
-    fib_node_back_walk_ctx_t bw_ctx = {
-       .fnbw_reason = FIB_NODE_BW_REASON_FLAG_ADJ_UPDATE,
+    if (NULL != rewrite)
+    {
        /*
-        * This walk only needs to go back one level, but there is no control here.
-         * the first receiving fib_entry_t will quash the walk
+        * new rewrite provided.
+        * fill in the adj's rewrite string, and build the VLIB graph arc.
         */
-    };
+       vnet_rewrite_set_data_internal(&adj->rewrite_header,
+                                      sizeof(adj->rewrite_data),
+                                      rewrite,
+                                      vec_len(rewrite));
 
-    fib_walk_sync(FIB_NODE_TYPE_ADJ, adj_index, &bw_ctx);
+       adj->rewrite_header.node_index = this_node;
+       adj->rewrite_header.next_index = vlib_node_add_next (vlib_get_main(),
+                                                            this_node,
+                                                            next_node);
+
+       vec_free(rewrite);
+    }
+    else
+    {
+       vnet_rewrite_clear_data_internal(&adj->rewrite_header,
+                                        sizeof(adj->rewrite_data));
+    }
+
+    /*
+     * done with the rewirte update - let the workers loose.
+     */
+    vlib_worker_thread_barrier_release(vm);
 }
 
 typedef struct adj_db_count_ctx_t_ {
@@ -523,6 +415,152 @@ adj_nbr_db_size (void)
     return (ctx.count);
 }
 
+/**
+ * @brief Context for a walk of the adjacency neighbour DB
+ */
+typedef struct adj_walk_ctx_t_
+{
+    adj_walk_cb_t awc_cb;
+    void *awc_ctx;
+} adj_walk_ctx_t;
+
+static void
+adj_nbr_walk_cb (BVT(clib_bihash_kv) * kvp,
+                void *arg)
+{
+    adj_walk_ctx_t *ctx = arg;
+
+    // FIXME: can't stop early...
+    ctx->awc_cb(kvp->value, ctx->awc_ctx);
+}
+
+void
+adj_nbr_walk (u32 sw_if_index,
+             fib_protocol_t adj_nh_proto,
+             adj_walk_cb_t cb,
+             void *ctx)
+{
+    if (!ADJ_NBR_ITF_OK(adj_nh_proto, sw_if_index))
+       return;
+
+    adj_walk_ctx_t awc = {
+       .awc_ctx = ctx,
+       .awc_cb = cb,
+    };
+
+    BV(clib_bihash_foreach_key_value_pair) (
+       adj_nbr_tables[adj_nh_proto][sw_if_index],
+       adj_nbr_walk_cb,
+       &awc);
+}
+
+/**
+ * @brief Context for a walk of the adjacency neighbour DB
+ */
+typedef struct adj_walk_nh_ctx_t_
+{
+    adj_walk_cb_t awc_cb;
+    void *awc_ctx;
+    const ip46_address_t *awc_nh;
+} adj_walk_nh_ctx_t;
+
+static void
+adj_nbr_walk_nh_cb (BVT(clib_bihash_kv) * kvp,
+                   void *arg)
+{
+    ip_adjacency_t *adj;
+    adj_walk_nh_ctx_t *ctx = arg;
+
+    adj = adj_get(kvp->value);
+
+    if (!ip46_address_cmp(&adj->sub_type.nbr.next_hop, ctx->awc_nh)) 
+       ctx->awc_cb(kvp->value, ctx->awc_ctx);
+}
+
+/**
+ * @brief Walk adjacencies on a link with a given v4 next-hop.
+ * that is visit the adjacencies with different link types.
+ */
+void
+adj_nbr_walk_nh4 (u32 sw_if_index,
+                const ip4_address_t *addr,
+                adj_walk_cb_t cb,
+                void *ctx)
+{
+    if (!ADJ_NBR_ITF_OK(FIB_PROTOCOL_IP4, sw_if_index))
+       return;
+
+    ip46_address_t nh = {
+       .ip4 = *addr,
+    };
+
+    adj_walk_nh_ctx_t awc = {
+       .awc_ctx = ctx,
+       .awc_cb = cb,
+       .awc_nh = &nh,
+    };
+
+    BV(clib_bihash_foreach_key_value_pair) (
+       adj_nbr_tables[FIB_PROTOCOL_IP4][sw_if_index],
+       adj_nbr_walk_nh_cb,
+       &awc);
+}
+
+/**
+ * @brief Walk adjacencies on a link with a given v6 next-hop.
+ * that is visit the adjacencies with different link types.
+ */
+void
+adj_nbr_walk_nh6 (u32 sw_if_index,
+                const ip6_address_t *addr,
+                adj_walk_cb_t cb,
+                void *ctx)
+{
+    if (!ADJ_NBR_ITF_OK(FIB_PROTOCOL_IP6, sw_if_index))
+       return;
+
+    ip46_address_t nh = {
+       .ip6 = *addr,
+    };
+
+    adj_walk_nh_ctx_t awc = {
+       .awc_ctx = ctx,
+       .awc_cb = cb,
+       .awc_nh = &nh,
+    };
+
+    BV(clib_bihash_foreach_key_value_pair) (
+       adj_nbr_tables[FIB_PROTOCOL_IP6][sw_if_index],
+       adj_nbr_walk_nh_cb,
+       &awc);
+}
+
+/**
+ * @brief Walk adjacencies on a link with a given next-hop.
+ * that is visit the adjacencies with different link types.
+ */
+void
+adj_nbr_walk_nh (u32 sw_if_index,
+                fib_protocol_t adj_nh_proto,
+                const ip46_address_t *nh,
+                adj_walk_cb_t cb,
+                void *ctx)
+{
+    if (!ADJ_NBR_ITF_OK(adj_nh_proto, sw_if_index))
+       return;
+
+    adj_walk_nh_ctx_t awc = {
+       .awc_ctx = ctx,
+       .awc_cb = cb,
+       .awc_nh = nh,
+    };
+
+    BV(clib_bihash_foreach_key_value_pair) (
+       adj_nbr_tables[adj_nh_proto][sw_if_index],
+       adj_nbr_walk_nh_cb,
+       &awc);
+}
+
 /**
  * Context for the state change walk of the DB
  */
@@ -534,8 +572,8 @@ typedef struct adj_nbr_interface_state_change_ctx_t_
     int flags;
 } adj_nbr_interface_state_change_ctx_t;
 
-static void
-adj_nbr_interface_state_change_one (BVT(clib_bihash_kv) * kvp,
+static adj_walk_rc_t
+adj_nbr_interface_state_change_one (adj_index_t ai,
                                    void *arg)
 {
     /*
@@ -550,7 +588,9 @@ adj_nbr_interface_state_change_one (BVT(clib_bihash_kv) * kvp,
                        FIB_NODE_BW_REASON_FLAG_INTERFACE_DOWN),
     };
 
-    fib_walk_sync(FIB_NODE_TYPE_ADJ, kvp->value, &bw_ctx);
+    fib_walk_sync(FIB_NODE_TYPE_ADJ, ai, &bw_ctx);
+
+    return (ADJ_WALK_RC_CONTINUE);
 }
 
 static clib_error_t *
@@ -565,17 +605,13 @@ adj_nbr_interface_state_change (vnet_main_t * vnm,
      */
     for (proto = FIB_PROTOCOL_IP4; proto <= FIB_PROTOCOL_IP6; proto++)
     {
-       if (!ADJ_NBR_ITF_OK(proto, sw_if_index))
-           continue;
-
        adj_nbr_interface_state_change_ctx_t ctx = {
            .flags = flags,
        };
 
-       BV(clib_bihash_foreach_key_value_pair) (
-           adj_nbr_tables[proto][sw_if_index],
-           adj_nbr_interface_state_change_one,
-           &ctx);
+       adj_nbr_walk(sw_if_index, proto,
+                    adj_nbr_interface_state_change_one,
+                    &ctx);
     }
 
     return (NULL);
@@ -583,8 +619,8 @@ adj_nbr_interface_state_change (vnet_main_t * vnm,
 
 VNET_SW_INTERFACE_ADMIN_UP_DOWN_FUNCTION(adj_nbr_interface_state_change);
 
-static void
-adj_nbr_interface_delete_one (BVT(clib_bihash_kv) * kvp,
+static adj_walk_rc_t
+adj_nbr_interface_delete_one (adj_index_t ai,
                              void *arg)
 {
     /*
@@ -595,7 +631,9 @@ adj_nbr_interface_delete_one (BVT(clib_bihash_kv) * kvp,
        .fnbw_reason = FIB_NODE_BW_REASON_FLAG_INTERFACE_DELETE,
     };
 
-    fib_walk_sync(FIB_NODE_TYPE_ADJ, kvp->value, &bw_ctx);
+    fib_walk_sync(FIB_NODE_TYPE_ADJ, ai, &bw_ctx);
+
+    return (ADJ_WALK_RC_CONTINUE);
 }
 
 /**
@@ -630,13 +668,9 @@ adj_nbr_interface_add_del (vnet_main_t * vnm,
 
     for (proto = FIB_PROTOCOL_IP4; proto <= FIB_PROTOCOL_IP6; proto++)
     {
-       if (!ADJ_NBR_ITF_OK(proto, sw_if_index))
-           continue;
-
-       BV(clib_bihash_foreach_key_value_pair) (
-           adj_nbr_tables[proto][sw_if_index],
-           adj_nbr_interface_delete_one,
-           NULL);
+       adj_nbr_walk(sw_if_index, proto,
+                    adj_nbr_interface_delete_one,
+                    NULL);
     }
 
     return (NULL);
@@ -646,15 +680,16 @@ adj_nbr_interface_add_del (vnet_main_t * vnm,
 VNET_SW_INTERFACE_ADD_DEL_FUNCTION(adj_nbr_interface_add_del);
 
 
-static void
-adj_nbr_show_one (BVT(clib_bihash_kv) * kvp,
+static adj_walk_rc_t
+adj_nbr_show_one (adj_index_t ai,
                  void *arg)
 {
     vlib_cli_output (arg, "[@%d]  %U",
-                     kvp->value,
-                     format_ip_adjacency,
-                     vnet_get_main(), kvp->value,
+                     ai,
+                     format_ip_adjacency, ai,
                     FORMAT_IP_ADJACENCY_NONE);
+
+    return (ADJ_WALK_RC_CONTINUE);
 }
 
 static clib_error_t *
@@ -663,11 +698,16 @@ adj_nbr_show (vlib_main_t * vm,
              vlib_cli_command_t * cmd)
 {
     adj_index_t ai = ADJ_INDEX_INVALID;
+    u32 sw_if_index = ~0;
 
     while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
     {
        if (unformat (input, "%d", &ai))
            ;
+       else if (unformat (input, "%U",
+                          unformat_vnet_sw_interface, vnet_get_main(),
+                          &sw_if_index))
+           ;
        else
            break;
     }
@@ -676,28 +716,31 @@ adj_nbr_show (vlib_main_t * vm,
     {
        vlib_cli_output (vm, "[@%d] %U",
                          ai,
-
-                         format_ip_adjacency,
-                        vnet_get_main(), ai,
+                         format_ip_adjacency, ai,
                         FORMAT_IP_ADJACENCY_DETAIL);
     }
-    else
+    else if (~0 != sw_if_index)
     {
        fib_protocol_t proto;
 
        for (proto = FIB_PROTOCOL_IP4; proto <= FIB_PROTOCOL_IP6; proto++)
        {
-           u32 sw_if_index;
+           adj_nbr_walk(sw_if_index, proto,
+                        adj_nbr_show_one,
+                        vm);
+       }
+    }
+    else
+    {
+       fib_protocol_t proto;
 
+       for (proto = FIB_PROTOCOL_IP4; proto <= FIB_PROTOCOL_IP6; proto++)
+       {
            vec_foreach_index(sw_if_index, adj_nbr_tables[proto])
            {
-               if (!ADJ_NBR_ITF_OK(proto, sw_if_index))
-                   continue;
-
-               BV(clib_bihash_foreach_key_value_pair) (
-                   adj_nbr_tables[proto][sw_if_index],
-                   adj_nbr_show_one,
-                   vm);
+               adj_nbr_walk(sw_if_index, proto,
+                            adj_nbr_show_one,
+                            vm);
            }
        }
     }
@@ -705,12 +748,37 @@ adj_nbr_show (vlib_main_t * vm,
     return 0;
 }
 
+/*?
+ * Show all neighbour adjacencies.
+ * @cliexpar
+ * @cliexstart{sh adj nbr}
+ * [@2] ipv4 via 1.0.0.2 loop0: IP4: 00:00:22:aa:bb:cc -> 00:00:11:aa:bb:cc
+ * [@3] mpls via 1.0.0.2 loop0: MPLS_UNICAST: 00:00:22:aa:bb:cc -> 00:00:11:aa:bb:cc
+ * [@4] ipv4 via 1.0.0.3 loop0: IP4: 00:00:22:aa:bb:cc -> 00:00:11:aa:bb:cc
+ * [@5] mpls via 1.0.0.3 loop0: MPLS_UNICAST: 00:00:22:aa:bb:cc -> 00:00:11:aa:bb:cc
+ * @cliexend
+ ?*/
 VLIB_CLI_COMMAND (ip4_show_fib_command, static) = {
     .path = "show adj nbr",
-    .short_help = "show adj nbr [<adj_index>] [sw_if_index <index>]",
+    .short_help = "show adj nbr [<adj_index>] [interface]",
     .function = adj_nbr_show,
 };
 
+static ip46_type_t
+adj_proto_to_46 (fib_protocol_t proto)
+{
+    switch (proto)
+    {
+    case FIB_PROTOCOL_IP4:
+       return (IP46_TYPE_IP4);
+    case FIB_PROTOCOL_IP6:
+       return (IP46_TYPE_IP6);
+    default:
+       return (IP46_TYPE_IP4);
+    }
+    return (IP46_TYPE_IP4);
+}
+
 u8*
 format_adj_nbr_incomplete (u8* s, va_list *ap)
 {
@@ -721,7 +789,8 @@ format_adj_nbr_incomplete (u8* s, va_list *ap)
 
     s = format (s, "arp-%U", format_fib_link, adj->ia_link);
     s = format (s, ": via %U",
-                format_ip46_address, &adj->sub_type.nbr.next_hop, IP46_TYPE_ANY);
+                format_ip46_address, &adj->sub_type.nbr.next_hop,
+               adj_proto_to_46(adj->ia_nh_proto));
     s = format (s, " %U",
                 format_vnet_sw_interface_name,
                 vnm,
@@ -741,7 +810,8 @@ format_adj_nbr (u8* s, va_list *ap)
 
     s = format (s, "%U", format_fib_link, adj->ia_link);
     s = format (s, " via %U ",
-               format_ip46_address, &adj->sub_type.nbr.next_hop, IP46_TYPE_ANY);
+               format_ip46_address, &adj->sub_type.nbr.next_hop,
+               adj_proto_to_46(adj->ia_nh_proto));
     s = format (s, "%U",
                format_vnet_rewrite,
                vnm->vlib_main, &adj->rewrite_header, sizeof (adj->rewrite_data), 0);
index 331423b..39663b6 100644 (file)
@@ -75,6 +75,28 @@ extern adj_index_t adj_nbr_add_or_lock_w_rewrite(fib_protocol_t nh_proto,
                                                 const ip46_address_t *nh_addr,
                                                 u32 sw_if_index,
                                                 u8 *rewrite);
+/**
+ * @brief When adding a rewrite to an adjacency these are flags that
+ * apply to that rewrite
+ */
+typedef enum adj_nbr_rewrite_flag_t_
+{
+    ADJ_NBR_REWRITE_FLAG_NONE,
+
+    /**
+     * An indication that the rewrite is incomplete, i.e. that it describes the
+     * ARP/ND rewrite when probing.
+     */
+    ADJ_NBR_REWRITE_FLAG_INCOMPLETE = ADJ_NBR_REWRITE_FLAG_NONE,
+
+    /**
+     * An indication that the rewrite is complete, i.e. that it fully describes
+     * the link-layer addressing for the desintation.
+     * The opposite of this is an incomplete rewrite that describes the ARP/ND
+     * rewrite when probing.
+     */
+    ADJ_NBR_REWRITE_FLAG_COMPLETE = (1 << 0),
+} adj_nbr_rewrite_flag_t;
 
 /**
  * @brief
@@ -87,6 +109,7 @@ extern adj_index_t adj_nbr_add_or_lock_w_rewrite(fib_protocol_t nh_proto,
  *  The new rewrite
  */
 extern void adj_nbr_update_rewrite(adj_index_t adj_index,
+                                  adj_nbr_rewrite_flag_t flags,
                                   u8 *rewrite);
 
 /**
@@ -101,6 +124,43 @@ extern u8* format_adj_nbr_incomplete(u8* s, va_list *ap);
  */
 extern u8* format_adj_nbr(u8* s, va_list *ap);
 
+/**
+ * @brief Walk the neighbour Adjacencies on a given interface
+ */
+extern void adj_nbr_walk (u32 sw_if_index,
+                         fib_protocol_t adj_nh_proto,
+                         adj_walk_cb_t cb,
+                         void *ctx);
+/**
+ * @brief Walk the neighbour Adjacencies on a given interface with a given next-hop
+ */
+void
+adj_nbr_walk_nh (u32 sw_if_index,
+                fib_protocol_t adj_nh_proto,
+                const ip46_address_t *nh,
+                adj_walk_cb_t cb,
+                void *ctx);
+
+/**
+ * @brief Walk adjacencies on a link with a given v4 next-hop.
+ * that is visit the adjacencies with different link types.
+ */
+void
+adj_nbr_walk_nh4 (u32 sw_if_index,
+                 const ip4_address_t *addr,
+                 adj_walk_cb_t cb,
+                 void *ctx);
+
+/**
+ * @brief Walk adjacencies on a link with a given v6 next-hop.
+ * that is visit the adjacencies with different link types.
+ */
+void
+adj_nbr_walk_nh6 (u32 sw_if_index,
+                 const ip6_address_t *addr,
+                 adj_walk_cb_t cb,
+                 void *ctx);
+
 /**
  * @brief
  *  Module initialisation
index eb93f6a..046fff4 100644 (file)
@@ -32,15 +32,17 @@ adj_rewrite_add_and_lock (fib_protocol_t nh_proto,
     adj = adj_alloc(nh_proto);
 
     adj->lookup_next_index = IP_LOOKUP_NEXT_REWRITE;
+    memset(&adj->sub_type.nbr.next_hop, 0, sizeof(adj->sub_type.nbr.next_hop));
     adj->ia_link = link_type;
+    adj->ia_nh_proto = nh_proto;
     adj->rewrite_header.sw_if_index = sw_if_index;
 
     ASSERT(NULL != rewrite);
 
     vnet_rewrite_for_sw_interface(vnet_get_main(),
-                                 adj_fib_link_2_vnet(link_type),
+                                 link_type,
                                  adj->rewrite_header.sw_if_index,
-                                 adj_get_rewrite_node(link_type)->index,
+                                 adj_get_rewrite_node(link_type),
                                  rewrite,
                                  &adj->rewrite_header,
                                  sizeof (adj->rewrite_data));
index a723466..cf90c08 100644 (file)
@@ -35,4 +35,19 @@ typedef u32 adj_index_t;
  */
 #define ADJ_INDEX_INVALID ((u32)~0)
 
+/**
+ * @brief return codes from a adjacency walker callback function
+ */
+typedef enum adj_walk_rc_t_
+{
+    ADJ_WALK_RC_STOP,
+    ADJ_WALK_RC_CONTINUE,
+} adj_walk_rc_t;
+
+/**
+ * @brief Call back function when walking adjacencies
+ */
+typedef adj_walk_rc_t (*adj_walk_cb_t)(adj_index_t ai,
+                                      void *ctx);
+
 #endif
index ffe6e8d..f555f19 100644 (file)
@@ -44,23 +44,15 @@ dhcp_client_release_address (dhcp_client_main_t * dcm, dhcp_client_t * c)
                                  c->subnet_mask_width, 1 /*is_del*/);
 }
 
-static void set_l2_rewrite (dhcp_client_main_t * dcm, dhcp_client_t * c)
+static void
+set_l2_rewrite (dhcp_client_main_t * dcm, dhcp_client_t * c)
 {
-  vnet_main_t * vnm = dcm->vnet_main;
-  vnet_hw_interface_t * hw = vnet_get_sup_hw_interface (vnm, c->sw_if_index);
-  vnet_hw_interface_class_t * hc = 
-    vnet_get_hw_interface_class (vnm, hw->hw_class_index);
-  u32 n_rw;
-
   /* Acquire the L2 rewrite string for the indicated sw_if_index */
-  vec_validate (c->l2_rewrite, 32);
-  ASSERT (hc->set_rewrite);
-  n_rw = hc->set_rewrite (dcm->vnet_main, c->sw_if_index, 
-                          VNET_L3_PACKET_TYPE_IP4,
-                          0 /* broadcast */, c->l2_rewrite, 
-                          vec_len(c->l2_rewrite));
-                          
-  _vec_len (c->l2_rewrite) = n_rw;
+  c->l2_rewrite = vnet_build_rewrite_for_sw_interface(
+                      dcm->vnet_main,
+                     c->sw_if_index, 
+                     VNET_LINK_IP4,
+                     0 /* broadcast */);
 }
 
 /* 
index 645ff86..eeaac4d 100644 (file)
@@ -22,7 +22,7 @@
 #include <vnet/l2/l2_input.h>
 #include <vppinfra/mhash.h>
 #include <vnet/fib/ip4_fib.h>
-#include <vnet/adj/adj.h>
+#include <vnet/adj/adj_nbr.h>
 #include <vnet/mpls/mpls.h>
 
 /**
@@ -48,33 +48,23 @@ typedef struct
 #define ETHERNET_ARP_IP4_ENTRY_FLAG_DYNAMIC (1 << 1)
 
   u64 cpu_time_last_updated;
-  adj_index_t adj_index[FIB_LINK_NUM];
-} ethernet_arp_ip4_entry_t;
 
-/**
- * @brief administrative and operational state falgs on an interface
- */
-typedef enum ethernet_arp_interface_flags_t_
-{
-  ETHERNET_ARP_INTERFACE_UP = (0 << 1),
-  ETHERNET_ARP_INTERFACE_MPLS_ENABLE = (1 << 0),
-} ethernet_arp_interface_flags_t;
+  /**
+   * The index of the adj-fib entry created
+   */
+  fib_node_index_t fib_entry_index;
+} ethernet_arp_ip4_entry_t;
 
 /**
  * @brief Per-interface ARP configuration and state
  */
 typedef struct ethernet_arp_interface_t_
 {
-    /**
-     * Hash table of ARP entries.
-     * Since this hash table is per-interface, the key is only the IPv4 address.
-     */
+  /**
+   * Hash table of ARP entries.
+   * Since this hash table is per-interface, the key is only the IPv4 address.
+   */
   uword *arp_entries;
-
-    /**
-     * Flags for administrative and operational state
-     */
-  ethernet_arp_interface_flags_t flags;
 } ethernet_arp_interface_t;
 
 typedef struct
@@ -123,14 +113,6 @@ typedef struct
 
 static ethernet_arp_main_t ethernet_arp_main;
 
-
-typedef enum arp_ether_type_t_
-{
-  ARP_ETHER_TYPE_IP4 = (1 << 0),
-  ARP_ETHER_TYPE_MPLS = (1 << 1),
-} arp_ether_type_t;
-#define ARP_ETHER_TYPE_BOTH (ARP_ETHER_TYPE_MPLS | ARP_ETHER_TYPE_IP4)
-
 typedef struct
 {
   u32 sw_if_index;
@@ -140,7 +122,6 @@ typedef struct
 #define ETHERNET_ARP_ARGS_REMOVE (1<<0)
 #define ETHERNET_ARP_ARGS_FLUSH  (1<<1)
 #define ETHERNET_ARP_ARGS_POPULATE  (1<<2)
-  arp_ether_type_t ether_type;
 } vnet_arp_set_ip4_over_ethernet_rpc_args_t;
 
 static void
@@ -339,68 +320,170 @@ format_arp_term_input_trace (u8 * s, va_list * va)
 }
 
 static void
-arp_mk_complete (ethernet_arp_interface_t * eai,
-                ethernet_arp_ip4_entry_t * e, arp_ether_type_t et)
+arp_nbr_probe (ip_adjacency_t * adj)
 {
-  fib_prefix_t pfx = {
-    .fp_len = 32,
-    .fp_proto = FIB_PROTOCOL_IP4,
-    .fp_addr = {
-               .ip4 = e->ip4_address,
-               },
-  };
-  u32 fib_index;
+  vnet_main_t *vnm = vnet_get_main ();
+  ip4_main_t *im = &ip4_main;
+  ip_interface_address_t *ia;
+  ethernet_arp_header_t *h;
+  vnet_hw_interface_t *hi;
+  vnet_sw_interface_t *si;
+  ip4_address_t *src;
+  vlib_buffer_t *b;
+  vlib_main_t *vm;
+  u32 bi = 0;
 
-  fib_index = ip4_fib_table_get_index_for_sw_if_index (e->sw_if_index);
+  vm = vlib_get_main ();
 
-  if (et & ARP_ETHER_TYPE_IP4)
+  si = vnet_get_sw_interface (vnm, adj->rewrite_header.sw_if_index);
+
+  if (!(si->flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP))
     {
-      if (ADJ_INDEX_INVALID == e->adj_index[FIB_LINK_IP4])
-       {
-         e->adj_index[FIB_LINK_IP4] =
-           adj_nbr_add_or_lock_w_rewrite (FIB_PROTOCOL_IP4,
-                                          FIB_LINK_IP4,
-                                          &pfx.fp_addr,
-                                          e->sw_if_index,
-                                          e->ethernet_address);
-         ASSERT (ADJ_INDEX_INVALID != e->adj_index[FIB_LINK_IP4]);
-
-         fib_table_entry_update_one_path (fib_index,
-                                          &pfx,
-                                          FIB_SOURCE_ADJ,
-                                          FIB_ENTRY_FLAG_ATTACHED,
-                                          FIB_PROTOCOL_IP4,
-                                          &pfx.fp_addr,
-                                          e->sw_if_index,
-                                          ~0,
-                                          1,
-                                          MPLS_LABEL_INVALID,
-                                          FIB_ROUTE_PATH_FLAG_NONE);
-       }
-      else
-       {
-         adj_nbr_update_rewrite (e->adj_index[FIB_LINK_IP4],
-                                 e->ethernet_address);
-       }
+      return;
     }
-  if ((et & ARP_ETHER_TYPE_MPLS) &&
-      eai->flags & ETHERNET_ARP_INTERFACE_MPLS_ENABLE)
+
+  src =
+    ip4_interface_address_matching_destination (im,
+                                               &adj->sub_type.nbr.next_hop.
+                                               ip4,
+                                               adj->rewrite_header.
+                                               sw_if_index, &ia);
+  if (!src)
     {
-      if (ADJ_INDEX_INVALID == e->adj_index[FIB_LINK_MPLS])
-       {
-         e->adj_index[FIB_LINK_MPLS] =
-           adj_nbr_add_or_lock_w_rewrite (FIB_PROTOCOL_IP4,
-                                          FIB_LINK_MPLS,
-                                          &pfx.fp_addr,
-                                          e->sw_if_index,
-                                          e->ethernet_address);
-         ASSERT (ADJ_INDEX_INVALID != e->adj_index[FIB_LINK_MPLS]);
-       }
-      else
-       {
-         adj_nbr_update_rewrite (e->adj_index[FIB_LINK_MPLS],
-                                 e->ethernet_address);
-       }
+      return;
+    }
+
+  h =
+    vlib_packet_template_get_packet (vm, &im->ip4_arp_request_packet_template,
+                                    &bi);
+
+  hi = vnet_get_sup_hw_interface (vnm, adj->rewrite_header.sw_if_index);
+
+  clib_memcpy (h->ip4_over_ethernet[0].ethernet,
+              hi->hw_address, sizeof (h->ip4_over_ethernet[0].ethernet));
+
+  h->ip4_over_ethernet[0].ip4 = src[0];
+  h->ip4_over_ethernet[1].ip4 = adj->sub_type.nbr.next_hop.ip4;
+
+  b = vlib_get_buffer (vm, bi);
+  vnet_buffer (b)->sw_if_index[VLIB_RX] =
+    vnet_buffer (b)->sw_if_index[VLIB_TX] = adj->rewrite_header.sw_if_index;
+
+  /* Add encapsulation string for software interface (e.g. ethernet header). */
+  vnet_rewrite_one_header (adj[0], h, sizeof (ethernet_header_t));
+  vlib_buffer_advance (b, -adj->rewrite_header.data_bytes);
+
+  {
+    vlib_frame_t *f = vlib_get_frame_to_node (vm, hi->output_node_index);
+    u32 *to_next = vlib_frame_vector_args (f);
+    to_next[0] = bi;
+    f->n_vectors = 1;
+    vlib_put_frame_to_node (vm, hi->output_node_index, f);
+  }
+}
+
+static void
+arp_mk_complete (adj_index_t ai, ethernet_arp_ip4_entry_t * e)
+{
+  adj_nbr_update_rewrite
+    (ai, ADJ_NBR_REWRITE_FLAG_COMPLETE,
+     ethernet_build_rewrite (vnet_get_main (),
+                            e->sw_if_index,
+                            adj_get_link_type (ai), e->ethernet_address));
+}
+
+static void
+arp_mk_incomplete (adj_index_t ai, ethernet_arp_ip4_entry_t * e)
+{
+  adj_nbr_update_rewrite
+    (ai,
+     ADJ_NBR_REWRITE_FLAG_INCOMPLETE,
+     ethernet_build_rewrite (vnet_get_main (),
+                            e->sw_if_index,
+                            VNET_LINK_ARP,
+                            VNET_REWRITE_FOR_SW_INTERFACE_ADDRESS_BROADCAST));
+}
+
+static ethernet_arp_ip4_entry_t *
+arp_entry_find (ethernet_arp_interface_t * eai, const ip4_address_t * addr)
+{
+  ethernet_arp_main_t *am = &ethernet_arp_main;
+  ethernet_arp_ip4_entry_t *e = NULL;
+  uword *p;
+
+  if (NULL != eai->arp_entries)
+    {
+      p = hash_get (eai->arp_entries, addr->as_u32);
+      if (!p)
+       return (NULL);
+
+      e = pool_elt_at_index (am->ip4_entry_pool, p[0]);
+    }
+
+  return (e);
+}
+
+static adj_walk_rc_t
+arp_mk_complete_walk (adj_index_t ai, void *ctx)
+{
+  ethernet_arp_ip4_entry_t *e = ctx;
+
+  arp_mk_complete (ai, e);
+
+  return (ADJ_WALK_RC_CONTINUE);
+}
+
+static adj_walk_rc_t
+arp_mk_incomplete_walk (adj_index_t ai, void *ctx)
+{
+  ethernet_arp_ip4_entry_t *e = ctx;
+
+  arp_mk_incomplete (ai, e);
+
+  return (ADJ_WALK_RC_CONTINUE);
+}
+
+void
+arp_update_adjacency (vnet_main_t * vnm, u32 sw_if_index, u32 ai)
+{
+  ethernet_arp_main_t *am = &ethernet_arp_main;
+  ethernet_arp_interface_t *arp_int;
+  ethernet_arp_ip4_entry_t *e;
+  ip_adjacency_t *adj;
+
+  adj = adj_get (ai);
+
+  vec_validate (am->ethernet_arp_by_sw_if_index, sw_if_index);
+  arp_int = &am->ethernet_arp_by_sw_if_index[sw_if_index];
+  e = arp_entry_find (arp_int, &adj->sub_type.nbr.next_hop.ip4);
+
+  if (NULL != e)
+    {
+      adj_nbr_walk_nh4 (sw_if_index,
+                       &e->ip4_address, arp_mk_complete_walk, e);
+    }
+  else
+    {
+      /*
+       * no matching ARP entry.
+       * construct the rewire required to for an ARP packet, and stick
+       * that in the adj's pipe to smoke.
+       */
+      adj_nbr_update_rewrite (ai,
+                             ADJ_NBR_REWRITE_FLAG_INCOMPLETE,
+                             ethernet_build_rewrite (vnm,
+                                                     sw_if_index,
+                                                     VNET_LINK_ARP,
+                                                     VNET_REWRITE_FOR_SW_INTERFACE_ADDRESS_BROADCAST));
+
+      /*
+       * since the FIB has added this adj for a route, it makes sense it may
+       * want to forward traffic sometime soon. Let's send a speculative ARP.
+       * just one. If we were to do periodically that wouldn't be bad either,
+       * but that's more code than i'm prepared to write at this time for
+       * relatively little reward.
+       */
+      arp_nbr_probe (adj);
     }
 }
 
@@ -417,7 +500,6 @@ vnet_arp_set_ip4_over_ethernet_internal (vnet_main_t * vnm,
   uword *p;
   pending_resolution_t *pr, *mc;
   ethernet_arp_interface_t *arp_int;
-  fib_link_t link;
   int is_static = args->is_static;
   u32 sw_if_index = args->sw_if_index;
 
@@ -441,23 +523,43 @@ vnet_arp_set_ip4_over_ethernet_internal (vnet_main_t * vnm,
 
   if (make_new_arp_cache_entry)
     {
+      fib_prefix_t pfx = {
+       .fp_len = 32,
+       .fp_proto = FIB_PROTOCOL_IP4,
+       .fp_addr = {
+                   .ip4 = a->ip4,
+                   }
+       ,
+      };
+      u32 fib_index;
+
       pool_get (am->ip4_entry_pool, e);
 
       if (NULL == arp_int->arp_entries)
        {
          arp_int->arp_entries = hash_create (0, sizeof (u32));
-         if (mpls_sw_interface_is_enabled (sw_if_index))
-           arp_int->flags |= ETHERNET_ARP_INTERFACE_MPLS_ENABLE;
        }
 
       hash_set (arp_int->arp_entries, a->ip4.as_u32, e - am->ip4_entry_pool);
 
       e->sw_if_index = sw_if_index;
       e->ip4_address = a->ip4;
-      FOR_EACH_FIB_LINK (link)
-      {
-       e->adj_index[link] = ADJ_INDEX_INVALID;
-      }
+      clib_memcpy (e->ethernet_address,
+                  a->ethernet, sizeof (e->ethernet_address));
+
+      fib_index = ip4_fib_table_get_index_for_sw_if_index (e->sw_if_index);
+      e->fib_entry_index =
+       fib_table_entry_update_one_path (fib_index,
+                                        &pfx,
+                                        FIB_SOURCE_ADJ,
+                                        FIB_ENTRY_FLAG_ATTACHED,
+                                        FIB_PROTOCOL_IP4,
+                                        &pfx.fp_addr,
+                                        e->sw_if_index,
+                                        ~0,
+                                        1,
+                                        MPLS_LABEL_INVALID,
+                                        FIB_ROUTE_PATH_FLAG_NONE);
     }
   else
     {
@@ -468,18 +570,19 @@ vnet_arp_set_ip4_over_ethernet_internal (vnet_main_t * vnm,
       if (0 == memcmp (e->ethernet_address,
                       a->ethernet, sizeof (e->ethernet_address)))
        return -1;
+
+      /* Update time stamp and ethernet address. */
+      clib_memcpy (e->ethernet_address, a->ethernet,
+                  sizeof (e->ethernet_address));
     }
 
-  /* Update time stamp and ethernet address. */
-  clib_memcpy (e->ethernet_address, a->ethernet,
-              sizeof (e->ethernet_address));
   e->cpu_time_last_updated = clib_cpu_time_now ();
   if (is_static)
     e->flags |= ETHERNET_ARP_IP4_ENTRY_FLAG_STATIC;
   else
     e->flags |= ETHERNET_ARP_IP4_ENTRY_FLAG_DYNAMIC;
 
-  arp_mk_complete (arp_int, e, ARP_ETHER_TYPE_BOTH);
+  adj_nbr_walk_nh4 (sw_if_index, &e->ip4_address, arp_mk_complete_walk, e);
 
   /* Customer(s) waiting for this address to be resolved? */
   p = hash_get (am->pending_resolutions_by_address, a->ip4.as_u32);
@@ -1334,7 +1437,6 @@ vnet_arp_unset_ip4_over_ethernet (vnet_main_t * vnm,
 
   args.sw_if_index = sw_if_index;
   args.flags = ETHERNET_ARP_ARGS_REMOVE;
-  args.ether_type = ARP_ETHER_TYPE_IP4;
   clib_memcpy (&args.a, a, sizeof (*a));
 
   vl_api_rpc_call_main_thread (set_ip4_over_ethernet_rpc_callback,
@@ -1350,15 +1452,13 @@ vnet_arp_unset_ip4_over_ethernet (vnet_main_t * vnm,
  */
 static int
 vnet_arp_flush_ip4_over_ethernet (vnet_main_t * vnm,
-                                 u32 sw_if_index,
-                                 arp_ether_type_t et, void *a_arg)
+                                 u32 sw_if_index, void *a_arg)
 {
   ethernet_arp_ip4_over_ethernet_address_t *a = a_arg;
   vnet_arp_set_ip4_over_ethernet_rpc_args_t args;
 
   args.sw_if_index = sw_if_index;
   args.flags = ETHERNET_ARP_ARGS_FLUSH;
-  args.ether_type = et;
   clib_memcpy (&args.a, a, sizeof (*a));
 
   vl_api_rpc_call_main_thread (set_ip4_over_ethernet_rpc_callback,
@@ -1372,19 +1472,16 @@ vnet_arp_flush_ip4_over_ethernet (vnet_main_t * vnm,
  * For static entries this will re-source the adjacencies.
  *
  * @param sw_if_index The interface on which the ARP entires are acted
- * @param et The ether type of those ARP entries.
  */
 static int
 vnet_arp_populate_ip4_over_ethernet (vnet_main_t * vnm,
-                                    u32 sw_if_index,
-                                    arp_ether_type_t et, void *a_arg)
+                                    u32 sw_if_index, void *a_arg)
 {
   ethernet_arp_ip4_over_ethernet_address_t *a = a_arg;
   vnet_arp_set_ip4_over_ethernet_rpc_args_t args;
 
   args.sw_if_index = sw_if_index;
   args.flags = ETHERNET_ARP_ARGS_POPULATE;
-  args.ether_type = et;
   clib_memcpy (&args.a, a, sizeof (*a));
 
   vl_api_rpc_call_main_thread (set_ip4_over_ethernet_rpc_callback,
@@ -1423,22 +1520,18 @@ arp_add_del_interface_address (ip4_main_t * im,
 
       eai = &am->ethernet_arp_by_sw_if_index[sw_if_index];
 
-      hash_foreach_pair (pair, eai->arp_entries, (
-                                                  {
-                                                  e =
-                                                  pool_elt_at_index
-                                                  (am->ip4_entry_pool,
-                                                   pair->value[0]);
-                                                  if
-                                                  (ip4_destination_matches_route
-                                                   (im, &e->ip4_address,
-                                                    address, address_length))
-                                                  {
-                                                  vec_add1 (to_delete,
-                                                            e -
-                                                            am->ip4_entry_pool);}
-                                                  }
-                        ));
+      /* *INDENT-OFF* */
+      hash_foreach_pair (pair, eai->arp_entries,
+      ({
+       e = pool_elt_at_index(am->ip4_entry_pool,
+                             pair->value[0]);
+       if (ip4_destination_matches_route (im, &e->ip4_address,
+                                          address, address_length))
+         {
+           vec_add1 (to_delete, e - am->ip4_entry_pool);
+         }
+      }));
+      /* *INDENT-ON* */
 
       for (i = 0; i < vec_len (to_delete); i++)
        {
@@ -1449,62 +1542,13 @@ arp_add_del_interface_address (ip4_main_t * im,
          delme.ip4.as_u32 = e->ip4_address.as_u32;
 
          vnet_arp_flush_ip4_over_ethernet (vnet_get_main (),
-                                           e->sw_if_index,
-                                           ARP_ETHER_TYPE_BOTH, &delme);
+                                           e->sw_if_index, &delme);
        }
 
       vec_free (to_delete);
     }
 }
 
-static void
-ethernet_arp_sw_interface_mpls_state_change (u32 sw_if_index, u32 is_enable)
-{
-  ethernet_arp_main_t *am = &ethernet_arp_main;
-  ethernet_arp_ip4_entry_t *e;
-  ethernet_arp_interface_t *eai;
-  u32 i, *to_update = 0;
-  hash_pair_t *pair;
-
-  if (vec_len (am->ethernet_arp_by_sw_if_index) < sw_if_index)
-    return;
-
-  eai = &am->ethernet_arp_by_sw_if_index[sw_if_index];
-
-  if (is_enable)
-    eai->flags |= ETHERNET_ARP_INTERFACE_MPLS_ENABLE;
-  else
-    eai->flags &= ~ETHERNET_ARP_INTERFACE_MPLS_ENABLE;
-
-  hash_foreach_pair (pair, eai->arp_entries, (
-                                              {
-                                              vec_add1 (to_update,
-                                                        pair->value[0]);
-                                              }
-                    ));
-
-  for (i = 0; i < vec_len (to_update); i++)
-    {
-      ethernet_arp_ip4_over_ethernet_address_t updateme;
-      e = pool_elt_at_index (am->ip4_entry_pool, to_update[i]);
-
-      clib_memcpy (&updateme.ethernet, e->ethernet_address, 6);
-      updateme.ip4.as_u32 = e->ip4_address.as_u32;
-
-      if (is_enable)
-       {
-         vnet_arp_populate_ip4_over_ethernet (vnet_get_main (),
-                                              e->sw_if_index,
-                                              ARP_ETHER_TYPE_MPLS,
-                                              &updateme);
-       }
-      else
-       continue;
-
-    }
-  vec_free (to_update);
-}
-
 static clib_error_t *
 ethernet_arp_init (vlib_main_t * vm)
 {
@@ -1550,92 +1594,21 @@ ethernet_arp_init (vlib_main_t * vm)
   cb.function_opaque = 0;
   vec_add1 (im->add_del_interface_address_callbacks, cb);
 
-  vec_add1 (mpls_main.mpls_interface_state_change_callbacks,
-           ethernet_arp_sw_interface_mpls_state_change);
-
   return 0;
 }
 
 VLIB_INIT_FUNCTION (ethernet_arp_init);
 
-static void
-arp_mk_incomplete (ethernet_arp_interface_t * eai,
-                  ethernet_arp_ip4_entry_t * e, arp_ether_type_t et)
-{
-  fib_prefix_t pfx = {
-    .fp_len = 32,
-    .fp_proto = FIB_PROTOCOL_IP4,
-    .fp_addr = {
-               .ip4 = e->ip4_address,
-               },
-  };
-  u32 fib_index;
-
-  fib_index = ip4_fib_table_get_index_for_sw_if_index (e->sw_if_index);
-
-  if ((ARP_ETHER_TYPE_IP4 & et) &&
-      (ADJ_INDEX_INVALID != e->adj_index[FIB_LINK_IP4]))
-    {
-      /*
-       * revert the adj this ARP entry sourced to incomplete
-       */
-      adj_nbr_update_rewrite (e->adj_index[FIB_LINK_IP4], NULL);
-
-      /*
-       * remove the FIB erntry the ARP entry sourced
-       */
-      fib_table_entry_delete (fib_index, &pfx, FIB_SOURCE_ADJ);
-
-      /*
-       * Unlock the adj now that the ARP entry is no longer a source
-       */
-      adj_unlock (e->adj_index[FIB_LINK_IP4]);
-      e->adj_index[FIB_LINK_IP4] = ADJ_INDEX_INVALID;
-    }
-  if ((ARP_ETHER_TYPE_MPLS & et) &&
-      (ADJ_INDEX_INVALID != e->adj_index[FIB_LINK_MPLS]))
-    {
-      /*
-       * revert the adj this ARP entry sourced to incomplete
-       */
-      adj_nbr_update_rewrite (e->adj_index[FIB_LINK_MPLS], NULL);
-
-      /*
-       * Unlock the adj now that the ARP entry is no longer a source
-       */
-      adj_unlock (e->adj_index[FIB_LINK_MPLS]);
-      e->adj_index[FIB_LINK_MPLS] = ADJ_INDEX_INVALID;
-    }
-}
-
 static void
 arp_entry_free (ethernet_arp_interface_t * eai, ethernet_arp_ip4_entry_t * e)
 {
   ethernet_arp_main_t *am = &ethernet_arp_main;
 
+  fib_table_entry_delete_index (e->fib_entry_index, FIB_SOURCE_ADJ);
   hash_unset (eai->arp_entries, e->ip4_address.as_u32);
   pool_put (am->ip4_entry_pool, e);
 }
 
-static ethernet_arp_ip4_entry_t *
-arp_entry_find (ethernet_arp_interface_t * eai, const ip4_address_t * addr)
-{
-  ethernet_arp_main_t *am = &ethernet_arp_main;
-  ethernet_arp_ip4_entry_t *e = NULL;
-  uword *p;
-
-  if (NULL != eai->arp_entries)
-    {
-      p = hash_get (eai->arp_entries, addr->as_u32);
-      if (!p)
-       return (NULL);
-
-      e = pool_elt_at_index (am->ip4_entry_pool, p[0]);
-    }
-
-  return (e);
-}
-
 static inline int
 vnet_arp_unset_ip4_over_ethernet_internal (vnet_main_t * vnm,
                                           vnet_arp_set_ip4_over_ethernet_rpc_args_t
@@ -1651,7 +1624,8 @@ vnet_arp_unset_ip4_over_ethernet_internal (vnet_main_t * vnm,
 
   if (NULL != e)
     {
-      arp_mk_incomplete (eai, e, ARP_ETHER_TYPE_BOTH);
+      adj_nbr_walk_nh4 (e->sw_if_index,
+                       &e->ip4_address, arp_mk_incomplete_walk, e);
       arp_entry_free (eai, e);
     }
 
@@ -1673,7 +1647,8 @@ vnet_arp_flush_ip4_over_ethernet_internal (vnet_main_t * vnm,
 
   if (NULL != e)
     {
-      arp_mk_incomplete (eai, e, args->ether_type);
+      adj_nbr_walk_nh4 (e->sw_if_index,
+                       &e->ip4_address, arp_mk_incomplete_walk, e);
 
       /*
        * The difference between flush and unset, is that an unset
@@ -1682,8 +1657,7 @@ vnet_arp_flush_ip4_over_ethernet_internal (vnet_main_t * vnm,
        * does in response to interface events. unset is only done
        * by the control plane.
        */
-      if ((e->flags & ETHERNET_ARP_IP4_ENTRY_FLAG_DYNAMIC) &&
-         (args->ether_type & ARP_ETHER_TYPE_IP4))
+      if (e->flags & ETHERNET_ARP_IP4_ENTRY_FLAG_DYNAMIC)
        {
          arp_entry_free (eai, e);
        }
@@ -1706,7 +1680,8 @@ vnet_arp_populate_ip4_over_ethernet_internal (vnet_main_t * vnm,
 
   if (NULL != e)
     {
-      arp_mk_complete (eai, e, args->ether_type);
+      adj_nbr_walk_nh4 (e->sw_if_index,
+                       &e->ip4_address, arp_mk_complete_walk, e);
     }
   return (0);
 }
@@ -1743,9 +1718,8 @@ ethernet_arp_sw_interface_up_down (vnet_main_t * vnm,
   pool_foreach (e, am->ip4_entry_pool,
   ({
     if (e->sw_if_index == sw_if_index)
-      {
-       vec_add1 (to_delete, e - am->ip4_entry_pool);
-      }
+      vec_add1 (to_delete,
+               e - am->ip4_entry_pool);
   }));
   /* *INDENT-ON* */
 
@@ -1759,25 +1733,21 @@ ethernet_arp_sw_interface_up_down (vnet_main_t * vnm,
 
       if (flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP)
        {
-         vnet_arp_populate_ip4_over_ethernet (vnm, e->sw_if_index,
-                                              ARP_ETHER_TYPE_BOTH, &delme);
+         vnet_arp_populate_ip4_over_ethernet (vnm, e->sw_if_index, &delme);
        }
       else
        {
-         vnet_arp_flush_ip4_over_ethernet (vnm, e->sw_if_index,
-                                           ARP_ETHER_TYPE_BOTH, &delme);
+         vnet_arp_flush_ip4_over_ethernet (vnm, e->sw_if_index, &delme);
        }
 
     }
   vec_free (to_delete);
 
-
   return 0;
 }
 
 VNET_SW_INTERFACE_ADMIN_UP_DOWN_FUNCTION (ethernet_arp_sw_interface_up_down);
 
-
 static void
 increment_ip4_and_mac_address (ethernet_arp_ip4_over_ethernet_address_t * a)
 {
@@ -1811,7 +1781,6 @@ vnet_arp_set_ip4_over_ethernet (vnet_main_t * vnm,
   args.sw_if_index = sw_if_index;
   args.is_static = is_static;
   args.flags = 0;
-  args.ether_type = ARP_ETHER_TYPE_IP4;
   clib_memcpy (&args.a, a, sizeof (*a));
 
   vl_api_rpc_call_main_thread (set_ip4_over_ethernet_rpc_callback,
@@ -1990,7 +1959,7 @@ ip_arp_add_del_command_fn (vlib_main_t * vm,
   return 0;
 }
 
-
+/* *INDENT-OFF* */
 /*?
  * Add or delete IPv4 ARP cache entries.
  *
@@ -2019,19 +1988,18 @@ ip_arp_add_del_command_fn (vlib_main_t * vm,
  * @cliexcmd{set ip arp count 10 GigabitEthernet2/0/0 6.0.0.3 dead.beef.babe}
  * @endparblock
  ?*/
-/* *INDENT-OFF* */
 VLIB_CLI_COMMAND (ip_arp_add_del_command, static) = {
   .path = "set ip arp",
   .short_help =
-    "set ip arp [del] <intfc> <ip-address> <mac-address> [static] [count <count>] [fib-id <fib-id>] [proxy <lo-addr> - <hi-addr>]",
+  "set ip arp [del] <intfc> <ip-address> <mac-address> [static] [count <count>] [fib-id <fib-id>] [proxy <lo-addr> - <hi-addr>]",
   .function = ip_arp_add_del_command_fn,
 };
 /* *INDENT-ON* */
 
 static clib_error_t *
 set_int_proxy_arp_command_fn (vlib_main_t * vm,
-                             unformat_input_t * input,
-                             vlib_cli_command_t * cmd)
+                             unformat_input_t *
+                             input, vlib_cli_command_t * cmd)
 {
   vnet_main_t *vnm = vnet_get_main ();
   u32 sw_if_index;
@@ -2066,7 +2034,7 @@ set_int_proxy_arp_command_fn (vlib_main_t * vm,
   return 0;
 }
 
-
+/* *INDENT-OFF* */
 /*?
  * Enable proxy-arp on an interface. The vpp stack will answer ARP
  * requests for the indicated address range. Multiple proxy-arp
@@ -2086,11 +2054,10 @@ set_int_proxy_arp_command_fn (vlib_main_t * vm,
  * To disable proxy arp on an individual interface:
  * @cliexcmd{set interface proxy-arp GigabitEthernet0/8/0 disable}
  ?*/
-/* *INDENT-OFF* */
 VLIB_CLI_COMMAND (set_int_proxy_enable_command, static) = {
   .path = "set interface proxy-arp",
   .short_help =
-    "set interface proxy-arp <intfc> [enable|disable]",
+  "set interface proxy-arp <intfc> [enable|disable]",
   .function = set_int_proxy_arp_command_fn,
 };
 /* *INDENT-ON* */
@@ -2174,8 +2141,8 @@ arp_term_l2bd (vlib_main_t * vm,
          error0 = ETHERNET_ARP_ERROR_replies_sent;
          error0 =
            (arp0->l2_type !=
-            clib_net_to_host_u16 (ETHERNET_ARP_HARDWARE_TYPE_ethernet) ?
-            ETHERNET_ARP_ERROR_l2_type_not_ethernet : error0);
+            clib_net_to_host_u16 (ETHERNET_ARP_HARDWARE_TYPE_ethernet)
+            ETHERNET_ARP_ERROR_l2_type_not_ethernet : error0);
          error0 =
            (arp0->l3_type !=
             clib_net_to_host_u16 (ETHERNET_TYPE_IP4) ?
@@ -2269,8 +2236,9 @@ arp_term_l2bd (vlib_main_t * vm,
             for ARP requests from other hosts. If output to VXLAN tunnel is
             required, however, can just clear the SHG in packet as follows:
             vnet_buffer(p0)->l2.shg = 0;         */
-         vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
-                                          n_left_to_next, pi0, next0);
+         vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
+                                          to_next, n_left_to_next, pi0,
+                                          next0);
          continue;
 
        check_ip6_nd:
@@ -2283,9 +2251,9 @@ arp_term_l2bd (vlib_main_t * vm,
                             (&iph0->src_address)))
            {
              sw_if_index0 = vnet_buffer (p0)->sw_if_index[VLIB_RX];
-             if (vnet_ip6_nd_term (vm, node, p0, eth0, iph0, sw_if_index0,
-                                   vnet_buffer (p0)->l2.bd_index,
-                                   vnet_buffer (p0)->l2.shg))
+             if (vnet_ip6_nd_term
+                 (vm, node, p0, eth0, iph0, sw_if_index0,
+                  vnet_buffer (p0)->l2.bd_index, vnet_buffer (p0)->l2.shg))
                goto output_response;
            }
 
@@ -2294,10 +2262,12 @@ arp_term_l2bd (vlib_main_t * vm,
            u32 feature_bitmap0 =
              vnet_buffer (p0)->l2.feature_bitmap & ~L2INPUT_FEAT_ARP_TERM;
            vnet_buffer (p0)->l2.feature_bitmap = feature_bitmap0;
-           next0 = feat_bitmap_get_next_node_index (arp_term_next_node_index,
-                                                    feature_bitmap0);
-           vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
-                                            n_left_to_next, pi0, next0);
+           next0 =
+             feat_bitmap_get_next_node_index (arp_term_next_node_index,
+                                              feature_bitmap0);
+           vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
+                                            to_next, n_left_to_next,
+                                            pi0, next0);
            continue;
          }
 
@@ -2311,8 +2281,9 @@ arp_term_l2bd (vlib_main_t * vm,
          next0 = ARP_TERM_NEXT_DROP;
          p0->error = node->errors[error0];
 
-         vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
-                                          n_left_to_next, pi0, next0);
+         vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
+                                          to_next, n_left_to_next, pi0,
+                                          next0);
        }
 
       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
@@ -2342,7 +2313,8 @@ VLIB_REGISTER_NODE (arp_term_l2bd_node, static) = {
 
 clib_error_t *
 arp_term_init (vlib_main_t * vm)
-{                              // Initialize the feature next-node indexes
+{
+  // Initialize the feature next-node indexes
   feat_bitmap_init_next_nodes (vm,
                               arp_term_l2bd_node.index,
                               L2INPUT_N_FEAT,
@@ -2358,21 +2330,8 @@ change_arp_mac (u32 sw_if_index, ethernet_arp_ip4_entry_t * e)
 {
   if (e->sw_if_index == sw_if_index)
     {
-
-      if (ADJ_INDEX_INVALID != e->adj_index[FIB_LINK_IP4])
-       {
-         // the update rewrite function takes the dst mac (which is not changing)
-         // the new source mac will be retrieved from the interface
-         // when the full rewrite is constructed.
-         adj_nbr_update_rewrite (e->adj_index[FIB_LINK_IP4],
-                                 e->ethernet_address);
-       }
-      if (ADJ_INDEX_INVALID != e->adj_index[FIB_LINK_MPLS])
-       {
-         adj_nbr_update_rewrite (e->adj_index[FIB_LINK_MPLS],
-                                 e->ethernet_address);
-       }
-
+      adj_nbr_walk_nh4 (e->sw_if_index,
+                       &e->ip4_address, arp_mk_complete_walk, e);
     }
 }
 
@@ -2384,9 +2343,9 @@ ethernet_arp_change_mac (vnet_main_t * vnm, u32 sw_if_index)
 
   /* *INDENT-OFF* */
   pool_foreach (e, am->ip4_entry_pool,
-    ({
-      change_arp_mac (sw_if_index, e);
-    }));
+  ({
+    change_arp_mac (sw_if_index, e);
+  }));
   /* *INDENT-ON* */
 }
 
index 973ed58..34ddb82 100644 (file)
@@ -562,6 +562,13 @@ int vnet_add_del_ip4_arp_change_event (vnet_main_t * vnm,
 
 void ethernet_arp_change_mac (vnet_main_t * vnm, u32 sw_if_index);
 
+void arp_update_adjacency (vnet_main_t * vnm, u32 sw_if_index, u32 ai);
+
+void ethernet_update_adjacency (vnet_main_t * vnm, u32 sw_if_index, u32 ai);
+u8 *ethernet_build_rewrite (vnet_main_t * vnm,
+                           u32 sw_if_index,
+                           vnet_link_t link_type, const void *dst_address);
+
 extern vlib_node_registration_t ethernet_input_node;
 
 #endif /* included_ethernet_h */
index 43f1cd4..45d215d 100644 (file)
@@ -42,9 +42,7 @@
 #include <vnet/pg/pg.h>
 #include <vnet/ethernet/ethernet.h>
 #include <vnet/l2/l2_input.h>
-#include <vnet/srp/srp.h>
-#include <vnet/lisp-gpe/lisp_gpe.h>
-#include <vnet/devices/af_packet/af_packet.h>
+#include <vnet/adj/adj.h>
 
 /**
  * @file
  * This file contains code to manage loopback interfaces.
  */
 
-int
-vnet_sw_interface_is_p2p (vnet_main_t * vnm, u32 sw_if_index)
-{
-  // FIXME - use flags on the HW itf
-  vnet_hw_interface_t *hw = vnet_get_sup_hw_interface (vnm, sw_if_index);
-  return (!(hw->hw_class_index == ethernet_hw_interface_class.index ||
-           hw->hw_class_index == af_packet_device_class.index ||
-           hw->hw_class_index == lisp_gpe_hw_class.index ||
-           hw->hw_class_index == srp_hw_interface_class.index));
-}
-
-static uword
-ethernet_set_rewrite (vnet_main_t * vnm,
-                     u32 sw_if_index,
-                     u32 l3_type,
-                     void *dst_address,
-                     void *rewrite, uword max_rewrite_bytes)
+/**
+ * @brief build a rewrite string to use for sending packets of type 'link_type'
+ * to 'dst_address'
+ */
+u8 *
+ethernet_build_rewrite (vnet_main_t * vnm,
+                       u32 sw_if_index,
+                       vnet_link_t link_type, const void *dst_address)
 {
   vnet_sw_interface_t *sub_sw = vnet_get_sw_interface (vnm, sw_if_index);
   vnet_sw_interface_t *sup_sw = vnet_get_sup_sw_interface (vnm, sw_if_index);
   vnet_hw_interface_t *hw = vnet_get_sup_hw_interface (vnm, sw_if_index);
   ethernet_main_t *em = &ethernet_main;
   ethernet_interface_t *ei;
-  ethernet_header_t *h = rewrite;
+  ethernet_header_t *h;
   ethernet_type_t type;
   uword n_bytes = sizeof (h[0]);
+  u8 *rewrite = NULL;
 
   if (sub_sw != sup_sw)
     {
@@ -100,22 +90,20 @@ ethernet_set_rewrite (vnet_main_t * vnm,
        }
     }
 
-  if (n_bytes > max_rewrite_bytes)
-    return 0;
-
-  switch (l3_type)
+  switch (link_type)
     {
-#define _(a,b) case VNET_L3_PACKET_TYPE_##a: type = ETHERNET_TYPE_##b; break
+#define _(a,b) case VNET_LINK_##a: type = ETHERNET_TYPE_##b; break
       _(IP4, IP4);
       _(IP6, IP6);
-      _(MPLS_UNICAST, MPLS_UNICAST);
-      _(MPLS_MULTICAST, MPLS_MULTICAST);
+      _(MPLS, MPLS_UNICAST);
       _(ARP, ARP);
 #undef _
     default:
-      return 0;
+      return NULL;
     }
 
+  vec_validate (rewrite, n_bytes - 1);
+  h = (ethernet_header_t *) rewrite;
   ei = pool_elt_at_index (em->interfaces, hw->hw_instance);
   clib_memcpy (h->src_address, ei->address, sizeof (h->src_address));
   if (dst_address)
@@ -156,7 +144,28 @@ ethernet_set_rewrite (vnet_main_t * vnm,
       h->type = clib_host_to_net_u16 (type);
     }
 
-  return n_bytes;
+  return (rewrite);
+}
+
+void
+ethernet_update_adjacency (vnet_main_t * vnm, u32 sw_if_index, u32 ai)
+{
+  ip_adjacency_t *adj;
+
+  adj = adj_get (ai);
+
+  if (FIB_PROTOCOL_IP4 == adj->ia_nh_proto)
+    {
+      arp_update_adjacency (vnm, sw_if_index, ai);
+    }
+  else if (FIB_PROTOCOL_IP6 == adj->ia_nh_proto)
+    {
+      ip6_ethernet_update_adjacency (vnm, sw_if_index, ai);
+    }
+  else
+    {
+      ASSERT (0);
+    }
 }
 
 /* *INDENT-OFF* */
@@ -166,7 +175,8 @@ VNET_HW_INTERFACE_CLASS (ethernet_hw_interface_class) = {
   .format_header = format_ethernet_header_with_length,
   .unformat_hw_address = unformat_ethernet_address,
   .unformat_header = unformat_ethernet_header,
-  .set_rewrite = ethernet_set_rewrite,
+  .build_rewrite = ethernet_build_rewrite,
+  .update_adjacency = ethernet_update_adjacency,
 };
 /* *INDENT-ON* */
 
index 5429da2..404f0f4 100644 (file)
@@ -402,35 +402,21 @@ fib_entry_back_walk_notify (fib_node_t *node,
                                             fib_entry_get_index(fib_entry)));
     }
 
-    if (FIB_NODE_BW_REASON_FLAG_ADJ_UPDATE & ctx->fnbw_reason)
-    {
-        /*
-         * ADJ updates (complete<->incomplete) do not need to propagate to
-         * recursive entries.
-         * The only reason its needed as far back as here, is that the adj
-         * and the incomplete adj are a different DPO type, so the LBs need
-         * to re-stack.
-         */
-        return (FIB_NODE_BACK_WALK_CONTINUE);
-    }
-    else
-    {
-        /*
-         * all other walk types can be reclassifed to a re-evaluate to
-         * all recursive dependents.
-         * By reclassifying we ensure that should any of these walk types meet
-         * they can be merged.
-         */
-        ctx->fnbw_reason = FIB_NODE_BW_REASON_FLAG_EVALUATE;
-
-        /*
-         * propagate the backwalk further if we haven't already reached the
-         * maximum depth.
-         */
-        fib_walk_sync(FIB_NODE_TYPE_ENTRY,
-                      fib_entry_get_index(fib_entry),
-                      ctx);
-    }
+    /*
+     * all other walk types can be reclassifed to a re-evaluate to
+     * all recursive dependents.
+     * By reclassifying we ensure that should any of these walk types meet
+     * they can be merged.
+     */
+    ctx->fnbw_reason = FIB_NODE_BW_REASON_FLAG_EVALUATE;
+
+    /*
+     * propagate the backwalk further if we haven't already reached the
+     * maximum depth.
+     */
+    fib_walk_sync(FIB_NODE_TYPE_ENTRY,
+                 fib_entry_get_index(fib_entry),
+                 ctx);
 
     return (FIB_NODE_BACK_WALK_CONTINUE);
 }
index bea1721..ba42e6b 100644 (file)
@@ -757,6 +757,20 @@ fib_path_back_walk_notify (fib_node_t *node,
                fib_path_proto_to_chain_type(path->fp_nh_proto),
                &path->fp_dpo);
        }
+       if (FIB_NODE_BW_REASON_FLAG_ADJ_UPDATE & ctx->fnbw_reason)
+       {
+           /*
+            * ADJ updates (complete<->incomplete) do not need to propagate to
+            * recursive entries.
+            * The only reason its needed as far back as here, is that the adj
+            * and the incomplete adj are a different DPO type, so the LBs need
+            * to re-stack.
+            * If this walk was quashed in the fib_entry, then any non-fib_path
+            * children (like tunnels that collapse out the LB when they stack)
+            * would not see the update.
+            */
+           return (FIB_NODE_BACK_WALK_CONTINUE);
+       }
        break;
     case FIB_PATH_TYPE_ATTACHED_NEXT_HOP:
        /*
index 7729209..1e459cf 100644 (file)
@@ -222,6 +222,19 @@ fib_test_urpf_is_equal (fib_node_index_t fei,
     return (1);
 }
 
+static u8*
+fib_test_build_rewrite (u8 *eth_addr)
+{
+    u8* rewrite = NULL;
+
+    vec_validate(rewrite, 13);
+
+    memcpy(rewrite, eth_addr, 6);
+    memcpy(rewrite+6, eth_addr, 6);
+
+    return (rewrite);
+}
+
 static void
 fib_test_v4 (void)
 {
@@ -523,6 +536,7 @@ fib_test_v4 (void)
     u8 eth_addr[] = {
        0xde, 0xde, 0xde, 0xba, 0xba, 0xba,
     };
+
     ip46_address_t nh_12_12_12_12 = {
        .ip4.as_u32 = clib_host_to_net_u32(0x0c0c0c0c),
     };
@@ -561,7 +575,8 @@ fib_test_v4 (void)
                                    &adj->sub_type.nbr.next_hop)),
              "adj nbr next-hop ok");
 
-    adj_nbr_update_rewrite(ai_01, eth_addr);
+    adj_nbr_update_rewrite(ai_01, ADJ_NBR_REWRITE_FLAG_COMPLETE,
+                          fib_test_build_rewrite(eth_addr));
     FIB_TEST((IP_LOOKUP_NEXT_REWRITE == adj->lookup_next_index),
             "adj is complete");
     FIB_TEST((0 == ip46_address_cmp(&pfx_10_10_10_1_s_32.fp_addr,
@@ -589,7 +604,8 @@ fib_test_v4 (void)
     FIB_TEST((0 == ip46_address_cmp(&nh_12_12_12_12,
                                    &adj->sub_type.nbr.next_hop)),
              "adj nbr next-hop ok");
-    adj_nbr_update_rewrite(ai_12_12_12_12, eth_addr);
+    adj_nbr_update_rewrite(ai_12_12_12_12, ADJ_NBR_REWRITE_FLAG_COMPLETE,
+                          fib_test_build_rewrite(eth_addr));
     FIB_TEST((IP_LOOKUP_NEXT_REWRITE == adj->lookup_next_index),
             "adj is complete");
 
@@ -636,7 +652,8 @@ fib_test_v4 (void)
                                    &adj->sub_type.nbr.next_hop)),
              "adj nbr next-hop ok");
 
-    adj_nbr_update_rewrite(ai_02, eth_addr);
+    adj_nbr_update_rewrite(ai_02, ADJ_NBR_REWRITE_FLAG_COMPLETE,
+                          fib_test_build_rewrite(eth_addr));
     FIB_TEST((IP_LOOKUP_NEXT_REWRITE == adj->lookup_next_index),
             "adj is complete");
     FIB_TEST((0 == ip46_address_cmp(&pfx_10_10_10_2_s_32.fp_addr,
@@ -3113,7 +3130,8 @@ fib_test_v6 (void)
                                    &adj->sub_type.nbr.next_hop)),
              "adj nbr next-hop ok");
 
-    adj_nbr_update_rewrite(ai_01, eth_addr);
+    adj_nbr_update_rewrite(ai_01, ADJ_NBR_REWRITE_FLAG_COMPLETE,
+                          fib_test_build_rewrite(eth_addr));
     FIB_TEST((IP_LOOKUP_NEXT_REWRITE == adj->lookup_next_index),
             "adj is complete");
     FIB_TEST((0 == ip46_address_cmp(&pfx_2001_1_2_s_128.fp_addr,
@@ -3150,7 +3168,8 @@ fib_test_v6 (void)
                                    &adj->sub_type.nbr.next_hop)),
              "adj nbr next-hop ok");
 
-    adj_nbr_update_rewrite(ai_02, eth_addr);
+    adj_nbr_update_rewrite(ai_02, ADJ_NBR_REWRITE_FLAG_COMPLETE,
+                          fib_test_build_rewrite(eth_addr));
     FIB_TEST((IP_LOOKUP_NEXT_REWRITE == adj->lookup_next_index),
             "adj is complete");
     FIB_TEST((0 == ip46_address_cmp(&pfx_2001_1_3_s_128.fp_addr,
index 4d65656..2f23527 100644 (file)
@@ -73,21 +73,17 @@ typedef enum fib_protocol_t_ {
  * Link Type. This maps directly into the ethertype.
  */
 typedef enum fib_link_t_ {
-#if CLIB_DEBUG > 0
-    FIB_LINK_IP4 = 1,
-#else
-    FIB_LINK_IP4 = 0,
-#endif
-    FIB_LINK_IP6,
-    FIB_LINK_ETHERNET,
-    FIB_LINK_MPLS,
+    FIB_LINK_IP4 = VNET_LINK_IP4,
+    FIB_LINK_IP6 = VNET_LINK_IP6,
+    FIB_LINK_MPLS = VNET_LINK_MPLS,
+    FIB_LINK_ETHERNET = VNET_LINK_ETHERNET,
 }  __attribute__ ((packed)) fib_link_t;
 
 /**
  * Definition outside of enum so it does not need to be included in non-defaulted
  * switch statements
  */
-#define FIB_LINK_NUM (FIB_LINK_MPLS+1)
+#define FIB_LINK_NUM (FIB_LINK_ETHERNET+1)
 
 #define FIB_LINKS {                  \
     [FIB_LINK_ETHERNET] = "ethernet", \
index aa6fca0..a4b3f9f 100644 (file)
@@ -17,7 +17,7 @@
 
 #include <vnet/vnet.h>
 #include <vnet/gre/gre.h>
-#include <vnet/adj/adj.h>
+#include <vnet/adj/adj_midchain.h>
 
 gre_main_t gre_main;
 
@@ -162,133 +162,95 @@ unformat_gre_header (unformat_input_t * input, va_list * args)
   return 1;
 }
 
-static uword gre_set_rewrite (vnet_main_t * vnm,
-                              u32 sw_if_index,
-                              u32 l3_type,
-                              void * dst_address,
-                              void * rewrite,
-                              uword max_rewrite_bytes)
+static int
+gre_proto_from_vnet_link (vnet_link_t link)
 {
-  /*
-   * Conundrum: packets from tun/tap destined for the tunnel
-   * actually have this rewrite applied. Transit packets do not.
-   * To make the two cases equivalent, don't generate a
-   * rewrite here, build the entire header in the fast path.
-   */
-  return 0;
-
-#ifdef THINGS_WORKED_AS_ONE_MIGHT_LIKE
-  ip4_and_gre_header_t * h = rewrite;
-  gre_protocol_t protocol;
-
-  if (max_rewrite_bytes < sizeof (h[0]))
-    return 0;
-
-  switch (l3_type) {
-#define _(a,b) case VNET_L3_PACKET_TYPE_##a: protocol = GRE_PROTOCOL_##b; break
-    _ (IP4, ip4);
-    _ (IP6, ip6);
-#undef _
-  default:
-    return 0;
-  }
-
-  memset (h, 0, sizeof (*h));
-  h->ip4.ip_version_and_header_length = 0x45;
-  h->ip4.ttl = 64;
-  h->ip4.protocol = IP_PROTOCOL_GRE;
-  h->gre.protocol = clib_host_to_net_u16 (protocol);
-
-  return sizeof (h[0]);
-#endif
+    switch (link)
+    {
+    case VNET_LINK_IP4:
+        return (GRE_PROTOCOL_ip4);
+    case VNET_LINK_IP6:
+        return (GRE_PROTOCOL_ip6);
+    case VNET_LINK_MPLS:
+        return (GRE_PROTOCOL_mpls_unicast);
+    case VNET_LINK_ETHERNET:
+        return (GRE_PROTOCOL_teb);
+    case VNET_LINK_ARP:
+        return (GRE_PROTOCOL_arp);
+    }
+    ASSERT(0);
+    return (GRE_PROTOCOL_ip4);
 }
 
-static uword
-gre_interface_tx (vlib_main_t * vm,
-                 vlib_node_runtime_t * node,
-                 vlib_frame_t * frame)
+static u8*
+gre_build_rewrite (vnet_main_t * vnm,
+                  u32 sw_if_index,
+                  vnet_link_t link_type,
+                  const void *dst_address)
 {
   gre_main_t * gm = &gre_main;
-  u32 next_index;
-  u32 * from, * to_next, n_left_from, n_left_to_next;
-  vnet_interface_output_runtime_t * rd = (void *) node->runtime_data;
-  gre_tunnel_t *t = pool_elt_at_index (gm->tunnels, rd->dev_instance);
-
-  /* Vector of buffer / pkt indices we're supposed to process */
-  from = vlib_frame_vector_args (frame);
-
-  /* Number of buffers / pkts */
-  n_left_from = frame->n_vectors;
-
-  /* Speculatively send the first buffer to the last disposition we used */
-  next_index = node->cached_next_index;
-
-  while (n_left_from > 0)
-    {
-      /* set up to enqueue to our disposition with index = next_index */
-      vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+  ip4_and_gre_header_t * h;
+  u8* rewrite = NULL;
+  gre_tunnel_t *t;
+  u32 ti;
 
-      /*
-       * FIXME DUAL LOOP
-       */
+  ti = gm->tunnel_index_by_sw_if_index[sw_if_index];
 
-      while (n_left_from > 0 && n_left_to_next > 0)
-       {
-         u32 bi0, adj_index0, next0;
-         const ip_adjacency_t * adj0;
-         const dpo_id_t *dpo0;
-         ip4_header_t * ip0;
-         vlib_buffer_t * b0;
+  if (~0 == ti)
+      /* not one of ours */
+      return (0);
 
-         bi0 = from[0];
-         to_next[0] = bi0;
-         from += 1;
-         to_next += 1;
-         n_left_from -= 1;
-         n_left_to_next -= 1;
+  t = pool_elt_at_index(gm->tunnels, ti);
 
-         b0 = vlib_get_buffer(vm, bi0);
-         ip0 = vlib_buffer_get_current (b0);
+  vec_validate(rewrite, sizeof(*h)-1);
+  h = (ip4_and_gre_header_t*)rewrite;
+  h->gre.protocol = clib_host_to_net_u16(gre_proto_from_vnet_link(link_type));
 
-         /* Fixup the checksum and len fields in the GRE tunnel encap
-          * that was applied at the midchain node */
-         ip0->length =
-           clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b0));
-         ip0->checksum = ip4_header_checksum (ip0);
+  h->ip4.ip_version_and_header_length = 0x45;
+  h->ip4.ttl = 254;
+  h->ip4.protocol = IP_PROTOCOL_GRE;
+  /* fixup ip4 header length and checksum after-the-fact */
+  h->ip4.src_address.as_u32 = t->tunnel_src.as_u32;
+  h->ip4.dst_address.as_u32 = t->tunnel_dst.as_u32;
+  h->ip4.checksum = ip4_header_checksum (&h->ip4);
 
-         /* Follow the DPO on which the midchain is stacked */
-         adj_index0 = vnet_buffer(b0)->ip.adj_index[VLIB_TX];
-         adj0 = adj_get(adj_index0);
-         dpo0 = &adj0->sub_type.midchain.next_dpo;
-         next0 = dpo0->dpoi_next_node;
-         vnet_buffer(b0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
+  return (rewrite);
+}
 
-         if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED))
-           {
-             gre_tx_trace_t *tr = vlib_add_trace (vm, node,
-                                                  b0, sizeof (*tr));
-             tr->tunnel_id = t - gm->tunnels;
-             tr->length = ip0->length;
-             tr->src.as_u32 = ip0->src_address.as_u32;
-             tr->dst.as_u32 = ip0->dst_address.as_u32;
-           }
+void
+gre_fixup (vlib_main_t *vm,
+          ip_adjacency_t *adj,
+          vlib_buffer_t *b0)
+{
+    ip4_header_t * ip0;
 
-         vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
-                                          to_next, n_left_to_next,
-                                          bi0, next0);
-       }
+    ip0 = vlib_buffer_get_current (b0);
 
-      vlib_put_next_frame (vm, node, next_index, n_left_to_next);
-    }
+    /* Fixup the checksum and len fields in the GRE tunnel encap
+     * that was applied at the midchain node */
+    ip0->length = clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b0));
+    ip0->checksum = ip4_header_checksum (ip0);
+}
 
-  vlib_node_increment_counter (vm, gre_input_node.index,
-                              GRE_ERROR_PKTS_ENCAP, frame->n_vectors);
+void
+gre_update_adj (vnet_main_t * vnm,
+               u32 sw_if_index,
+               adj_index_t ai)
+{
+    adj_nbr_midchain_update_rewrite (ai, gre_fixup, 
+                                    ADJ_MIDCHAIN_FLAG_NONE,
+                                    gre_build_rewrite(vnm, sw_if_index,
+                                                      adj_get_link_type(ai),
+                                                      NULL));
 
-  return frame->n_vectors;
+    gre_tunnel_stack(ai);
 }
 
+/**
+ * @brief TX function. Only called L2. L3 traffic uses the adj-midchains
+ */
 static uword
-gre_l2_interface_tx (vlib_main_t * vm,
+gre_interface_tx (vlib_main_t * vm,
                     vlib_node_runtime_t * node,
                     vlib_frame_t * frame)
 {
@@ -330,7 +292,7 @@ gre_l2_interface_tx (vlib_main_t * vm,
 
          b0 = vlib_get_buffer(vm, bi0);
 
-         vnet_buffer(b0)->ip.adj_index[VLIB_TX] = gt->adj_index[FIB_LINK_ETHERNET];
+         vnet_buffer(b0)->ip.adj_index[VLIB_TX] = gt->l2_adj_index;
 
          if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED))
            {
@@ -356,38 +318,6 @@ gre_l2_interface_tx (vlib_main_t * vm,
   return frame->n_vectors;
 }
 
-static clib_error_t *
-gre_interface_admin_up_down (vnet_main_t * vnm, u32 hw_if_index, u32 flags)
-{
-  gre_main_t * gm = &gre_main;
-  vnet_hw_interface_t * hi;
-  gre_tunnel_t *t;
-  u32 ti;
-
-  hi = vnet_get_hw_interface (vnm, hw_if_index);
-
-  if (NULL == gm->tunnel_index_by_sw_if_index ||
-      hi->sw_if_index >= vec_len(gm->tunnel_index_by_sw_if_index))
-      return (NULL);
-
-  ti = gm->tunnel_index_by_sw_if_index[hi->sw_if_index];
-
-  if (~0 == ti)
-      /* not one of ours */
-      return (NULL);
-
-  t = pool_elt_at_index(gm->tunnels, ti);
-
-  if (flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP)
-    vnet_hw_interface_set_flags (vnm, hw_if_index, VNET_HW_INTERFACE_FLAG_LINK_UP);
-  else
-    vnet_hw_interface_set_flags (vnm, hw_if_index, 0 /* down */);
-
-  gre_tunnel_stack(t);
-
-  return /* no error */ 0;
-}
-
 static u8 * format_gre_tunnel_name (u8 * s, va_list * args)
 {
   u32 dev_instance = va_arg (*args, u32);
@@ -403,15 +333,6 @@ static u8 * format_gre_device (u8 * s, va_list * args)
   return s;
 }
 
-static u8 * format_gre_l2_device (u8 * s, va_list * args)
-{
-  u32 dev_instance = va_arg (*args, u32);
-  CLIB_UNUSED (int verbose) = va_arg (*args, int);
-
-  s = format (s, "GRE L2-tunnel: id %d\n", dev_instance);
-  return s;
-}
-
 VNET_DEVICE_CLASS (gre_device_class) = {
   .name = "GRE tunnel device",
   .format_device_name = format_gre_tunnel_name,
@@ -427,27 +348,13 @@ VNET_DEVICE_CLASS (gre_device_class) = {
 VLIB_DEVICE_TX_FUNCTION_MULTIARCH (gre_device_class,
                                   gre_interface_tx)
 
-VNET_DEVICE_CLASS (gre_l2_device_class) = {
-  .name = "GRE L2 tunnel device",
-  .format_device_name = format_gre_tunnel_name,
-  .format_device = format_gre_l2_device,
-  .format_tx_trace = format_gre_tx_trace,
-  .tx_function = gre_l2_interface_tx,
-  .admin_up_down_function = gre_interface_admin_up_down,
-#ifdef SOON
-  .clear counter = 0;
-#endif
-};
-
-VLIB_DEVICE_TX_FUNCTION_MULTIARCH (gre_l2_device_class,
-                                  gre_l2_interface_tx)
-
-
 VNET_HW_INTERFACE_CLASS (gre_hw_interface_class) = {
   .name = "GRE",
   .format_header = format_gre_header_with_length,
   .unformat_header = unformat_gre_header,
-  .set_rewrite = gre_set_rewrite,
+  .build_rewrite = gre_build_rewrite,
+  .update_adjacency = gre_update_adj,
+  .flags = VNET_HW_INTERFACE_CLASS_FLAG_P2P,
 };
 
 static void add_protocol (gre_main_t * gm,
index d1a6f31..a0ee9ad 100644 (file)
@@ -86,14 +86,14 @@ typedef struct {
   u32 sibling_index;
 
   /**
-   * The index of the midchain adjacency created for this tunnel
+   * on a L2 tunnel this is the VLIB arc from the L2-tx to the l2-midchain
    */
-  adj_index_t adj_index[FIB_LINK_NUM];
+  u32 l2_tx_arc;
 
   /**
-   * on a L2 tunnel this is the VLIB arc from the L2-tx to the l2-midchain
+   * an L2 tunnel always rquires an L2 midchain. cache here for DP.
    */
-  u32 l2_tx_arc;
+  adj_index_t l2_adj_index;
 } gre_tunnel_t;
 
 typedef struct {
@@ -142,7 +142,14 @@ gre_register_input_type (vlib_main_t * vm,
                         gre_protocol_t protocol,
                         u32 node_index);
 
-extern void gre_tunnel_stack (gre_tunnel_t *gt);
+extern  clib_error_t * gre_interface_admin_up_down (vnet_main_t * vnm,
+                                                   u32 hw_if_index,
+                                                   u32 flags);
+
+extern void gre_tunnel_stack (adj_index_t ai);
+extern void gre_update_adj (vnet_main_t * vnm,
+                           u32 sw_if_index,
+                           adj_index_t ai);
 
 format_function_t format_gre_protocol;
 format_function_t format_gre_header;
index 0550c0b..397a042 100644 (file)
@@ -21,6 +21,7 @@
 #include <vnet/ip/format.h>
 #include <vnet/fib/ip4_fib.h>
 #include <vnet/adj/adj_midchain.h>
+#include <vnet/adj/adj_nbr.h>
 #include <vnet/mpls/mpls.h>
 
 static inline u64
@@ -36,7 +37,6 @@ static u8 *
 format_gre_tunnel (u8 * s, va_list * args)
 {
   gre_tunnel_t * t = va_arg (*args, gre_tunnel_t *);
-  int detail = va_arg (*args, int);
   gre_main_t * gm = &gre_main;
 
   s = format (s,
@@ -46,14 +46,6 @@ format_gre_tunnel (u8 * s, va_list * args)
               format_ip4_address, &t->tunnel_dst,
               (t->teb ? "teb" : "ip"),
               t->outer_fib_index);
-  if (detail)
-  {
-      s = format (s, "\n  fib-entry:%d adj-ip4:%d adj-ip6:%d adj-mpls:%d",
-                  t->fib_entry_index,
-                  t->adj_index[FIB_LINK_IP4],
-                  t->adj_index[FIB_LINK_IP6],
-                  t->adj_index[FIB_LINK_MPLS]);
-  }
 
   return s;
 }
@@ -113,32 +105,68 @@ gre_tunnel_from_fib_node (fib_node_t *node)
  * 'stack' (resolve the recursion for) the tunnel's midchain adjacency
  */
 void
-gre_tunnel_stack (gre_tunnel_t *gt)
+gre_tunnel_stack (adj_index_t ai)
 {
-    fib_link_t linkt;
+    gre_main_t * gm = &gre_main;
+    ip_adjacency_t *adj;
+    gre_tunnel_t *gt;
+    u32 sw_if_index;
+
+    adj = adj_get(ai);
+    sw_if_index = adj->rewrite_header.sw_if_index;
+
+    if ((vec_len(gm->tunnel_index_by_sw_if_index) < sw_if_index) ||
+       (~0 == gm->tunnel_index_by_sw_if_index[sw_if_index]))
+       return;
+
+    gt = pool_elt_at_index(gm->tunnels,
+                          gm->tunnel_index_by_sw_if_index[sw_if_index]);
 
     /*
      * find the adjacency that is contributed by the FIB entry
      * that this tunnel resovles via, and use it as the next adj
      * in the midchain
      */
-    FOR_EACH_FIB_LINK(linkt)
+    if (vnet_hw_interface_get_flags(vnet_get_main(),
+                                   gt->hw_if_index) &
+       VNET_HW_INTERFACE_FLAG_LINK_UP)
     {
-        if (ADJ_INDEX_INVALID != gt->adj_index[linkt])
-        {
-           if (vnet_hw_interface_get_flags(vnet_get_main(),
-                                           gt->hw_if_index) &
-               VNET_HW_INTERFACE_FLAG_LINK_UP)
-           {
-               adj_nbr_midchain_stack(
-                   gt->adj_index[linkt],
-                   fib_entry_contribute_ip_forwarding(gt->fib_entry_index));
-           }
-           else
-           {
-               adj_nbr_midchain_unstack(gt->adj_index[linkt]);
-           }
-        }
+       adj_nbr_midchain_stack(
+           ai,
+           fib_entry_contribute_ip_forwarding(gt->fib_entry_index));
+    }
+    else
+    {
+       adj_nbr_midchain_unstack(ai);
+    }
+}
+
+/**
+ * @brief Call back when restacking all adjacencies on a GRE interface
+ */
+static adj_walk_rc_t
+gre_adj_walk_cb (adj_index_t ai,
+                void *ctx)
+{
+    gre_tunnel_stack(ai);
+
+    return (ADJ_WALK_RC_CONTINUE);
+}
+
+static void
+gre_tunnel_restack (gre_tunnel_t *gt)
+{
+    fib_protocol_t proto;
+
+    /*
+     * walk all the adjacencies on th GRE interface and restack them
+     */
+    FOR_EACH_FIB_IP_PROTOCOL(proto)
+    {
+       adj_nbr_walk(gt->sw_if_index,
+                    proto,
+                    gre_adj_walk_cb,
+                    NULL);
     }
 }
 
@@ -147,9 +175,9 @@ gre_tunnel_stack (gre_tunnel_t *gt)
  */
 static fib_node_back_walk_rc_t
 gre_tunnel_back_walk (fib_node_t *node,
-                          fib_node_back_walk_ctx_t *ctx)
+                     fib_node_back_walk_ctx_t *ctx)
 {
-    gre_tunnel_stack(gre_tunnel_from_fib_node(node));
+    gre_tunnel_restack(gre_tunnel_from_fib_node(node));
 
     return (FIB_NODE_BACK_WALK_CONTINUE);
 }
@@ -192,63 +220,6 @@ const static fib_node_vft_t gre_vft = {
     .fnv_back_walk = gre_tunnel_back_walk,
 };
 
-static int
-gre_proto_from_fib_link (fib_link_t link)
-{
-    switch (link)
-    {
-    case FIB_LINK_IP4:
-        return (GRE_PROTOCOL_ip4);
-    case FIB_LINK_IP6:
-        return (GRE_PROTOCOL_ip6);
-    case FIB_LINK_MPLS:
-        return (GRE_PROTOCOL_mpls_unicast);
-    case FIB_LINK_ETHERNET:
-        return (GRE_PROTOCOL_teb);
-    }
-    ASSERT(0);
-    return (GRE_PROTOCOL_ip4);
-}
-
-static u8 *
-gre_rewrite (gre_tunnel_t * t,
-             fib_link_t link)
-{
-  ip4_and_gre_header_t * h0;
-  u8 * rewrite_data = 0;
-
-  vec_validate_init_empty (rewrite_data, sizeof (*h0) - 1, 0);
-
-  h0 = (ip4_and_gre_header_t *) rewrite_data;
-
-  h0->gre.protocol = clib_host_to_net_u16(gre_proto_from_fib_link(link));
-
-  h0->ip4.ip_version_and_header_length = 0x45;
-  h0->ip4.ttl = 254;
-  h0->ip4.protocol = IP_PROTOCOL_GRE;
-  /* $$$ fixup ip4 header length and checksum after-the-fact */
-  h0->ip4.src_address.as_u32 = t->tunnel_src.as_u32;
-  h0->ip4.dst_address.as_u32 = t->tunnel_dst.as_u32;
-  h0->ip4.checksum = ip4_header_checksum (&h0->ip4);
-
-  return (rewrite_data);
-}
-
-static void
-gre_fixup (vlib_main_t *vm,
-          ip_adjacency_t *adj,
-          vlib_buffer_t *b0)
-{
-    ip4_header_t * ip0;
-
-    ip0 = vlib_buffer_get_current (b0);
-
-    /* Fixup the checksum and len fields in the GRE tunnel encap
-     * that was applied at the midchain node */
-    ip0->length = clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b0));
-    ip0->checksum = ip4_header_checksum (ip0);
-}
-
 static int 
 vnet_gre_tunnel_add (vnet_gre_add_del_tunnel_args_t *a,
                      u32 * sw_if_indexp)
@@ -262,8 +233,6 @@ vnet_gre_tunnel_add (vnet_gre_add_del_tunnel_args_t *a,
   u32 outer_fib_index;
   u8 address[6];
   clib_error_t *error;
-  fib_link_t linkt;
-  u8 *rewrite;
 
   outer_fib_index = ip4_fib_index_from_table_id(a->outer_fib_id);
 
@@ -278,10 +247,6 @@ vnet_gre_tunnel_add (vnet_gre_add_del_tunnel_args_t *a,
   pool_get_aligned (gm->tunnels, t, CLIB_CACHE_LINE_BYTES);
   memset (t, 0, sizeof (*t));
   fib_node_init(&t->node, FIB_NODE_TYPE_GRE_TUNNEL);
-  FOR_EACH_FIB_LINK(linkt)
-  {
-      t->adj_index[linkt] = ADJ_INDEX_INVALID;
-  }
 
   if (vec_len (gm->free_gre_tunnel_hw_if_indices) > 0) {
       vnet_interface_main_t * im = &vnm->interface_main;
@@ -321,10 +286,11 @@ vnet_gre_tunnel_add (vnet_gre_add_del_tunnel_args_t *a,
         address[3] = 0xd0;
         address[4] = t - gm->tunnels;
 
-        error = ethernet_register_interface
-          (vnm,
-           gre_l2_device_class.index, t - gm->tunnels, address, &hw_if_index,
-           0);
+        error = ethernet_register_interface(vnm,
+                                           gre_device_class.index,
+                                           t - gm->tunnels, address,
+                                           &hw_if_index,
+                                           0);
 
         if (error)
         {
@@ -337,10 +303,11 @@ vnet_gre_tunnel_add (vnet_gre_add_del_tunnel_args_t *a,
                                                hi->tx_node_index,
                                                "adj-l2-midchain");
       } else {
-       hw_if_index = vnet_register_interface
-           (vnm, gre_device_class.index, t - gm->tunnels,
-            gre_hw_interface_class.index,
-            t - gm->tunnels);
+       hw_if_index = vnet_register_interface(vnm,
+                                             gre_device_class.index,
+                                             t - gm->tunnels,
+                                             gre_hw_interface_class.index,
+                                             t - gm->tunnels);
       }
       hi = vnet_get_hw_interface (vnm, hw_if_index);
       sw_if_index = hi->sw_if_index;
@@ -395,48 +362,18 @@ vnet_gre_tunnel_add (vnet_gre_add_del_tunnel_args_t *a,
                           FIB_NODE_TYPE_GRE_TUNNEL,
                           t - gm->tunnels);
 
-  /*
-   * create and update the midchain adj this tunnel sources.
-   * We could be smarter here and trigger this on an interface proto enable,
-   * like we do for MPLS.
-   */
+  clib_memcpy (&t->tunnel_src, &a->src, sizeof (t->tunnel_src));
+  clib_memcpy (&t->tunnel_dst, &a->dst, sizeof (t->tunnel_dst));
+
   if (t->teb)
   {
-      t->adj_index[FIB_LINK_ETHERNET] = adj_nbr_add_or_lock(FIB_PROTOCOL_IP4,
-                                                           FIB_LINK_ETHERNET,
-                                                           &zero_addr,
-                                                           sw_if_index);
-
-      rewrite = gre_rewrite(t, FIB_LINK_ETHERNET);
-      adj_nbr_midchain_update_rewrite(t->adj_index[FIB_LINK_ETHERNET],
-                                     gre_fixup,
-                                     ADJ_MIDCHAIN_FLAG_NO_COUNT,
-                                     rewrite);
-      vec_free(rewrite);
-  }
-  else
-  {
-      FOR_EACH_FIB_IP_LINK (linkt)
-      {
-         t->adj_index[linkt] = adj_nbr_add_or_lock(FIB_PROTOCOL_IP4,
-                                                   linkt,
-                                                   &zero_addr,
-                                                   sw_if_index);
-
-         rewrite = gre_rewrite(t, linkt);
-         adj_nbr_midchain_update_rewrite(t->adj_index[linkt],
-                                         gre_fixup,
-                                         ADJ_MIDCHAIN_FLAG_NONE,
-                                         rewrite);
-         vec_free(rewrite);
-      }
-  }
-
-  t->adj_index[FIB_LINK_MPLS] = ADJ_INDEX_INVALID;
+      t->l2_adj_index = adj_nbr_add_or_lock(FIB_PROTOCOL_IP4,
+                                           FIB_LINK_ETHERNET,
+                                           &zero_addr,
+                                           sw_if_index);
 
-  clib_memcpy (&t->tunnel_src, &a->src, sizeof (t->tunnel_src));
-  clib_memcpy (&t->tunnel_dst, &a->dst, sizeof (t->tunnel_dst));
-  gre_tunnel_stack(t);
+      gre_update_adj(vnm, t->sw_if_index, t->l2_adj_index);
+  }
 
   if (sw_if_indexp)
     *sw_if_indexp = sw_if_index;
@@ -451,7 +388,6 @@ vnet_gre_tunnel_delete (vnet_gre_add_del_tunnel_args_t *a,
   gre_main_t * gm = &gre_main;
   vnet_main_t * vnm = gm->vnet_main;
   gre_tunnel_t * t;
-  fib_link_t linkt;
   u32 sw_if_index;
 
   t = gre_tunnel_db_find(&a->src, &a->dst, a->outer_fib_id);
@@ -472,11 +408,6 @@ vnet_gre_tunnel_delete (vnet_gre_add_del_tunnel_args_t *a,
   fib_table_entry_delete_index(t->fib_entry_index,
                                FIB_SOURCE_RR);
 
-  FOR_EACH_FIB_LINK(linkt)
-  {
-      adj_unlock(t->adj_index[linkt]);
-  }
-
   gre_tunnel_db_remove(t);
   fib_node_deinit(&t->node);
   pool_put (gm->tunnels, t);
@@ -497,43 +428,36 @@ vnet_gre_add_del_tunnel (vnet_gre_add_del_tunnel_args_t *a,
     return (vnet_gre_tunnel_delete(a, sw_if_indexp));
 }
 
-static void
-gre_sw_interface_mpls_state_change (u32 sw_if_index,
-                                    u32 is_enable)
+clib_error_t *
+gre_interface_admin_up_down (vnet_main_t * vnm, u32 hw_if_index, u32 flags)
 {
-  gre_main_t *gm = &gre_main;
+  gre_main_t * gm = &gre_main;
+  vnet_hw_interface_t * hi;
   gre_tunnel_t *t;
-  u8 *rewrite;
+  u32 ti;
 
-  if ((vec_len(gm->tunnel_index_by_sw_if_index) < sw_if_index) ||
-      (~0 == gm->tunnel_index_by_sw_if_index[sw_if_index]))
-      return;
+  hi = vnet_get_hw_interface (vnm, hw_if_index);
 
-  t = pool_elt_at_index(gm->tunnels,
-                        gm->tunnel_index_by_sw_if_index[sw_if_index]);
+  if (NULL == gm->tunnel_index_by_sw_if_index ||
+      hi->sw_if_index >= vec_len(gm->tunnel_index_by_sw_if_index))
+      return (NULL);
 
-  if (is_enable)
-    {
-      t->adj_index[FIB_LINK_MPLS] =
-          adj_nbr_add_or_lock(FIB_PROTOCOL_IP4,
-                              FIB_LINK_MPLS,
-                              &zero_addr,
-                              sw_if_index);
-
-      rewrite = gre_rewrite(t, FIB_LINK_MPLS);
-      adj_nbr_midchain_update_rewrite(t->adj_index[FIB_LINK_MPLS],
-                                     gre_fixup,
-                                     ADJ_MIDCHAIN_FLAG_NONE,
-                                      rewrite);
-      vec_free(rewrite);
-    }
+  ti = gm->tunnel_index_by_sw_if_index[hi->sw_if_index];
+
+  if (~0 == ti)
+      /* not one of ours */
+      return (NULL);
+
+  t = pool_elt_at_index(gm->tunnels, ti);
+
+  if (flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP)
+    vnet_hw_interface_set_flags (vnm, hw_if_index, VNET_HW_INTERFACE_FLAG_LINK_UP);
   else
-    {
-      adj_unlock(t->adj_index[FIB_LINK_MPLS]);
-      t->adj_index[FIB_LINK_MPLS] = ADJ_INDEX_INVALID;
-    }
+    vnet_hw_interface_set_flags (vnm, hw_if_index, 0 /* down */);
 
-  gre_tunnel_stack(t);
+  gre_tunnel_restack(t);
+
+  return /* no error */ 0;
 }
 
 static clib_error_t *
@@ -637,14 +561,14 @@ show_gre_tunnel_command_fn (vlib_main_t * vm,
     {
       pool_foreach (t, gm->tunnels,
       ({
-          vlib_cli_output (vm, "%U", format_gre_tunnel, t, 0);
+          vlib_cli_output (vm, "%U", format_gre_tunnel, t);
       }));
     }
   else
   {
       t = pool_elt_at_index(gm->tunnels, ti);
 
-      vlib_cli_output (vm, "%U", format_gre_tunnel, t, 1);
+      vlib_cli_output (vm, "%U", format_gre_tunnel, t);
   }
 
   return 0;
@@ -658,9 +582,6 @@ VLIB_CLI_COMMAND (show_gre_tunnel_command, static) = {
 /* force inclusion from application's main.c */
 clib_error_t *gre_interface_init (vlib_main_t *vm)
 {
-  vec_add1(mpls_main.mpls_interface_state_change_callbacks,
-           gre_sw_interface_mpls_state_change);
-
   fib_node_register_type(FIB_NODE_TYPE_GRE_TUNNEL, &gre_vft);
 
   return 0;
index 9997ddc..174085a 100644 (file)
@@ -167,42 +167,41 @@ unformat_hdlc_header (unformat_input_t * input, va_list * args)
   return 1;
 }
 
-static uword hdlc_set_rewrite (vnet_main_t * vnm,
-                              u32 sw_if_index,
-                              u32 l3_type,
-                              void * dst_address,
-                              void * rewrite,
-                              uword max_rewrite_bytes)
+static u8*
+hdlc_build_rewrite (vnet_main_t * vnm,
+                   u32 sw_if_index,
+                   vnet_link_t link_type,
+                   const void *dst_address)
 {
-  hdlc_header_t * h = rewrite;
+  hdlc_header_t * h;
+  u8* rewrite = NULL;
   hdlc_protocol_t protocol;
 
-  if (max_rewrite_bytes < sizeof (h[0]))
-    return 0;
-
-  switch (l3_type) {
-#define _(a,b) case VNET_L3_PACKET_TYPE_##a: protocol = HDLC_PROTOCOL_##b; break
+  switch (link_type) {
+#define _(a,b) case VNET_LINK_##a: protocol = HDLC_PROTOCOL_##b; break
     _ (IP4, ip4);
     _ (IP6, ip6);
-    _ (MPLS_UNICAST, mpls_unicast);
-    _ (MPLS_MULTICAST, mpls_multicast);
+    _ (MPLS, mpls_unicast);
 #undef _
   default:
-    return 0;
+      return (NULL);
   }
 
+  vec_validate(rewrite, sizeof(*h)-1);
+  h = (hdlc_header_t *)rewrite;
   h->address = 0x0f;
   h->control = 0x00;
   h->protocol = clib_host_to_net_u16 (protocol);
                     
-  return sizeof (h[0]);
+  return (rewrite);
 }
 
 VNET_HW_INTERFACE_CLASS (hdlc_hw_interface_class) = {
   .name = "HDLC",
   .format_header = format_hdlc_header_with_length,
   .unformat_header = unformat_hdlc_header,
-  .set_rewrite = hdlc_set_rewrite,
+  .build_rewrite = hdlc_build_rewrite,
+  .flags = VNET_HW_INTERFACE_CLASS_FLAG_P2P,
 };
 
 static void add_protocol (hdlc_main_t * pm,
index 08db683..941ab17 100644 (file)
@@ -40,6 +40,7 @@
 #include <vnet/vnet.h>
 #include <vnet/plugin/plugin.h>
 #include <vnet/fib/ip6_fib.h>
+#include <vnet/adj/adj.h>
 
 #define VNET_INTERFACE_SET_FLAGS_HELPER_IS_CREATE (1 << 0)
 #define VNET_INTERFACE_SET_FLAGS_HELPER_WANT_REDISTRIBUTE (1 << 1)
@@ -1044,6 +1045,16 @@ vnet_hw_interface_compare (vnet_main_t * vnm,
   return (word) h0->hw_instance - (word) h1->hw_instance;
 }
 
+int
+vnet_sw_interface_is_p2p (vnet_main_t * vnm, u32 sw_if_index)
+{
+  vnet_hw_interface_t *hw = vnet_get_sup_hw_interface (vnm, sw_if_index);
+  vnet_hw_interface_class_t *hc =
+    vnet_get_hw_interface_class (vnm, hw->hw_class_index);
+
+  return (hc->flags & VNET_HW_INTERFACE_CLASS_FLAG_P2P);
+}
+
 clib_error_t *
 vnet_interface_init (vlib_main_t * vm)
 {
@@ -1120,6 +1131,12 @@ vnet_interface_init (vlib_main_t * vm)
       {
        c->index = vec_len (im->hw_interface_classes);
        hash_set_mem (im->hw_interface_class_by_name, c->name, c->index);
+
+       if (NULL == c->build_rewrite)
+         c->build_rewrite = default_build_rewrite;
+       if (NULL == c->update_adjacency)
+         c->update_adjacency = default_update_adjacency;
+
        vec_add1 (im->hw_interface_classes, c[0]);
        c = c->next_class_registration;
       }
@@ -1287,6 +1304,48 @@ vnet_hw_interface_change_mac_address (vnet_main_t * vnm, u32 hw_if_index,
     (vnm, hw_if_index, mac_address);
 }
 
+vnet_l3_packet_type_t
+vnet_link_to_l3_proto (vnet_link_t link)
+{
+  switch (link)
+    {
+    case VNET_LINK_IP4:
+      return (VNET_L3_PACKET_TYPE_IP4);
+    case VNET_LINK_IP6:
+      return (VNET_L3_PACKET_TYPE_IP6);
+    case VNET_LINK_MPLS:
+      return (VNET_L3_PACKET_TYPE_MPLS_UNICAST);
+    case VNET_LINK_ARP:
+      return (VNET_L3_PACKET_TYPE_ARP);
+    case VNET_LINK_ETHERNET:
+      ASSERT (0);
+      break;
+    }
+  ASSERT (0);
+  return (0);
+}
+
+u8 *
+default_build_rewrite (vnet_main_t * vnm,
+                      u32 sw_if_index,
+                      vnet_link_t link_type, const void *dst_address)
+{
+  return (NULL);
+}
+
+void
+default_update_adjacency (vnet_main_t * vnm, u32 sw_if_index, u32 ai)
+{
+  u8 *rewrite;
+
+  rewrite = vnet_build_rewrite_for_sw_interface (vnm, sw_if_index,
+                                                adj_get_link_type (ai),
+                                                NULL);
+
+  adj_nbr_update_rewrite (ai, ADJ_NBR_REWRITE_FLAG_COMPLETE, rewrite);
+}
+
+
 /*
  * fd.io coding-style-patch-verification: ON
  *
index 245b86f..c5a7947 100644 (file)
 #define included_vnet_interface_h
 
 #include <vnet/unix/pcap.h>
+#include <vnet/l3_types.h>
 
 struct vnet_main_t;
 struct vnet_hw_interface_t;
 struct vnet_sw_interface_t;
+struct ip46_address_t;
 
 /* Interface up/down callback. */
 typedef clib_error_t *(vnet_interface_function_t)
@@ -196,6 +198,39 @@ __VA_ARGS__ vnet_device_class_t x
   { dev.tx_function = fn ## _multiarch_select(); }
 #endif
 
+/**
+ * Link Type: A description of the protocol of packets on the link.
+ * On an ethernet link this maps directly into the ethertype. On a GRE tunnel
+ * it maps to the GRE-proto, etc for other lnk types.
+ */
+typedef enum vnet_link_t_
+{
+#if CLIB_DEBUG > 0
+  VNET_LINK_IP4 = 1,
+#else
+  VNET_LINK_IP4 = 0,
+#endif
+  VNET_LINK_IP6,
+  VNET_LINK_MPLS,
+  VNET_LINK_ETHERNET,
+  VNET_LINK_ARP,
+} __attribute__ ((packed)) vnet_link_t;
+
+/**
+ * @brief Convert a link to to an Ethertype
+ */
+extern vnet_l3_packet_type_t vnet_link_to_l3_proto (vnet_link_t link);
+
+/**
+ * @brief Attributes assignable to a HW interface Class.
+ */
+typedef enum vnet_hw_interface_class_flags_t_
+{
+  /**
+   * @brief a point 2 point interface
+   */
+  VNET_HW_INTERFACE_CLASS_FLAG_P2P = (1 << 0),
+} vnet_hw_interface_class_flags_t;
 
 /* Layer-2 (e.g. Ethernet) interface class. */
 typedef struct _vnet_hw_interface_class
@@ -206,6 +241,9 @@ typedef struct _vnet_hw_interface_class
   /* Class name (e.g. "Ethernet"). */
   char *name;
 
+  /* Flags */
+  vnet_hw_interface_class_flags_t flags;
+
   /* Function to call when hardware interface is added/deleted. */
   vnet_interface_function_t *interface_add_del_function;
 
@@ -233,13 +271,16 @@ typedef struct _vnet_hw_interface_class
   /* Parser for packet header for e.g. rewrite string. */
   unformat_function_t *unformat_header;
 
-  /* Forms adjacency for given l3 packet type and destination address.
-     Returns number of bytes in adjacency. */
-    uword (*set_rewrite) (struct vnet_main_t * vnm,
-                         u32 sw_if_index,
-                         u32 l3_packet_type,
-                         void *dst_address,
-                         void *rewrite, uword max_rewrite_bytes);
+  /* Builds a rewrite string for the interface to the destination
+   * for the payload/link type. */
+  u8 *(*build_rewrite) (struct vnet_main_t * vnm,
+                       u32 sw_if_index,
+                       vnet_link_t link_type, const void *dst_hw_address);
+
+  /* Update an adjacecny added by FIB (as opposed to via the
+   * neighbour resolution protocol). */
+  void (*update_adjacency) (struct vnet_main_t * vnm,
+                           u32 sw_if_index, u32 adj_index);
 
     uword (*is_valid_class_for_interface) (struct vnet_main_t * vnm,
                                           u32 hw_if_index,
@@ -255,6 +296,20 @@ typedef struct _vnet_hw_interface_class
 
 } vnet_hw_interface_class_t;
 
+/**
+ * @brief Return a complete, zero-length (aka dummy) rewrite
+ */
+extern u8 *default_build_rewrite (struct vnet_main_t *vnm,
+                                 u32 sw_if_index,
+                                 vnet_link_t link_type,
+                                 const void *dst_hw_address);
+
+/**
+ * @brief Default adjacency update function
+ */
+extern void default_update_adjacency (struct vnet_main_t *vnm,
+                                     u32 sw_if_index, u32 adj_index);
+
 #define VNET_HW_INTERFACE_CLASS(x,...)                                  \
   __VA_ARGS__ vnet_hw_interface_class_t x;                              \
 static void __vnet_add_hw_interface_class_registration_##x (void)       \
index f603a03..424e176 100644 (file)
@@ -128,6 +128,22 @@ vnet_hw_interface_get_flags (vnet_main_t * vnm, u32 hw_if_index)
   return hw->flags;
 }
 
+always_inline uword
+vnet_hw_interface_get_mtu (vnet_main_t * vnm, u32 hw_if_index,
+                          vlib_rx_or_tx_t dir)
+{
+  vnet_hw_interface_t *hw = vnet_get_hw_interface (vnm, hw_if_index);
+  return hw->max_l3_packet_bytes[dir];
+}
+
+always_inline uword
+vnet_sw_interface_get_mtu (vnet_main_t * vnm, u32 sw_if_index,
+                          vlib_rx_or_tx_t dir)
+{
+  vnet_hw_interface_t *hw = vnet_get_sup_hw_interface (vnm, sw_if_index);
+  return (hw->max_l3_packet_bytes[dir]);
+}
+
 always_inline uword
 vnet_hw_interface_is_link_up (vnet_main_t * vnm, u32 hw_if_index)
 {
index 0d0eb6c..8a76a61 100644 (file)
@@ -51,6 +51,7 @@ unformat_function_t unformat_tcp_udp_port;
 typedef enum format_ip_adjacency_flags_t_
 {
     FORMAT_IP_ADJACENCY_NONE,
+    FORMAT_IP_ADJACENCY_BRIEF  = FORMAT_IP_ADJACENCY_NONE,
     FORMAT_IP_ADJACENCY_DETAIL = (1 << 0),
 } format_ip_adjacency_flags_t;
 
index 2197345..b0390b8 100644 (file)
@@ -1118,7 +1118,7 @@ static u8 * format_ip4_rewrite_trace (u8 * s, va_list * args)
 
   s = format (s, "tx_sw_if_index %d dpo-idx %d : %U flow hash: 0x%08x",
               t->fib_index, t->dpo_index, format_ip_adjacency,
-              vnm, t->dpo_index, FORMAT_IP_ADJACENCY_NONE,
+              t->dpo_index, FORMAT_IP_ADJACENCY_NONE,
              t->flow_hash);
   s = format (s, "\n%U%U",
               format_white_space, indent,
@@ -1890,6 +1890,13 @@ ip4_arp_inline (vlib_main_t * vm,
 
          p0->error = node->errors[drop0 ? IP4_ARP_ERROR_DROP : IP4_ARP_ERROR_REQUEST_SENT];
 
+         /*
+          * the adj has been updated to a rewrite but the node the DPO that got
+          * us here hasn't - yet. no big deal. we'll drop while we wait.
+          */
+         if (IP_LOOKUP_NEXT_REWRITE == adj0->lookup_next_index)
+           continue;
+
          if (drop0)
            continue;
 
index ab0e650..7854612 100644 (file)
@@ -375,6 +375,10 @@ void ip6_register_protocol (u32 protocol, u32 node_index);
 
 serialize_function_t serialize_vnet_ip6_main, unserialize_vnet_ip6_main;
 
+void ip6_ethernet_update_adjacency (vnet_main_t * vnm,
+                                   u32 sw_if_index,
+                                   u32 ai);
+
 int
 vnet_set_ip6_ethernet_neighbor (vlib_main_t * vm,
                                 u32 sw_if_index,
index d48ccad..2487af9 100644 (file)
@@ -916,7 +916,7 @@ static u8 * format_ip6_rewrite_trace (u8 * s, va_list * args)
 
   s = format (s, "tx_sw_if_index %d adj-idx %d : %U flow hash: 0x%08x",
               t->fib_index, t->adj_index, format_ip_adjacency,
-              vnm, t->adj_index, FORMAT_IP_ADJACENCY_NONE,
+              t->adj_index, FORMAT_IP_ADJACENCY_NONE,
              t->flow_hash);
   s = format (s, "\n%U%U",
               format_white_space, indent,
@@ -1605,6 +1605,13 @@ ip6_discover_neighbor_inline (vlib_main_t * vm,
          if (drop0)
            continue;
 
+         /*
+          * the adj has been updated to a rewrite but the node the DPO that got
+          * us here hasn't - yet. no big deal. we'll drop while we wait.
+          */
+         if (IP_LOOKUP_NEXT_REWRITE == adj0->lookup_next_index)
+           continue;
+
          {
            u32 bi0 = 0;
            icmp6_neighbor_solicitation_header_t * h0;
@@ -2167,10 +2174,6 @@ VLIB_REGISTER_NODE (ip6_midchain_node) = {
   .format_trace = format_ip6_forward_next_trace,
 
   .sibling_of = "ip6-rewrite",
-
-  .next_nodes = {
-    [IP6_REWRITE_NEXT_DROP] = "error-drop",
-  },
 };
 
 VLIB_NODE_FUNCTION_MULTIARCH (ip6_midchain_node, ip6_midchain)
index e042385..3aef232 100644 (file)
@@ -51,7 +51,7 @@ typedef struct {
 #define IP6_NEIGHBOR_FLAG_STATIC (1 << 0)
 #define IP6_NEIGHBOR_FLAG_DYNAMIC  (2 << 0)
   u64 cpu_time_last_updated;
-  adj_index_t adj_index;
+  fib_node_index_t fib_entry_index;
 } ip6_neighbor_t;
 
 /* advertised prefix option */ 
@@ -267,6 +267,7 @@ ip6_neighbor_sw_interface_up_down (vnet_main_t * vnm,
        {
          n = pool_elt_at_index (nm->neighbor_pool, to_delete[i]);
          mhash_unset (&nm->neighbor_index_by_key, &n->key, 0);
+         fib_table_entry_delete_index (n->fib_entry_index,  FIB_SOURCE_ADJ);
          pool_put (nm->neighbor_pool, n);
        }
 
@@ -342,48 +343,182 @@ static void set_unset_ip6_neighbor_rpc
 #endif
 
 static void
-ip6_nd_mk_complete (ip6_neighbor_t * nbr)
+ip6_nbr_probe (ip_adjacency_t *adj)
 {
-  fib_prefix_t pfx = {
-      .fp_len = 128,
-      .fp_proto = FIB_PROTOCOL_IP6,
-      .fp_addr = {
-         .ip6 = nbr->key.ip6_address,
-      },
-  };
-  ip6_main_t *im;
-  u32 fib_index;
+  icmp6_neighbor_solicitation_header_t * h;
+  vnet_main_t * vnm = vnet_get_main();
+  ip6_main_t * im = &ip6_main;
+  ip_interface_address_t * ia;
+  ip6_address_t * dst, *src;
+  vnet_hw_interface_t * hi;
+  vnet_sw_interface_t * si;
+  vlib_buffer_t * b;
+  int bogus_length;
+  vlib_main_t * vm;
+  u32 bi = 0;
+
+  vm = vlib_get_main();
 
-  im = &ip6_main;
-  fib_index = im->fib_index_by_sw_if_index[nbr->key.sw_if_index];
+  si = vnet_get_sw_interface(vnm, adj->rewrite_header.sw_if_index);
+  dst = &adj->sub_type.nbr.next_hop.ip6;
 
-  /* only once please */
-  if (ADJ_INDEX_INVALID == nbr->adj_index)
+  if (!(si->flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP))
     {
-      nbr->adj_index =
-         adj_nbr_add_or_lock_w_rewrite(FIB_PROTOCOL_IP6,
-                                       FIB_LINK_IP6,
-                                       &pfx.fp_addr,
-                                       nbr->key.sw_if_index,
-                                       nbr->link_layer_address);
-      ASSERT(ADJ_INDEX_INVALID != nbr->adj_index);
-
-      fib_table_entry_update_one_path(fib_index,
-                                     &pfx,
-                                     FIB_SOURCE_ADJ,
-                                     FIB_ENTRY_FLAG_NONE,
-                                     FIB_PROTOCOL_IP6,
-                                     &pfx.fp_addr,
-                                     nbr->key.sw_if_index,
-                                     ~0,
-                                     1,
-                                     MPLS_LABEL_INVALID,
-                                     FIB_ROUTE_PATH_FLAG_NONE);
+      return;
+    }
+  src = ip6_interface_address_matching_destination(im, dst,
+                                                  adj->rewrite_header.sw_if_index,
+                                                  &ia);
+  if (! src)
+    {
+      return;
+    }
+
+  h = vlib_packet_template_get_packet(vm,
+                                     &im->discover_neighbor_packet_template,
+                                     &bi);
+
+  hi = vnet_get_sup_hw_interface(vnm, adj->rewrite_header.sw_if_index);
+
+  h->ip.dst_address.as_u8[13] = dst->as_u8[13];
+  h->ip.dst_address.as_u8[14] = dst->as_u8[14];
+  h->ip.dst_address.as_u8[15] = dst->as_u8[15];
+  h->ip.src_address = src[0];
+  h->neighbor.target_address = dst[0];
+
+  clib_memcpy (h->link_layer_option.ethernet_address,
+              hi->hw_address,
+              vec_len(hi->hw_address));
+
+  h->neighbor.icmp.checksum =
+      ip6_tcp_udp_icmp_compute_checksum(vm, 0, &h->ip, &bogus_length);
+  ASSERT(bogus_length == 0);
+
+  b = vlib_get_buffer (vm, bi);
+  vnet_buffer (b)->sw_if_index[VLIB_RX] =
+      vnet_buffer (b)->sw_if_index[VLIB_TX] =
+      adj->rewrite_header.sw_if_index;
+
+  /* Add encapsulation string for software interface (e.g. ethernet header). */
+  vnet_rewrite_one_header(adj[0], h, sizeof (ethernet_header_t));
+  vlib_buffer_advance(b, -adj->rewrite_header.data_bytes);
+
+  {
+      vlib_frame_t * f = vlib_get_frame_to_node(vm, hi->output_node_index);
+      u32 * to_next = vlib_frame_vector_args(f);
+      to_next[0] = bi;
+      f->n_vectors = 1;
+      vlib_put_frame_to_node(vm, hi->output_node_index, f);
+  }
+}
+
+static void
+ip6_nd_mk_complete (adj_index_t ai, ip6_neighbor_t * nbr)
+{
+  adj_nbr_update_rewrite (ai, ADJ_NBR_REWRITE_FLAG_COMPLETE,
+                         ethernet_build_rewrite (vnet_get_main (),
+                                                 nbr->key.sw_if_index,
+                                                 adj_get_link_type(ai),
+                                                 nbr->link_layer_address));
+}
+
+static void
+ip6_nd_mk_incomplete (adj_index_t ai, ip6_neighbor_t * nbr)
+{
+  adj_nbr_update_rewrite (
+      ai,
+      ADJ_NBR_REWRITE_FLAG_INCOMPLETE,
+      ethernet_build_rewrite (vnet_get_main (),
+                             nbr->key.sw_if_index,
+                             adj_get_link_type(ai),
+                             VNET_REWRITE_FOR_SW_INTERFACE_ADDRESS_BROADCAST));
+}
+
+#define IP6_NBR_MK_KEY(k, sw_if_index, addr) \
+{                                           \
+    k.sw_if_index = sw_if_index;            \
+    k.ip6_address = *addr;                  \
+    k.pad = 0;                              \
+}
+
+static ip6_neighbor_t *
+ip6_nd_find (u32 sw_if_index,
+            const ip6_address_t * addr)
+{
+  ip6_neighbor_main_t * nm = &ip6_neighbor_main;
+  ip6_neighbor_t * n = NULL;
+  ip6_neighbor_key_t k;
+  uword *p;
+
+  IP6_NBR_MK_KEY(k, sw_if_index, addr);
+
+  p = mhash_get (&nm->neighbor_index_by_key, &k);
+  if (p) {
+    n = pool_elt_at_index (nm->neighbor_pool, p[0]);
+  }
+
+  return (n);
+}
+
+static adj_walk_rc_t
+ip6_nd_mk_complete_walk (adj_index_t ai, void *ctx)
+{
+  ip6_neighbor_t *nbr = ctx;
+
+  ip6_nd_mk_complete (ai, nbr);
+
+  return (ADJ_WALK_RC_CONTINUE);
+}
+
+static adj_walk_rc_t
+ip6_nd_mk_incomplete_walk (adj_index_t ai, void *ctx)
+{
+  ip6_neighbor_t *nbr = ctx;
+
+  ip6_nd_mk_incomplete (ai, nbr);
+
+  return (ADJ_WALK_RC_CONTINUE);
+}
+
+void
+ip6_ethernet_update_adjacency (vnet_main_t * vnm,
+                              u32 sw_if_index,
+                              u32 ai)
+{
+  ip6_neighbor_t *nbr;
+  ip_adjacency_t *adj;
+
+  adj = adj_get (ai);
+
+  nbr = ip6_nd_find (sw_if_index, &adj->sub_type.nbr.next_hop.ip6);
+
+  if (NULL != nbr)
+    {
+      adj_nbr_walk_nh6 (sw_if_index, &nbr->key.ip6_address,
+                       ip6_nd_mk_complete_walk, nbr);
     }
   else
     {
-      adj_nbr_update_rewrite(nbr->adj_index,
-                            nbr->link_layer_address);
+      /*
+       * no matching ND entry.
+       * construct the rewrite required to for an ND packet, and stick
+       * that in the adj's pipe to smoke.
+       */
+      adj_nbr_update_rewrite (ai,
+                             ADJ_NBR_REWRITE_FLAG_INCOMPLETE,
+                             ethernet_build_rewrite (vnm,
+                                                     sw_if_index,
+                                                     VNET_LINK_IP6,
+                                                     VNET_REWRITE_FOR_SW_INTERFACE_ADDRESS_BROADCAST));
+
+      /*
+       * since the FIB has added this adj for a route, it makes sense it may
+       * want to forward traffic sometime soon. Let's send a speculative ND.
+       * just one. If we were to do periodically that wouldn't be bad either,
+       * but that's more code than i'm prepared to write at this time for
+       * relatively little reward.
+       */
+      ip6_nbr_probe (adj);
     }
 }
 
@@ -416,8 +551,6 @@ vnet_set_ip6_ethernet_neighbor (vlib_main_t * vm,
   k.ip6_address = a[0];
   k.pad = 0;
 
-  vlib_worker_thread_barrier_sync (vm);
-
   p = mhash_get (&nm->neighbor_index_by_key, &k);
   if (p) {
     n = pool_elt_at_index (nm->neighbor_pool, p[0]);
@@ -429,11 +562,40 @@ vnet_set_ip6_ethernet_neighbor (vlib_main_t * vm,
   }
 
   if (make_new_nd_cache_entry) {
+      fib_prefix_t pfx = {
+         .fp_len = 128,
+         .fp_proto = FIB_PROTOCOL_IP6,
+         .fp_addr = {
+             .ip6 = k.ip6_address,
+         },
+      };
+      u32 fib_index;
+
     pool_get (nm->neighbor_pool, n);
     mhash_set (&nm->neighbor_index_by_key, &k, n - nm->neighbor_pool,
                /* old value */ 0);
     n->key = k;
-    n->adj_index = ADJ_INDEX_INVALID;
+
+    clib_memcpy (n->link_layer_address,
+                link_layer_address,
+                n_bytes_link_layer_address);
+
+    /*
+     * create the adj-fib. the entry in the FIB table for and to the peer.
+     */
+    fib_index = ip6_main.fib_index_by_sw_if_index[n->key.sw_if_index];
+    n->fib_entry_index =
+       fib_table_entry_update_one_path(fib_index,
+                                       &pfx,
+                                       FIB_SOURCE_ADJ,
+                                       FIB_ENTRY_FLAG_NONE,
+                                       FIB_PROTOCOL_IP6,
+                                       &pfx.fp_addr,
+                                       n->key.sw_if_index,
+                                       ~0,
+                                       1,
+                                       MPLS_LABEL_INVALID,
+                                       FIB_ROUTE_PATH_FLAG_NONE);
   }
   else
   {
@@ -445,20 +607,22 @@ vnet_set_ip6_ethernet_neighbor (vlib_main_t * vm,
                    link_layer_address,
                    n_bytes_link_layer_address))
       return -1;
-  }
 
-  /* Update time stamp and ethernet address. */
-  clib_memcpy (n->link_layer_address,
-              link_layer_address,
-              n_bytes_link_layer_address);
+    clib_memcpy (n->link_layer_address,
+                link_layer_address,
+                n_bytes_link_layer_address);
+  }
 
+  /* Update time stamp and flags. */
   n->cpu_time_last_updated = clib_cpu_time_now ();
   if (is_static)
     n->flags |= IP6_NEIGHBOR_FLAG_STATIC;
   else
     n->flags |= IP6_NEIGHBOR_FLAG_DYNAMIC;
 
-  ip6_nd_mk_complete(n);
+  adj_nbr_walk_nh6 (sw_if_index,
+                   &n->key.ip6_address,
+                   ip6_nd_mk_complete_walk, n);
 
   /* Customer(s) waiting for this address to be resolved? */
   p = mhash_get (&nm->pending_resolutions_by_address, a);
@@ -507,44 +671,9 @@ vnet_set_ip6_ethernet_neighbor (vlib_main_t * vm,
         }
     }
 
-  vlib_worker_thread_barrier_release(vm);
   return 0;
 }
 
-static void
-ip6_nd_mk_incomplete (ip6_neighbor_t *nbr)
-{
-  fib_prefix_t pfx = {
-      .fp_len = 128,
-      .fp_proto = FIB_PROTOCOL_IP6,
-      .fp_addr = {
-         .ip6 = nbr->key.ip6_address,
-      },
-  };
-  u32 fib_index;
-  ip6_main_t *im;
-
-  im = &ip6_main;
-  fib_index = im->fib_index_by_sw_if_index[nbr->key.sw_if_index];
-
-  /*
-   * revert the adj this ND entry sourced to incomplete
-   */
-  adj_nbr_update_rewrite(nbr->adj_index,
-                        NULL);
-
-  /*
-   * remove the FIB entry the ND entry sourced
-   */
-  fib_table_entry_delete(fib_index, &pfx, FIB_SOURCE_ADJ);
-
-  /*
-   * Unlock the adj now that the ARP entry is no longer a source
-   */
-  adj_unlock(nbr->adj_index);
-  nbr->adj_index = ADJ_INDEX_INVALID;
-}
-
 int
 vnet_unset_ip6_ethernet_neighbor (vlib_main_t * vm,
                                   u32 sw_if_index,
@@ -571,8 +700,6 @@ vnet_unset_ip6_ethernet_neighbor (vlib_main_t * vm,
   k.ip6_address = a[0];
   k.pad = 0;
   
-  vlib_worker_thread_barrier_sync (vm);
-  
   p = mhash_get (&nm->neighbor_index_by_key, &k);
   if (p == 0)
     {
@@ -582,12 +709,16 @@ vnet_unset_ip6_ethernet_neighbor (vlib_main_t * vm,
   
   n = pool_elt_at_index (nm->neighbor_pool, p[0]);
 
-  ip6_nd_mk_incomplete(n);
+  adj_nbr_walk_nh6 (sw_if_index,
+                   &n->key.ip6_address,
+                   ip6_nd_mk_incomplete_walk,
+                   n);
+
   mhash_unset (&nm->neighbor_index_by_key, &n->key, 0);
+  fib_table_entry_delete_index (n->fib_entry_index,  FIB_SOURCE_ADJ);
   pool_put (nm->neighbor_pool, n);
   
  out:
-  vlib_worker_thread_barrier_release(vm);
   return rv;
 }
 
@@ -3725,11 +3856,9 @@ ethernet_ndp_change_mac (vlib_main_t * vm, u32 sw_if_index)
   pool_foreach (n, nm->neighbor_pool, ({
     if (n->key.sw_if_index == sw_if_index)
     {
-      if (ADJ_INDEX_INVALID != n->adj_index)
-        {
-          adj_nbr_update_rewrite(n->adj_index,
-               n->link_layer_address);
-        }
+       adj_nbr_walk_nh6 (sw_if_index,
+                         &n->key.ip6_address,
+                         ip6_nd_mk_complete_walk, n);
     }
   }));
   /* *INDENT-ON* */
index 13901ef..77d5d19 100644 (file)
@@ -38,24 +38,23 @@ dummy_interface_tx (vlib_main_t * vm,
   return frame->n_vectors;
 }
 
+/* *INDENT-OFF* */
 VNET_DEVICE_CLASS (ipsec_device_class, static) =
 {
-.name = "IPSec",.format_device_name = format_ipsec_name,.format_tx_trace =
-    format_ipsec_if_output_trace,.tx_function = dummy_interface_tx,};
-
-static uword
-dummy_set_rewrite (vnet_main_t * vnm,
-                  u32 sw_if_index,
-                  u32 l3_type,
-                  void *dst_address, void *rewrite, uword max_rewrite_bytes)
-{
-  return 0;
-}
-
+  .name = "IPSec",
+  .format_device_name = format_ipsec_name,
+  .format_tx_trace = format_ipsec_if_output_trace,
+  .tx_function = dummy_interface_tx,
+};
+/* *INDENT-ON* */
+
+/* *INDENT-OFF* */
 VNET_HW_INTERFACE_CLASS (ipsec_hw_class) =
 {
-.name = "IPSec",.set_rewrite = dummy_set_rewrite,};
-
+  .name = "IPSec",
+  .build_rewrite = default_build_rewrite,
+};
+/* *INDENT-ON* */
 
 static int
 ipsec_add_del_tunnel_if_internal (vnet_main_t * vnm,
index a043483..13854e2 100644 (file)
@@ -271,21 +271,6 @@ VNET_DEVICE_CLASS (l2tpv3_device_class,static) = {
 };
 /* *INDENT-ON* */
 
-static uword
-dummy_set_rewrite (vnet_main_t * vnm,
-                  u32 sw_if_index,
-                  u32 l3_type,
-                  void *dst_address, void *rewrite, uword max_rewrite_bytes)
-{
-  /*
-   * Conundrum: packets from tun/tap destined for the tunnel
-   * actually have this rewrite applied. Transit packets do not.
-   * To make the two cases equivalent, don't generate a
-   * rewrite here, build the entire header in the fast path.
-   */
-  return 0;
-}
-
 static u8 *
 format_l2tp_header_with_length (u8 * s, va_list * args)
 {
@@ -298,7 +283,8 @@ format_l2tp_header_with_length (u8 * s, va_list * args)
 VNET_HW_INTERFACE_CLASS (l2tpv3_hw_class) = {
   .name = "L2TPV3",
   .format_header = format_l2tp_header_with_length,
-  .set_rewrite = dummy_set_rewrite,
+  .build_rewrite = default_build_rewrite,
+  .flags = VNET_HW_INTERFACE_CLASS_FLAG_P2P,
 };
 /* *INDENT-ON* */
 
index 1f527d6..04b8462 100644 (file)
@@ -698,7 +698,17 @@ ip_address_cmp (const ip_address_t * ip1, const ip_address_t * ip2)
 void
 ip_address_copy (ip_address_t * dst, const ip_address_t * src)
 {
-  clib_memcpy (dst, src, sizeof (ip_address_t));
+  if (IP4 == ip_addr_version (src))
+    {
+      /* don't copy any garbe from the union */
+      memset (dst, 0, sizeof (*dst));
+      dst->ip.v4 = src->ip.v4;
+      dst->version = IP4;
+    }
+  else
+    {
+      clib_memcpy (dst, src, sizeof (ip_address_t));
+    }
 }
 
 void
@@ -714,6 +724,26 @@ ip_address_set (ip_address_t * dst, const void *src, u8 version)
   ip_addr_version (dst) = version;
 }
 
+void
+ip_address_to_46 (const ip_address_t * addr,
+                 ip46_address_t * a, fib_protocol_t * proto)
+{
+  *proto = (IP4 == ip_addr_version (addr) ?
+           FIB_PROTOCOL_IP4 : FIB_PROTOCOL_IP6);
+  switch (*proto)
+    {
+    case FIB_PROTOCOL_IP4:
+      ip46_address_set_ip4 (a, &addr->ip.v4);
+      break;
+    case FIB_PROTOCOL_IP6:
+      a->ip6 = addr->ip.v6;
+      break;
+    default:
+      ASSERT (0);
+      break;
+    }
+}
+
 static void
 ip_prefix_normalize_ip4 (ip4_address_t * ip4, u8 preflen)
 {
index b37315e..dd7a53e 100644 (file)
@@ -186,6 +186,8 @@ u16 ip_address_size_to_write (ip_address_t * a);
 u16 ip_address_iana_afi (ip_address_t * a);
 u8 ip_address_max_len (u8 ver);
 u32 ip_address_put (u8 * b, ip_address_t * a);
+void ip_address_to_46 (const ip_address_t * addr,
+                      ip46_address_t * a, fib_protocol_t * proto);
 
 /* LISP AFI codes  */
 typedef enum
index 0b4f7ed..15d203c 100644 (file)
@@ -28,6 +28,7 @@
 #include <vnet/lisp-gpe/lisp_gpe.h>
 #include <vnet/lisp-gpe/lisp_gpe_fwd_entry.h>
 #include <vnet/lisp-gpe/lisp_gpe_tenant.h>
+#include <vnet/lisp-gpe/lisp_gpe_adjacency.h>
 #include <vnet/adj/adj.h>
 #include <vnet/fib/fib_table.h>
 #include <vnet/fib/ip4_fib.h>
@@ -167,13 +168,6 @@ VNET_DEVICE_CLASS (lisp_gpe_device_class) = {
 };
 /* *INDENT-ON* */
 
-static uword
-dummy_set_rewrite (vnet_main_t * vnm, u32 sw_if_index, u32 l3_type,
-                  void *dst_address, void *rewrite, uword max_rewrite_bytes)
-{
-  return 0;
-}
-
 u8 *
 format_lisp_gpe_header_with_length (u8 * s, va_list * args)
 {
@@ -200,7 +194,9 @@ format_lisp_gpe_header_with_length (u8 * s, va_list * args)
 VNET_HW_INTERFACE_CLASS (lisp_gpe_hw_class) = {
   .name = "LISP_GPE",
   .format_header = format_lisp_gpe_header_with_length,
-  .set_rewrite = dummy_set_rewrite,
+  .build_rewrite = default_build_rewrite,
+  .build_rewrite = lisp_gpe_build_rewrite,
+  .update_adjacency = lisp_gpe_update_adjacency,
 };
 /* *INDENT-ON* */
 
index d042f11..93a6d01 100644 (file)
@@ -18,7 +18,8 @@
  *
  */
 
-#include <vnet/dpo/dpo.h>
+#include <vnet/dpo/load_balance.h>
+#include <vnet/lisp-cp/lisp_types.h>
 #include <vnet/lisp-gpe/lisp_gpe_sub_interface.h>
 #include <vnet/lisp-gpe/lisp_gpe_adjacency.h>
 #include <vnet/lisp-gpe/lisp_gpe_tunnel.h>
@@ -106,30 +107,99 @@ lisp_gpe_adj_get_fib_chain_type (const lisp_gpe_adjacency_t * ladj)
   return (FIB_FORW_CHAIN_TYPE_UNICAST_IP4);
 }
 
+static void
+ip46_address_to_ip_address (const ip46_address_t * a, ip_address_t * b)
+{
+  if (ip46_address_is_ip4 (a))
+    {
+      memset (b, 0, sizeof (*b));
+      ip_address_set (b, &a->ip4, IP4);
+    }
+  else
+    {
+      ip_address_set (b, &a->ip6, IP6);
+    }
+}
+
 /**
  * @brief Stack the tunnel's midchain on the IP forwarding chain of the via
  */
 static void
-lisp_gpe_adj_stack (lisp_gpe_adjacency_t * ladj)
+lisp_gpe_adj_stack_one (lisp_gpe_adjacency_t * ladj, adj_index_t ai)
 {
   const lisp_gpe_tunnel_t *lgt;
   dpo_id_t tmp = DPO_NULL;
-  fib_link_t linkt;
 
   lgt = lisp_gpe_tunnel_get (ladj->tunnel_index);
   fib_entry_contribute_forwarding (lgt->fib_entry_index,
                                   lisp_gpe_adj_get_fib_chain_type (ladj),
                                   &tmp);
 
-  FOR_EACH_FIB_LINK (linkt)
-  {
-    if (FIB_LINK_MPLS == linkt)
-      continue;
-    adj_nbr_midchain_stack (ladj->adjs[linkt], &tmp);
-  }
+  if (DPO_LOAD_BALANCE == tmp.dpoi_type)
+    {
+      /*
+       * post LISP rewrite we will load-balance. However, the LISP encap
+       * is always the same for this adjacency/tunnel and hence the IP/UDP src,dst
+       * hash is always the same result too. So we do that hash now and
+       * stack on the choice.
+       * If the choice is an incomplete adj then we will need a poke when
+       * it becomes complete. This happens since the adj update walk propagates
+       * as far a recursive paths.
+       */
+      const dpo_id_t *choice;
+      load_balance_t *lb;
+      int hash;
+
+      lb = load_balance_get (tmp.dpoi_index);
+
+      if (IP4 == ip_addr_version (&ladj->remote_rloc))
+       {
+         hash = ip4_compute_flow_hash ((ip4_header_t *) adj_get_rewrite (ai),
+                                       lb->lb_hash_config);
+       }
+      else
+       {
+         hash = ip6_compute_flow_hash ((ip6_header_t *) adj_get_rewrite (ai),
+                                       lb->lb_hash_config);
+       }
+
+      choice =
+       load_balance_get_bucket_i (lb, hash & lb->lb_n_buckets_minus_1);
+      dpo_copy (&tmp, choice);
+    }
+
+  adj_nbr_midchain_stack (ai, &tmp);
   dpo_reset (&tmp);
 }
 
+/**
+ * @brief Call back when restacking all adjacencies on a GRE interface
+ */
+static adj_walk_rc_t
+lisp_gpe_adj_walk_cb (adj_index_t ai, void *ctx)
+{
+  lisp_gpe_adjacency_t *ladj = ctx;
+
+  lisp_gpe_adj_stack_one (ladj, ai);
+
+  return (ADJ_WALK_RC_CONTINUE);
+}
+
+static void
+lisp_gpe_adj_stack (lisp_gpe_adjacency_t * ladj)
+{
+  fib_protocol_t nh_proto;
+  ip46_address_t nh;
+
+  ip_address_to_46 (&ladj->remote_rloc, &nh, &nh_proto);
+
+  /*
+   * walk all the adjacencies on th lisp interface and restack them
+   */
+  adj_nbr_walk_nh (ladj->sw_if_index,
+                  nh_proto, &nh, lisp_gpe_adj_walk_cb, ladj);
+}
+
 static lisp_gpe_next_protocol_e
 lisp_gpe_adj_proto_from_fib_link_type (fib_link_t linkt)
 {
@@ -157,10 +227,59 @@ lisp_gpe_fixup (vlib_main_t * vm, ip_adjacency_t * adj, vlib_buffer_t * b)
   ip_udp_fixup_one (vm, b, is_v4_packet (vlib_buffer_get_current (b)));
 }
 
+/**
+ * @brief The LISP-GPE interface registered function to update, i.e.
+ * provide an rewrite string for, an adjacency.
+ */
+void
+lisp_gpe_update_adjacency (vnet_main_t * vnm, u32 sw_if_index, adj_index_t ai)
+{
+  const lisp_gpe_tunnel_t *lgt;
+  lisp_gpe_adjacency_t *ladj;
+  ip_adjacency_t *adj;
+  ip_address_t rloc;
+  vnet_link_t linkt;
+  index_t lai;
+
+  adj = adj_get (ai);
+  ip46_address_to_ip_address (&adj->sub_type.nbr.next_hop, &rloc);
+
+  /*
+   * find an existing or create a new adj
+   */
+  lai = lisp_adj_find (&rloc, sw_if_index);
+
+  ASSERT (INDEX_INVALID != lai);
+
+  ladj = pool_elt_at_index (lisp_adj_pool, lai);
+  lgt = lisp_gpe_tunnel_get (ladj->tunnel_index);
+  linkt = adj_get_link_type (ai);
+
+  adj_nbr_midchain_update_rewrite
+    (ai, lisp_gpe_fixup,
+     (VNET_LINK_ETHERNET == linkt ?
+      ADJ_MIDCHAIN_FLAG_NO_COUNT :
+      ADJ_MIDCHAIN_FLAG_NONE),
+     lisp_gpe_tunnel_build_rewrite
+     (lgt, ladj, lisp_gpe_adj_proto_from_fib_link_type (linkt)));
+
+  lisp_gpe_adj_stack_one (ladj, ai);
+}
+
+u8 *
+lisp_gpe_build_rewrite (vnet_main_t * vnm,
+                       u32 sw_if_index,
+                       vnet_link_t link_type, const void *dst_address)
+{
+  ASSERT (0);
+  return (NULL);
+}
+
 index_t
 lisp_gpe_adjacency_find_or_create_and_lock (const locator_pair_t * pair,
                                            u32 overlay_table_id, u32 vni)
 {
+  const lisp_gpe_sub_interface_t *l3s;
   const lisp_gpe_tunnel_t *lgt;
   lisp_gpe_adjacency_t *ladj;
   index_t lai, l3si;
@@ -171,29 +290,24 @@ lisp_gpe_adjacency_find_or_create_and_lock (const locator_pair_t * pair,
   l3si = lisp_gpe_sub_interface_find_or_create_and_lock (&pair->lcl_loc,
                                                         overlay_table_id,
                                                         vni);
+  l3s = lisp_gpe_sub_interface_get (l3si);
 
   /*
    * find an existing or create a new adj
    */
-  lai = lisp_adj_find (&pair->rmt_loc, l3si);
+  lai = lisp_adj_find (&pair->rmt_loc, l3s->sw_if_index);
 
   if (INDEX_INVALID == lai)
     {
-      const lisp_gpe_sub_interface_t *l3s;
-      u8 *rewrite = NULL;
-      fib_link_t linkt;
-      fib_prefix_t nh;
 
       pool_get (lisp_adj_pool, ladj);
       memset (ladj, 0, sizeof (*ladj));
       lai = (ladj - lisp_adj_pool);
 
-      ladj->remote_rloc = pair->rmt_loc;
+      ip_address_copy (&ladj->remote_rloc, &pair->rmt_loc);
       ladj->vni = vni;
       /* transfer the lock to the adj */
       ladj->lisp_l3_sub_index = l3si;
-
-      l3s = lisp_gpe_sub_interface_get (l3si);
       ladj->sw_if_index = l3s->sw_if_index;
 
       /* if vni is non-default */
@@ -219,38 +333,8 @@ lisp_gpe_adjacency_find_or_create_and_lock (const locator_pair_t * pair,
       ladj->fib_entry_child_index = fib_entry_child_add (lgt->fib_entry_index,
                                                         FIB_NODE_TYPE_LISP_ADJ,
                                                         lai);
-      ip_address_to_fib_prefix (&pair->rmt_loc, &nh);
-
-      /*
-       * construct and stack the FIB midchain adjacencies
-       */
-      FOR_EACH_FIB_LINK (linkt)
-      {
-       if (FIB_LINK_MPLS == linkt)
-         continue;
-
-       ladj->adjs[linkt] = adj_nbr_add_or_lock (nh.fp_proto,
-                                                linkt,
-                                                &nh.fp_addr,
-                                                ladj->sw_if_index);
-
-       rewrite =
-         lisp_gpe_tunnel_build_rewrite (lgt, ladj,
-                                        lisp_gpe_adj_proto_from_fib_link_type
-                                        (linkt));
 
-       adj_nbr_midchain_update_rewrite (ladj->adjs[linkt],
-                                        lisp_gpe_fixup,
-                                        (FIB_LINK_ETHERNET == linkt ?
-                                         ADJ_MIDCHAIN_FLAG_NO_COUNT :
-                                         ADJ_MIDCHAIN_FLAG_NONE), rewrite);
-
-       vec_free (rewrite);
-      }
-
-      lisp_gpe_adj_stack (ladj);
-
-      lisp_adj_insert (&ladj->remote_rloc, ladj->lisp_l3_sub_index, lai);
+      lisp_adj_insert (&ladj->remote_rloc, ladj->sw_if_index, lai);
     }
   else
     {
@@ -278,15 +362,21 @@ lisp_gpe_adjacency_from_fib_node (const fib_node_t * node)
 static void
 lisp_gpe_adjacency_last_lock_gone (lisp_gpe_adjacency_t * ladj)
 {
+  const lisp_gpe_tunnel_t *lgt;
+
   /*
    * no children so we are not counting locks. no-op.
    * at least not counting
    */
-  lisp_adj_remove (&ladj->remote_rloc, ladj->lisp_l3_sub_index);
+  lisp_adj_remove (&ladj->remote_rloc, ladj->sw_if_index);
 
   /*
    * unlock the resources this adj holds
    */
+  lgt = lisp_gpe_tunnel_get (ladj->tunnel_index);
+
+  fib_entry_child_remove (lgt->fib_entry_index, ladj->fib_entry_child_index);
+
   lisp_gpe_tunnel_unlock (ladj->tunnel_index);
   lisp_gpe_sub_interface_unlock (ladj->lisp_l3_sub_index);
 
@@ -375,9 +465,9 @@ format_lisp_gpe_adjacency (u8 * s, va_list * args)
       s = format (s, " %U\n",
                  format_lisp_gpe_tunnel,
                  lisp_gpe_tunnel_get (ladj->tunnel_index));
-      s = format (s, " FIB adjacencies: IPV4:%d IPv6:%d L2:%d\n",
-                 ladj->adjs[FIB_LINK_IP4],
-                 ladj->adjs[FIB_LINK_IP6], ladj->adjs[FIB_LINK_ETHERNET]);
+      /* s = format (s, " FIB adjacencies: IPV4:%d IPv6:%d L2:%d\n", */
+      /*                  ladj->adjs[FIB_LINK_IP4], */
+      /*                  ladj->adjs[FIB_LINK_IP6], ladj->adjs[FIB_LINK_ETHERNET]); */
     }
   else
     {
index f6a66cd..adc3aca 100644 (file)
@@ -79,12 +79,6 @@ typedef struct lisp_gpe_adjacency_t_
    */
   u32 tunnel_index;
 
-  /**
-   * Per-link-type FIB adjacencies contributed.
-   * These will be used as a result of a FIB lookup.
-   */
-  adj_index_t adjs[FIB_LINK_NUM];
-
   /**
    * This adjacency is a child of the FIB entry to reach the RLOC.
    * This is so when the reachability of that RLOC changes, we can restack
@@ -112,6 +106,14 @@ extern void lisp_gpe_adjacency_unlock (index_t l3si);
 
 extern const lisp_gpe_adjacency_t *lisp_gpe_adjacency_get (index_t l3si);
 
+extern void lisp_gpe_update_adjacency (vnet_main_t * vnm,
+                                      u32 sw_if_index, adj_index_t ai);
+extern u8 *lisp_gpe_build_rewrite (vnet_main_t * vnm,
+                                  u32 sw_if_index,
+                                  vnet_link_t link_type,
+                                  const void *dst_address);
+
+
 /**
  * @brief Flags for displaying the adjacency
  */
index 3bd83dc..75db97d 100644 (file)
@@ -190,26 +190,6 @@ ip_src_fib_add_route_w_dpo (u32 src_fib_index,
     }
 }
 
-static void
-ip_address_to_46 (const ip_address_t * addr,
-                 ip46_address_t * a, fib_protocol_t * proto)
-{
-  *proto = (IP4 == ip_addr_version (addr) ?
-           FIB_PROTOCOL_IP4 : FIB_PROTOCOL_IP6);
-  switch (*proto)
-    {
-    case FIB_PROTOCOL_IP4:
-      a->ip4 = addr->ip.v4;
-      break;
-    case FIB_PROTOCOL_IP6:
-      a->ip6 = addr->ip.v6;
-      break;
-    default:
-      ASSERT (0);
-      break;
-    }
-}
-
 static fib_route_path_t *
 lisp_gpe_mk_fib_paths (const lisp_fwd_path_t * paths)
 {
index 0496e8d..975207b 100644 (file)
@@ -155,31 +155,30 @@ unformat_llc_header (unformat_input_t * input, va_list * args)
   return 1;
 }
 
-static uword
-llc_set_rewrite (vnet_main_t * vnm,
-                u32 sw_if_index,
-                u32 l3_type,
-                void *dst_address, void *rewrite, uword max_rewrite_bytes)
+static u8 *
+llc_build_rewrite (vnet_main_t * vnm,
+                  u32 sw_if_index,
+                  vnet_link_t link_type, const void *dst_address)
 {
-  llc_header_t *h = rewrite;
+  llc_header_t *h;
+  u8 *rewrite = NULL;
   llc_protocol_t protocol;
 
-  if (max_rewrite_bytes < sizeof (h[0]))
-    return 0;
-
-  switch (l3_type)
+  switch (link_type)
     {
-#define _(a,b) case VNET_L3_PACKET_TYPE_##a: protocol = LLC_PROTOCOL_##b; break
+#define _(a,b) case VNET_LINK_##a: protocol = LLC_PROTOCOL_##b; break
       _(IP4, ip4);
 #undef _
     default:
-      return 0;
+      return (NULL);
     }
 
+  vec_validate (rewrite, sizeof (*h) - 1);
+  h = (llc_header_t *) rewrite;
   h->src_sap = h->dst_sap = protocol;
   h->control = 0x3;
 
-  return sizeof (h[0]);
+  return (rewrite);
 }
 
 /* *INDENT-OFF* */
@@ -187,7 +186,7 @@ VNET_HW_INTERFACE_CLASS (llc_hw_interface_class) = {
   .name = "LLC",
   .format_header = format_llc_header_with_length,
   .unformat_header = unformat_llc_header,
-  .set_rewrite = llc_set_rewrite,
+  .build_rewrite = llc_build_rewrite,
 };
 /* *INDENT-ON* */
 
index dd40fc2..553d7fb 100644 (file)
 #include <vnet/adj/adj_midchain.h>
 #include <vnet/dpo/classify_dpo.h>
 
-static uword mpls_gre_set_rewrite (vnet_main_t * vnm,
-                              u32 sw_if_index,
-                              u32 l3_type,
-                              void * dst_address,
-                              void * rewrite,
-                              uword max_rewrite_bytes)
-{
-  /*
-   * Conundrum: packets from tun/tap destined for the tunnel
-   * actually have this rewrite applied. Transit packets do not.
-   * To make the two cases equivalent, don't generate a
-   * rewrite here, build the entire header in the fast path.
-   */
-  return 0;
-}
-
 /* manually added to the interface output node */
 #define MPLS_GRE_OUTPUT_NEXT_POST_REWRITE      1
 
@@ -271,26 +255,10 @@ VNET_HW_INTERFACE_CLASS (mpls_gre_hw_interface_class) = {
 #if 0
   .unformat_header = unformat_mpls_gre_header,
 #endif
-  .set_rewrite = mpls_gre_set_rewrite,
+  .build_rewrite = default_build_rewrite,
+  .flags = VNET_HW_INTERFACE_CLASS_FLAG_P2P,
 };
 
-
-static uword mpls_eth_set_rewrite (vnet_main_t * vnm,
-                              u32 sw_if_index,
-                              u32 l3_type,
-                              void * dst_address,
-                              void * rewrite,
-                              uword max_rewrite_bytes)
-{
-  /*
-   * Conundrum: packets from tun/tap destined for the tunnel
-   * actually have this rewrite applied. Transit packets do not.
-   * To make the two cases equivalent, don't generate a
-   * rewrite here, build the entire header in the fast path.
-   */
-  return 0;
-}
-
 /* manually added to the interface output node */
 #define MPLS_ETH_OUTPUT_NEXT_OUTPUT    1
 
@@ -525,7 +493,8 @@ VNET_HW_INTERFACE_CLASS (mpls_eth_hw_interface_class) = {
 #if 0
   .unformat_header = unformat_mpls_eth_header,
 #endif
-  .set_rewrite = mpls_eth_set_rewrite,
+  .build_rewrite = default_build_rewrite,
+  .flags = VNET_HW_INTERFACE_CLASS_FLAG_P2P,
 };
 
 /**
@@ -609,7 +578,6 @@ mpls_sw_interface_enable_disable (mpls_main_t * mm,
                                   u32 sw_if_index,
                                   u8 is_enable)
 {
-  mpls_interface_state_change_callback_t *callback;
   vlib_main_t * vm = vlib_get_main();
   ip_config_main_t * cm = &mm->feature_config_mains[VNET_IP_RX_UNICAST_FEAT];
   vnet_config_main_t * vcm = &cm->config_main;
@@ -660,14 +628,6 @@ mpls_sw_interface_enable_disable (mpls_main_t * mm,
                                   /* # bytes of config data */ 0);
 
   cm->config_index_by_sw_if_index[sw_if_index] = ci;
-
-  /*
-   * notify all interested clients of the change of state.
-   */
-  vec_foreach(callback, mm->mpls_interface_state_change_callbacks)
-  {
-      (*callback)(sw_if_index, is_enable);
-  }
 }
 
 static mpls_gre_tunnel_t *
@@ -1578,7 +1538,7 @@ int vnet_mpls_ethernet_add_del_tunnel (u8 *dst,
   
   vnet_rewrite_for_sw_interface
     (vnm,
-     VNET_L3_PACKET_TYPE_MPLS_UNICAST
+     VNET_LINK_MPLS
      tx_sw_if_index,
      ip4_rewrite_node.index,
      tp->tunnel_dst,
@@ -1763,7 +1723,7 @@ int vnet_mpls_policy_tunnel_add_rewrite (mpls_main_t * mm,
   /* Build L2 encap */
   vnet_rewrite_for_sw_interface
     (mm->vnet_main, 
-     VNET_L3_PACKET_TYPE_MPLS_UNICAST
+     VNET_LINK_MPLS
      t->tx_sw_if_index,
      mpls_policy_encap_node.index,
      t->tunnel_dst,
index e473770..3575533 100644 (file)
@@ -163,9 +163,6 @@ typedef struct {
   /* IP4 enabled count by software interface */
   u8 * mpls_enabled_by_sw_if_index;
 
-  /* Functions to call when MPLS state on an interface changes. */
-  mpls_interface_state_change_callback_t * mpls_interface_state_change_callbacks;
-
   /* convenience */
   vlib_main_t * vlib_main;
   vnet_main_t * vnet_main;
index 932fcb8..299e1dd 100644 (file)
@@ -39,7 +39,7 @@ format_mpls_output_trace (u8 * s, va_list * args)
 
   s = format (s, "adj-idx %d : %U flow hash: 0x%08x",
               t->adj_index,
-              format_ip_adjacency, vnm, t->adj_index, FORMAT_IP_ADJACENCY_NONE,
+              format_ip_adjacency, t->adj_index, FORMAT_IP_ADJACENCY_NONE,
              t->flow_hash);
   s = format (s, "\n%U%U",
               format_white_space, indent,
index b66fb74..20f54a7 100644 (file)
@@ -121,25 +121,25 @@ VNET_DEVICE_CLASS (pg_dev_class) = {
 };
 /* *INDENT-ON* */
 
-static uword
-pg_set_rewrite (vnet_main_t * vnm,
-               u32 sw_if_index,
-               u32 l3_type,
-               void *dst_address, void *rewrite, uword max_rewrite_bytes)
+static u8 *
+pg_build_rewrite (vnet_main_t * vnm,
+                 u32 sw_if_index,
+                 vnet_link_t link_type, const void *dst_address)
 {
-  u16 *h = rewrite;
+  u8 *rewrite = NULL;
+  u16 *h;
 
-  if (max_rewrite_bytes < sizeof (h[0]))
-    return 0;
+  vec_validate (rewrite, sizeof (*h) - 1);
+  h = (u16 *) rewrite;
+  h[0] = clib_host_to_net_u16 (vnet_link_to_l3_proto (link_type));
 
-  h[0] = clib_host_to_net_u16 (l3_type);
-  return sizeof (h[0]);
+  return (rewrite);
 }
 
 /* *INDENT-OFF* */
 VNET_HW_INTERFACE_CLASS (pg_interface_class,static) = {
   .name = "Packet generator",
-  .set_rewrite = pg_set_rewrite,
+  .build_rewrite = pg_build_rewrite,
 };
 /* *INDENT-ON* */
 
index 427fd7b..a0eefba 100644 (file)
@@ -168,35 +168,33 @@ unformat_ppp_header (unformat_input_t * input, va_list * args)
   return 1;
 }
 
-static uword
-ppp_set_rewrite (vnet_main_t * vnm,
-                u32 sw_if_index,
-                u32 l3_type,
-                void *dst_address, void *rewrite, uword max_rewrite_bytes)
+static u8 *
+ppp_build_rewrite (vnet_main_t * vnm,
+                  u32 sw_if_index,
+                  vnet_link_t link_type, const void *dst_hw_address)
 {
-  ppp_header_t *h = rewrite;
+  ppp_header_t *h;
+  u8 *rewrite = NULL;
   ppp_protocol_t protocol;
 
-  if (max_rewrite_bytes < sizeof (h[0]))
-    return 0;
-
-  switch (l3_type)
+  switch (link_type)
     {
-#define _(a,b) case VNET_L3_PACKET_TYPE_##a: protocol = PPP_PROTOCOL_##b; break
+#define _(a,b) case VNET_LINK_##a: protocol = PPP_PROTOCOL_##b; break
       _(IP4, ip4);
       _(IP6, ip6);
-      _(MPLS_UNICAST, mpls_unicast);
-      _(MPLS_MULTICAST, mpls_multicast);
+      _(MPLS, mpls_unicast);
 #undef _
     default:
-      return 0;
+      return (NULL);
     }
 
+  vec_validate (rewrite, sizeof (*h) - 1);
+  h = (ppp_header_t *) rewrite;
   h->address = 0xff;
   h->control = 0x03;
   h->protocol = clib_host_to_net_u16 (protocol);
 
-  return sizeof (h[0]);
+  return (rewrite);
 }
 
 /* *INDENT-OFF* */
@@ -204,7 +202,8 @@ VNET_HW_INTERFACE_CLASS (ppp_hw_interface_class) = {
   .name = "PPP",
   .format_header = format_ppp_header_with_length,
   .unformat_header = unformat_ppp_header,
-  .set_rewrite = ppp_set_rewrite,
+  .build_rewrite = ppp_build_rewrite,
+  .flags = VNET_HW_INTERFACE_CLASS_FLAG_P2P,
 };
 /* *INDENT-ON* */
 
index 571be7d..561c86c 100644 (file)
@@ -225,6 +225,12 @@ replication_recycle_callback (vlib_main_t * vm, vlib_buffer_free_list_t * fl)
          /* Mark that this buffer was just recycled */
          b0->flags |= VLIB_BUFFER_IS_RECYCLED;
 
+#if (CLIB_DEBUG > 0)
+#if DPDK == 0
+         vlib_buffer_set_known_state (vm, bi0, VLIB_BUFFER_KNOWN_ALLOCATED);
+#endif
+#endif
+
          /* If buffer is traced, mark frame as traced */
          if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
            f->flags |= VLIB_FRAME_TRACE;
index 42d0688..53d548b 100644 (file)
@@ -184,38 +184,51 @@ done:
   return error == 0;
 }
 
+u32
+vnet_tx_node_index_for_sw_interface (vnet_main_t * vnm, u32 sw_if_index)
+{
+  vnet_hw_interface_t *hw = vnet_get_sup_hw_interface (vnm, sw_if_index);
+  return (hw->output_node_index);
+}
+
+void
+vnet_rewrite_init (vnet_main_t * vnm,
+                  u32 sw_if_index,
+                  u32 this_node, u32 next_node, vnet_rewrite_header_t * rw)
+{
+  rw->sw_if_index = sw_if_index;
+  rw->node_index = this_node;
+  rw->next_index = vlib_node_add_next (vnm->vlib_main, this_node, next_node);
+  rw->max_l3_packet_bytes =
+    vnet_sw_interface_get_mtu (vnm, sw_if_index, VLIB_TX);
+}
+
 void
 vnet_rewrite_for_sw_interface (vnet_main_t * vnm,
-                              vnet_l3_packet_type_t packet_type,
+                              vnet_link_t link_type,
                               u32 sw_if_index,
                               u32 node_index,
                               void *dst_address,
                               vnet_rewrite_header_t * rw,
                               u32 max_rewrite_bytes)
 {
+
   vnet_hw_interface_t *hw = vnet_get_sup_hw_interface (vnm, sw_if_index);
   vnet_hw_interface_class_t *hc =
     vnet_get_hw_interface_class (vnm, hw->hw_class_index);
-  static u8 *rw_tmp = 0;
-  uword n_rw_tmp;
+  u8 *rewrite = NULL;
 
-  rw->sw_if_index = sw_if_index;
-  rw->node_index = node_index;
-  rw->next_index =
-    vlib_node_add_next (vnm->vlib_main, node_index, hw->output_node_index);
-  rw->max_l3_packet_bytes = hw->max_l3_packet_bytes[VLIB_TX];
-
-  ASSERT (max_rewrite_bytes > 0);
-  vec_reset_length (rw_tmp);
-  vec_validate (rw_tmp, max_rewrite_bytes - 1);
-
-  ASSERT (hc->set_rewrite);
-  n_rw_tmp =
-    hc->set_rewrite (vnm, sw_if_index, packet_type, dst_address, rw_tmp,
-                    max_rewrite_bytes);
-
-  ASSERT (n_rw_tmp < max_rewrite_bytes);
-  vnet_rewrite_set_data_internal (rw, max_rewrite_bytes, rw_tmp, n_rw_tmp);
+  vnet_rewrite_init (vnm, sw_if_index, node_index,
+                    vnet_tx_node_index_for_sw_interface (vnm, sw_if_index),
+                    rw);
+
+  ASSERT (hc->build_rewrite);
+  rewrite = hc->build_rewrite (vnm, sw_if_index, link_type, dst_address);
+
+  ASSERT (vec_len (rewrite) < max_rewrite_bytes);
+  vnet_rewrite_set_data_internal (rw, max_rewrite_bytes, rewrite,
+                                 vec_len (rewrite));
+  vec_free (rewrite);
 }
 
 void
@@ -280,6 +293,33 @@ unserialize_vnet_rewrite (serialize_main_t * m, va_list * va)
               rw->data_bytes);
 }
 
+u8 *
+vnet_build_rewrite_for_sw_interface (vnet_main_t * vnm,
+                                    u32 sw_if_index,
+                                    vnet_link_t link_type,
+                                    const void *dst_address)
+{
+  vnet_hw_interface_t *hw = vnet_get_sup_hw_interface (vnm, sw_if_index);
+  vnet_hw_interface_class_t *hc =
+    vnet_get_hw_interface_class (vnm, hw->hw_class_index);
+
+  ASSERT (hc->build_rewrite);
+  return (hc->build_rewrite (vnm, sw_if_index, link_type, dst_address));
+}
+
+
+void
+vnet_update_adjacency_for_sw_interface (vnet_main_t * vnm,
+                                       u32 sw_if_index, u32 ai)
+{
+  vnet_hw_interface_t *hw = vnet_get_sup_hw_interface (vnm, sw_if_index);
+  vnet_hw_interface_class_t *hc =
+    vnet_get_hw_interface_class (vnm, hw->hw_class_index);
+
+  ASSERT (hc->update_adjacency);
+  hc->update_adjacency (vnm, sw_if_index, ai);
+}
+
 /*
  * fd.io coding-style-patch-verification: ON
  *
index fb800da..00c1efb 100644 (file)
@@ -88,6 +88,16 @@ struct {                                                             \
   u8 rewrite_data[(total_bytes) - sizeof (vnet_rewrite_header_t)];     \
 }
 
+always_inline void
+vnet_rewrite_clear_data_internal (vnet_rewrite_header_t * rw, int max_size)
+{
+  /* Sanity check values carefully for this memset operation */
+  ASSERT ((max_size > 0) && (max_size < VLIB_BUFFER_PRE_DATA_SIZE));
+
+  rw->data_bytes = 0;
+  memset (rw->data, 0xfe, max_size);
+}
+
 always_inline void
 vnet_rewrite_set_data_internal (vnet_rewrite_header_t * rw,
                                int max_size, void *data, int data_bytes)
@@ -252,20 +262,29 @@ _vnet_rewrite_two_headers (vnet_rewrite_header_t * h0,
                             (most_likely_size))
 
 #define VNET_REWRITE_FOR_SW_INTERFACE_ADDRESS_BROADCAST ((void *) 0)
+/** Deprecated */
 void vnet_rewrite_for_sw_interface (struct vnet_main_t *vnm,
-                                   vnet_l3_packet_type_t packet_type,
+                                   vnet_link_t packet_type,
                                    u32 sw_if_index,
                                    u32 node_index,
                                    void *dst_address,
                                    vnet_rewrite_header_t * rw,
                                    u32 max_rewrite_bytes);
 
-void vnet_rewrite_for_tunnel (struct vnet_main_t *vnm,
-                             u32 tx_sw_if_index,
-                             u32 rewrite_node_index,
-                             u32 post_rewrite_node_index,
-                             vnet_rewrite_header_t * rw,
-                             u8 * rewrite_data, u32 rewrite_length);
+u32 vnet_tx_node_index_for_sw_interface (struct vnet_main_t *vnm,
+                                        u32 sw_if_index);
+
+void vnet_rewrite_init (struct vnet_main_t *vnm,
+                       u32 sw_if_index,
+                       u32 this_node,
+                       u32 next_node, vnet_rewrite_header_t * rw);
+
+u8 *vnet_build_rewrite_for_sw_interface (struct vnet_main_t *vnm,
+                                        u32 sw_if_index,
+                                        vnet_link_t packet_type,
+                                        const void *dst_address);
+void vnet_update_adjacency_for_sw_interface (struct vnet_main_t *vnm,
+                                            u32 sw_if_index, u32 ai);
 
 /* Parser for unformat header & rewrite string. */
 unformat_function_t unformat_vnet_rewrite;
index 46c091d..d427cc3 100644 (file)
 #include <vnet/pg/pg.h>
 #include <vnet/srp/srp.h>
 
-static uword srp_set_rewrite (vnet_main_t * vnm,
-                             u32 sw_if_index,
-                             u32 l3_type,
-                             void * dst_address,
-                             void * rewrite,
-                             uword max_rewrite_bytes)
+static u8*
+srp_build_rewrite (vnet_main_t * vnm,
+                  u32 sw_if_index,
+                  vnet_link_t link_type,
+                  const void * dst_address)
 {
   vnet_hw_interface_t * hw = vnet_get_sup_hw_interface (vnm, sw_if_index);
   srp_main_t * sm = &srp_main;
-  srp_and_ethernet_header_t * h = rewrite;
+  srp_and_ethernet_header_t * h;
+  u8* rewrite = NULL;
   u16 type;
   uword n_bytes = sizeof (h[0]);
 
-  if (n_bytes > max_rewrite_bytes)
-    return 0;
-
-  switch (l3_type) {
-#define _(a,b) case VNET_L3_PACKET_TYPE_##a: type = ETHERNET_TYPE_##b; break
+  switch (link_type) {
+#define _(a,b) case VNET_LINK_##a: type = ETHERNET_TYPE_##b; break
     _ (IP4, IP4);
     _ (IP6, IP6);
-    _ (MPLS_UNICAST, MPLS_UNICAST);
-    _ (MPLS_MULTICAST, MPLS_MULTICAST);
+    _ (MPLS, MPLS_UNICAST);
     _ (ARP, ARP);
 #undef _
   default:
-    return 0;
+      return (NULL);
   }
 
+  vec_validate(rewrite, n_bytes-1);
+  h = (srp_and_ethernet_header_t *)rewrite;
+
   clib_memcpy (h->ethernet.src_address, hw->hw_address, sizeof (h->ethernet.src_address));
   if (dst_address)
     clib_memcpy (h->ethernet.dst_address, dst_address, sizeof (h->ethernet.dst_address));
@@ -82,7 +81,7 @@ static uword srp_set_rewrite (vnet_main_t * vnm,
   h->srp.ttl = sm->default_data_ttl;
   srp_header_compute_parity (&h->srp);
 
-  return n_bytes;
+  return (rewrite);
 }
 
 static void srp_register_interface_helper (u32 * hw_if_indices_by_side, u32 redistribute);
@@ -293,7 +292,8 @@ VNET_HW_INTERFACE_CLASS (srp_hw_interface_class) = {
   .format_device = format_srp_device,
   .unformat_hw_address = unformat_ethernet_address,
   .unformat_header = unformat_srp_header,
-  .set_rewrite = srp_set_rewrite,
+  .build_rewrite = srp_build_rewrite,
+  .update_adjacency = ethernet_update_adjacency,
   .is_valid_class_for_interface = srp_is_valid_class_for_interface,
   .hw_class_change = srp_interface_hw_class_change,
 };
index 0be68a9..8a5d47d 100644 (file)
@@ -585,6 +585,7 @@ tapcli_nopunt_frame (vlib_main_t * vm,
 
 VNET_HW_INTERFACE_CLASS (tapcli_interface_class,static) = {
   .name = "tapcli",
+  .flags = VNET_HW_INTERFACE_CLASS_FLAG_P2P,
 };
 
 /**
index 48d5dc2..89fd1dc 100644 (file)
@@ -946,6 +946,7 @@ tuntap_nopunt_frame (vlib_main_t * vm,
 
 VNET_HW_INTERFACE_CLASS (tuntap_interface_class,static) = {
   .name = "tuntap",
+  .flags = VNET_HW_INTERFACE_CLASS_FLAG_P2P,
 };
 
 /**
index fae481c..979864e 100644 (file)
@@ -121,17 +121,6 @@ VNET_DEVICE_CLASS (vxlan_gpe_device_class,static) = {
   .admin_up_down_function = vxlan_gpe_interface_admin_up_down,
 };
 
-static uword dummy_set_rewrite (vnet_main_t * vnm,
-                                u32 sw_if_index,
-                                u32 l3_type,
-                                void * dst_address,
-                                void * rewrite,
-                                uword max_rewrite_bytes)
-{
-  return 0;
-}
-
-
 /**
  * @brief Formatting function for tracing VXLAN GPE with length
  *
@@ -151,7 +140,8 @@ static u8 * format_vxlan_gpe_header_with_length (u8 * s, va_list * args)
 VNET_HW_INTERFACE_CLASS (vxlan_gpe_hw_class) = {
   .name = "VXLAN_GPE",
   .format_header = format_vxlan_gpe_header_with_length,
-  .set_rewrite = dummy_set_rewrite,
+  .build_rewrite = default_build_rewrite,
+  .flags = VNET_HW_INTERFACE_CLASS_FLAG_P2P,
 };
 
 
index da359a8..5b521db 100644 (file)
@@ -103,16 +103,6 @@ VNET_DEVICE_CLASS (vxlan_device_class,static) = {
   .admin_up_down_function = vxlan_interface_admin_up_down,
 };
 
-static uword dummy_set_rewrite (vnet_main_t * vnm,
-                                u32 sw_if_index,
-                                u32 l3_type,
-                                void * dst_address,
-                                void * rewrite,
-                                uword max_rewrite_bytes)
-{
-  return 0;
-}
-
 static u8 * format_vxlan_header_with_length (u8 * s, va_list * args)
 {
   u32 dev_instance = va_arg (*args, u32);
@@ -123,7 +113,7 @@ static u8 * format_vxlan_header_with_length (u8 * s, va_list * args)
 VNET_HW_INTERFACE_CLASS (vxlan_hw_class) = {
   .name = "VXLAN",
   .format_header = format_vxlan_header_with_length,
-  .set_rewrite = dummy_set_rewrite,
+  .build_rewrite = default_build_rewrite,
 };
 
 #define foreach_copy_field                      \
index ca56ab2..b0e0412 100644 (file)
@@ -3211,7 +3211,9 @@ dhcp_compl_event_callback (u32 client_index, u32 pid, u8 * hostname,
   mp->hostname[vec_len (hostname) + 1] = '\n';
   clib_memcpy (&mp->host_address[0], host_address, 16);
   clib_memcpy (&mp->router_address[0], router_address, 16);
-  clib_memcpy (&mp->host_mac[0], host_mac, 6);
+
+  if (NULL != host_mac)
+    clib_memcpy (&mp->host_mac[0], host_mac, 6);
 
   mp->_vl_msg_id = ntohs (VL_API_DHCP_COMPL_EVENT);