X-Git-Url: https://gerrit.fd.io/r/gitweb?a=blobdiff_plain;f=src%2Fvnet%2Fadj%2Fadj.h;h=b29b1b0c225c73f37e2f52875d788530bf425bcf;hb=e2fe09742;hp=fcc5890c7d70fcae802e79f023746755a1f8c4e5;hpb=9c6a613feb2d718c5756cecbcd3ab84156241db2;p=vpp.git diff --git a/src/vnet/adj/adj.h b/src/vnet/adj/adj.h index fcc5890c7d7..b29b1b0c225 100644 --- a/src/vnet/adj/adj.h +++ b/src/vnet/adj/adj.h @@ -23,8 +23,7 @@ * 'glean' mean use the packet's destination address as the target * address in the ARP packet. * UNSHARED. Only one per-interface. - * - midchain: a nighbour adj on a virtual/tunnel interface. - * - rewrite: an adj with no key, but with a rewrite string. + * - midchain: a neighbour adj on a virtual/tunnel interface. * * The API to create and update the adjacency is very sub-type specific. This * is intentional as it encourages the user to carefully consider which adjacency @@ -34,18 +33,338 @@ * is important to enforce this approach as space in the adjacency is a premium, * as we need it to fit in 1 cache line. * - * the API is also based around an index to an ajdacency not a raw pointer. This + * the API is also based around an index to an adjacency not a raw pointer. This * is so the user doesn't suffer the same limp inducing firearm injuries that - * the author suffered as the adjacenices can realloc. + * the author suffered as the adjacencies can realloc. */ #ifndef __ADJ_H__ #define __ADJ_H__ -#include #include #include #include +#include + +/** @brief Common (IP4/IP6) next index stored in adjacency. */ +typedef enum +{ + /** Adjacency to drop this packet. */ + IP_LOOKUP_NEXT_DROP, + /** Adjacency to punt this packet. */ + IP_LOOKUP_NEXT_PUNT, + + /** This packet is for one of our own IP addresses. */ + IP_LOOKUP_NEXT_LOCAL, + + /** This packet matches an "incomplete adjacency" and packets + need to be passed to ARP to find rewrite string for + this destination. */ + IP_LOOKUP_NEXT_ARP, + + /** This packet matches an "interface route" and packets + need to be passed to ARP to find rewrite string for + this destination. */ + IP_LOOKUP_NEXT_GLEAN, + + /** This packet is to be rewritten and forwarded to the next + processing node. This is typically the output interface but + might be another node for further output processing. */ + IP_LOOKUP_NEXT_REWRITE, + + /** This packets follow a mid-chain adjacency */ + IP_LOOKUP_NEXT_MIDCHAIN, + + /** This packets needs to go to ICMP error */ + IP_LOOKUP_NEXT_ICMP_ERROR, + + /** Multicast Adjacency. */ + IP_LOOKUP_NEXT_MCAST, + + /** Broadcast Adjacency. */ + IP_LOOKUP_NEXT_BCAST, + + /** Multicast Midchain Adjacency. An Adjacency for sending multicast packets + * on a tunnel/virtual interface */ + IP_LOOKUP_NEXT_MCAST_MIDCHAIN, + + IP_LOOKUP_N_NEXT, +} __attribute__ ((packed)) ip_lookup_next_t; + +typedef enum +{ + IP4_LOOKUP_N_NEXT = IP_LOOKUP_N_NEXT, +} ip4_lookup_next_t; + +typedef enum +{ + /* Hop-by-hop header handling */ + IP6_LOOKUP_NEXT_HOP_BY_HOP = IP_LOOKUP_N_NEXT, + IP6_LOOKUP_NEXT_ADD_HOP_BY_HOP, + IP6_LOOKUP_NEXT_POP_HOP_BY_HOP, + IP6_LOOKUP_N_NEXT, +} ip6_lookup_next_t; + +#define IP4_LOOKUP_NEXT_NODES { \ + [IP_LOOKUP_NEXT_DROP] = "ip4-drop", \ + [IP_LOOKUP_NEXT_PUNT] = "ip4-punt", \ + [IP_LOOKUP_NEXT_LOCAL] = "ip4-local", \ + [IP_LOOKUP_NEXT_ARP] = "ip4-arp", \ + [IP_LOOKUP_NEXT_GLEAN] = "ip4-glean", \ + [IP_LOOKUP_NEXT_REWRITE] = "ip4-rewrite", \ + [IP_LOOKUP_NEXT_MCAST] = "ip4-rewrite-mcast", \ + [IP_LOOKUP_NEXT_BCAST] = "ip4-rewrite-bcast", \ + [IP_LOOKUP_NEXT_MIDCHAIN] = "ip4-midchain", \ + [IP_LOOKUP_NEXT_MCAST_MIDCHAIN] = "ip4-mcast-midchain", \ + [IP_LOOKUP_NEXT_ICMP_ERROR] = "ip4-icmp-error", \ +} + +#define IP6_LOOKUP_NEXT_NODES { \ + [IP_LOOKUP_NEXT_DROP] = "ip6-drop", \ + [IP_LOOKUP_NEXT_PUNT] = "ip6-punt", \ + [IP_LOOKUP_NEXT_LOCAL] = "ip6-local", \ + [IP_LOOKUP_NEXT_ARP] = "ip6-discover-neighbor", \ + [IP_LOOKUP_NEXT_GLEAN] = "ip6-glean", \ + [IP_LOOKUP_NEXT_REWRITE] = "ip6-rewrite", \ + [IP_LOOKUP_NEXT_BCAST] = "ip6-rewrite-bcast", \ + [IP_LOOKUP_NEXT_MCAST] = "ip6-rewrite-mcast", \ + [IP_LOOKUP_NEXT_MIDCHAIN] = "ip6-midchain", \ + [IP_LOOKUP_NEXT_MCAST_MIDCHAIN] = "ip6-mcast-midchain", \ + [IP_LOOKUP_NEXT_ICMP_ERROR] = "ip6-icmp-error", \ + [IP6_LOOKUP_NEXT_HOP_BY_HOP] = "ip6-hop-by-hop", \ + [IP6_LOOKUP_NEXT_ADD_HOP_BY_HOP] = "ip6-add-hop-by-hop", \ + [IP6_LOOKUP_NEXT_POP_HOP_BY_HOP] = "ip6-pop-hop-by-hop", \ +} + +/** + * The special broadcast address (to construct a broadcast adjacency + */ +extern const ip46_address_t ADJ_BCAST_ADDR; + +/** + * Forward declaration + */ +struct ip_adjacency_t_; + +/** + * @brief A function type for post-rewrite fixups on midchain adjacency + */ +typedef void (*adj_midchain_fixup_t) (vlib_main_t * vm, + const struct ip_adjacency_t_ * adj, + vlib_buffer_t * b0, + const void *data); + +/** + * @brief Flags on an IP adjacency + */ +typedef enum adj_attr_t_ +{ + /** + * Currently a sync walk is active. Used to prevent re-entrant walking + */ + ADJ_ATTR_SYNC_WALK_ACTIVE = 0, + + /** + * Packets TX through the midchain do not increment the interface + * counters. This should be used when the adj is associated with an L2 + * interface and that L2 interface is in a bridge domain. In that case + * the packet will have traversed the interface's TX node, and hence have + * been counted, before it traverses ths midchain + */ + ADJ_ATTR_MIDCHAIN_NO_COUNT, + /** + * When stacking midchains on a fib-entry extract the choice from the + * load-balance returned based on an IP hash of the adj's rewrite + */ + ADJ_ATTR_MIDCHAIN_IP_STACK, + /** + * If the midchain were to stack on its FIB entry a loop would form. + */ + ADJ_ATTR_MIDCHAIN_LOOPED, + /** + * the fixup function is standard IP4o4 header + */ + ADJ_ATTR_MIDCHAIN_FIXUP_IP4O4_HDR, +} adj_attr_t; + +#define ADJ_ATTR_NAMES { \ + [ADJ_ATTR_SYNC_WALK_ACTIVE] = "walk-active", \ + [ADJ_ATTR_MIDCHAIN_NO_COUNT] = "midchain-no-count", \ + [ADJ_ATTR_MIDCHAIN_IP_STACK] = "midchain-ip-stack", \ + [ADJ_ATTR_MIDCHAIN_LOOPED] = "midchain-looped", \ + [ADJ_ATTR_MIDCHAIN_FIXUP_IP4O4_HDR] = "midchain-ip4o4-hdr-fixup", \ +} + +#define FOR_EACH_ADJ_ATTR(_attr) \ + for (_attr = ADJ_ATTR_SYNC_WALK_ACTIVE; \ + _attr <= ADJ_ATTR_MIDCHAIN_FIXUP_IP4O4_HDR; \ + _attr++) + +/** + * @brief Flags on an IP adjacency + */ +typedef enum adj_flags_t_ +{ + ADJ_FLAG_NONE = 0, + ADJ_FLAG_SYNC_WALK_ACTIVE = (1 << ADJ_ATTR_SYNC_WALK_ACTIVE), + ADJ_FLAG_MIDCHAIN_NO_COUNT = (1 << ADJ_ATTR_MIDCHAIN_NO_COUNT), + ADJ_FLAG_MIDCHAIN_IP_STACK = (1 << ADJ_ATTR_MIDCHAIN_IP_STACK), + ADJ_FLAG_MIDCHAIN_LOOPED = (1 << ADJ_ATTR_MIDCHAIN_LOOPED), + ADJ_FLAG_MIDCHAIN_FIXUP_IP4O4_HDR = (1 << ADJ_ATTR_MIDCHAIN_FIXUP_IP4O4_HDR), +} __attribute__ ((packed)) adj_flags_t; + +/** + * @brief Format adjacency flags + */ +extern u8* format_adj_flags(u8 * s, va_list * args); + +/** + * @brief IP unicast adjacency. + * @note cache aligned. + * + * An adjacency is a representation of a peer on a particular link. + */ +typedef struct ip_adjacency_t_ +{ + CLIB_CACHE_LINE_ALIGN_MARK (cacheline0); + + /** + * Linkage into the FIB node graph. First member since this type + * has 8 byte alignment requirements. + */ + fib_node_t ia_node; + /** + * feature [arc] config index + */ + u32 ia_cfg_index; + + union + { + /** + * IP_LOOKUP_NEXT_ARP/IP_LOOKUP_NEXT_REWRITE + * + * neighbour adjacency sub-type; + */ + struct + { + ip46_address_t next_hop; + } nbr; + /** + * IP_LOOKUP_NEXT_MIDCHAIN + * + * A nbr adj that is also recursive. Think tunnels. + * A nbr adj can transition to be of type MIDCHAIN + * so be sure to leave the two structs with the next_hop + * fields aligned. + */ + struct + { + /** + * The recursive next-hop. + * This field MUST be at the same memory location as + * sub_type.nbr.next_hop + */ + ip46_address_t next_hop; + /** + * The next DPO to use + */ + dpo_id_t next_dpo; + /** + * A function to perform the post-rewrite fixup + */ + adj_midchain_fixup_t fixup_func; + /** + * Fixup data passed back to the client in the fixup function + */ + const void *fixup_data; + /** + * the FIB entry this midchain resolves through. required for recursive + * loop detection. + */ + fib_node_index_t fei; + + /** spare space */ + u8 __ia_midchain_pad[4]; + + } midchain; + /** + * IP_LOOKUP_NEXT_GLEAN + * + * Glean the address to ARP for from the packet's destination. + * Technically these aren't adjacencies, i.e. they are not a + * representation of a peer. One day we might untangle this coupling + * and use a new Glean DPO. + */ + struct + { + fib_prefix_t rx_pfx; + } glean; + } sub_type; + + CLIB_CACHE_LINE_ALIGN_MARK (cacheline1); + + /** Rewrite in second and third cache lines */ + VNET_DECLARE_REWRITE; + + /** + * more control plane members that do not fit on the first cacheline + */ + CLIB_CACHE_LINE_ALIGN_MARK (cacheline3); + + /** + * A sorted vector of delegates + */ + struct adj_delegate_t_ *ia_delegates; + + /** + * The VLIB node in which this adj is used to forward packets + */ + u32 ia_node_index; + + /** + * Next hop after ip4-lookup. + * This is not accessed in the rewrite nodes. + * 1-bytes + */ + ip_lookup_next_t lookup_next_index; + + /** + * link/ether-type + * 1 bytes + */ + vnet_link_t ia_link; + + /** + * The protocol of the neighbor/peer. i.e. the protocol with + * which to interpret the 'next-hop' attributes of the sub-types. + * 1-bytes + */ + fib_protocol_t ia_nh_proto; + + /** + * Flags on the adjacency + * 1-bytes + */ + adj_flags_t ia_flags; + + /** + * Free space on the fourth cacheline (not used in the DP) + */ + u8 __ia_pad[48]; +} ip_adjacency_t; + +STATIC_ASSERT ((STRUCT_OFFSET_OF (ip_adjacency_t, cacheline0) == 0), + "IP adjacency cacheline 0 is not offset"); +STATIC_ASSERT ((STRUCT_OFFSET_OF (ip_adjacency_t, cacheline1) == + CLIB_CACHE_LINE_BYTES), + "IP adjacency cacheline 1 is more than one cacheline size offset"); +#if defined __x86_64__ +STATIC_ASSERT ((STRUCT_OFFSET_OF (ip_adjacency_t, cacheline3) == + 3 * CLIB_CACHE_LINE_BYTES), + "IP adjacency cacheline 3 is more than one cacheline size offset"); +/* An adj fits into 4 cachelines on your average machine */ +STATIC_ASSERT_SIZEOF (ip_adjacency_t, 4 * 64); +#endif /** * @brief @@ -61,7 +380,7 @@ extern void adj_unlock(adj_index_t adj_index); /** * @brief * Add a child dependent to an adjacency. The child will - * thus be informed via its registerd back-walk function + * thus be informed via its registered back-walk function * when the adjacency state changes. */ extern u32 adj_child_add(adj_index_t adj_index, @@ -91,14 +410,32 @@ extern vnet_link_t adj_get_link_type (adj_index_t ai); */ extern u32 adj_get_sw_if_index (adj_index_t ai); +/** + * @brief Return true if the adjacency is 'UP', i.e. can be used for forwarding. + * 0 is down, !0 is up. + */ +extern int adj_is_up (adj_index_t ai); + /** * @brief Return the link type of the adjacency */ extern const u8* adj_get_rewrite (adj_index_t ai); +/** + * @brief descend the FIB graph looking for loops + * + * @param ai + * The adj index to traverse + * + * @param entry_indicies) + * A pointer to a vector of FIB entries already visited. + */ +extern int adj_recursive_loop_detect (adj_index_t ai, + fib_node_index_t **entry_indicies); + /** * @brief - * The global adjacnecy pool. Exposed for fast/inline data-plane access + * The global adjacency pool. Exposed for fast/inline data-plane access */ extern ip_adjacency_t *adj_pool; @@ -121,7 +458,13 @@ extern int adj_per_adj_counters; static inline ip_adjacency_t * adj_get (adj_index_t adj_index) { - return (vec_elt_at_index(adj_pool, adj_index)); + return (pool_elt_at_index(adj_pool, adj_index)); +} + +static inline int +adj_is_valid(adj_index_t adj_index) +{ + return !(pool_is_free_index(adj_pool, adj_index)); } /**