Load Balancer: Use FIB 2.0
[vpp.git] / plugins / lb-plugin / lb / node.c
index c51a210..77beaac 100644 (file)
@@ -36,28 +36,11 @@ static char *lb_error_strings[] = {
 #undef _
 };
 
-typedef enum {
-  LB_NEXT_LOOKUP,
-  LB_NEXT_REWRITE,
-  LB_NEXT_DROP,
-  LB_N_NEXT,
-} lb_next_t;
-
 typedef struct {
   u32 vip_index;
   u32 as_index;
 } lb_trace_t;
 
-/* u8 *lb_format_adjacency(u8 * s, va_list * va) */
-/* { */
-/*   lb_main_t *lbm = &lb_main; */
-/*   __attribute((unused)) ip_lookup_main_t *lm = va_arg (*va, ip_lookup_main_t *); */
-/*   ip_adjacency_t *adj = va_arg (*va, ip_adjacency_t *); */
-/*   lb_adj_data_t *ad = (lb_adj_data_t *) &adj->opaque; */
-/*   __attribute__((unused)) lb_vip_t *vip = pool_elt_at_index (lbm->vips, ad->vip_index); */
-/*   return format(s, "vip idx:%d", ad->vip_index); */
-/* } */
-
 u8 *
 format_lb_trace (u8 * s, va_list * args)
 {
@@ -108,169 +91,162 @@ lb_node_fn (vlib_main_t * vm,
          u8 is_input_v4, //Compile-time parameter stating that is input is v4 (or v6)
          u8 is_encap_v4) //Compile-time parameter stating that is GRE encap is v4 (or v6)
 {
-  /* ip_lookup_main_t *lm = (is_input_v4)?&ip4_main.lookup_main:&ip6_main.lookup_main; */
-  /* lb_main_t *lbm = &lb_main; */
-  /* vlib_node_runtime_t *error_node = node; */
-  /* u32 n_left_from, *from, next_index, *to_next, n_left_to_next; */
-  /* u32 cpu_index = os_get_cpu_number(); */
-  /* u32 lb_time = lb_hash_time_now(vm); */
-
-  /* lb_hash_t *sticky_ht = lb_get_sticky_table(cpu_index); */
-  /* from = vlib_frame_vector_args (frame); */
-  /* n_left_from = frame->n_vectors; */
-  /* next_index = node->cached_next_index; */
-
-  /* while (n_left_from > 0) */
-  /* { */
-  /*   vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); */
-  /*   while (n_left_from > 0 && n_left_to_next > 0) */
-  /*   { */
-  /*     u32 pi0; */
-  /*     vlib_buffer_t *p0; */
-  /*     ip_adjacency_t *adj0; */
-  /*     lb_adj_data_t *ad0; */
-  /*     lb_vip_t *vip0; */
-  /*     lb_as_t *as0; */
-  /*     gre_header_t *gre0; */
-  /*     u16 len0; */
-  /*     u32 value0, available_index0, hash0; */
-  /*     u64 key0[5]; */
-  /*     lb_error_t error0 = LB_ERROR_NONE; */
-  /*     lb_next_t next0 = LB_NEXT_LOOKUP; */
-
-  /*     if (PREDICT_TRUE(n_left_from > 1)) */
-  /*     { */
-  /*       vlib_buffer_t *p2; */
-  /*       p2 = vlib_get_buffer(vm, from[1]); */
-  /*       vlib_prefetch_buffer_header(p2, STORE); */
-  /*       /\* IPv4 + 8 = 28. possibly plus -40 *\/ */
-  /*       CLIB_PREFETCH (vlib_buffer_get_current(p2) - 40, 128, STORE); */
-  /*     } */
-
-  /*     pi0 = to_next[0] = from[0]; */
-  /*     from += 1; */
-  /*     n_left_from -= 1; */
-  /*     to_next += 1; */
-  /*     n_left_to_next -= 1; */
-
-  /*     p0 = vlib_get_buffer (vm, pi0); */
-  /*     adj0 = ip_get_adjacency (lm, vnet_buffer (p0)->ip.adj_index[VLIB_TX]); */
-  /*     ad0 = (lb_adj_data_t *) &adj0->opaque; */
-  /*     vip0 = pool_elt_at_index (lbm->vips, ad0->vip_index); */
-
-  /*     if (is_input_v4) { */
-  /*       ip4_header_t *ip40; */
-  /*       ip40 = vlib_buffer_get_current (p0); */
-  /*       len0 = clib_net_to_host_u16(ip40->length); */
-  /*       key0[0] = (u64) ip40->src_address.as_u32; */
-  /*       key0[1] = (u64) ip40->dst_address.as_u32; */
-  /*       key0[2] = 0; */
-  /*       key0[3] = 0; */
-  /*       key0[4] = ((u64)((udp_header_t *)(ip40 + 1))->src_port << 32) | */
-  /*           ((u64)((udp_header_t *)(ip40 + 1))->dst_port << 16); */
-
-  /*       hash0 = lb_hash_hash(key0); */
-  /*     } else { */
-  /*       ip6_header_t *ip60; */
-  /*       ip60 = vlib_buffer_get_current (p0); */
-  /*       len0 = clib_net_to_host_u16(ip60->payload_length) + sizeof(ip6_header_t); */
-  /*       key0[0] = ip60->src_address.as_u64[0]; */
-  /*       key0[1] = ip60->src_address.as_u64[1]; */
-  /*       key0[2] = ip60->dst_address.as_u64[0]; */
-  /*       key0[3] = ip60->dst_address.as_u64[1]; */
-  /*       key0[4] = ((u64)((udp_header_t *)(ip60 + 1))->src_port << 32) | */
-  /*           ((u64)((udp_header_t *)(ip60 + 1))->dst_port << 16); */
-
-  /*       hash0 = lb_hash_hash(key0); */
-  /*     } */
-
-  /*     //NOTE: This is an ugly trick to not include the VIP index in the hash calculation */
-  /*     //but actually use it in the key determination. */
-  /*     key0[4] |= ((vip0 - lbm->vips)); */
-
-  /*     lb_hash_get(sticky_ht, key0, hash0, lb_time, &available_index0, &value0); */
-  /*     if (PREDICT_TRUE(value0 != ~0)) { */
-  /*       //Found an existing entry */
-  /*       as0 = &lbm->ass[value0]; */
-  /*     } else if (PREDICT_TRUE(available_index0 != ~0)) { */
-  /*       //There is an available slot for a new flow */
-  /*       as0 = &lbm->ass[vip0->new_flow_table[hash0 & vip0->new_flow_table_mask].as_index]; */
-  /*       if (PREDICT_FALSE(as0 == lbm->ass)) { //Special first element */
-  /*         error0 = LB_ERROR_NO_SERVER; */
-  /*         next0 = LB_NEXT_DROP; */
-  /*       } else { */
-  /*         vlib_increment_simple_counter(&lbm->vip_counters[LB_VIP_COUNTER_TRACKED_SESSION], */
-  /*                                       cpu_index, vip0 - lbm->vips, 1); */
-  /*       } */
-
-  /*       //TODO: There are race conditions with as0 and vip0 manipulation. */
-  /*       //Configuration may be changed, vectors resized, etc... */
-
-  /*       //Dereference previously used */
-  /*       vlib_refcount_add(&lbm->as_refcount, cpu_index, lb_hash_available_value(sticky_ht, available_index0), -1); */
-  /*       vlib_refcount_add(&lbm->as_refcount, cpu_index, as0 - lbm->ass, 1); */
-
-  /*       //Add sticky entry */
-  /*       //Note that when there is no AS configured, an entry is configured anyway. */
-  /*       //But no configured AS is not something that should happen */
-  /*       lb_hash_put(sticky_ht, key0, as0 - lbm->ass, available_index0, lb_time); */
-  /*     } else { */
-  /*       //Could not store new entry in the table */
-  /*       as0 = &lbm->ass[vip0->new_flow_table[hash0 & vip0->new_flow_table_mask].as_index]; */
-  /*       vlib_increment_simple_counter(&lbm->vip_counters[LB_VIP_COUNTER_UNTRACKED_PACKET], */
-  /*                                               cpu_index, vip0 - lbm->vips, 1); */
-  /*     } */
-
-  /*     //Now let's encap */
-  /*     if (is_encap_v4) { */
-  /*       ip4_header_t *ip40; */
-  /*       vlib_buffer_advance(p0, - sizeof(ip4_header_t) - sizeof(gre_header_t)); */
-  /*       ip40 = vlib_buffer_get_current(p0); */
-  /*       gre0 = (gre_header_t *)(ip40 + 1); */
-  /*       ip40->src_address = lbm->ip4_src_address; */
-  /*       ip40->dst_address = as0->address.ip4; */
-  /*       ip40->ip_version_and_header_length = 0x45; */
-  /*       ip40->ttl = 128; */
-  /*       ip40->length = clib_host_to_net_u16(len0 + sizeof(gre_header_t) + sizeof(ip4_header_t)); */
-  /*       ip40->protocol = IP_PROTOCOL_GRE; */
-  /*       ip40->checksum = ip4_header_checksum (ip40); */
-  /*     } else { */
-  /*       ip6_header_t *ip60; */
-  /*       vlib_buffer_advance(p0, - sizeof(ip6_header_t) - sizeof(gre_header_t)); */
-  /*       ip60 = vlib_buffer_get_current(p0); */
-  /*       gre0 = (gre_header_t *)(ip60 + 1); */
-  /*       ip60->dst_address = as0->address.ip6; */
-  /*       ip60->src_address = lbm->ip6_src_address; */
-  /*       ip60->hop_limit = 128; */
-  /*       ip60->ip_version_traffic_class_and_flow_label = clib_host_to_net_u32 (0x6<<28); */
-  /*       ip60->payload_length = clib_host_to_net_u16(len0 + sizeof(gre_header_t)); */
-  /*       ip60->protocol = IP_PROTOCOL_GRE; */
-  /*     } */
-
-  /*     gre0->flags_and_version = 0; */
-  /*     gre0->protocol = (is_input_v4)? */
-  /*         clib_host_to_net_u16(0x0800): */
-  /*         clib_host_to_net_u16(0x86DD); */
-
-  /*     vnet_buffer(p0)->ip.adj_index[VLIB_TX] = as0->adj_index; */
-  /*     next0 = (as0->adj_index != ~0)?LB_NEXT_REWRITE:next0; */
-
-  /*     if (PREDICT_FALSE (p0->flags & VLIB_BUFFER_IS_TRACED)) */
-  /*     { */
-  /*       lb_trace_t *tr = vlib_add_trace (vm, node, p0, sizeof (*tr)); */
-  /*       tr->as_index = as0 - lbm->ass; */
-  /*       tr->vip_index = ad0->vip_index; */
-  /*     } */
-
-  /*     p0->error = error_node->errors[error0]; */
-  /*     vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next, */
-  /*                                      n_left_to_next, pi0, next0); */
-  /*   } */
-  /*   vlib_put_next_frame (vm, node, next_index, n_left_to_next); */
-  /* } */
-
-  /* return frame->n_vectors; */
-  return 0;
+  lb_main_t *lbm = &lb_main;
+  vlib_node_runtime_t *error_node = node;
+  u32 n_left_from, *from, next_index, *to_next, n_left_to_next;
+  u32 cpu_index = os_get_cpu_number();
+  u32 lb_time = lb_hash_time_now(vm);
+
+  lb_hash_t *sticky_ht = lb_get_sticky_table(cpu_index);
+  from = vlib_frame_vector_args (frame);
+  n_left_from = frame->n_vectors;
+  next_index = node->cached_next_index;
+
+  while (n_left_from > 0)
+  {
+    vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+    while (n_left_from > 0 && n_left_to_next > 0)
+    {
+      u32 pi0;
+      vlib_buffer_t *p0;
+      lb_vip_t *vip0;
+      lb_as_t *as0;
+      gre_header_t *gre0;
+      u16 len0;
+      u32 value0, available_index0, hash0;
+      u64 key0[5];
+      lb_error_t error0 = LB_ERROR_NONE;
+
+      if (PREDICT_TRUE(n_left_from > 1))
+      {
+        vlib_buffer_t *p2;
+        p2 = vlib_get_buffer(vm, from[1]);
+        vlib_prefetch_buffer_header(p2, STORE);
+        /* IPv4 + 8 = 28. possibly plus -40 */
+        CLIB_PREFETCH (vlib_buffer_get_current(p2) - 40, 128, STORE);
+      }
+
+      pi0 = to_next[0] = from[0];
+      from += 1;
+      n_left_from -= 1;
+      to_next += 1;
+      n_left_to_next -= 1;
+
+      p0 = vlib_get_buffer (vm, pi0);
+      vip0 = pool_elt_at_index (lbm->vips,
+                               vnet_buffer (p0)->ip.adj_index[VLIB_TX]);
+
+      if (is_input_v4) {
+        ip4_header_t *ip40;
+        ip40 = vlib_buffer_get_current (p0);
+        len0 = clib_net_to_host_u16(ip40->length);
+        key0[0] = (u64) ip40->src_address.as_u32;
+        key0[1] = (u64) ip40->dst_address.as_u32;
+        key0[2] = 0;
+        key0[3] = 0;
+        key0[4] = ((u64)((udp_header_t *)(ip40 + 1))->src_port << 32) |
+            ((u64)((udp_header_t *)(ip40 + 1))->dst_port << 16);
+
+        hash0 = lb_hash_hash(key0);
+      } else {
+        ip6_header_t *ip60;
+        ip60 = vlib_buffer_get_current (p0);
+        len0 = clib_net_to_host_u16(ip60->payload_length) + sizeof(ip6_header_t);
+        key0[0] = ip60->src_address.as_u64[0];
+        key0[1] = ip60->src_address.as_u64[1];
+        key0[2] = ip60->dst_address.as_u64[0];
+        key0[3] = ip60->dst_address.as_u64[1];
+        key0[4] = ((u64)((udp_header_t *)(ip60 + 1))->src_port << 32) |
+            ((u64)((udp_header_t *)(ip60 + 1))->dst_port << 16);
+
+        hash0 = lb_hash_hash(key0);
+      }
+
+      //NOTE: This is an ugly trick to not include the VIP index in the hash calculation
+      //but actually use it in the key determination.
+      key0[4] |= ((vip0 - lbm->vips));
+
+      lb_hash_get(sticky_ht, key0, hash0, lb_time, &available_index0, &value0);
+      if (PREDICT_TRUE(value0 != ~0)) {
+        //Found an existing entry
+        as0 = &lbm->ass[value0];
+      } else if (PREDICT_TRUE(available_index0 != ~0)) {
+        //There is an available slot for a new flow
+        as0 = &lbm->ass[vip0->new_flow_table[hash0 & vip0->new_flow_table_mask].as_index];
+        if (PREDICT_FALSE(as0 == lbm->ass)) { //Special first element
+          error0 = LB_ERROR_NO_SERVER;
+        } else {
+          vlib_increment_simple_counter(&lbm->vip_counters[LB_VIP_COUNTER_TRACKED_SESSION],
+                                        cpu_index, vip0 - lbm->vips, 1);
+        }
+
+        //TODO: There are race conditions with as0 and vip0 manipulation.
+        //Configuration may be changed, vectors resized, etc...
+
+        //Dereference previously used
+        vlib_refcount_add(&lbm->as_refcount, cpu_index, lb_hash_available_value(sticky_ht, available_index0), -1);
+        vlib_refcount_add(&lbm->as_refcount, cpu_index, as0 - lbm->ass, 1);
+
+        //Add sticky entry
+        //Note that when there is no AS configured, an entry is configured anyway.
+        //But no configured AS is not something that should happen
+        lb_hash_put(sticky_ht, key0, as0 - lbm->ass, available_index0, lb_time);
+      } else {
+        //Could not store new entry in the table
+        as0 = &lbm->ass[vip0->new_flow_table[hash0 & vip0->new_flow_table_mask].as_index];
+        vlib_increment_simple_counter(&lbm->vip_counters[LB_VIP_COUNTER_UNTRACKED_PACKET],
+                                                cpu_index, vip0 - lbm->vips, 1);
+      }
+
+      //Now let's encap
+      if (is_encap_v4) {
+        ip4_header_t *ip40;
+        vlib_buffer_advance(p0, - sizeof(ip4_header_t) - sizeof(gre_header_t));
+        ip40 = vlib_buffer_get_current(p0);
+        gre0 = (gre_header_t *)(ip40 + 1);
+        ip40->src_address = lbm->ip4_src_address;
+        ip40->dst_address = as0->address.ip4;
+        ip40->ip_version_and_header_length = 0x45;
+        ip40->ttl = 128;
+        ip40->length = clib_host_to_net_u16(len0 + sizeof(gre_header_t) + sizeof(ip4_header_t));
+        ip40->protocol = IP_PROTOCOL_GRE;
+        ip40->checksum = ip4_header_checksum (ip40);
+      } else {
+        ip6_header_t *ip60;
+        vlib_buffer_advance(p0, - sizeof(ip6_header_t) - sizeof(gre_header_t));
+        ip60 = vlib_buffer_get_current(p0);
+        gre0 = (gre_header_t *)(ip60 + 1);
+        ip60->dst_address = as0->address.ip6;
+        ip60->src_address = lbm->ip6_src_address;
+        ip60->hop_limit = 128;
+        ip60->ip_version_traffic_class_and_flow_label = clib_host_to_net_u32 (0x6<<28);
+        ip60->payload_length = clib_host_to_net_u16(len0 + sizeof(gre_header_t));
+        ip60->protocol = IP_PROTOCOL_GRE;
+      }
+
+      gre0->flags_and_version = 0;
+      gre0->protocol = (is_input_v4)?
+          clib_host_to_net_u16(0x0800):
+          clib_host_to_net_u16(0x86DD);
+
+      vnet_buffer (p0)->ip.adj_index[VLIB_TX] = as0->dpo.dpoi_index;
+
+      if (PREDICT_FALSE (p0->flags & VLIB_BUFFER_IS_TRACED))
+      {
+        lb_trace_t *tr = vlib_add_trace (vm, node, p0, sizeof (*tr));
+        tr->as_index = as0 - lbm->ass;
+        tr->vip_index = vip0 - lbm->vips;
+      }
+
+      p0->error = error_node->errors[error0];
+      vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
+                                       n_left_to_next, pi0,
+                                      as0->dpo.dpoi_next_node);
+    }
+    vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+  }
+
+  return frame->n_vectors;
 }
 
 static uword
@@ -314,18 +290,10 @@ VLIB_REGISTER_NODE (lb6_gre6_node) =
   .n_next_nodes = LB_N_NEXT,
   .next_nodes =
   {
-      [LB_NEXT_LOOKUP] = "ip6-lookup",
-      [LB_NEXT_REWRITE] = "ip6-rewrite",
       [LB_NEXT_DROP] = "error-drop"
   },
 };
 
-/* VNET_IP6_REGISTER_ADJACENCY(lb6_gre6) = { */
-/*   .node_name = "lb6-gre6", */
-/*   .fn = lb_format_adjacency, */
-/*   .next_index = &lb_main.ip_lookup_next_index[LB_VIP_TYPE_IP6_GRE6] */
-/* }; */
-
 VLIB_REGISTER_NODE (lb6_gre4_node) =
 {
   .function = lb6_gre4_node_fn,
@@ -339,18 +307,10 @@ VLIB_REGISTER_NODE (lb6_gre4_node) =
   .n_next_nodes = LB_N_NEXT,
   .next_nodes =
   {
-      [LB_NEXT_LOOKUP] = "ip4-lookup",
-      [LB_NEXT_REWRITE]= "ip4-rewrite-transit",
       [LB_NEXT_DROP] = "error-drop"
   },
 };
 
-/* VNET_IP6_REGISTER_ADJACENCY(lb6_gre4) = { */
-/*   .node_name = "lb6-gre4", */
-/*   .fn = lb_format_adjacency, */
-/*   .next_index = &lb_main.ip_lookup_next_index[LB_VIP_TYPE_IP6_GRE4] */
-/* }; */
-
 VLIB_REGISTER_NODE (lb4_gre6_node) =
 {
   .function = lb4_gre6_node_fn,
@@ -364,18 +324,10 @@ VLIB_REGISTER_NODE (lb4_gre6_node) =
   .n_next_nodes = LB_N_NEXT,
   .next_nodes =
   {
-      [LB_NEXT_LOOKUP] = "ip6-lookup",
-      [LB_NEXT_REWRITE] = "ip6-rewrite",
       [LB_NEXT_DROP] = "error-drop"
   },
 };
 
-/* VNET_IP4_REGISTER_ADJACENCY(lb4_gre6) = { */
-/*   .node_name = "lb4-gre6", */
-/*   .fn = lb_format_adjacency, */
-/*   .next_index = &lb_main.ip_lookup_next_index[LB_VIP_TYPE_IP4_GRE6] */
-/* }; */
-
 VLIB_REGISTER_NODE (lb4_gre4_node) =
 {
   .function = lb4_gre4_node_fn,
@@ -389,14 +341,7 @@ VLIB_REGISTER_NODE (lb4_gre4_node) =
   .n_next_nodes = LB_N_NEXT,
   .next_nodes =
   {
-      [LB_NEXT_LOOKUP] = "ip4-lookup",
-      [LB_NEXT_REWRITE]= "ip4-rewrite-transit",
       [LB_NEXT_DROP] = "error-drop"
   },
 };
 
-/* VNET_IP4_REGISTER_ADJACENCY(lb4_gre4) = { */
-/*   .node_name = "lb4-gre4", */
-/*   .fn = lb_format_adjacency, */
-/*   .next_index = &lb_main.ip_lookup_next_index[LB_VIP_TYPE_IP4_GRE4] */
-/* }; */