X-Git-Url: https://gerrit.fd.io/r/gitweb?a=blobdiff_plain;f=src%2Fvlibmemory%2Fmemory_vlib.c;h=805438152ce3741df4c4470ac321917855010fb9;hb=90a6398;hp=1d40bcb791fda70ae72d74e9152df4be8f7a2b9c;hpb=7cd468a3d7dee7d6c92f69a0bb7061ae208ec727;p=vpp.git diff --git a/src/vlibmemory/memory_vlib.c b/src/vlibmemory/memory_vlib.c index 1d40bcb791f..805438152ce 100644 --- a/src/vlibmemory/memory_vlib.c +++ b/src/vlibmemory/memory_vlib.c @@ -22,6 +22,8 @@ #include #include #include +#include +#include #include #include #include @@ -36,6 +38,14 @@ #include #include +/** + * @file + * @brief Binary API messaging via shared memory + * Low-level, primary provisioning interface + */ +/*? %%clicmd:group_label Binary API CLI %% ?*/ +/*? %%syscfg:group_label Binary API configuration %% ?*/ + #define TRACE_VLIB_MEMORY_QUEUE 0 #include /* enumerate all vlib messages */ @@ -70,22 +80,23 @@ vl_api_memclnt_delete_t_print (vl_api_memclnt_delete_t * a, void *handle) return handle; } +static inline void * +vl_api_trace_plugin_msg_ids_t_print (vl_api_trace_plugin_msg_ids_t * a, + void *handle) +{ + vl_print (handle, "vl_api_trace_plugin_msg_ids: %s first %u last %u\n", + a->plugin_name, + clib_host_to_net_u16 (a->first_msg_id), + clib_host_to_net_u16 (a->last_msg_id)); + return handle; +} + /* instantiate all the endian swap functions we know about */ #define vl_endianfun #include #undef vl_endianfun -void vl_socket_api_send (vl_api_registration_t * rp, u8 * elem) - __attribute__ ((weak)); - -void -vl_socket_api_send (vl_api_registration_t * rp, u8 * elem) -{ - static int count; - - if (count++ < 5) - clib_warning ("need to link against -lvlibsocket, msg not sent!"); -} +extern void vl_socket_api_send (vl_api_registration_t * rp, u8 * elem); void vl_msg_api_send (vl_api_registration_t * rp, u8 * elem) @@ -96,7 +107,7 @@ vl_msg_api_send (vl_api_registration_t * rp, u8 * elem) } else { - vl_msg_api_send_shmem (rp->vl_input_queue, elem); + vl_msg_api_send_shmem (rp->vl_input_queue, (u8 *) & elem); } } @@ -112,18 +123,57 @@ vl_api_serialize_message_table (api_main_t * am, u8 * vector) /* serialize the count */ serialize_integer (sm, nmsg, sizeof (u32)); - hash_foreach_pair (hp, am->msg_index_by_name_and_crc, ( - { - serialize_likely_small_unsigned_integer - (sm, hp->value[0]); - serialize_cstring - (sm, - (char *) hp->key); - })); + /* *INDENT-OFF* */ + hash_foreach_pair (hp, am->msg_index_by_name_and_crc, + ({ + serialize_likely_small_unsigned_integer (sm, hp->value[0]); + serialize_cstring (sm, (char *) hp->key); + })); + /* *INDENT-ON* */ return serialize_close_vector (sm); } +/* + * vl_api_memclnt_create_internal + */ + +u32 +vl_api_memclnt_create_internal (char *name, unix_shared_memory_queue_t * q) +{ + vl_api_registration_t **regpp; + vl_api_registration_t *regp; + svm_region_t *svm; + void *oldheap; + api_main_t *am = &api_main; + + ASSERT (vlib_get_thread_index () == 0); + pool_get (am->vl_clients, regpp); + + svm = am->vlib_rp; + + pthread_mutex_lock (&svm->mutex); + oldheap = svm_push_data_heap (svm); + *regpp = clib_mem_alloc (sizeof (vl_api_registration_t)); + + regp = *regpp; + memset (regp, 0, sizeof (*regp)); + regp->registration_type = REGISTRATION_TYPE_SHMEM; + regp->vl_api_registration_pool_index = regpp - am->vl_clients; + regp->vlib_rp = svm; + regp->shmem_hdr = am->shmem_hdr; + + regp->vl_input_queue = q; + regp->name = format (0, "%s%c", name, 0); + + pthread_mutex_unlock (&svm->mutex); + svm_pop_heap (oldheap); + return vl_msg_api_handle_from_index_and_epoch + (regp->vl_api_registration_pool_index, + am->shmem_hdr->application_restarts); +} + + /* * vl_api_memclnt_create_t_handler */ @@ -138,7 +188,6 @@ vl_api_memclnt_create_t_handler (vl_api_memclnt_create_t * mp) int rv = 0; void *oldheap; api_main_t *am = &api_main; - u8 *serialized_message_table = 0; /* * This is tortured. Maintain a vlib-address-space private @@ -170,9 +219,6 @@ vl_api_memclnt_create_t_handler (vl_api_memclnt_create_t * mp) svm = am->vlib_rp; - if (am->serialized_message_table_in_shmem == 0) - serialized_message_table = vl_api_serialize_message_table (am, 0); - pthread_mutex_lock (&svm->mutex); oldheap = svm_push_data_heap (svm); *regpp = clib_mem_alloc (sizeof (vl_api_registration_t)); @@ -181,21 +227,22 @@ vl_api_memclnt_create_t_handler (vl_api_memclnt_create_t * mp) memset (regp, 0, sizeof (*regp)); regp->registration_type = REGISTRATION_TYPE_SHMEM; regp->vl_api_registration_pool_index = regpp - am->vl_clients; + regp->vlib_rp = svm; + regp->shmem_hdr = am->shmem_hdr; q = regp->vl_input_queue = (unix_shared_memory_queue_t *) (uword) mp->input_queue; regp->name = format (0, "%s", mp->name); vec_add1 (regp->name, 0); - if (serialized_message_table) + + if (am->serialized_message_table_in_shmem == 0) am->serialized_message_table_in_shmem = - vec_dup (serialized_message_table); + vl_api_serialize_message_table (am, 0); pthread_mutex_unlock (&svm->mutex); svm_pop_heap (oldheap); - vec_free (serialized_message_table); - rp = vl_msg_api_alloc (sizeof (*rp)); rp->_vl_msg_id = ntohs (VL_API_MEMCLNT_CREATE_REPLY); rp->handle = (uword) regp; @@ -204,17 +251,26 @@ vl_api_memclnt_create_t_handler (vl_api_memclnt_create_t * mp) am->shmem_hdr->application_restarts); rp->context = mp->context; rp->response = ntohl (rv); - rp->message_table = (u64) am->serialized_message_table_in_shmem; + rp->message_table = + pointer_to_uword (am->serialized_message_table_in_shmem); vl_msg_api_send_shmem (q, (u8 *) & rp); } -/* Application callback to clean up leftover registrations from this client */ -int vl_api_memclnt_delete_callback (u32 client_index) __attribute__ ((weak)); - -int -vl_api_memclnt_delete_callback (u32 client_index) +static int +call_reaper_functions (u32 client_index) { + clib_error_t *error = 0; + _vl_msg_api_function_list_elt_t *i; + + i = api_main.reaper_function_registrations; + while (i) + { + error = i->f (client_index); + if (error) + clib_error_report (error); + i = i->next_init_function; + } return 0; } @@ -234,7 +290,7 @@ vl_api_memclnt_delete_t_handler (vl_api_memclnt_delete_t * mp) handle = mp->index; - if (vl_api_memclnt_delete_callback (handle)) + if (call_reaper_functions (handle)) return; epoch = vl_msg_api_handle_get_epoch (handle); @@ -253,11 +309,15 @@ vl_api_memclnt_delete_t_handler (vl_api_memclnt_delete_t * mp) if (!pool_is_free (am->vl_clients, regpp)) { + int i; regp = *regpp; svm = am->vlib_rp; + int private_registration = 0; - /* $$$ check the input queue for e.g. punted sf's */ - + /* + * Note: the API message handling path will set am->vlib_rp + * as appropriate for pairwise / private memory segments + */ rp = vl_msg_api_alloc (sizeof (*rp)); rp->_vl_msg_id = ntohs (VL_API_MEMCLNT_DELETE_REPLY); rp->handle = mp->handle; @@ -273,18 +333,56 @@ vl_api_memclnt_delete_t_handler (vl_api_memclnt_delete_t * mp) return; } + /* For horizontal scaling, add a hash table... */ + for (i = 0; i < vec_len (am->vlib_private_rps); i++) + { + /* Is this a pairwise / private API segment? */ + if (am->vlib_private_rps[i] == svm) + { + /* Note: account for the memfd header page */ + u64 virtual_base = svm->virtual_base - MMAP_PAGESIZE; + u64 virtual_size = svm->virtual_size + MMAP_PAGESIZE; + + /* + * Kill the registration pool element before we make + * the index vanish forever + */ + pool_put_index (am->vl_clients, + regp->vl_api_registration_pool_index); + + vec_delete (am->vlib_private_rps, 1, i); + /* Kill it, accounting for the memfd header page */ + if (munmap ((void *) virtual_base, virtual_size) < 0) + clib_unix_warning ("munmap"); + /* Reset the queue-length-address cache */ + vec_reset_length (vl_api_queue_cursizes); + private_registration = 1; + break; + } + } + /* No dangling references, please */ *regpp = 0; - pool_put_index (am->vl_clients, regp->vl_api_registration_pool_index); - - pthread_mutex_lock (&svm->mutex); - oldheap = svm_push_data_heap (svm); - /* Poison the old registration */ - memset (regp, 0xF1, sizeof (*regp)); - clib_mem_free (regp); - pthread_mutex_unlock (&svm->mutex); - svm_pop_heap (oldheap); + if (private_registration == 0) + { + pool_put_index (am->vl_clients, + regp->vl_api_registration_pool_index); + pthread_mutex_lock (&svm->mutex); + oldheap = svm_push_data_heap (svm); + /* Poison the old registration */ + memset (regp, 0xF1, sizeof (*regp)); + clib_mem_free (regp); + pthread_mutex_unlock (&svm->mutex); + svm_pop_heap (oldheap); + /* + * These messages must be freed manually, since they're set up + * as "bounce" messages. In the private_registration == 1 case, + * we kill the shared-memory segment which contains the message + * with munmap. + */ + vl_msg_api_free (mp); + } } else { @@ -332,21 +430,104 @@ out: vl_msg_api_send_shmem (q, (u8 *) & rmp); } -#define foreach_vlib_api_msg \ -_(MEMCLNT_CREATE, memclnt_create) \ -_(MEMCLNT_DELETE, memclnt_delete) \ -_(GET_FIRST_MSG_ID, get_first_msg_id) +/** + * client answered a ping, stave off the grim reaper... + */ + +void + vl_api_memclnt_keepalive_reply_t_handler + (vl_api_memclnt_keepalive_reply_t * mp) +{ + vl_api_registration_t *regp; + vlib_main_t *vm = vlib_get_main (); + + regp = vl_api_client_index_to_registration (mp->context); + if (regp) + { + regp->last_heard = vlib_time_now (vm); + regp->unanswered_pings = 0; + } + else + clib_warning ("BUG: anonymous memclnt_keepalive_reply"); +} + +/** + * We can send ourselves these messages if someone uses the + * builtin binary api test tool... + */ +static void +vl_api_memclnt_keepalive_t_handler (vl_api_memclnt_keepalive_t * mp) +{ + vl_api_memclnt_keepalive_reply_t *rmp; + api_main_t *am; + vl_shmem_hdr_t *shmem_hdr; + + am = &api_main; + shmem_hdr = am->shmem_hdr; + + rmp = vl_msg_api_alloc_as_if_client (sizeof (*rmp)); + memset (rmp, 0, sizeof (*rmp)); + rmp->_vl_msg_id = ntohs (VL_API_MEMCLNT_KEEPALIVE_REPLY); + rmp->context = mp->context; + vl_msg_api_send_shmem (shmem_hdr->vl_input_queue, (u8 *) & rmp); +} + +void +vl_api_api_versions_t_handler (vl_api_api_versions_t * mp) +{ + api_main_t *am = &api_main; + vl_api_api_versions_reply_t *rmp; + unix_shared_memory_queue_t *q; + u32 nmsg = vec_len (am->api_version_list); + int msg_size = sizeof (*rmp) + sizeof (rmp->api_versions[0]) * nmsg; + int i; + + q = vl_api_client_index_to_input_queue (mp->client_index); + if (q == 0) + return; + + rmp = vl_msg_api_alloc (msg_size); + memset (rmp, 0, msg_size); + rmp->_vl_msg_id = ntohs (VL_API_API_VERSIONS_REPLY); + + /* fill in the message */ + rmp->context = mp->context; + rmp->count = htonl (nmsg); + + for (i = 0; i < nmsg; ++i) + { + api_version_t *vl = &am->api_version_list[i]; + rmp->api_versions[i].major = htonl (vl->major); + rmp->api_versions[i].minor = htonl (vl->minor); + rmp->api_versions[i].patch = htonl (vl->patch); + strncpy ((char *) rmp->api_versions[i].name, vl->name, 64 - 1); + } + + vl_msg_api_send_shmem (q, (u8 *) & rmp); + +} + +#define foreach_vlib_api_msg \ +_(MEMCLNT_CREATE, memclnt_create) \ +_(MEMCLNT_DELETE, memclnt_delete) \ +_(GET_FIRST_MSG_ID, get_first_msg_id) \ +_(MEMCLNT_KEEPALIVE, memclnt_keepalive) \ +_(MEMCLNT_KEEPALIVE_REPLY, memclnt_keepalive_reply) \ +_(API_VERSIONS, api_versions) /* * vl_api_init */ static int -memory_api_init (char *region_name) +memory_api_init (const char *region_name) { int rv; + api_main_t *am = &api_main; vl_msg_api_msg_config_t cfg; vl_msg_api_msg_config_t *c = &cfg; + memset (c, 0, sizeof (*c)); + if ((rv = vl_map_shmem (region_name, 1 /* is_vlib */ )) < 0) return rv; @@ -360,11 +541,19 @@ memory_api_init (char *region_name) c->size = sizeof(vl_api_##n##_t); \ c->traced = 1; /* trace, so these msgs print */ \ c->replay = 0; /* don't replay client create/delete msgs */ \ + c->message_bounce = 0; /* don't bounce this message */ \ vl_msg_api_config(c);} while (0); foreach_vlib_api_msg; #undef _ + /* + * special-case freeing of memclnt_delete messages, so we can + * simply munmap pairwise / private API segments... + */ + am->message_bounce[VL_API_MEMCLNT_DELETE] = 1; + am->is_mp_safe[VL_API_MEMCLNT_KEEPALIVE_REPLY] = 1; + return 0; } @@ -386,6 +575,228 @@ static u64 vector_rate_histogram[SLEEP_N_BUCKETS]; static void memclnt_queue_callback (vlib_main_t * vm); +/* + * Callback to send ourselves a plugin numbering-space trace msg + */ +static void +send_one_plugin_msg_ids_msg (u8 * name, u16 first_msg_id, u16 last_msg_id) +{ + vl_api_trace_plugin_msg_ids_t *mp; + api_main_t *am = &api_main; + vl_shmem_hdr_t *shmem_hdr = am->shmem_hdr; + unix_shared_memory_queue_t *q; + + mp = vl_msg_api_alloc_as_if_client (sizeof (*mp)); + memset (mp, 0, sizeof (*mp)); + + mp->_vl_msg_id = clib_host_to_net_u16 (VL_API_TRACE_PLUGIN_MSG_IDS); + strncpy ((char *) mp->plugin_name, (char *) name, + sizeof (mp->plugin_name) - 1); + mp->first_msg_id = clib_host_to_net_u16 (first_msg_id); + mp->last_msg_id = clib_host_to_net_u16 (last_msg_id); + + q = shmem_hdr->vl_input_queue; + + vl_msg_api_send_shmem (q, (u8 *) & mp); +} + +static void +send_memclnt_keepalive (vl_api_registration_t * regp, f64 now) +{ + vl_api_memclnt_keepalive_t *mp; + unix_shared_memory_queue_t *q; + api_main_t *am = &api_main; + svm_region_t *save_vlib_rp = am->vlib_rp; + vl_shmem_hdr_t *save_shmem_hdr = am->shmem_hdr; + + q = regp->vl_input_queue; + + /* + * If the queue head is moving, assume that the client is processing + * messages and skip the ping. This heuristic may fail if the queue + * is in the same position as last time, net of wrapping; in which + * case, the client will receive a keepalive. + */ + if (regp->last_queue_head != q->head) + { + regp->last_heard = now; + regp->unanswered_pings = 0; + regp->last_queue_head = q->head; + return; + } + + /* + * push/pop shared memory segment, so this routine + * will work with "normal" as well as "private segment" + * memory clients.. + */ + + am->vlib_rp = regp->vlib_rp; + am->shmem_hdr = regp->shmem_hdr; + + mp = vl_msg_api_alloc (sizeof (*mp)); + memset (mp, 0, sizeof (*mp)); + mp->_vl_msg_id = clib_host_to_net_u16 (VL_API_MEMCLNT_KEEPALIVE); + mp->context = mp->client_index = + vl_msg_api_handle_from_index_and_epoch + (regp->vl_api_registration_pool_index, + am->shmem_hdr->application_restarts); + + regp->unanswered_pings++; + + /* Failure-to-send due to a stuffed queue is absolutely expected */ + if (unix_shared_memory_queue_add (q, (u8 *) & mp, 1 /* nowait */ )) + vl_msg_api_free (mp); + + am->vlib_rp = save_vlib_rp; + am->shmem_hdr = save_shmem_hdr; +} + +static void +dead_client_scan (api_main_t * am, vl_shmem_hdr_t * shm, f64 now) +{ + + vl_api_registration_t **regpp; + vl_api_registration_t *regp; + static u32 *dead_indices; + static u32 *confused_indices; + + vec_reset_length (dead_indices); + vec_reset_length (confused_indices); + + /* *INDENT-OFF* */ + pool_foreach (regpp, am->vl_clients, + ({ + regp = *regpp; + if (regp) + { + /* If we haven't heard from this client recently... */ + if (regp->last_heard < (now - 10.0)) + { + if (regp->unanswered_pings == 2) + { + unix_shared_memory_queue_t *q; + q = regp->vl_input_queue; + if (kill (q->consumer_pid, 0) >=0) + { + clib_warning ("REAPER: lazy binary API client '%s'", + regp->name); + regp->unanswered_pings = 0; + regp->last_heard = now; + } + else + { + clib_warning ("REAPER: binary API client '%s' died", + regp->name); + vec_add1(dead_indices, regpp - am->vl_clients); + } + } + else + send_memclnt_keepalive (regp, now); + } + else + regp->unanswered_pings = 0; + } + else + { + clib_warning ("NULL client registration index %d", + regpp - am->vl_clients); + vec_add1 (confused_indices, regpp - am->vl_clients); + } + })); + /* *INDENT-ON* */ + /* This should "never happen," but if it does, fix it... */ + if (PREDICT_FALSE (vec_len (confused_indices) > 0)) + { + int i; + for (i = 0; i < vec_len (confused_indices); i++) + { + pool_put_index (am->vl_clients, confused_indices[i]); + } + } + + if (PREDICT_FALSE (vec_len (dead_indices) > 0)) + { + int i; + svm_region_t *svm; + void *oldheap; + + /* Allow the application to clean up its registrations */ + for (i = 0; i < vec_len (dead_indices); i++) + { + regpp = pool_elt_at_index (am->vl_clients, dead_indices[i]); + if (regpp) + { + u32 handle; + + handle = vl_msg_api_handle_from_index_and_epoch + (dead_indices[i], shm->application_restarts); + (void) call_reaper_functions (handle); + } + } + + svm = am->vlib_rp; + pthread_mutex_lock (&svm->mutex); + oldheap = svm_push_data_heap (svm); + + for (i = 0; i < vec_len (dead_indices); i++) + { + regpp = pool_elt_at_index (am->vl_clients, dead_indices[i]); + if (regpp) + { + /* Is this a pairwise SVM segment? */ + if ((*regpp)->vlib_rp != svm) + { + int i; + svm_region_t *dead_rp = (*regpp)->vlib_rp; + /* Note: account for the memfd header page */ + u64 virtual_base = dead_rp->virtual_base - MMAP_PAGESIZE; + u64 virtual_size = dead_rp->virtual_size + MMAP_PAGESIZE; + + /* For horizontal scaling, add a hash table... */ + for (i = 0; i < vec_len (am->vlib_private_rps); i++) + if (am->vlib_private_rps[i] == dead_rp) + { + vec_delete (am->vlib_private_rps, 1, i); + goto found; + } + clib_warning ("private rp %llx AWOL", dead_rp); + + found: + /* Kill it, accounting for the memfd header page */ + if (munmap ((void *) virtual_base, virtual_size) < 0) + clib_unix_warning ("munmap"); + /* Reset the queue-length-address cache */ + vec_reset_length (vl_api_queue_cursizes); + } + else + { + /* Poison the old registration */ + memset (*regpp, 0xF3, sizeof (**regpp)); + clib_mem_free (*regpp); + } + /* no dangling references, please */ + *regpp = 0; + } + else + { + svm_pop_heap (oldheap); + clib_warning ("Duplicate free, client index %d", + regpp - am->vl_clients); + oldheap = svm_push_data_heap (svm); + } + } + + svm_client_scan_this_region_nolock (am->vlib_rp); + + pthread_mutex_unlock (&svm->mutex); + svm_pop_heap (oldheap); + for (i = 0; i < vec_len (dead_indices); i++) + pool_put_index (am->vl_clients, dead_indices[i]); + } +} + + static uword memclnt_process (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * f) @@ -399,37 +810,112 @@ memclnt_process (vlib_main_t * vm, f64 dead_client_scan_time; f64 sleep_time, start_time; f64 vector_rate; + clib_error_t *socksvr_api_init (vlib_main_t * vm); + clib_error_t *error; + int i; + vl_socket_args_for_process_t *a; + uword event_type; + uword *event_data = 0; + int private_segment_rotor = 0; + svm_region_t *vlib_rp; + f64 now; vlib_set_queue_signal_callback (vm, memclnt_queue_callback); if ((rv = memory_api_init (am->region_name)) < 0) { - clib_warning ("memory_api_init returned %d, wait for godot...", rv); - vlib_process_suspend (vm, 1e70); + clib_warning ("memory_api_init returned %d, quitting...", rv); + return 0; + } + + if ((error = socksvr_api_init (vm))) + { + clib_error_report (error); + clib_warning ("socksvr_api_init failed, quitting..."); + return 0; } shm = am->shmem_hdr; ASSERT (shm); q = shm->vl_input_queue; ASSERT (q); + /* Make a note so we can always find the primary region easily */ + am->vlib_primary_rp = am->vlib_rp; e = vlib_call_init_exit_functions (vm, vm->api_init_function_registrations, 1 /* call_once */ ); if (e) clib_error_report (e); - sleep_time = 20.0; - dead_client_scan_time = vlib_time_now (vm) + 20.0; + sleep_time = 10.0; + dead_client_scan_time = vlib_time_now (vm) + 10.0; - /* $$$ pay attention to frame size, control CPU usage */ - while (1) + /* + * Send plugin message range messages for each plugin we loaded + */ + for (i = 0; i < vec_len (am->msg_ranges); i++) { - uword event_type __attribute__ ((unused)); - i8 *headp; - int need_broadcast; + vl_api_msg_range_t *rp = am->msg_ranges + i; + send_one_plugin_msg_ids_msg (rp->name, rp->first_msg_id, + rp->last_msg_id); + } + + /* + * Save the api message table snapshot, if configured + */ + if (am->save_msg_table_filename) + { + int fd, rv; + u8 *chroot_file; + u8 *serialized_message_table; /* - * There's a reason for checking the queue before + * Snapshoot the api message table. + */ + if (strstr ((char *) am->save_msg_table_filename, "..") + || index ((char *) am->save_msg_table_filename, '/')) + { + clib_warning ("illegal save-message-table filename '%s'", + am->save_msg_table_filename); + goto skip_save; + } + + chroot_file = format (0, "/tmp/%s%c", am->save_msg_table_filename, 0); + + fd = creat ((char *) chroot_file, 0644); + + if (fd < 0) + { + clib_unix_warning ("creat"); + goto skip_save; + } + + serialized_message_table = vl_api_serialize_message_table (am, 0); + + rv = write (fd, serialized_message_table, + vec_len (serialized_message_table)); + + if (rv != vec_len (serialized_message_table)) + clib_unix_warning ("write"); + + rv = close (fd); + if (rv < 0) + clib_unix_warning ("close"); + + vec_free (chroot_file); + vec_free (serialized_message_table); + } + +skip_save: + + /* $$$ pay attention to frame size, control CPU usage */ + while (1) + { + i8 *headp; + int need_broadcast; + + /* + * There's a reason for checking the queue before * sleeping. If the vlib application crashes, it's entirely * possible for a client to enqueue a connect request * during the process restart interval. @@ -509,104 +995,88 @@ memclnt_process (vlib_main_t * vm, } } - event_type = vlib_process_wait_for_event_or_clock (vm, sleep_time); - vm->queue_signal_pending = 0; - vlib_process_get_events (vm, 0 /* event_data */ ); - - if (vlib_time_now (vm) > dead_client_scan_time) + /* + * see if we have any private api shared-memory segments + * If so, push required context variables, and process + * a message. + */ + if (PREDICT_FALSE (vec_len (am->vlib_private_rps))) { - vl_api_registration_t **regpp; - vl_api_registration_t *regp; - unix_shared_memory_queue_t *q; - static u32 *dead_indices; - static u32 *confused_indices; + unix_shared_memory_queue_t *save_vlib_input_queue = q; + vl_shmem_hdr_t *save_shmem_hdr = am->shmem_hdr; + svm_region_t *save_vlib_rp = am->vlib_rp; - vec_reset_length (dead_indices); - vec_reset_length (confused_indices); + vlib_rp = am->vlib_rp = am->vlib_private_rps[private_segment_rotor]; - /* *INDENT-OFF* */ - pool_foreach (regpp, am->vl_clients, - ({ - regp = *regpp; - if (regp) - { - q = regp->vl_input_queue; - if (kill (q->consumer_pid, 0) < 0) - { - vec_add1(dead_indices, regpp - am->vl_clients); - } - } - else - { - clib_warning ("NULL client registration index %d", - regpp - am->vl_clients); - vec_add1 (confused_indices, regpp - am->vl_clients); - } - })); - /* *INDENT-ON* */ - /* This should "never happen," but if it does, fix it... */ - if (PREDICT_FALSE (vec_len (confused_indices) > 0)) - { - int i; - for (i = 0; i < vec_len (confused_indices); i++) - { - pool_put_index (am->vl_clients, confused_indices[i]); - } - } + am->shmem_hdr = (void *) vlib_rp->user_ctx; + q = am->shmem_hdr->vl_input_queue; - if (PREDICT_FALSE (vec_len (dead_indices) > 0)) + pthread_mutex_lock (&q->mutex); + if (q->cursize > 0) { - int i; - svm_region_t *svm; - void *oldheap; + headp = (i8 *) (q->data + sizeof (uword) * q->head); + clib_memcpy (&mp, headp, sizeof (uword)); - /* Allow the application to clean up its registrations */ - for (i = 0; i < vec_len (dead_indices); i++) - { - regpp = pool_elt_at_index (am->vl_clients, dead_indices[i]); - if (regpp) - { - u32 handle; - - handle = vl_msg_api_handle_from_index_and_epoch - (dead_indices[i], shm->application_restarts); - (void) vl_api_memclnt_delete_callback (handle); - } - } + q->head++; + need_broadcast = (q->cursize == q->maxsize / 2); + q->cursize--; - svm = am->vlib_rp; - pthread_mutex_lock (&svm->mutex); - oldheap = svm_push_data_heap (svm); + if (PREDICT_FALSE (q->head == q->maxsize)) + q->head = 0; + pthread_mutex_unlock (&q->mutex); - for (i = 0; i < vec_len (dead_indices); i++) - { - regpp = pool_elt_at_index (am->vl_clients, dead_indices[i]); - if (regpp) - { - /* Poison the old registration */ - memset (*regpp, 0xF3, sizeof (**regpp)); - clib_mem_free (*regpp); - /* no dangling references, please */ - *regpp = 0; - } - else - { - svm_pop_heap (oldheap); - clib_warning ("Duplicate free, client index %d", - regpp - am->vl_clients); - oldheap = svm_push_data_heap (svm); - } - } + if (need_broadcast) + (void) pthread_cond_broadcast (&q->condvar); + + vl_msg_api_handler_with_vm_node (am, (void *) mp, vm, node); + } + else + pthread_mutex_unlock (&q->mutex); - svm_client_scan_this_region_nolock (am->vlib_rp); + q = save_vlib_input_queue; + am->shmem_hdr = save_shmem_hdr; + am->vlib_rp = save_vlib_rp; - pthread_mutex_unlock (&svm->mutex); - svm_pop_heap (oldheap); - for (i = 0; i < vec_len (dead_indices); i++) - pool_put_index (am->vl_clients, dead_indices[i]); + private_segment_rotor++; + if (private_segment_rotor >= vec_len (am->vlib_private_rps)) + private_segment_rotor = 0; + } + + vlib_process_wait_for_event_or_clock (vm, sleep_time); + vec_reset_length (event_data); + event_type = vlib_process_get_events (vm, &event_data); + now = vlib_time_now (vm); + + switch (event_type) + { + case QUEUE_SIGNAL_EVENT: + vm->queue_signal_pending = 0; + break; + + case SOCKET_READ_EVENT: + for (i = 0; i < vec_len (event_data); i++) + { + a = pool_elt_at_index (socket_main.process_args, event_data[i]); + vl_api_socket_process_msg (a->clib_file, a->regp, + (i8 *) a->data); + vec_free (a->data); + pool_put (socket_main.process_args, a); } + break; + + /* Timeout... */ + case -1: + break; + + default: + clib_warning ("unknown event type %d", event_type); + break; + } - dead_client_scan_time = vlib_time_now (vm) + 20.0; + if (now > dead_client_scan_time) + { + dead_client_scan (am, shm, now); + dead_client_scan_time = vlib_time_now (vm) + 10.0; } if (TRACE_VLIB_MEMORY_QUEUE) @@ -628,6 +1098,16 @@ memclnt_process (vlib_main_t * vm, return 0; } +/* *INDENT-OFF* */ +VLIB_REGISTER_NODE (memclnt_node) = +{ + .function = memclnt_process, + .type = VLIB_NODE_TYPE_PROCESS, + .name = "api-rx-from-ring", + .state = VLIB_NODE_STATE_DISABLED, +}; +/* *INDENT-ON* */ + static clib_error_t * vl_api_show_histogram_command (vlib_main_t * vm, @@ -664,11 +1144,15 @@ vl_api_show_histogram_command (vlib_main_t * vm, return 0; } +/*? + * Display the binary api sleep-time histogram +?*/ /* *INDENT-OFF* */ -VLIB_CLI_COMMAND (cli_show_api_histogram_command, static) = { - .path = "show api histogram", - .short_help = "show api histogram", - .function = vl_api_show_histogram_command, +VLIB_CLI_COMMAND (cli_show_api_histogram_command, static) = +{ + .path = "show api histogram", + .short_help = "show api histogram", + .function = vl_api_show_histogram_command, }; /* *INDENT-ON* */ @@ -684,32 +1168,29 @@ vl_api_clear_histogram_command (vlib_main_t * vm, return 0; } +/*? + * Clear the binary api sleep-time histogram +?*/ /* *INDENT-OFF* */ -VLIB_CLI_COMMAND (cli_clear_api_histogram_command, static) = { - .path = "clear api histogram", - .short_help = "clear api histogram", - .function = vl_api_clear_histogram_command, +VLIB_CLI_COMMAND (cli_clear_api_histogram_command, static) = +{ + .path = "clear api histogram", + .short_help = "clear api histogram", + .function = vl_api_clear_histogram_command, }; /* *INDENT-ON* */ - -/* *INDENT-OFF* */ -VLIB_REGISTER_NODE (memclnt_node,static) = { - .function = memclnt_process, - .type = VLIB_NODE_TYPE_PROCESS, - .name = "api-rx-from-ring", - .state = VLIB_NODE_STATE_DISABLED, -}; -/* *INDENT-ON* */ +volatile int **vl_api_queue_cursizes; static void memclnt_queue_callback (vlib_main_t * vm) { - static volatile int *cursizep; + int i; + api_main_t *am = &api_main; - if (PREDICT_FALSE (cursizep == 0)) + if (PREDICT_FALSE (vec_len (vl_api_queue_cursizes) != + 1 + vec_len (am->vlib_private_rps))) { - api_main_t *am = &api_main; vl_shmem_hdr_t *shmem_hdr = am->shmem_hdr; unix_shared_memory_queue_t *q; @@ -719,15 +1200,30 @@ memclnt_queue_callback (vlib_main_t * vm) q = shmem_hdr->vl_input_queue; if (q == 0) return; - cursizep = &q->cursize; + + vec_add1 (vl_api_queue_cursizes, &q->cursize); + + for (i = 0; i < vec_len (am->vlib_private_rps); i++) + { + svm_region_t *vlib_rp = am->vlib_private_rps[i]; + + shmem_hdr = (void *) vlib_rp->user_ctx; + q = shmem_hdr->vl_input_queue; + vec_add1 (vl_api_queue_cursizes, &q->cursize); + } } - if (*cursizep >= 1) + for (i = 0; i < vec_len (vl_api_queue_cursizes); i++) { - vm->queue_signal_pending = 1; - vm->api_queue_nonempty = 1; - vlib_process_signal_event (vm, memclnt_node.index, - /* event_type */ 0, /* event_data */ 0); + if (*vl_api_queue_cursizes[i]) + { + vm->queue_signal_pending = 1; + vm->api_queue_nonempty = 1; + vlib_process_signal_event (vm, memclnt_node.index, + /* event_type */ QUEUE_SIGNAL_EVENT, + /* event_data */ 0); + break; + } } } @@ -808,51 +1304,104 @@ setup_memclnt_exit (vlib_main_t * vm) VLIB_INIT_FUNCTION (setup_memclnt_exit); +u8 * +format_api_message_rings (u8 * s, va_list * args) +{ + api_main_t *am = va_arg (*args, api_main_t *); + vl_shmem_hdr_t *shmem_hdr = va_arg (*args, vl_shmem_hdr_t *); + int main_segment = va_arg (*args, int); + ring_alloc_t *ap; + int i; + + if (shmem_hdr == 0) + return format (s, "%8s %8s %8s %8s %8s\n", + "Owner", "Size", "Nitems", "Hits", "Misses"); + + ap = shmem_hdr->vl_rings; + + for (i = 0; i < vec_len (shmem_hdr->vl_rings); i++) + { + s = format (s, "%8s %8d %8d %8d %8d\n", + "vlib", ap->size, ap->nitems, ap->hits, ap->misses); + ap++; + } + + ap = shmem_hdr->client_rings; + + for (i = 0; i < vec_len (shmem_hdr->client_rings); i++) + { + s = format (s, "%8s %8d %8d %8d %8d\n", + "clnt", ap->size, ap->nitems, ap->hits, ap->misses); + ap++; + } + + if (main_segment) + { + s = format (s, "%d ring miss fallback allocations\n", am->ring_misses); + s = format + (s, + "%d application restarts, %d reclaimed msgs, %d garbage collects\n", + shmem_hdr->application_restarts, shmem_hdr->restart_reclaims, + shmem_hdr->garbage_collects); + } + return s; +} + static clib_error_t * vl_api_ring_command (vlib_main_t * vm, unformat_input_t * input, vlib_cli_command_t * cli_cmd) { int i; - ring_alloc_t *ap; vl_shmem_hdr_t *shmem_hdr; api_main_t *am = &api_main; - shmem_hdr = am->shmem_hdr; + /* First, dump the primary region rings.. */ - if (shmem_hdr == 0) + if (am->vlib_primary_rp == 0 || am->vlib_primary_rp->user_ctx == 0) { vlib_cli_output (vm, "Shared memory segment not initialized...\n"); return 0; } - vlib_cli_output (vm, "%8s %8s %8s %8s %8s\n", - "Owner", "Size", "Nitems", "Hits", "Misses"); + shmem_hdr = (void *) am->vlib_primary_rp->user_ctx; - ap = shmem_hdr->vl_rings; + vlib_cli_output (vm, "Main API segment rings:"); - for (i = 0; i < vec_len (shmem_hdr->vl_rings); i++) - { - vlib_cli_output (vm, "%8s %8d %8d %8d %8d\n", - "vlib", ap->size, ap->nitems, ap->hits, ap->misses); - ap++; - } + vlib_cli_output (vm, "%U", format_api_message_rings, am, + 0 /* print header */ , 0 /* notused */ ); - ap = shmem_hdr->client_rings; + vlib_cli_output (vm, "%U", format_api_message_rings, am, + shmem_hdr, 1 /* main segment */ ); - for (i = 0; i < vec_len (shmem_hdr->client_rings); i++) + for (i = 0; i < vec_len (am->vlib_private_rps); i++) { - vlib_cli_output (vm, "%8s %8d %8d %8d %8d\n", - "clnt", ap->size, ap->nitems, ap->hits, ap->misses); - ap++; + svm_region_t *vlib_rp = am->vlib_private_rps[i]; + shmem_hdr = (void *) vlib_rp->user_ctx; + vl_api_registration_t **regpp; + vl_api_registration_t *regp = 0; + + /* For horizontal scaling, add a hash table... */ + /* *INDENT-OFF* */ + pool_foreach (regpp, am->vl_clients, + ({ + regp = *regpp; + if (regp && regp->vlib_rp == vlib_rp) + { + vlib_cli_output (vm, "%s segment rings:", regp->name); + goto found; + } + })); + vlib_cli_output (vm, "regp %llx not found?", regp); + continue; + /* *INDENT-ON* */ + found: + vlib_cli_output (vm, "%U", format_api_message_rings, am, + 0 /* print header */ , 0 /* notused */ ); + vlib_cli_output (vm, "%U", format_api_message_rings, am, + shmem_hdr, 0 /* main segment */ ); } - vlib_cli_output (vm, "%d ring miss fallback allocations\n", - am->ring_misses); - - vlib_cli_output (vm, "%d application restarts, %d reclaimed msgs\n", - shmem_hdr->application_restarts, - shmem_hdr->restart_reclaims); return 0; } @@ -877,7 +1426,7 @@ vl_api_client_command (vlib_main_t * vm, if (!pool_elts (am->vl_clients)) goto socket_clients; vlib_cli_output (vm, "Shared memory clients"); - vlib_cli_output (vm, "%16s %8s %14s %18s %s", + vlib_cli_output (vm, "%20s %8s %14s %18s %s", "Name", "PID", "Queue Length", "Queue VA", "Health"); /* *INDENT-OFF* */ @@ -887,16 +1436,14 @@ vl_api_client_command (vlib_main_t * vm, if (regp) { - q = regp->vl_input_queue; - if (kill (q->consumer_pid, 0) < 0) - { - health = "DEAD"; - } + if (regp->unanswered_pings > 0) + health = "questionable"; else - { - health = "alive"; - } - vlib_cli_output (vm, "%16s %8d %14d 0x%016llx %s\n", + health = "OK"; + + q = regp->vl_input_queue; + + vlib_cli_output (vm, "%20s %8d %14d 0x%016llx %s\n", regp->name, q->consumer_pid, q->cursize, q, health); } @@ -965,33 +1512,46 @@ vl_api_status_command (vlib_main_t * vm, } /* *INDENT-OFF* */ -VLIB_CLI_COMMAND (cli_show_api_command, static) = { - .path = "show api", - .short_help = "Show API information", +VLIB_CLI_COMMAND (cli_show_api_command, static) = +{ + .path = "show api", + .short_help = "Show API information", }; /* *INDENT-ON* */ +/*? + * Display binary api message allocation ring statistics +?*/ /* *INDENT-OFF* */ -VLIB_CLI_COMMAND (cli_show_api_ring_command, static) = { - .path = "show api ring-stats", - .short_help = "Message ring statistics", - .function = vl_api_ring_command, +VLIB_CLI_COMMAND (cli_show_api_ring_command, static) = +{ + .path = "show api ring-stats", + .short_help = "Message ring statistics", + .function = vl_api_ring_command, }; /* *INDENT-ON* */ +/*? + * Display current api client connections +?*/ /* *INDENT-OFF* */ -VLIB_CLI_COMMAND (cli_show_api_clients_command, static) = { - .path = "show api clients", - .short_help = "Client information", - .function = vl_api_client_command, +VLIB_CLI_COMMAND (cli_show_api_clients_command, static) = +{ + .path = "show api clients", + .short_help = "Client information", + .function = vl_api_client_command, }; /* *INDENT-ON* */ +/*? + * Display the current api message tracing status +?*/ /* *INDENT-OFF* */ -VLIB_CLI_COMMAND (cli_show_api_status_command, static) = { - .path = "show api status", - .short_help = "Show API trace status", - .function = vl_api_status_command, +VLIB_CLI_COMMAND (cli_show_api_status_command, static) = +{ + .path = "show api trace-status", + .short_help = "Display API trace status", + .function = vl_api_status_command, }; /* *INDENT-ON* */ @@ -1034,11 +1594,15 @@ vl_api_message_table_command (vlib_main_t * vm, return 0; } +/*? + * Display the current api message decode tables +?*/ /* *INDENT-OFF* */ -VLIB_CLI_COMMAND (cli_show_api_message_table_command, static) = { - .path = "show api message-table", - .short_help = "Message Table", - .function = vl_api_message_table_command, +VLIB_CLI_COMMAND (cli_show_api_message_table_command, static) = +{ + .path = "show api message-table", + .short_help = "Message Table", + .function = vl_api_message_table_command, }; /* *INDENT-ON* */ @@ -1108,11 +1672,15 @@ configure: return 0; } +/*? + * Control the binary API trace mechanism +?*/ /* *INDENT-OFF* */ -VLIB_CLI_COMMAND (trace, static) = { - .path = "set api-trace", - .short_help = "API trace", - .function = vl_api_trace_command, +VLIB_CLI_COMMAND (trace, static) = +{ + .path = "set api-trace [on][on tx][on rx][off][free][debug on][debug off]", + .short_help = "API trace", + .function = vl_api_trace_command, }; /* *INDENT-ON* */ @@ -1121,6 +1689,7 @@ vlibmemory_init (vlib_main_t * vm) { api_main_t *am = &api_main; svm_map_region_args_t _a, *a = &_a; + clib_error_t *error; memset (a, 0, sizeof (*a)); a->root_path = am->root_path; @@ -1136,13 +1705,16 @@ vlibmemory_init (vlib_main_t * vm) 0) ? am->global_pvt_heap_size : SVM_PVT_MHEAP_SIZE; svm_region_init_args (a); - return 0; + + error = vlib_call_init_function (vm, vlibsocket_init); + + return error; } VLIB_INIT_FUNCTION (vlibmemory_init); void -vl_set_memory_region_name (char *name) +vl_set_memory_region_name (const char *name) { api_main_t *am = &api_main; @@ -1166,9 +1738,9 @@ format_api_msg_range (u8 * s, va_list * args) vl_api_msg_range_t *rp = va_arg (*args, vl_api_msg_range_t *); if (rp == 0) - s = format (s, "%-20s%9s%9s", "Name", "First-ID", "Last-ID"); + s = format (s, "%-50s%9s%9s", "Name", "First-ID", "Last-ID"); else - s = format (s, "%-20s%9d%9d", rp->name, rp->first_msg_id, + s = format (s, "%-50s%9d%9d", rp->name, rp->first_msg_id, rp->last_msg_id); return s; @@ -1199,21 +1771,27 @@ vl_api_show_plugin_command (vlib_main_t * vm, for (i = 0; i < vec_len (rp); i++) vlib_cli_output (vm, "%U", format_api_msg_range, rp + i); + vec_free (rp); + return 0; } +/*? + * Display the plugin binary API message range table +?*/ /* *INDENT-OFF* */ -VLIB_CLI_COMMAND (cli_show_api_plugin_command, static) = { - .path = "show api plugin", - .short_help = "show api plugin", - .function = vl_api_show_plugin_command, +VLIB_CLI_COMMAND (cli_show_api_plugin_command, static) = +{ + .path = "show api plugin", + .short_help = "show api plugin", + .function = vl_api_show_plugin_command, }; /* *INDENT-ON* */ static void vl_api_rpc_call_t_handler (vl_api_rpc_call_t * mp) { - vl_api_rpc_reply_t *rmp; + vl_api_rpc_call_reply_t *rmp; int (*fp) (void *); i32 rv = 0; vlib_main_t *vm = vlib_get_main (); @@ -1243,7 +1821,7 @@ vl_api_rpc_call_t_handler (vl_api_rpc_call_t * mp) if (q) { rmp = vl_msg_api_alloc_as_if_client (sizeof (*rmp)); - rmp->_vl_msg_id = ntohs (VL_API_RPC_REPLY); + rmp->_vl_msg_id = ntohs (VL_API_RPC_CALL_REPLY); rmp->context = mp->context; rmp->retval = rv; vl_msg_api_send_shmem (q, (u8 *) & rmp); @@ -1256,23 +1834,56 @@ vl_api_rpc_call_t_handler (vl_api_rpc_call_t * mp) } static void -vl_api_rpc_reply_t_handler (vl_api_rpc_reply_t * mp) +vl_api_rpc_call_reply_t_handler (vl_api_rpc_call_reply_t * mp) { clib_warning ("unimplemented"); } void -vl_api_rpc_call_main_thread (void *fp, u8 * data, u32 data_length) +vl_api_send_pending_rpc_requests (vlib_main_t * vm) { - vl_api_rpc_call_t *mp; api_main_t *am = &api_main; vl_shmem_hdr_t *shmem_hdr = am->shmem_hdr; unix_shared_memory_queue_t *q; + int i; + + /* + * Use the "normal" control-plane mechanism for the main thread. + * Well, almost. if the main input queue is full, we cannot + * block. Otherwise, we can expect a barrier sync timeout. + */ + q = shmem_hdr->vl_input_queue; + + for (i = 0; i < vec_len (vm->pending_rpc_requests); i++) + { + while (pthread_mutex_trylock (&q->mutex)) + vlib_worker_thread_barrier_check (); + + while (PREDICT_FALSE (unix_shared_memory_queue_is_full (q))) + { + pthread_mutex_unlock (&q->mutex); + vlib_worker_thread_barrier_check (); + while (pthread_mutex_trylock (&q->mutex)) + vlib_worker_thread_barrier_check (); + } + + vl_msg_api_send_shmem_nolock (q, (u8 *) (vm->pending_rpc_requests + i)); + + pthread_mutex_unlock (&q->mutex); + } + _vec_len (vm->pending_rpc_requests) = 0; +} + +always_inline void +vl_api_rpc_call_main_thread_inline (void *fp, u8 * data, u32 data_length, + u8 force_rpc) +{ + vl_api_rpc_call_t *mp; + vlib_main_t *vm = vlib_get_main (); - /* Main thread: call the function directly */ - if (os_get_cpu_number () == 0) + /* Main thread and not a forced RPC: call the function directly */ + if ((force_rpc == 0) && (vlib_get_thread_index () == 0)) { - vlib_main_t *vm = vlib_get_main (); void (*call_fp) (void *); vlib_worker_thread_barrier_sync (vm); @@ -1284,7 +1895,7 @@ vl_api_rpc_call_main_thread (void *fp, u8 * data, u32 data_length) return; } - /* Any other thread, actually do an RPC call... */ + /* Otherwise, actually do an RPC */ mp = vl_msg_api_alloc_as_if_client (sizeof (*mp) + data_length); memset (mp, 0, sizeof (*mp)); @@ -1293,36 +1904,89 @@ vl_api_rpc_call_main_thread (void *fp, u8 * data, u32 data_length) mp->function = pointer_to_uword (fp); mp->need_barrier_sync = 1; - /* - * Use the "normal" control-plane mechanism for the main thread. - * Well, almost. if the main input queue is full, we cannot - * block. Otherwise, we can expect a barrier sync timeout. - */ - q = shmem_hdr->vl_input_queue; + vec_add1 (vm->pending_rpc_requests, (uword) mp); +} + +/* + * Check if called from worker threads. + * If so, make rpc call of fp through shmem. + * Otherwise, call fp directly + */ +void +vl_api_rpc_call_main_thread (void *fp, u8 * data, u32 data_length) +{ + vl_api_rpc_call_main_thread_inline (fp, data, data_length, /*force_rpc */ + 0); +} + +/* + * Always make rpc call of fp through shmem, useful for calling from threads + * not setup as worker threads, such as DPDK callback thread + */ +void +vl_api_force_rpc_call_main_thread (void *fp, u8 * data, u32 data_length) +{ + vl_api_rpc_call_main_thread_inline (fp, data, data_length, /*force_rpc */ + 1); +} + +static void +vl_api_trace_plugin_msg_ids_t_handler (vl_api_trace_plugin_msg_ids_t * mp) +{ + api_main_t *am = &api_main; + vl_api_msg_range_t *rp; + uword *p; - while (pthread_mutex_trylock (&q->mutex)) - vlib_worker_thread_barrier_check (); + /* Noop (except for tracing) during normal operation */ + if (am->replay_in_progress == 0) + return; - while (PREDICT_FALSE (unix_shared_memory_queue_is_full (q))) + p = hash_get_mem (am->msg_range_by_name, mp->plugin_name); + if (p == 0) { - pthread_mutex_unlock (&q->mutex); - vlib_worker_thread_barrier_check (); - while (pthread_mutex_trylock (&q->mutex)) - vlib_worker_thread_barrier_check (); + clib_warning ("WARNING: traced plugin '%s' not in current image", + mp->plugin_name); + return; } - vl_msg_api_send_shmem_nolock (q, (u8 *) & mp); + rp = vec_elt_at_index (am->msg_ranges, p[0]); + if (rp->first_msg_id != clib_net_to_host_u16 (mp->first_msg_id)) + { + clib_warning ("WARNING: traced plugin '%s' first message id %d not %d", + mp->plugin_name, clib_net_to_host_u16 (mp->first_msg_id), + rp->first_msg_id); + } - pthread_mutex_unlock (&q->mutex); + if (rp->last_msg_id != clib_net_to_host_u16 (mp->last_msg_id)) + { + clib_warning ("WARNING: traced plugin '%s' last message id %d not %d", + mp->plugin_name, clib_net_to_host_u16 (mp->last_msg_id), + rp->last_msg_id); + } } #define foreach_rpc_api_msg \ _(RPC_CALL,rpc_call) \ -_(RPC_REPLY,rpc_reply) +_(RPC_CALL_REPLY,rpc_call_reply) + +#define foreach_plugin_trace_msg \ +_(TRACE_PLUGIN_MSG_IDS,trace_plugin_msg_ids) + +/* + * Set the rpc callback at our earliest possible convenience. + * This avoids ordering issues between thread_init() -> start_workers and + * an init function which we could define here. If we ever intend to use + * vlib all by itself, we can't create a link-time dependency on + * an init function here and a typical "call foo_init first" + * guitar lick. + */ + +extern void *rpc_call_main_thread_cb_fn; static clib_error_t * rpc_api_hookup (vlib_main_t * vm) { + api_main_t *am = &api_main; #define _(N,n) \ vl_msg_api_set_handlers(VL_API_##N, #n, \ vl_api_##n##_t_handler, \ @@ -1332,11 +1996,806 @@ rpc_api_hookup (vlib_main_t * vm) sizeof(vl_api_##n##_t), 0 /* do not trace */); foreach_rpc_api_msg; #undef _ + +#define _(N,n) \ + vl_msg_api_set_handlers(VL_API_##N, #n, \ + vl_api_##n##_t_handler, \ + vl_noop_handler, \ + vl_noop_handler, \ + vl_api_##n##_t_print, \ + sizeof(vl_api_##n##_t), 1 /* do trace */); + foreach_plugin_trace_msg; +#undef _ + + /* No reason to halt the parade to create a trace record... */ + am->is_mp_safe[VL_API_TRACE_PLUGIN_MSG_IDS] = 1; + rpc_call_main_thread_cb_fn = vl_api_rpc_call_main_thread; return 0; } VLIB_API_INIT_FUNCTION (rpc_api_hookup); +typedef enum +{ + DUMP, + CUSTOM_DUMP, + REPLAY, + INITIALIZERS, +} vl_api_replay_t; + +u8 * +format_vl_msg_api_trace_status (u8 * s, va_list * args) +{ + api_main_t *am = va_arg (*args, api_main_t *); + vl_api_trace_which_t which = va_arg (*args, vl_api_trace_which_t); + vl_api_trace_t *tp; + char *trace_name; + + switch (which) + { + case VL_API_TRACE_TX: + tp = am->tx_trace; + trace_name = "TX trace"; + break; + + case VL_API_TRACE_RX: + tp = am->rx_trace; + trace_name = "RX trace"; + break; + + default: + abort (); + } + + if (tp == 0) + { + s = format (s, "%s: not yet configured.\n", trace_name); + return s; + } + + s = format (s, "%s: used %d of %d items, %s enabled, %s wrapped\n", + trace_name, vec_len (tp->traces), tp->nitems, + tp->enabled ? "is" : "is not", tp->wrapped ? "has" : "has not"); + return s; +} + +void vl_msg_api_custom_dump_configure (api_main_t * am) + __attribute__ ((weak)); +void +vl_msg_api_custom_dump_configure (api_main_t * am) +{ +} + +static void +vl_msg_api_process_file (vlib_main_t * vm, u8 * filename, + u32 first_index, u32 last_index, + vl_api_replay_t which) +{ + vl_api_trace_file_header_t *hp; + int i, fd; + struct stat statb; + size_t file_size; + u8 *msg; + u8 endian_swap_needed = 0; + api_main_t *am = &api_main; + u8 *tmpbuf = 0; + u32 nitems; + void **saved_print_handlers = 0; + + fd = open ((char *) filename, O_RDONLY); + + if (fd < 0) + { + vlib_cli_output (vm, "Couldn't open %s\n", filename); + return; + } + + if (fstat (fd, &statb) < 0) + { + vlib_cli_output (vm, "Couldn't stat %s\n", filename); + close (fd); + return; + } + + if (!(statb.st_mode & S_IFREG) || (statb.st_size < sizeof (*hp))) + { + vlib_cli_output (vm, "File not plausible: %s\n", filename); + close (fd); + return; + } + + file_size = statb.st_size; + file_size = (file_size + 4095) & ~(4096); + + hp = mmap (0, file_size, PROT_READ, MAP_PRIVATE, fd, 0); + + if (hp == (vl_api_trace_file_header_t *) MAP_FAILED) + { + vlib_cli_output (vm, "mmap failed: %s\n", filename); + close (fd); + return; + } + close (fd); + + if ((clib_arch_is_little_endian && hp->endian == VL_API_BIG_ENDIAN) + || (clib_arch_is_big_endian && hp->endian == VL_API_LITTLE_ENDIAN)) + endian_swap_needed = 1; + + if (endian_swap_needed) + nitems = ntohl (hp->nitems); + else + nitems = hp->nitems; + + if (last_index == (u32) ~ 0) + { + last_index = nitems - 1; + } + + if (first_index >= nitems || last_index >= nitems) + { + vlib_cli_output (vm, "Range (%d, %d) outside file range (0, %d)\n", + first_index, last_index, nitems - 1); + munmap (hp, file_size); + return; + } + if (hp->wrapped) + vlib_cli_output (vm, + "Note: wrapped/incomplete trace, results may vary\n"); + + if (which == CUSTOM_DUMP) + { + saved_print_handlers = (void **) vec_dup (am->msg_print_handlers); + vl_msg_api_custom_dump_configure (am); + } + + + msg = (u8 *) (hp + 1); + + for (i = 0; i < first_index; i++) + { + trace_cfg_t *cfgp; + int size; + u16 msg_id; + + size = clib_host_to_net_u32 (*(u32 *) msg); + msg += sizeof (u32); + + if (clib_arch_is_little_endian) + msg_id = ntohs (*((u16 *) msg)); + else + msg_id = *((u16 *) msg); + + cfgp = am->api_trace_cfg + msg_id; + if (!cfgp) + { + vlib_cli_output (vm, "Ugh: msg id %d no trace config\n", msg_id); + munmap (hp, file_size); + return; + } + msg += size; + } + + if (which == REPLAY) + am->replay_in_progress = 1; + + for (; i <= last_index; i++) + { + trace_cfg_t *cfgp; + u16 *msg_idp; + u16 msg_id; + int size; + + if (which == DUMP) + vlib_cli_output (vm, "---------- trace %d -----------\n", i); + + size = clib_host_to_net_u32 (*(u32 *) msg); + msg += sizeof (u32); + + if (clib_arch_is_little_endian) + msg_id = ntohs (*((u16 *) msg)); + else + msg_id = *((u16 *) msg); + + cfgp = am->api_trace_cfg + msg_id; + if (!cfgp) + { + vlib_cli_output (vm, "Ugh: msg id %d no trace config\n", msg_id); + munmap (hp, file_size); + vec_free (tmpbuf); + am->replay_in_progress = 0; + return; + } + + /* Copy the buffer (from the read-only mmap'ed file) */ + vec_validate (tmpbuf, size - 1 + sizeof (uword)); + clib_memcpy (tmpbuf + sizeof (uword), msg, size); + memset (tmpbuf, 0xf, sizeof (uword)); + + /* + * Endian swap if needed. All msg data is supposed to be + * in network byte order. All msg handlers are supposed to + * know that. The generic message dumpers don't know that. + * One could fix apigen, I suppose. + */ + if ((which == DUMP && clib_arch_is_little_endian) || endian_swap_needed) + { + void (*endian_fp) (void *); + if (msg_id >= vec_len (am->msg_endian_handlers) + || (am->msg_endian_handlers[msg_id] == 0)) + { + vlib_cli_output (vm, "Ugh: msg id %d no endian swap\n", msg_id); + munmap (hp, file_size); + vec_free (tmpbuf); + am->replay_in_progress = 0; + return; + } + endian_fp = am->msg_endian_handlers[msg_id]; + (*endian_fp) (tmpbuf + sizeof (uword)); + } + + /* msg_id always in network byte order */ + if (clib_arch_is_little_endian) + { + msg_idp = (u16 *) (tmpbuf + sizeof (uword)); + *msg_idp = msg_id; + } + + switch (which) + { + case CUSTOM_DUMP: + case DUMP: + if (msg_id < vec_len (am->msg_print_handlers) && + am->msg_print_handlers[msg_id]) + { + u8 *(*print_fp) (void *, void *); + + print_fp = (void *) am->msg_print_handlers[msg_id]; + (*print_fp) (tmpbuf + sizeof (uword), vm); + } + else + { + vlib_cli_output (vm, "Skipping msg id %d: no print fcn\n", + msg_id); + break; + } + break; + + case INITIALIZERS: + if (msg_id < vec_len (am->msg_print_handlers) && + am->msg_print_handlers[msg_id]) + { + u8 *s; + int j; + u8 *(*print_fp) (void *, void *); + + print_fp = (void *) am->msg_print_handlers[msg_id]; + + vlib_cli_output (vm, "/*"); + + (*print_fp) (tmpbuf + sizeof (uword), vm); + vlib_cli_output (vm, "*/\n"); + + s = format (0, "static u8 * vl_api_%s_%d[%d] = {", + am->msg_names[msg_id], i, + am->api_trace_cfg[msg_id].size); + + for (j = 0; j < am->api_trace_cfg[msg_id].size; j++) + { + if ((j & 7) == 0) + s = format (s, "\n "); + s = format (s, "0x%02x,", tmpbuf[sizeof (uword) + j]); + } + s = format (s, "\n};\n%c", 0); + vlib_cli_output (vm, (char *) s); + vec_free (s); + } + break; + + case REPLAY: + if (msg_id < vec_len (am->msg_print_handlers) && + am->msg_print_handlers[msg_id] && cfgp->replay_enable) + { + void (*handler) (void *); + + handler = (void *) am->msg_handlers[msg_id]; + + if (!am->is_mp_safe[msg_id]) + vl_msg_api_barrier_sync (); + (*handler) (tmpbuf + sizeof (uword)); + if (!am->is_mp_safe[msg_id]) + vl_msg_api_barrier_release (); + } + else + { + if (cfgp->replay_enable) + vlib_cli_output (vm, "Skipping msg id %d: no handler\n", + msg_id); + break; + } + break; + } + + _vec_len (tmpbuf) = 0; + msg += size; + } + + if (saved_print_handlers) + { + clib_memcpy (am->msg_print_handlers, saved_print_handlers, + vec_len (am->msg_print_handlers) * sizeof (void *)); + vec_free (saved_print_handlers); + } + + munmap (hp, file_size); + vec_free (tmpbuf); + am->replay_in_progress = 0; +} + +static clib_error_t * +api_trace_command_fn (vlib_main_t * vm, + unformat_input_t * input, vlib_cli_command_t * cmd) +{ + u32 nitems = 256 << 10; + api_main_t *am = &api_main; + vl_api_trace_which_t which = VL_API_TRACE_RX; + u8 *filename; + u32 first = 0; + u32 last = (u32) ~ 0; + FILE *fp; + int rv; + + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (input, "on") || unformat (input, "enable")) + { + if (unformat (input, "nitems %d", &nitems)) + ; + vl_msg_api_trace_configure (am, which, nitems); + vl_msg_api_trace_onoff (am, which, 1 /* on */ ); + } + else if (unformat (input, "off")) + { + vl_msg_api_trace_onoff (am, which, 0); + } + else if (unformat (input, "save %s", &filename)) + { + u8 *chroot_filename; + if (strstr ((char *) filename, "..") + || index ((char *) filename, '/')) + { + vlib_cli_output (vm, "illegal characters in filename '%s'", + filename); + return 0; + } + + chroot_filename = format (0, "/tmp/%s%c", filename, 0); + + vec_free (filename); + + fp = fopen ((char *) chroot_filename, "w"); + if (fp == NULL) + { + vlib_cli_output (vm, "Couldn't create %s\n", chroot_filename); + return 0; + } + rv = vl_msg_api_trace_save (am, which, fp); + fclose (fp); + if (rv == -1) + vlib_cli_output (vm, "API Trace data not present\n"); + else if (rv == -2) + vlib_cli_output (vm, "File for writing is closed\n"); + else if (rv == -10) + vlib_cli_output (vm, "Error while writing header to file\n"); + else if (rv == -11) + vlib_cli_output (vm, "Error while writing trace to file\n"); + else if (rv == -12) + vlib_cli_output (vm, + "Error while writing end of buffer trace to file\n"); + else if (rv == -13) + vlib_cli_output (vm, + "Error while writing start of buffer trace to file\n"); + else if (rv < 0) + vlib_cli_output (vm, "Unkown error while saving: %d", rv); + else + vlib_cli_output (vm, "API trace saved to %s\n", chroot_filename); + vec_free (chroot_filename); + } + else if (unformat (input, "dump %s", &filename)) + { + vl_msg_api_process_file (vm, filename, first, last, DUMP); + } + else if (unformat (input, "custom-dump %s", &filename)) + { + vl_msg_api_process_file (vm, filename, first, last, CUSTOM_DUMP); + } + else if (unformat (input, "replay %s", &filename)) + { + vl_msg_api_process_file (vm, filename, first, last, REPLAY); + } + else if (unformat (input, "initializers %s", &filename)) + { + vl_msg_api_process_file (vm, filename, first, last, INITIALIZERS); + } + else if (unformat (input, "tx")) + { + which = VL_API_TRACE_TX; + } + else if (unformat (input, "first %d", &first)) + { + ; + } + else if (unformat (input, "last %d", &last)) + { + ; + } + else if (unformat (input, "status")) + { + vlib_cli_output (vm, "%U", format_vl_msg_api_trace_status, + am, which); + } + else if (unformat (input, "free")) + { + vl_msg_api_trace_onoff (am, which, 0); + vl_msg_api_trace_free (am, which); + } + else if (unformat (input, "post-mortem-on")) + vl_msg_api_post_mortem_dump_enable_disable (1 /* enable */ ); + else if (unformat (input, "post-mortem-off")) + vl_msg_api_post_mortem_dump_enable_disable (0 /* enable */ ); + else + return clib_error_return (0, "unknown input `%U'", + format_unformat_error, input); + } + return 0; +} + +/*? + * Display, replay, or save a binary API trace +?*/ + +/* *INDENT-OFF* */ +VLIB_CLI_COMMAND (api_trace_command, static) = +{ + .path = "api trace", + .short_help = + "api trace [on|off][dump|save|replay ][status][free][post-mortem-on]", + .function = api_trace_command_fn, +}; +/* *INDENT-ON* */ + +static clib_error_t * +api_config_fn (vlib_main_t * vm, unformat_input_t * input) +{ + u32 nitems = 256 << 10; + vl_api_trace_which_t which = VL_API_TRACE_RX; + api_main_t *am = &api_main; + + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (input, "on") || unformat (input, "enable")) + { + if (unformat (input, "nitems %d", &nitems)) + ; + vl_msg_api_trace_configure (am, which, nitems); + vl_msg_api_trace_onoff (am, which, 1 /* on */ ); + vl_msg_api_post_mortem_dump_enable_disable (1 /* enable */ ); + } + else if (unformat (input, "save-api-table %s", + &am->save_msg_table_filename)) + ; + else + return clib_error_return (0, "unknown input `%U'", + format_unformat_error, input); + } + return 0; +} + +/*? + * This module has three configuration parameters: + * "on" or "enable" - enables binary api tracing + * "nitems " - sets the size of the circular buffer to + * "save-api-table " - dumps the API message table to /tmp/ +?*/ +VLIB_CONFIG_FUNCTION (api_config_fn, "api-trace"); + +static clib_error_t * +api_queue_config_fn (vlib_main_t * vm, unformat_input_t * input) +{ + api_main_t *am = &api_main; + u32 nitems; + + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (input, "length %d", &nitems) || + (unformat (input, "len %d", &nitems))) + { + if (nitems >= 1024) + am->vlib_input_queue_length = nitems; + else + clib_warning ("vlib input queue length %d too small, ignored", + nitems); + } + else + return clib_error_return (0, "unknown input `%U'", + format_unformat_error, input); + } + return 0; +} + +VLIB_CONFIG_FUNCTION (api_queue_config_fn, "api-queue"); + +static u8 * +extract_name (u8 * s) +{ + u8 *rv; + + rv = vec_dup (s); + + while (vec_len (rv) && rv[vec_len (rv)] != '_') + _vec_len (rv)--; + + rv[vec_len (rv)] = 0; + + return rv; +} + +static u8 * +extract_crc (u8 * s) +{ + int i; + u8 *rv; + + rv = vec_dup (s); + + for (i = vec_len (rv) - 1; i >= 0; i--) + { + if (rv[i] == '_') + { + vec_delete (rv, i + 1, 0); + break; + } + } + return rv; +} + +typedef struct +{ + u8 *name_and_crc; + u8 *name; + u8 *crc; + u32 msg_index; + int which; +} msg_table_unserialize_t; + +static int +table_id_cmp (void *a1, void *a2) +{ + msg_table_unserialize_t *n1 = a1; + msg_table_unserialize_t *n2 = a2; + + return (n1->msg_index - n2->msg_index); +} + +static int +table_name_and_crc_cmp (void *a1, void *a2) +{ + msg_table_unserialize_t *n1 = a1; + msg_table_unserialize_t *n2 = a2; + + return strcmp ((char *) n1->name_and_crc, (char *) n2->name_and_crc); +} + +static clib_error_t * +dump_api_table_file_command_fn (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + u8 *filename = 0; + api_main_t *am = &api_main; + serialize_main_t _sm, *sm = &_sm; + clib_error_t *error; + u32 nmsgs; + u32 msg_index; + u8 *name_and_crc; + int compare_current = 0; + int numeric_sort = 0; + msg_table_unserialize_t *table = 0, *item; + u32 i; + u32 ndifferences = 0; + + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (input, "file %s", &filename)) + ; + else if (unformat (input, "compare-current") + || unformat (input, "compare")) + compare_current = 1; + else if (unformat (input, "numeric")) + numeric_sort = 1; + else + return clib_error_return (0, "unknown input `%U'", + format_unformat_error, input); + } + + if (numeric_sort && compare_current) + return clib_error_return + (0, "Comparison and numeric sorting are incompatible"); + + if (filename == 0) + return clib_error_return (0, "File not specified"); + + /* Load the serialized message table from the table dump */ + + error = unserialize_open_clib_file (sm, (char *) filename); + + if (error) + return error; + + unserialize_integer (sm, &nmsgs, sizeof (u32)); + + for (i = 0; i < nmsgs; i++) + { + msg_index = unserialize_likely_small_unsigned_integer (sm); + unserialize_cstring (sm, (char **) &name_and_crc); + vec_add2 (table, item, 1); + item->msg_index = msg_index; + item->name_and_crc = name_and_crc; + item->name = extract_name (name_and_crc); + item->crc = extract_crc (name_and_crc); + item->which = 0; /* file */ + } + serialize_close (sm); + + /* Compare with the current image? */ + if (compare_current) + { + /* Append the current message table */ + u8 *tblv = vl_api_serialize_message_table (am, 0); + + serialize_open_vector (sm, tblv); + unserialize_integer (sm, &nmsgs, sizeof (u32)); + + for (i = 0; i < nmsgs; i++) + { + msg_index = unserialize_likely_small_unsigned_integer (sm); + unserialize_cstring (sm, (char **) &name_and_crc); + + vec_add2 (table, item, 1); + item->msg_index = msg_index; + item->name_and_crc = name_and_crc; + item->name = extract_name (name_and_crc); + item->crc = extract_crc (name_and_crc); + item->which = 1; /* current_image */ + } + vec_free (tblv); + } + + /* Sort the table. */ + if (numeric_sort) + vec_sort_with_function (table, table_id_cmp); + else + vec_sort_with_function (table, table_name_and_crc_cmp); + + if (compare_current) + { + ndifferences = 0; + + /* + * In this case, the recovered table will have two entries per + * API message. So, if entries i and i+1 match, the message definitions + * are identical. Otherwise, the crc is different, or a message is + * present in only one of the tables. + */ + vlib_cli_output (vm, "%=60s %s", "Message Name", "Result"); + + for (i = 0; i < vec_len (table);) + { + /* Last message lonely? */ + if (i == vec_len (table) - 1) + { + ndifferences++; + goto last_unique; + } + + /* Identical pair? */ + if (!strncmp + ((char *) table[i].name_and_crc, + (char *) table[i + 1].name_and_crc, + vec_len (table[i].name_and_crc))) + { + i += 2; + continue; + } + + ndifferences++; + + /* Only in one of two tables? */ + if (strncmp ((char *) table[i].name, (char *) table[i + 1].name, + vec_len (table[i].name))) + { + last_unique: + vlib_cli_output (vm, "%-60s only in %s", + table[i].name, table[i].which ? + "image" : "file"); + i++; + continue; + } + /* In both tables, but with different signatures */ + vlib_cli_output (vm, "%-60s definition changed", table[i].name); + i += 2; + } + if (ndifferences == 0) + vlib_cli_output (vm, "No api message signature differences found."); + else + vlib_cli_output (vm, "Found %u api message signature differences", + ndifferences); + goto cleanup; + } + + /* Dump the table, sorted as shown above */ + vlib_cli_output (vm, "%=60s %=8s %=10s", "Message name", "MsgID", "CRC"); + + for (i = 0; i < vec_len (table); i++) + { + item = table + i; + vlib_cli_output (vm, "%-60s %8u %10s", item->name, + item->msg_index, item->crc); + } + +cleanup: + for (i = 0; i < vec_len (table); i++) + { + vec_free (table[i].name_and_crc); + vec_free (table[i].name); + vec_free (table[i].crc); + } + + vec_free (table); + + return 0; +} + +/*? + * Displays a serialized API message decode table, sorted by message name + * + * @cliexpar + * @cliexstart{show api dump file } + * Message name MsgID CRC + * accept_session 407 8e2a127e + * accept_session_reply 408 67d8c22a + * add_node_next 549 e4202993 + * add_node_next_reply 550 e89d6eed + * etc. + * @cliexend +?*/ + +/*? + * Compares a serialized API message decode table with the current image + * + * @cliexpar + * @cliexstart{show api dump file compare} + * ip_add_del_route definition changed + * ip_table_add_del definition changed + * l2_macs_event only in image + * vnet_ip4_fib_counters only in file + * vnet_ip4_nbr_counters only in file + * @cliexend +?*/ + +/*? + * Display a serialized API message decode table, compare a saved + * decode table with the current image, to establish API differences. + * +?*/ +/* *INDENT-OFF* */ +VLIB_CLI_COMMAND (dump_api_table_file, static) = +{ + .path = "show api dump", + .short_help = "show api dump file [numeric | compare-current]", + .function = dump_api_table_file_command_fn, +}; +/* *INDENT-ON* */ + /* * fd.io coding-style-patch-verification: ON *