X-Git-Url: https://gerrit.fd.io/r/gitweb?a=blobdiff_plain;f=src%2Fsvm%2Ffifo_segment.c;h=2ff272e2f8812ca9172d2cfed659d13c3efffc29;hb=afbb33aaf254ebfe6ebf2477b1a4859cec40e34b;hp=d30932448c62131a639ffb5b2b7bc111f91c9ee3;hpb=0bc78d80363efc22d07171473933d1b0016440e4;p=vpp.git diff --git a/src/svm/fifo_segment.c b/src/svm/fifo_segment.c index d30932448c6..2ff272e2f88 100644 --- a/src/svm/fifo_segment.c +++ b/src/svm/fifo_segment.c @@ -105,13 +105,14 @@ fsh_n_active_fifos (fifo_segment_header_t * fsh) } static inline uword -fsh_virtual_mem (fifo_segment_header_t * fsh) +fs_virtual_mem (fifo_segment_t *fs) { + fifo_segment_header_t *fsh = fs->h; fifo_segment_slice_t *fss; uword total_vm = 0; int i; - for (i = 0; i < fsh->n_slices; i++) + for (i = 0; i < fs->n_slices; i++) { fss = fsh_slice_get (fsh, i); total_vm += clib_atomic_load_relax_n (&fss->virtual_mem); @@ -127,31 +128,22 @@ fsh_virtual_mem_update (fifo_segment_header_t * fsh, u32 slice_index, fss->virtual_mem += n_bytes; } -static inline void -fss_chunk_freelist_lock (fifo_segment_slice_t *fss) +static inline int +fss_chunk_fl_index_is_valid (fifo_segment_slice_t *fss, u32 fl_index) { - u32 free = 0; - while (!clib_atomic_cmp_and_swap_acq_relax_n (&fss->chunk_lock, &free, 1, 0)) - { - /* atomic load limits number of compare_exchange executions */ - while (clib_atomic_load_relax_n (&fss->chunk_lock)) - CLIB_PAUSE (); - /* on failure, compare_exchange writes (*p)->lock into free */ - free = 0; - } + return (fl_index < FS_CHUNK_VEC_LEN); } -static inline void -fss_chunk_freelist_unlock (fifo_segment_slice_t *fss) -{ - /* Make sure all reads/writes are complete before releasing the lock */ - clib_atomic_release (&fss->chunk_lock); -} +#define FS_CL_HEAD_MASK 0xFFFFFFFFFFFF +#define FS_CL_HEAD_TMASK 0xFFFF000000000000 +#define FS_CL_HEAD_TINC (1ULL << 48) -static inline int -fss_chunk_fl_index_is_valid (fifo_segment_slice_t * fss, u32 fl_index) +static svm_fifo_chunk_t * +fss_chunk_free_list_head (fifo_segment_header_t *fsh, + fifo_segment_slice_t *fss, u32 fl_index) { - return (fl_index < FS_CHUNK_VEC_LEN); + fs_sptr_t headsp = clib_atomic_load_relax_n (&fss->free_chunks[fl_index]); + return fs_chunk_ptr (fsh, headsp & FS_CL_HEAD_MASK); } static void @@ -159,10 +151,20 @@ fss_chunk_free_list_push (fifo_segment_header_t *fsh, fifo_segment_slice_t *fss, u32 fl_index, svm_fifo_chunk_t *c) { - fss_chunk_freelist_lock (fss); - c->next = fss->free_chunks[fl_index]; - fss->free_chunks[fl_index] = fs_chunk_sptr (fsh, c); - fss_chunk_freelist_unlock (fss); + fs_sptr_t old_head, new_head, csp; + + csp = fs_chunk_sptr (fsh, c); + ASSERT (csp <= FS_CL_HEAD_MASK); + old_head = clib_atomic_load_acq_n (&fss->free_chunks[fl_index]); + + do + { + c->next = old_head & FS_CL_HEAD_MASK; + new_head = csp + ((old_head + FS_CL_HEAD_TINC) & FS_CL_HEAD_TMASK); + } + while (!__atomic_compare_exchange (&fss->free_chunks[fl_index], &old_head, + &new_head, 0 /* weak */, __ATOMIC_RELEASE, + __ATOMIC_ACQUIRE)); } static void @@ -170,32 +172,50 @@ fss_chunk_free_list_push_list (fifo_segment_header_t *fsh, fifo_segment_slice_t *fss, u32 fl_index, svm_fifo_chunk_t *head, svm_fifo_chunk_t *tail) { - fss_chunk_freelist_lock (fss); - tail->next = fss->free_chunks[fl_index]; - fss->free_chunks[fl_index] = fs_chunk_sptr (fsh, head); - fss_chunk_freelist_unlock (fss); + fs_sptr_t old_head, new_head, headsp; + + headsp = fs_chunk_sptr (fsh, head); + ASSERT (headsp <= FS_CL_HEAD_MASK); + old_head = clib_atomic_load_acq_n (&fss->free_chunks[fl_index]); + + do + { + tail->next = old_head & FS_CL_HEAD_MASK; + new_head = headsp + ((old_head + FS_CL_HEAD_TINC) & FS_CL_HEAD_TMASK); + } + while (!__atomic_compare_exchange (&fss->free_chunks[fl_index], &old_head, + &new_head, 0 /* weak */, __ATOMIC_RELEASE, + __ATOMIC_ACQUIRE)); } static svm_fifo_chunk_t * fss_chunk_free_list_pop (fifo_segment_header_t *fsh, fifo_segment_slice_t *fss, u32 fl_index) { + fs_sptr_t old_head, new_head; svm_fifo_chunk_t *c; ASSERT (fss_chunk_fl_index_is_valid (fss, fl_index)); - fss_chunk_freelist_lock (fss); + old_head = clib_atomic_load_acq_n (&fss->free_chunks[fl_index]); - if (!fss->free_chunks[fl_index]) + /* Lock-free stacks are affected by ABA if a side allocates a chunk and + * shortly thereafter frees it. To circumvent that, reuse the upper bits + * of the head of the list shared pointer, i.e., offset to where the chunk + * is, as a tag. The tag is incremented with each push/pop operation and + * therefore collisions can only happen if an element is popped and pushed + * exactly after a complete wrap of the tag (16 bits). It's unlikely either + * of the sides will be descheduled for that long */ + do { - fss_chunk_freelist_unlock (fss); - return 0; + if (!(old_head & FS_CL_HEAD_MASK)) + return 0; + c = fs_chunk_ptr (fsh, old_head & FS_CL_HEAD_MASK); + new_head = c->next + ((old_head + FS_CL_HEAD_TINC) & FS_CL_HEAD_TMASK); } - - c = fs_chunk_ptr (fsh, fss->free_chunks[fl_index]); - fss->free_chunks[fl_index] = c->next; - - fss_chunk_freelist_unlock (fss); + while (!__atomic_compare_exchange (&fss->free_chunks[fl_index], &old_head, + &new_head, 0 /* weak */, __ATOMIC_RELEASE, + __ATOMIC_ACQUIRE)); return c; } @@ -292,6 +312,7 @@ fifo_segment_init (fifo_segment_t * fs) seg_start = round_pow2_u64 (pointer_to_uword (seg_data), align); fsh = uword_to_pointer (seg_start, void *); + CLIB_MEM_UNPOISON (fsh, seg_sz); memset (fsh, 0, sizeof (*fsh) + slices_sz); fsh->byte_index = sizeof (*fsh) + slices_sz; @@ -301,6 +322,7 @@ fifo_segment_init (fifo_segment_t * fs) fsh->max_log2_fifo_size = min_log2 (max_fifo); fsh->n_cached_bytes = 0; fsh->n_reserved_bytes = fsh->byte_index; + fsh->start_byte_index = fsh->byte_index; ASSERT (fsh->max_byte_index <= sh->ssvm_size - offset); fs->max_byte_index = fsh->max_byte_index; @@ -416,6 +438,14 @@ fifo_segment_get_segment (fifo_segment_main_t * sm, u32 segment_index) return pool_elt_at_index (sm->segments, segment_index); } +fifo_segment_t * +fifo_segment_get_segment_if_valid (fifo_segment_main_t *sm, u32 segment_index) +{ + if (pool_is_free_index (sm->segments, segment_index)) + return 0; + return pool_elt_at_index (sm->segments, segment_index); +} + void fifo_segment_info (fifo_segment_t * seg, char **address, size_t * size) { @@ -668,7 +698,8 @@ free_list: if (data_bytes <= fss_fl_chunk_bytes (fss) + n_free) { u32 min_size = FIFO_SEGMENT_MIN_FIFO_SIZE; - + if (n_free < min_size) + goto done; batch = (data_bytes - fss_fl_chunk_bytes (fss)) / min_size; batch = clib_min (batch + 1, n_free / min_size); if (fsh_try_alloc_chunk_batch (fsh, fss, 0, batch)) @@ -781,11 +812,13 @@ fs_fifo_alloc (fifo_segment_t *fs, u32 slice_index) } void -fs_fifo_free (fifo_segment_t *fs, svm_fifo_t *f) +fs_fifo_free (fifo_segment_t *fs, svm_fifo_t *f, u32 slice_index) { - u32 slice_index = f->shr->slice_index; fifo_slice_private_t *pfss; + if (CLIB_DEBUG) + clib_memset (f, 0xfc, sizeof (*f)); + pfss = &fs->slices[slice_index]; clib_mem_bulk_free (pfss->fifos, f); } @@ -799,8 +832,10 @@ fifo_segment_cleanup (fifo_segment_t *fs) for (slice_index = 0; slice_index < fs->n_slices; slice_index++) clib_mem_bulk_destroy (fs->slices[slice_index].fifos); + vec_free (fs->slices); + vec_foreach (fs->mqs, mq) - vec_free (mq->rings); + svm_msg_q_cleanup (mq); vec_free (fs->mqs); } @@ -869,6 +904,14 @@ fifo_segment_alloc_fifo_w_offset (fifo_segment_t *fs, uword offset) return f; } +svm_fifo_t * +fifo_segment_duplicate_fifo (fifo_segment_t *fs, svm_fifo_t *f) +{ + svm_fifo_t *nf = fs_fifo_alloc (fs, 0); + clib_memcpy (nf, f, sizeof (*f)); + return nf; +} + /** * Free fifo allocated in fifo segment */ @@ -927,19 +970,24 @@ fifo_segment_free_fifo (fifo_segment_t * fs, svm_fifo_t * f) f->ooo_enq = f->ooo_deq = 0; f->prev = 0; - fs_fifo_free (fs, f); + fs_fifo_free (fs, f, f->shr->slice_index); fsh_active_fifos_update (fsh, -1); } +void +fifo_segment_free_client_fifo (fifo_segment_t *fs, svm_fifo_t *f) +{ + fs_fifo_free (fs, f, 0 /* clients attach fifos in slice 0 */); +} + void fifo_segment_detach_fifo (fifo_segment_t *fs, svm_fifo_t **f) { fifo_slice_private_t *pfss; fifo_segment_slice_t *fss; - u32 fl_index, slice_index; - svm_fifo_chunk_t **c; svm_fifo_t *of = *f; + u32 slice_index; slice_index = of->master_thread_index; fss = fsh_slice_get (fs->h, slice_index); @@ -948,13 +996,13 @@ fifo_segment_detach_fifo (fifo_segment_t *fs, svm_fifo_t **f) if (of->flags & SVM_FIFO_F_LL_TRACKED) pfss_fifo_del_active_list (pfss, of); - /* Update slice counts for chunks that were detached */ - vec_foreach (c, of->chunks_at_attach) - { - fl_index = fs_freelist_for_size ((*c)->length); - clib_atomic_fetch_sub_rel (&fss->num_chunks[fl_index], 1); - } - vec_free (of->chunks_at_attach); + /* Collect chunks that were provided in return for those detached */ + fsh_slice_collect_chunks (fs->h, fss, of->chunks_at_attach); + of->chunks_at_attach = 0; + + /* Collect hdr that was provided in return for the detached */ + fss_fifo_free_list_push (fs->h, fss, of->hdr_at_attach); + of->hdr_at_attach = 0; clib_mem_bulk_free (pfss->fifos, *f); *f = 0; @@ -963,11 +1011,10 @@ fifo_segment_detach_fifo (fifo_segment_t *fs, svm_fifo_t **f) void fifo_segment_attach_fifo (fifo_segment_t *fs, svm_fifo_t **f, u32 slice_index) { + svm_fifo_chunk_t *c, *nc, *pc = 0; fifo_slice_private_t *pfss; fifo_segment_slice_t *fss; - svm_fifo_chunk_t *c; svm_fifo_t *nf, *of; - u32 fl_index; nf = fs_fifo_alloc (fs, slice_index); clib_memcpy_fast (nf, *f, sizeof (*nf)); @@ -975,21 +1022,23 @@ fifo_segment_attach_fifo (fifo_segment_t *fs, svm_fifo_t **f, u32 slice_index) fss = fsh_slice_get (fs->h, slice_index); pfss = fs_slice_private_get (fs, slice_index); fss->virtual_mem += svm_fifo_size (nf); + nf->next = nf->prev = 0; if (nf->flags & SVM_FIFO_F_LL_TRACKED) pfss_fifo_add_active_list (pfss, nf); - /* Update allocated chunks for fifo segment and build list - * of chunks to be freed at detach */ + /* Allocate shared hdr and chunks to be collected at detach in return + * for those that are being attached now */ of = *f; - of->chunks_at_attach = 0; + of->hdr_at_attach = fsh_try_alloc_fifo_hdr (fs->h, fss); c = fs_chunk_ptr (fs->h, nf->shr->start_chunk); - while (c) + of->chunks_at_attach = pc = fsh_try_alloc_chunk (fs->h, fss, c->length); + + while ((c = fs_chunk_ptr (fs->h, c->next))) { - fl_index = fs_freelist_for_size (c->length); - clib_atomic_fetch_add_rel (&fss->num_chunks[fl_index], 1); - vec_add1 (of->chunks_at_attach, c); - c = fs_chunk_ptr (fs->h, c->next); + nc = fsh_try_alloc_chunk (fs->h, fss, c->length); + pc->next = fs_chunk_sptr (fs->h, nc); + pc = nc; } nf->shr->slice_index = slice_index; @@ -1002,6 +1051,30 @@ fifo_segment_fifo_offset (svm_fifo_t *f) return (u8 *) f->shr - (u8 *) f->fs_hdr; } +svm_fifo_chunk_t * +fifo_segment_alloc_chunk_w_slice (fifo_segment_t *fs, u32 slice_index, + u32 chunk_size) +{ + fifo_segment_header_t *fsh = fs->h; + fifo_segment_slice_t *fss; + + fss = fsh_slice_get (fsh, slice_index); + return fsh_try_alloc_chunk (fsh, fss, chunk_size); +} + +void +fifo_segment_collect_chunk (fifo_segment_t *fs, u32 slice_index, + svm_fifo_chunk_t *c) +{ + fsh_collect_chunks (fs->h, slice_index, c); +} + +uword +fifo_segment_chunk_offset (fifo_segment_t *fs, svm_fifo_chunk_t *c) +{ + return (u8 *) c - (u8 *) fs->h; +} + svm_msg_q_t * fifo_segment_msg_q_alloc (fifo_segment_t *fs, u32 mq_index, svm_msg_q_cfg_t *cfg) @@ -1042,7 +1115,7 @@ fifo_segment_msg_q_attach (fifo_segment_t *fs, uword offset, u32 mq_index) mq = vec_elt_at_index (fs->mqs, mq_index); - if (!mq->q) + if (!mq->q.shr) { svm_msg_q_shared_t *smq; smq = (svm_msg_q_shared_t *) ((u8 *) fs->h + offset); @@ -1054,15 +1127,48 @@ fifo_segment_msg_q_attach (fifo_segment_t *fs, uword offset, u32 mq_index) return mq; } +void +fifo_segment_msg_qs_discover (fifo_segment_t *fs, int *fds, u32 n_fds) +{ + svm_msg_q_shared_t *smq; + u32 n_mqs, size, i; + uword offset = 0, n_alloced; + svm_msg_q_t *mq; + + n_mqs = fs->h->n_mqs; + if (n_fds && n_mqs != n_fds) + { + clib_warning ("expected %u fds got %u", n_mqs, n_fds); + return; + } + + vec_validate (fs->mqs, n_mqs - 1); + n_alloced = fs->h->n_reserved_bytes - fs->h->start_byte_index; + ASSERT (n_alloced % n_mqs == 0); + size = n_alloced / n_mqs; + + offset = fs->h->start_byte_index; + for (i = 0; i < n_mqs; i++) + { + mq = vec_elt_at_index (fs->mqs, i); + smq = (svm_msg_q_shared_t *) ((u8 *) fs->h + offset); + svm_msg_q_attach (mq, smq); + if (n_fds) + svm_msg_q_set_eventfd (mq, fds[i]); + offset += size; + } +} + uword fifo_segment_msg_q_offset (fifo_segment_t *fs, u32 mq_index) { svm_msg_q_t *mq = vec_elt_at_index (fs->mqs, mq_index); - if (mq->q == 0) + if (mq->q.shr == 0) return ~0ULL; - return (uword) ((u8 *) mq->q - (u8 *) fs->h) - sizeof (svm_msg_q_shared_t); + return (uword) ((u8 *) mq->q.shr - (u8 *) fs->h) - + sizeof (svm_msg_q_shared_t); } int @@ -1218,7 +1324,7 @@ fs_slice_num_free_chunks (fifo_segment_header_t *fsh, { for (i = 0; i < FS_CHUNK_VEC_LEN; i++) { - c = fs_chunk_ptr (fsh, fss->free_chunks[i]); + c = fss_chunk_free_list_head (fsh, fss, i); if (c == 0) continue; @@ -1237,7 +1343,7 @@ fs_slice_num_free_chunks (fifo_segment_header_t *fsh, if (fl_index >= FS_CHUNK_VEC_LEN) return 0; - c = fs_chunk_ptr (fsh, fss->free_chunks[fl_index]); + c = fss_chunk_free_list_head (fsh, fss, fl_index); if (c == 0) return 0; @@ -1271,16 +1377,16 @@ fifo_segment_size (fifo_segment_t * fs) return fs->h->max_byte_index - fs->h->n_reserved_bytes; } -u8 -fsh_has_reached_mem_limit (fifo_segment_header_t * fsh) +static u8 +fs_has_reached_mem_limit (fifo_segment_t *fs) { - return (fsh->flags & FIFO_SEGMENT_F_MEM_LIMIT) ? 1 : 0; + return (fs->flags & FIFO_SEGMENT_F_MEM_LIMIT) ? 1 : 0; } -void -fsh_reset_mem_limit (fifo_segment_header_t * fsh) +static void +fs_reset_mem_limit (fifo_segment_t *fs) { - fsh->flags &= ~FIFO_SEGMENT_F_MEM_LIMIT; + fs->flags &= ~FIFO_SEGMENT_F_MEM_LIMIT; } void * @@ -1355,26 +1461,26 @@ fifo_segment_get_mem_usage (fifo_segment_t * fs) } fifo_segment_mem_status_t -fifo_segment_determine_status (fifo_segment_header_t * fsh, u8 usage) +fifo_segment_determine_status (fifo_segment_t *fs, u8 usage) { - if (!fsh->high_watermark || !fsh->low_watermark) + if (!fs->high_watermark || !fs->low_watermark) return MEMORY_PRESSURE_NO_PRESSURE; /* once the no-memory is detected, the status continues * until memory usage gets below the high watermark */ - if (fsh_has_reached_mem_limit (fsh)) + if (fs_has_reached_mem_limit (fs)) { - if (usage >= fsh->high_watermark) + if (usage >= fs->high_watermark) return MEMORY_PRESSURE_NO_MEMORY; else - fsh_reset_mem_limit (fsh); + fs_reset_mem_limit (fs); } - if (usage >= fsh->high_watermark) + if (usage >= fs->high_watermark) return MEMORY_PRESSURE_HIGH_PRESSURE; - else if (usage >= fsh->low_watermark) + else if (usage >= fs->low_watermark) return MEMORY_PRESSURE_LOW_PRESSURE; return MEMORY_PRESSURE_NO_PRESSURE; @@ -1383,10 +1489,9 @@ fifo_segment_determine_status (fifo_segment_header_t * fsh, u8 usage) fifo_segment_mem_status_t fifo_segment_get_mem_status (fifo_segment_t * fs) { - fifo_segment_header_t *fsh = fs->h; u8 usage = fifo_segment_get_mem_usage (fs); - return fifo_segment_determine_status (fsh, usage); + return fifo_segment_determine_status (fs, usage); } u8 * @@ -1466,7 +1571,7 @@ format_fifo_segment (u8 * s, va_list * args) fss = fsh_slice_get (fsh, slice_index); for (i = 0; i < FS_CHUNK_VEC_LEN; i++) { - c = fs_chunk_ptr (fsh, fss->free_chunks[i]); + c = fss_chunk_free_list_head (fsh, fss, i); if (c == 0 && fss->num_chunks[i] == 0) continue; count = 0; @@ -1493,7 +1598,7 @@ format_fifo_segment (u8 * s, va_list * args) in_use = fifo_segment_size (fs) - est_free_seg_bytes - tracked_cached_bytes; usage = (100.0 * in_use) / allocated; mem_st = fifo_segment_get_mem_status (fs); - virt = fsh_virtual_mem (fsh); + virt = fs_virtual_mem (fs); reserved = fsh->n_reserved_bytes; s = format (s, "\n%Useg free bytes: %U (%lu) estimated: %U (%lu) reserved:" @@ -1506,8 +1611,8 @@ format_fifo_segment (u8 * s, va_list * args) format_memory_size, chunk_bytes, chunk_bytes, format_memory_size, est_chunk_bytes, est_chunk_bytes, format_memory_size, tracked_cached_bytes, tracked_cached_bytes); - s = format (s, "%Ufifo active: %u hdr free bytes: %U (%u) \n", - format_white_space, indent + 2, fsh->n_active_fifos, + s = format (s, "%Ufifo active: %u hdr free: %u bytes: %U (%u) \n", + format_white_space, indent + 2, fsh->n_active_fifos, free_fifos, format_memory_size, fifo_hdr, fifo_hdr); s = format (s, "%Usegment usage: %.2f%% (%U / %U) virt: %U status: %s\n", format_white_space, indent + 2, usage, format_memory_size,