session_wrk_send_evt_to_main (session_worker_t *wrk, session_evt_elt_t *elt)
{
session_evt_elt_t *he;
- u32 thread_index;
+ uword thread_index;
u8 is_empty;
thread_index = wrk->vm->thread_index;
is_empty = clib_llist_is_empty (wrk->event_elts, evt_list, he);
clib_llist_add_tail (wrk->event_elts, evt_list, elt, he);
if (is_empty)
- vlib_rpc_call_main_thread (session_wrk_handle_evts_main_rpc,
- (u8 *) &thread_index, sizeof (thread_index));
+ session_send_rpc_evt_to_thread (0, session_wrk_handle_evts_main_rpc,
+ uword_to_pointer (thread_index, void *));
}
#define app_check_thread_and_barrier(_wrk, _elt) \
session_mq_handle_connects_rpc (void *arg)
{
u32 max_connects = 32, n_connects = 0;
- vlib_main_t *vm = vlib_get_main ();
session_evt_elt_t *he, *elt, *next;
- session_worker_t *fwrk, *wrk;
+ session_worker_t *fwrk;
- ASSERT (vlib_get_thread_index () == 0);
+ ASSERT (session_vlib_thread_is_cl_thread ());
/* Pending connects on linked list pertaining to first worker */
- fwrk = session_main_get_worker (1);
+ fwrk = session_main_get_worker (transport_cl_thread ());
if (!fwrk->n_pending_connects)
- goto update_state;
-
- vlib_worker_thread_barrier_sync (vm);
+ return;
he = clib_llist_elt (fwrk->event_elts, fwrk->pending_connects);
elt = clib_llist_next (fwrk->event_elts, evt_list, he);
- /* Avoid holding the barrier for too long */
+ /* Avoid holding the worker for too long */
while (n_connects < max_connects && elt != he)
{
next = clib_llist_next (fwrk->event_elts, evt_list, elt);
/* Decrement with worker barrier */
fwrk->n_pending_connects -= n_connects;
-
- vlib_worker_thread_barrier_release (vm);
-
-update_state:
-
- /* Switch worker to poll mode if it was in interrupt mode and had work or
- * back to interrupt if threshold of loops without a connect is passed.
- * While in poll mode, reprogram connects rpc */
- wrk = session_main_get_worker (0);
- if (wrk->state != SESSION_WRK_POLLING)
+ if (fwrk->n_pending_connects > 0)
{
- if (n_connects)
- {
- session_wrk_set_state (wrk, SESSION_WRK_POLLING);
- vlib_node_set_state (vm, session_queue_node.index,
- VLIB_NODE_STATE_POLLING);
- wrk->no_connect_loops = 0;
- }
- }
- else
- {
- if (!n_connects)
- {
- if (++wrk->no_connect_loops > 1e5)
- {
- session_wrk_set_state (wrk, SESSION_WRK_INTERRUPT);
- vlib_node_set_state (vm, session_queue_node.index,
- VLIB_NODE_STATE_INTERRUPT);
- }
- }
- else
- wrk->no_connect_loops = 0;
- }
-
- if (wrk->state == SESSION_WRK_POLLING)
- {
- elt = session_evt_alloc_ctrl (wrk);
- elt->evt.event_type = SESSION_CTRL_EVT_RPC;
- elt->evt.rpc_args.fp = session_mq_handle_connects_rpc;
+ session_send_rpc_evt_to_thread_force (fwrk->vm->thread_index,
+ session_mq_handle_connects_rpc, 0);
}
}
u32 thread_index = wrk - session_main.wrk;
session_evt_elt_t *he;
- /* No workers, so just deal with the connect now */
- if (PREDICT_FALSE (!thread_index))
+ if (PREDICT_FALSE (thread_index > transport_cl_thread ()))
{
- session_mq_connect_one (session_evt_ctrl_data (wrk, elt));
+ clib_warning ("Connect on wrong thread. Dropping");
return;
}
- if (PREDICT_FALSE (thread_index != 1))
+ /* If on worker, check if main has any pending messages. Avoids reordering
+ * with other control messages that need to be handled by main
+ */
+ if (thread_index)
{
- clib_warning ("Connect on wrong thread. Dropping");
- return;
+ he = clib_llist_elt (wrk->event_elts, wrk->evts_pending_main);
+
+ /* Events pending on main, postpone to avoid reordering */
+ if (!clib_llist_is_empty (wrk->event_elts, evt_list, he))
+ {
+ clib_llist_add_tail (wrk->event_elts, evt_list, elt, he);
+ return;
+ }
}
- /* Add to pending list to be handled by main thread */
+ /* Add to pending list to be handled by first worker */
he = clib_llist_elt (wrk->event_elts, wrk->pending_connects);
clib_llist_add_tail (wrk->event_elts, evt_list, elt, he);
wrk->n_pending_connects += 1;
if (wrk->n_pending_connects == 1)
{
- vlib_node_set_interrupt_pending (vlib_get_main_by_index (0),
- session_queue_node.index);
- session_send_rpc_evt_to_thread (0, session_mq_handle_connects_rpc, 0);
+ session_send_rpc_evt_to_thread_force (thread_index,
+ session_mq_handle_connects_rpc, 0);
}
}
/* Special handling for cut-through sessions */
if (!session_has_transport (s))
{
- s->session_state = SESSION_STATE_READY;
+ session_set_state (s, SESSION_STATE_READY);
ct_session_connect_notify (s, SESSION_E_NONE);
return;
}
old_state = s->session_state;
- s->session_state = SESSION_STATE_READY;
+ session_set_state (s, SESSION_STATE_READY);
if (!svm_fifo_is_empty_prod (s->rx_fifo))
app_worker_lock_and_send_event (app_wrk, s, SESSION_IO_EVT_RX);
if (old_state >= SESSION_STATE_TRANSPORT_CLOSING)
{
app_worker_close_notify (app_wrk, s);
- s->session_state = old_state;
+ session_set_state (s, old_state);
return;
}
}
void
session_wrk_handle_evts_main_rpc (void *args)
{
- session_evt_elt_t *he, *elt, *next;
+ vlib_main_t *vm = vlib_get_main ();
+ clib_llist_index_t ei, next_ei;
+ session_evt_elt_t *he, *elt;
session_worker_t *fwrk;
u32 thread_index;
- ASSERT (vlib_thread_is_main_w_barrier ());
- thread_index = *(u32 *) args;
+ vlib_worker_thread_barrier_sync (vm);
+
+ thread_index = pointer_to_uword (args);
fwrk = session_main_get_worker (thread_index);
he = clib_llist_elt (fwrk->event_elts, fwrk->evts_pending_main);
- elt = clib_llist_next (fwrk->event_elts, evt_list, he);
+ ei = clib_llist_next_index (he, evt_list);
- while (elt != he)
+ while (ei != fwrk->evts_pending_main)
{
- next = clib_llist_next (fwrk->event_elts, evt_list, elt);
+ elt = clib_llist_elt (fwrk->event_elts, ei);
+ next_ei = clib_llist_next_index (elt, evt_list);
clib_llist_remove (fwrk->event_elts, evt_list, elt);
switch (elt->evt.event_type)
{
case SESSION_CTRL_EVT_ACCEPTED_REPLY:
session_mq_accepted_reply_handler (fwrk, elt);
break;
+ case SESSION_CTRL_EVT_CONNECT:
+ session_mq_connect_handler (fwrk, elt);
+ break;
default:
clib_warning ("unhandled %u", elt->evt.event_type);
ALWAYS_ASSERT (0);
break;
}
- session_evt_ctrl_data_free (fwrk, elt);
- clib_llist_put (fwrk->event_elts, elt);
- elt = next;
+
+ /* Regrab element in case pool moved */
+ elt = clib_llist_elt (fwrk->event_elts, ei);
+ if (!clib_llist_elt_is_linked (elt, evt_list))
+ {
+ session_evt_ctrl_data_free (fwrk, elt);
+ clib_llist_put (fwrk->event_elts, elt);
+ }
+ ei = next_ei;
}
+
+ vlib_worker_thread_barrier_release (vm);
}
vlib_node_registration_t session_queue_node;
vlib_set_trace_count (vm, node, n_trace);
}
+always_inline int
+session_tx_fill_dma_transfers (session_worker_t *wrk,
+ session_tx_context_t *ctx, vlib_buffer_t *b)
+{
+ vlib_main_t *vm = wrk->vm;
+ u32 len_to_deq;
+ u8 *data0 = NULL;
+ int n_bytes_read, len_write;
+ svm_fifo_seg_t data_fs[2];
+
+ u32 n_segs = 2;
+ u16 n_transfers = 0;
+ /*
+ * Start with the first buffer in chain
+ */
+ b->error = 0;
+ b->flags = VNET_BUFFER_F_LOCALLY_ORIGINATED;
+ b->current_data = 0;
+ data0 = vlib_buffer_make_headroom (b, TRANSPORT_MAX_HDRS_LEN);
+ len_to_deq = clib_min (ctx->left_to_snd, ctx->deq_per_first_buf);
+
+ n_bytes_read = svm_fifo_segments (ctx->s->tx_fifo, ctx->sp.tx_offset,
+ data_fs, &n_segs, len_to_deq);
+
+ len_write = n_bytes_read;
+ ASSERT (n_bytes_read == len_to_deq);
+
+ while (n_bytes_read)
+ {
+ wrk->batch_num++;
+ vlib_dma_batch_add (vm, wrk->batch, data0, data_fs[n_transfers].data,
+ data_fs[n_transfers].len);
+ data0 += data_fs[n_transfers].len;
+ n_bytes_read -= data_fs[n_transfers].len;
+ n_transfers++;
+ }
+ return len_write;
+}
+
+always_inline int
+session_tx_fill_dma_transfers_tail (session_worker_t *wrk,
+ session_tx_context_t *ctx,
+ vlib_buffer_t *b, u32 len_to_deq, u8 *data)
+{
+ vlib_main_t *vm = wrk->vm;
+ int n_bytes_read, len_write;
+ svm_fifo_seg_t data_fs[2];
+ u32 n_segs = 2;
+ u16 n_transfers = 0;
+
+ n_bytes_read = svm_fifo_segments (ctx->s->tx_fifo, ctx->sp.tx_offset,
+ data_fs, &n_segs, len_to_deq);
+
+ len_write = n_bytes_read;
+
+ ASSERT (n_bytes_read == len_to_deq);
+
+ while (n_bytes_read)
+ {
+ wrk->batch_num++;
+ vlib_dma_batch_add (vm, wrk->batch, data, data_fs[n_transfers].data,
+ data_fs[n_transfers].len);
+ data += data_fs[n_transfers].len;
+ n_bytes_read -= data_fs[n_transfers].len;
+ n_transfers++;
+ }
+
+ return len_write;
+}
+
+always_inline int
+session_tx_copy_data (session_worker_t *wrk, session_tx_context_t *ctx,
+ vlib_buffer_t *b, u32 len_to_deq, u8 *data0)
+{
+ int n_bytes_read;
+ if (PREDICT_TRUE (!wrk->dma_enabled))
+ n_bytes_read =
+ svm_fifo_peek (ctx->s->tx_fifo, ctx->sp.tx_offset, len_to_deq, data0);
+ else
+ n_bytes_read = session_tx_fill_dma_transfers (wrk, ctx, b);
+ return n_bytes_read;
+}
+
+always_inline int
+session_tx_copy_data_tail (session_worker_t *wrk, session_tx_context_t *ctx,
+ vlib_buffer_t *b, u32 len_to_deq, u8 *data)
+{
+ int n_bytes_read;
+ if (PREDICT_TRUE (!wrk->dma_enabled))
+ n_bytes_read =
+ svm_fifo_peek (ctx->s->tx_fifo, ctx->sp.tx_offset, len_to_deq, data);
+ else
+ n_bytes_read =
+ session_tx_fill_dma_transfers_tail (wrk, ctx, b, len_to_deq, data);
+ return n_bytes_read;
+}
+
always_inline void
-session_tx_fifo_chain_tail (vlib_main_t * vm, session_tx_context_t * ctx,
- vlib_buffer_t * b, u16 * n_bufs, u8 peek_data)
+session_tx_fifo_chain_tail (session_worker_t *wrk, session_tx_context_t *ctx,
+ vlib_buffer_t *b, u16 *n_bufs, u8 peek_data)
{
+ vlib_main_t *vm = wrk->vm;
vlib_buffer_t *chain_b, *prev_b;
u32 chain_bi0, to_deq, left_from_seg;
- u16 len_to_deq, n_bytes_read;
+ int len_to_deq, n_bytes_read;
u8 *data, j;
b->flags |= VLIB_BUFFER_TOTAL_LENGTH_VALID;
data = vlib_buffer_get_current (chain_b);
if (peek_data)
{
- n_bytes_read = svm_fifo_peek (ctx->s->tx_fifo,
- ctx->sp.tx_offset, len_to_deq, data);
+ n_bytes_read =
+ session_tx_copy_data_tail (wrk, ctx, b, len_to_deq, data);
ctx->sp.tx_offset += n_bytes_read;
}
else
}
always_inline void
-session_tx_fill_buffer (vlib_main_t * vm, session_tx_context_t * ctx,
- vlib_buffer_t * b, u16 * n_bufs, u8 peek_data)
+session_tx_fill_buffer (session_worker_t *wrk, session_tx_context_t *ctx,
+ vlib_buffer_t *b, u16 *n_bufs, u8 peek_data)
{
u32 len_to_deq;
u8 *data0;
int n_bytes_read;
-
/*
* Start with the first buffer in chain
*/
if (peek_data)
{
- n_bytes_read = svm_fifo_peek (ctx->s->tx_fifo, ctx->sp.tx_offset,
- len_to_deq, data0);
+ n_bytes_read = session_tx_copy_data (wrk, ctx, b, len_to_deq, data0);
ASSERT (n_bytes_read > 0);
/* Keep track of progress locally, transport is also supposed to
* increment it independently when pushing the header */
ASSERT (n_bytes_read > 0);
}
}
+
b->current_length = n_bytes_read;
ctx->left_to_snd -= n_bytes_read;
* Fill in the remaining buffers in the chain, if any
*/
if (PREDICT_FALSE (ctx->n_bufs_per_seg > 1 && ctx->left_to_snd))
- session_tx_fifo_chain_tail (vm, ctx, b, n_bufs, peek_data);
+ session_tx_fifo_chain_tail (wrk, ctx, b, n_bufs, peek_data);
}
always_inline u8
}
}
+always_inline void
+session_tx_add_pending_buffer (session_worker_t *wrk, u32 bi, u32 next_index)
+{
+ if (PREDICT_TRUE (!wrk->dma_enabled))
+ {
+ vec_add1 (wrk->pending_tx_buffers, bi);
+ vec_add1 (wrk->pending_tx_nexts, next_index);
+ }
+ else
+ {
+ session_dma_transfer *dma_transfer = &wrk->dma_trans[wrk->trans_tail];
+ vec_add1 (dma_transfer->pending_tx_buffers, bi);
+ vec_add1 (dma_transfer->pending_tx_nexts, next_index);
+ }
+}
+
always_inline int
session_tx_fifo_read_and_snd_i (session_worker_t * wrk,
vlib_node_runtime_t * node,
b0 = vlib_get_buffer (vm, bi0);
b1 = vlib_get_buffer (vm, bi1);
- session_tx_fill_buffer (vm, ctx, b0, &n_bufs, peek_data);
- session_tx_fill_buffer (vm, ctx, b1, &n_bufs, peek_data);
+ session_tx_fill_buffer (wrk, ctx, b0, &n_bufs, peek_data);
+ session_tx_fill_buffer (wrk, ctx, b1, &n_bufs, peek_data);
ctx->transport_pending_bufs[ctx->n_segs_per_evt - n_left] = b0;
ctx->transport_pending_bufs[ctx->n_segs_per_evt - n_left + 1] = b1;
n_left -= 2;
- vec_add1 (wrk->pending_tx_buffers, bi0);
- vec_add1 (wrk->pending_tx_buffers, bi1);
- vec_add1 (wrk->pending_tx_nexts, next_index);
- vec_add1 (wrk->pending_tx_nexts, next_index);
+ session_tx_add_pending_buffer (wrk, bi0, next_index);
+ session_tx_add_pending_buffer (wrk, bi1, next_index);
}
while (n_left)
{
bi0 = ctx->tx_buffers[--n_bufs];
b0 = vlib_get_buffer (vm, bi0);
- session_tx_fill_buffer (vm, ctx, b0, &n_bufs, peek_data);
+ session_tx_fill_buffer (wrk, ctx, b0, &n_bufs, peek_data);
ctx->transport_pending_bufs[ctx->n_segs_per_evt - n_left] = b0;
n_left -= 1;
- vec_add1 (wrk->pending_tx_buffers, bi0);
- vec_add1 (wrk->pending_tx_nexts, next_index);
+ session_tx_add_pending_buffer (wrk, bi0, next_index);
}
/* Ask transport to push headers */
*n_tx_packets += ctx->n_segs_per_evt;
SESSION_EVT (SESSION_EVT_DEQ, ctx->s, ctx->max_len_to_snd, ctx->max_dequeue,
- ctx->s->tx_fifo->has_event, wrk->last_vlib_time);
+ ctx->s->tx_fifo->shr->has_event, wrk->last_vlib_time);
ASSERT (ctx->left_to_snd == 0);
clib_warning ("unhandled event type %d", e->event_type);
}
- SESSION_EVT (SESSION_IO_EVT_COUNTS, e->event_type, 1, wrk);
+ SESSION_EVT (SESSION_EVT_IO_EVT_COUNTS, e->event_type, 1, wrk);
/* Regrab elements in case pool moved */
elt = clib_llist_elt (wrk->event_elts, ei);
n_tx_packets = vec_len (wrk->pending_tx_buffers);
SESSION_EVT (SESSION_EVT_DSP_CNTRS, UPDATE_TIME, wrk);
+ if (PREDICT_FALSE (wrk->dma_enabled))
+ {
+ if (wrk->trans_head == ((wrk->trans_tail + 1) & (wrk->trans_size - 1)))
+ return 0;
+ wrk->batch = vlib_dma_batch_new (vm, wrk->config_index);
+ }
+
/*
* Dequeue new internal mq events
*/
};
}
+ if (PREDICT_FALSE (wrk->dma_enabled))
+ {
+ if (wrk->batch_num)
+ {
+ vlib_dma_batch_set_cookie (vm, wrk->batch, wrk->trans_tail);
+ wrk->batch_num = 0;
+ wrk->trans_tail++;
+ if (wrk->trans_tail == wrk->trans_size)
+ wrk->trans_tail = 0;
+ }
+
+ vlib_dma_batch_submit (vm, wrk->batch);
+ }
+
SESSION_EVT (SESSION_EVT_DSP_CNTRS, OLD_IO_EVTS, wrk);
if (vec_len (wrk->pending_tx_buffers))