udp: refactor udp code
[vpp.git] / src / uri / vppcom.c
index c41acd5..f0bd2f8 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Copyright (c) 2017 Cisco and/or its affiliates.
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this
  * You may obtain a copy of the License at:
@@ -68,6 +68,24 @@ typedef enum
   STATE_FAILED
 } session_state_t;
 
+typedef struct epoll_event vppcom_epoll_event_t;
+
+typedef struct
+{
+  u32 next_sid;
+  u32 prev_sid;
+  u32 vep_idx;
+  vppcom_epoll_event_t ev;
+#define VEP_DEFAULT_ET_MASK  (EPOLLIN|EPOLLOUT)
+  u32 et_mask;
+} vppcom_epoll_t;
+
+typedef struct
+{
+  u8 is_ip4;
+  ip46_address_t ip46;
+} vppcom_ip46_t;
+
 typedef struct
 {
   volatile session_state_t state;
@@ -76,16 +94,21 @@ typedef struct
   svm_fifo_t *server_tx_fifo;
   u32 sm_seg_index;
   u64 vpp_session_handle;
-  unix_shared_memory_queue_t *event_queue;
+  unix_shared_memory_queue_t *vpp_event_queue;
 
   /* Socket configuration state */
+  /* TBD: covert 'is_*' vars to bit in u8 flags; */
   u8 is_server;
   u8 is_listen;
   u8 is_cut_thru;
   u8 is_nonblocking;
+  u8 is_vep;
+  u8 is_vep_session;
+  u32 wait_cont_idx;
+  vppcom_epoll_t vep;
   u32 vrf;
-  u8 is_ip4;
-  u8 ip[16];
+  vppcom_ip46_t lcl_addr;
+  vppcom_ip46_t peer_addr;
   u16 port;
   u8 proto;
   u64 client_queue_address;
@@ -113,7 +136,6 @@ typedef struct vppcom_main_t_
   u8 init;
   u32 *client_session_index_fifo;
   volatile u32 bind_session_index;
-  u32 tx_event_id;
   int main_cpu;
 
   /* vpe input queue */
@@ -238,7 +260,7 @@ vppcom_session_at_index (u32 session_index, session_t * volatile *sess)
   if (PREDICT_FALSE ((session_index == ~0) ||
                     pool_is_free_index (vcm->sessions, session_index)))
     {
-      clib_warning ("[%d] invalid session, sid (%d) has been closed!",
+      clib_warning ("[%d] invalid session, sid (%u) has been closed!",
                    vcm->my_pid, session_index);
       return VPPCOM_EBADFD;
     }
@@ -439,7 +461,8 @@ vppcom_app_send_attach (void)
   bmp->client_index = vcm->my_client_index;
   bmp->context = htonl (0xfeedface);
   bmp->options[APP_OPTIONS_FLAGS] =
-    APP_OPTIONS_FLAGS_USE_FIFO | APP_OPTIONS_FLAGS_ADD_SEGMENT;
+    APP_OPTIONS_FLAGS_ACCEPT_REDIRECT | APP_OPTIONS_FLAGS_ADD_SEGMENT |
+    APP_OPTIONS_FLAGS_USE_LOCAL_SCOPE | APP_OPTIONS_FLAGS_USE_GLOBAL_SCOPE;
   bmp->options[SESSION_OPTIONS_SEGMENT_SIZE] = vcm->cfg.segment_size;
   bmp->options[SESSION_OPTIONS_ADD_SEGMENT_SIZE] = vcm->cfg.add_segment_size;
   bmp->options[SESSION_OPTIONS_RX_FIFO_SIZE] = vcm->cfg.rx_fifo_size;
@@ -554,7 +577,7 @@ vl_api_disconnect_session_reply_t_handler (vl_api_disconnect_session_reply_t *
       if (PREDICT_FALSE (rv))
        {
          if (VPPCOM_DEBUG > 1)
-           clib_warning ("[%d] invalid session, sid (%d) has been closed!",
+           clib_warning ("[%d] invalid session, sid (%u) has been closed!",
                          vcm->my_pid, p[0]);
        }
       hash_unset (vcm->session_index_by_vpp_handles, mp->handle);
@@ -616,7 +639,7 @@ vl_api_disconnect_session_t_handler (vl_api_disconnect_session_t * mp)
       if (PREDICT_FALSE (rval))
        {
          if (VPPCOM_DEBUG > 1)
-           clib_warning ("[%d] invalid session, sid (%d) has been closed!",
+           clib_warning ("[%d] invalid session, sid (%u) has been closed!",
                          vcm->my_pid, p[0]);
        }
       else
@@ -658,7 +681,7 @@ vl_api_reset_session_t_handler (vl_api_reset_session_t * mp)
       if (PREDICT_FALSE (rval))
        {
          if (VPPCOM_DEBUG > 1)
-           clib_warning ("[%d] invalid session, sid (%d) has been closed!",
+           clib_warning ("[%d] invalid session, sid (%u) has been closed!",
                          vcm->my_pid, p[0]);
        }
       else
@@ -682,7 +705,7 @@ vl_api_reset_session_t_handler (vl_api_reset_session_t * mp)
 }
 
 static void
-vl_api_connect_sock_reply_t_handler (vl_api_connect_sock_reply_t * mp)
+vl_api_connect_session_reply_t_handler (vl_api_connect_session_reply_t * mp)
 {
   vppcom_main_t *vcm = &vppcom_main;
   session_t *session;
@@ -698,9 +721,9 @@ vl_api_connect_sock_reply_t_handler (vl_api_connect_sock_reply_t * mp)
       return;
     }
 
-  session_index = ntohl (mp->app_connect);
+  session_index = mp->context;
   if (VPPCOM_DEBUG > 1)
-    clib_warning ("[%d] app_connect = %d 0x%08x", vcm->my_pid,
+    clib_warning ("[%d] session_index = %d 0x%08x", vcm->my_pid,
                  session_index, session_index);
 
   clib_spinlock_lock (&vcm->sessions_lockp);
@@ -729,6 +752,7 @@ vl_api_connect_sock_reply_t_handler (vl_api_connect_sock_reply_t * mp)
       vec_reset_length (a->new_segment_indices);
       if (PREDICT_FALSE (rv))
        {
+         clib_spinlock_unlock (&vcm->sessions_lockp);
          clib_warning ("[%d] sm_fifo_segment_attach ('%s') failed",
                        vcm->my_pid, a->segment_name);
          return;
@@ -743,8 +767,8 @@ vl_api_connect_sock_reply_t_handler (vl_api_connect_sock_reply_t * mp)
 
   session = pool_elt_at_index (vcm->sessions, session_index);
   session->is_cut_thru = is_cut_thru;
-  session->event_queue = uword_to_pointer (mp->vpp_event_queue_address,
-                                          unix_shared_memory_queue_t *);
+  session->vpp_event_queue = uword_to_pointer (mp->vpp_event_queue_address,
+                                              unix_shared_memory_queue_t *);
 
   rx_fifo = uword_to_pointer (mp->server_rx_fifo, svm_fifo_t *);
   rx_fifo->client_session_index = session_index;
@@ -773,17 +797,15 @@ vppcom_send_connect_sock (session_t * session, u32 session_index)
   memset (cmp, 0, sizeof (*cmp));
   cmp->_vl_msg_id = ntohs (VL_API_CONNECT_SOCK);
   cmp->client_index = vcm->my_client_index;
-  cmp->context = htonl (0xfeedface);
-  cmp->app_connect = session_index;
+  cmp->context = session_index;
 
   if (VPPCOM_DEBUG > 1)
-    clib_warning ("[%d] session_index = %d 0x%08x, app_connect = %d 0x%08x",
-                 vcm->my_pid, session_index, session_index,
-                 cmp->app_connect, cmp->app_connect);
+    clib_warning ("[%d] session_index = %d 0x%08x",
+                 vcm->my_pid, session_index, session_index);
 
   cmp->vrf = session->vrf;
-  cmp->is_ip4 = session->is_ip4;
-  clib_memcpy (cmp->ip, session->ip, sizeof (cmp->ip));
+  cmp->is_ip4 = session->peer_addr.is_ip4;
+  clib_memcpy (cmp->ip, &session->peer_addr.ip46, sizeof (cmp->ip));
   cmp->port = session->port;
   cmp->proto = session->proto;
   clib_memcpy (cmp->options, session->options, sizeof (cmp->options));
@@ -804,7 +826,7 @@ vppcom_send_disconnect (u32 session_index)
     {
       clib_spinlock_unlock (&vcm->sessions_lockp);
       if (VPPCOM_DEBUG > 1)
-       clib_warning ("[%d] invalid session, sid (%d) has been closed!",
+       clib_warning ("[%d] invalid session, sid (%u) has been closed!",
                      vcm->my_pid, session_index);
       return rv;
     }
@@ -854,19 +876,15 @@ vl_api_unbind_sock_reply_t_handler (vl_api_unbind_sock_reply_t * mp)
 
   clib_spinlock_lock (&vcm->sessions_lockp);
   rv = vppcom_session_at_index (vcm->bind_session_index, &session);
-  if (PREDICT_FALSE (rv))
+  if (rv == VPPCOM_OK)
     {
-      if (VPPCOM_DEBUG > 1)
-       clib_warning ("[%d] invalid session, sid (%d) has been closed!",
-                     vcm->my_pid, vcm->bind_session_index);
-    }
-
-  if (mp->retval)
-    clib_warning ("[%d] unbind failed: %U", vcm->my_pid, format_api_error,
-                 ntohl (mp->retval));
+      if ((VPPCOM_DEBUG > 1) && (mp->retval))
+       clib_warning ("[%d] unbind failed: %U", vcm->my_pid, format_api_error,
+                     ntohl (mp->retval));
 
-  vcm->bind_session_index = ~0;
-  session->state = STATE_START;
+      vcm->bind_session_index = ~0;
+      session->state = STATE_START;
+    }
   clib_spinlock_unlock (&vcm->sessions_lockp);
 }
 
@@ -991,19 +1009,21 @@ vl_api_accept_session_t_handler (vl_api_accept_session_t * mp)
 
   session->server_rx_fifo = rx_fifo;
   session->server_tx_fifo = tx_fifo;
-  session->event_queue = uword_to_pointer (mp->vpp_event_queue_address,
-                                          unix_shared_memory_queue_t *);
+  session->vpp_event_queue = uword_to_pointer (mp->vpp_event_queue_address,
+                                              unix_shared_memory_queue_t *);
   session->state = STATE_ACCEPT;
   session->is_cut_thru = 0;
+  session->is_server = 1;
   session->port = ntohs (mp->port);
-  session->is_ip4 = mp->is_ip4;
-  clib_memcpy (session->ip, mp->ip, sizeof (session->ip));
-  clib_spinlock_unlock (&vcm->sessions_lockp);
+  session->peer_addr.is_ip4 = mp->is_ip4;
+  clib_memcpy (&session->peer_addr.ip46, mp->ip,
+              sizeof (session->peer_addr.ip46));
 
   /* Add it to lookup table */
   hash_set (vcm->session_index_by_vpp_handles, mp->handle, session_index);
 
   clib_fifo_add1 (vcm->client_session_index_fifo, session_index);
+  clib_spinlock_unlock (&vcm->sessions_lockp);
 
   /*
    * Send accept reply to vpp
@@ -1029,18 +1049,20 @@ vl_api_connect_sock_t_handler (vl_api_connect_sock_t * mp)
   u32 session_index;
   svm_fifo_segment_private_t *seg;
   unix_shared_memory_queue_t *client_q;
-  vl_api_connect_sock_reply_t *rmp;
+  vl_api_connect_session_reply_t *rmp;
   session_t *session = 0;
   int rv = 0;
   svm_fifo_t *rx_fifo;
   svm_fifo_t *tx_fifo;
   unix_shared_memory_queue_t *event_q = 0;
 
+  clib_spinlock_lock (&vcm->sessions_lockp);
   if (!clib_fifo_free_elts (vcm->client_session_index_fifo))
     {
       if (VPPCOM_DEBUG > 1)
        clib_warning ("[%d] client session queue is full!", vcm->my_pid);
       rv = VNET_API_ERROR_QUEUE_FULL;
+      clib_spinlock_unlock (&vcm->sessions_lockp);
       goto send_reply;
     }
 
@@ -1068,7 +1090,6 @@ vl_api_connect_sock_t_handler (vl_api_connect_sock_t * mp)
   if (VPPCOM_DEBUG > 1)
     clib_warning ("[%d] created segment '%s'", vcm->my_pid, a->segment_name);
 
-  clib_spinlock_lock (&vcm->sessions_lockp);
   pool_get (vcm->sessions, session);
   memset (session, 0, sizeof (*session));
   session_index = session - vcm->sessions;
@@ -1111,37 +1132,37 @@ vl_api_connect_sock_t_handler (vl_api_connect_sock_t * mp)
   session->client_queue_address = mp->client_queue_address;
   session->is_cut_thru = 1;
   session->is_server = 1;
-  session->is_ip4 = mp->is_ip4;
   session->port = mp->port;
+  session->peer_addr.is_ip4 = mp->is_ip4;
+  clib_memcpy (&session->peer_addr.ip46, mp->ip,
+              sizeof (session->peer_addr.ip46));
   {
     void *oldheap;
     ssvm_shared_header_t *sh = seg->ssvm.sh;
 
     ssvm_lock_non_recursive (sh, 1);
     oldheap = ssvm_push_heap (sh);
-    event_q = session->event_queue =
+    event_q = session->vpp_event_queue =
       unix_shared_memory_queue_init (vcm->cfg.event_queue_size,
                                     sizeof (session_fifo_event_t),
                                     vcm->my_pid, 0 /* signal not sent */ );
     ssvm_pop_heap (oldheap);
     ssvm_unlock_non_recursive (sh);
   }
-  clib_memcpy (session->ip, mp->ip, sizeof (session->ip));
 
   session->state = STATE_ACCEPT;
   if (VPPCOM_DEBUG > 1)
     clib_warning ("[%d] Connected cut-thru to client: sid %d",
                  vcm->my_pid, session_index);
-  clib_spinlock_unlock (&vcm->sessions_lockp);
   clib_fifo_add1 (vcm->client_session_index_fifo, session_index);
+  clib_spinlock_unlock (&vcm->sessions_lockp);
 
 send_reply:
   rmp = vl_msg_api_alloc (sizeof (*rmp));
   memset (rmp, 0, sizeof (*rmp));
 
-  rmp->_vl_msg_id = ntohs (VL_API_CONNECT_SOCK_REPLY);
+  rmp->_vl_msg_id = ntohs (VL_API_CONNECT_SESSION_REPLY);
   rmp->context = mp->context;
-  rmp->app_connect = htonl (mp->app_connect);
   rmp->retval = htonl (rv);
   rmp->segment_name_length = vec_len (a->segment_name);
   clib_memcpy (rmp->segment_name, a->segment_name, vec_len (a->segment_name));
@@ -1175,8 +1196,8 @@ vppcom_send_bind_sock (session_t * session)
   bmp->client_index = vcm->my_client_index;
   bmp->context = htonl (0xfeedface);
   bmp->vrf = session->vrf;
-  bmp->is_ip4 = session->is_ip4;
-  clib_memcpy (bmp->ip, session->ip, sizeof (bmp->ip));
+  bmp->is_ip4 = session->lcl_addr.is_ip4;
+  clib_memcpy (bmp->ip, &session->lcl_addr.ip46, sizeof (bmp->ip));
   bmp->port = session->port;
   bmp->proto = session->proto;
   clib_memcpy (bmp->options, session->options, sizeof (bmp->options));
@@ -1197,7 +1218,7 @@ vppcom_send_unbind_sock (u32 session_index)
     {
       clib_spinlock_unlock (&vcm->sessions_lockp);
       if (VPPCOM_DEBUG > 0)
-       clib_warning ("[%d] invalid session, sid (%d) has been closed!",
+       clib_warning ("[%d] invalid session, sid (%u) has been closed!",
                      vcm->my_pid, session_index);
       return;
     }
@@ -1240,7 +1261,7 @@ vppcom_session_unbind (u32 session_index)
     {
       clib_spinlock_unlock (&vcm->sessions_lockp);
       if (VPPCOM_DEBUG > 1)
-       clib_warning ("[%d] invalid session, sid (%d) has been closed!",
+       clib_warning ("[%d] invalid session, sid (%u) has been closed!",
                      vcm->my_pid, session_index);
       return VPPCOM_EBADFD;
     }
@@ -1280,10 +1301,6 @@ vppcom_session_disconnect (u32 session_index)
                      vcm->my_pid, vppcom_retval_str (rv), rv);
       return rv;
     }
-
-  clib_spinlock_lock (&vcm->sessions_lockp);
-  pool_put_index (vcm->sessions, session_index);
-  clib_spinlock_unlock (&vcm->sessions_lockp);
   return VPPCOM_OK;
 }
 
@@ -1293,7 +1310,7 @@ _(BIND_SOCK_REPLY, bind_sock_reply)                             \
 _(UNBIND_SOCK_REPLY, unbind_sock_reply)                         \
 _(ACCEPT_SESSION, accept_session)                               \
 _(CONNECT_SOCK, connect_sock)                                   \
-_(CONNECT_SOCK_REPLY, connect_sock_reply)                       \
+_(CONNECT_SESSION_REPLY, connect_session_reply)                 \
 _(DISCONNECT_SESSION, disconnect_session)                       \
 _(DISCONNECT_SESSION_REPLY, disconnect_session_reply)           \
 _(RESET_SESSION, reset_session)                                 \
@@ -1372,23 +1389,18 @@ vppcom_cfg_heapsize (char *conf_fname)
          argc++;
          char **tmp = realloc (argv, argc * sizeof (char *));
          if (tmp == NULL)
-           {
-             fclose (fp);
-             goto defaulted;
-           }
+           goto defaulted;
          argv = tmp;
          arg = strndup (p, 1024);
          if (arg == NULL)
-           {
-             fclose (fp);
-             goto defaulted;
-           }
+           goto defaulted;
          argv[argc - 1] = arg;
          p = strtok (NULL, " \t\n");
        }
     }
 
   fclose (fp);
+  fp = NULL;
 
   char **tmp = realloc (argv, (argc + 1) * sizeof (char *));
   if (tmp == NULL)
@@ -1441,6 +1453,10 @@ vppcom_cfg_heapsize (char *conf_fname)
     }
 
 defaulted:
+  if (fp != NULL)
+    fclose (fp);
+  if (argv != NULL)
+    free (argv);
   if (!clib_mem_init (0, vcl_cfg->heapsize))
     clib_warning ("[%d] vppcom heap allocation failure!", vcm->my_pid);
   else if (VPPCOM_DEBUG > 0)
@@ -1485,11 +1501,11 @@ vppcom_cfg_read (char *conf_fname)
       goto file_done;
     }
 
-  unformat_init_unix_file (input, fd);
+  unformat_init_clib_file (input, fd);
 
   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
     {
-      unformat_user (input, unformat_line_input, line_input);
+      (void) unformat_user (input, unformat_line_input, line_input);
       unformat_skip_white_space (line_input);
 
       if (unformat (line_input, "vppcom {"))
@@ -1690,7 +1706,7 @@ input_done:
   unformat_free (input);
 
 file_done:
-  if (fd > 0)
+  if (fd >= 0)
     close (fd);
 }
 
@@ -1749,7 +1765,7 @@ vppcom_app_create (char *app_name)
       rv = vppcom_connect_to_vpp (app_name);
       if (rv)
        {
-         clib_warning ("[%s] couldn't connect to VPP.", vcm->my_pid);
+         clib_warning ("[%d] couldn't connect to VPP.", vcm->my_pid);
          return rv;
        }
 
@@ -1818,6 +1834,7 @@ vppcom_session_create (u32 vrf, u8 proto, u8 is_nonblocking)
 
   clib_spinlock_lock (&vcm->sessions_lockp);
   pool_get (vcm->sessions, session);
+  memset (session, 0, sizeof (*session));
   session_index = session - vcm->sessions;
 
   session->vrf = vrf;
@@ -1843,32 +1860,101 @@ vppcom_session_close (uint32_t session_index)
   rv = vppcom_session_at_index (session_index, &session);
   if (PREDICT_FALSE (rv))
     {
-      clib_spinlock_unlock (&vcm->sessions_lockp);
       if (VPPCOM_DEBUG > 0)
-       clib_warning ("[%d] invalid session, sid (%d) has been closed!",
+       clib_warning ("[%d] invalid session, sid (%u) has been closed!",
                      vcm->my_pid, session_index);
-      return rv;
+      clib_spinlock_unlock (&vcm->sessions_lockp);
+      goto done;
     }
   clib_spinlock_unlock (&vcm->sessions_lockp);
 
   if (VPPCOM_DEBUG > 0)
     clib_warning ("[%d] sid %d", vcm->my_pid, session_index);
 
-  if (session->is_cut_thru)
+  if (session->is_vep)
     {
-      if (session->is_server)
-       rv = vppcom_session_unbind_cut_thru (session);
+      u32 next_sid;
+      for (next_sid = session->vep.next_sid; next_sid != ~0;
+          next_sid = session->vep.next_sid)
+       {
+         rv = vppcom_epoll_ctl (session_index, EPOLL_CTL_DEL, next_sid, 0);
+         if ((VPPCOM_DEBUG > 0) && (rv < 0))
+           clib_warning ("[%d] EPOLL_CTL_DEL vep_idx %u, sid %u failed, "
+                         "rv = %s (%d)", session_index, next_sid,
+                         vcm->my_pid, session_index,
+                         vppcom_retval_str (rv), rv);
+
+         clib_spinlock_lock (&vcm->sessions_lockp);
+         rv = vppcom_session_at_index (session_index, &session);
+         if (PREDICT_FALSE (rv))
+           {
+             if (VPPCOM_DEBUG > 0)
+               clib_warning
+                 ("[%d] invalid session, sid (%u) has been closed!",
+                  vcm->my_pid, session_index);
+             clib_spinlock_unlock (&vcm->sessions_lockp);
+             goto done;
+           }
+         clib_spinlock_unlock (&vcm->sessions_lockp);
+       }
     }
   else
     {
-      rv = (session->is_server) ?
-       vppcom_session_unbind (session_index) :
-       vppcom_session_disconnect (session_index);
-    }
+      if (session->is_vep_session)
+       {
+         u32 vep_idx = session->vep.vep_idx;
+         rv = vppcom_epoll_ctl (vep_idx, EPOLL_CTL_DEL, session_index, 0);
+         if ((VPPCOM_DEBUG > 0) && (rv < 0))
+           clib_warning ("[%d] EPOLL_CTL_DEL vep_idx %u, sid %u failed, "
+                         "rv = %s (%d)", vep_idx, session_index,
+                         vcm->my_pid, session_index,
+                         vppcom_retval_str (rv), rv);
 
-  clib_spinlock_lock (&vcm->sessions_lockp);
+         clib_spinlock_lock (&vcm->sessions_lockp);
+         rv = vppcom_session_at_index (session_index, &session);
+         if (PREDICT_FALSE (rv))
+           {
+             if (VPPCOM_DEBUG > 0)
+               clib_warning
+                 ("[%d] invalid session, sid (%u) has been closed!",
+                  vcm->my_pid, session_index);
+             clib_spinlock_unlock (&vcm->sessions_lockp);
+             goto done;
+           }
+         clib_spinlock_unlock (&vcm->sessions_lockp);
+       }
+
+      if (session->is_cut_thru)
+       {
+         if (session->is_server)
+           {
+             rv = vppcom_session_unbind_cut_thru (session);
+             if ((VPPCOM_DEBUG > 0) && (rv < 0))
+               clib_warning ("[%d] unbind cut-thru (session %d) failed, "
+                             "rv = %s (%d)",
+                             vcm->my_pid, session_index,
+                             vppcom_retval_str (rv), rv);
+           }
+       }
+      else if (session->is_server)
+       {
+         rv = vppcom_session_unbind (session_index);
+         if ((VPPCOM_DEBUG > 0) && (rv < 0))
+           clib_warning ("[%d] unbind (session %d) failed, rv = %s (%d)",
+                         vcm->my_pid, session_index,
+                         vppcom_retval_str (rv), rv);
+       }
+      else
+       {
+         rv = vppcom_session_disconnect (session_index);
+         if ((VPPCOM_DEBUG > 0) && (rv < 0))
+           clib_warning ("[%d] disconnect (session %d) failed, rv = %s (%d)",
+                         vcm->my_pid, session_index,
+                         vppcom_retval_str (rv), rv);
+       }
+    }
   pool_put_index (vcm->sessions, session_index);
-  clib_spinlock_unlock (&vcm->sessions_lockp);
+done:
   return rv;
 }
 
@@ -1888,7 +1974,7 @@ vppcom_session_bind (uint32_t session_index, vppcom_endpt_t * ep)
     {
       clib_spinlock_unlock (&vcm->sessions_lockp);
       if (VPPCOM_DEBUG > 0)
-       clib_warning ("[%d] invalid session, sid (%d) has been closed!",
+       clib_warning ("[%d] invalid session, sid (%u) has been closed!",
                      vcm->my_pid, session_index);
       return rv;
     }
@@ -1897,9 +1983,8 @@ vppcom_session_bind (uint32_t session_index, vppcom_endpt_t * ep)
     clib_warning ("[%d] sid %d", vcm->my_pid, session_index);
 
   session->vrf = ep->vrf;
-  session->is_ip4 = ep->is_ip4;
-  memset (session->ip, 0, sizeof (*session->ip));
-  clib_memcpy (session->ip, ep->ip, sizeof (session->ip));
+  session->lcl_addr.is_ip4 = ep->is_ip4;
+  session->lcl_addr.ip46 = to_ip46 (!ep->is_ip4, ep->ip);
   session->port = ep->port;
 
   clib_spinlock_unlock (&vcm->sessions_lockp);
@@ -1907,32 +1992,33 @@ vppcom_session_bind (uint32_t session_index, vppcom_endpt_t * ep)
 }
 
 int
-vppcom_session_listen (uint32_t session_index, uint32_t q_len)
+vppcom_session_listen (uint32_t listen_session_index, uint32_t q_len)
 {
   vppcom_main_t *vcm = &vppcom_main;
-  session_t *session = 0;
+  session_t *listen_session = 0;
   int rv;
 
   clib_spinlock_lock (&vcm->sessions_lockp);
-  rv = vppcom_session_at_index (session_index, &session);
+  rv = vppcom_session_at_index (listen_session_index, &listen_session);
   if (PREDICT_FALSE (rv))
     {
       clib_spinlock_unlock (&vcm->sessions_lockp);
       if (VPPCOM_DEBUG > 0)
-       clib_warning ("[%d] invalid session, sid (%d) has been closed!",
-                     vcm->my_pid, session_index);
+       clib_warning ("[%d] invalid session, sid (%u) has been closed!",
+                     vcm->my_pid, listen_session_index);
       return rv;
     }
 
   if (VPPCOM_DEBUG > 0)
-    clib_warning ("[%d] sid %d", vcm->my_pid, session_index);
+    clib_warning ("[%d] sid %d", vcm->my_pid, listen_session_index);
 
   ASSERT (vcm->bind_session_index == ~0);
-  vcm->bind_session_index = session_index;
-  vppcom_send_bind_sock (session);
+  vcm->bind_session_index = listen_session_index;
+  vppcom_send_bind_sock (listen_session);
   clib_spinlock_unlock (&vcm->sessions_lockp);
-  rv = vppcom_wait_for_session_state_change (session_index, STATE_LISTEN,
-                                            vcm->cfg.session_timeout);
+  rv =
+    vppcom_wait_for_session_state_change (listen_session_index, STATE_LISTEN,
+                                         vcm->cfg.session_timeout);
   if (PREDICT_FALSE (rv))
     {
       vcm->bind_session_index = ~0;
@@ -1943,18 +2029,18 @@ vppcom_session_listen (uint32_t session_index, uint32_t q_len)
     }
 
   clib_spinlock_lock (&vcm->sessions_lockp);
-  rv = vppcom_session_at_index (session_index, &session);
+  rv = vppcom_session_at_index (listen_session_index, &listen_session);
   if (PREDICT_FALSE (rv))
     {
       clib_spinlock_unlock (&vcm->sessions_lockp);
       if (VPPCOM_DEBUG > 0)
-       clib_warning ("[%d] invalid session, sid (%d) has been closed!",
-                     vcm->my_pid, session_index);
+       clib_warning ("[%d] invalid session, sid (%u) has been closed!",
+                     vcm->my_pid, listen_session_index);
       return rv;
     }
-  session->is_listen = 1;
-  clib_spinlock_unlock (&vcm->sessions_lockp);
+  listen_session->is_listen = 1;
   clib_fifo_validate (vcm->client_session_index_fifo, q_len);
+  clib_spinlock_unlock (&vcm->sessions_lockp);
 
   return VPPCOM_OK;
 }
@@ -1964,37 +2050,40 @@ vppcom_session_accept (uint32_t listen_session_index, vppcom_endpt_t * ep,
                       double wait_for_time)
 {
   vppcom_main_t *vcm = &vppcom_main;
-  session_t *session = 0;
+  session_t *listen_session = 0;
+  session_t *client_session = 0;
   u32 client_session_index;
   int rv;
   f64 wait_for;
 
   clib_spinlock_lock (&vcm->sessions_lockp);
-  rv = vppcom_session_at_index (listen_session_index, &session);
+  rv = vppcom_session_at_index (listen_session_index, &listen_session);
   if (PREDICT_FALSE (rv))
     {
       clib_spinlock_unlock (&vcm->sessions_lockp);
       if (VPPCOM_DEBUG > 0)
-       clib_warning ("[%d] invalid session, sid (%d) has been closed!",
+       clib_warning ("[%d] invalid session, sid (%u) has been closed!",
                      vcm->my_pid, listen_session_index);
       return rv;
     }
 
-  if (session->state != STATE_LISTEN)
+  if (listen_session->state != STATE_LISTEN)
     {
       clib_spinlock_unlock (&vcm->sessions_lockp);
       if (VPPCOM_DEBUG > 0)
        clib_warning ("[%d] session not in listen state, state = %s",
-                     vcm->my_pid, vppcom_session_state_str (session->state));
+                     vcm->my_pid,
+                     vppcom_session_state_str (listen_session->state));
       return VPPCOM_EBADFD;
     }
-  wait_for = session->is_nonblocking ? 0 :
+  wait_for = listen_session->is_nonblocking ? 0 :
     (wait_for_time < 0) ? vcm->cfg.accept_timeout : wait_for_time;
 
   if (VPPCOM_DEBUG > 0)
-    clib_warning ("[%d] sid %d, state %s (%d)", vcm->my_pid,
+    clib_warning ("[%d] sid %d: %s (%d)", vcm->my_pid,
                  listen_session_index,
-                 vppcom_session_state_str (session->state), session->state);
+                 vppcom_session_state_str (listen_session->state),
+                 listen_session->state);
   clib_spinlock_unlock (&vcm->sessions_lockp);
 
   while (1)
@@ -2013,25 +2102,27 @@ vppcom_session_accept (uint32_t listen_session_index, vppcom_endpt_t * ep,
        break;
     }
 
-  clib_fifo_sub1 (vcm->client_session_index_fifo, client_session_index);
-
-  session = 0;
   clib_spinlock_lock (&vcm->sessions_lockp);
-  rv = vppcom_session_at_index (client_session_index, &session);
+  clib_fifo_sub1 (vcm->client_session_index_fifo, client_session_index);
+  rv = vppcom_session_at_index (client_session_index, &client_session);
   ASSERT (rv == VPPCOM_OK);
-  ASSERT (session->is_server);
+  ASSERT (client_session->peer_addr.is_ip4 ==
+         listen_session->lcl_addr.is_ip4);
 
   if (VPPCOM_DEBUG > 0)
     clib_warning ("[%d] Got a request: client sid %d", vcm->my_pid,
                  client_session_index);
 
-  ep->vrf = session->vrf;
-  ep->is_cut_thru = session->is_cut_thru;
-  ep->is_ip4 = session->is_ip4;
-  ep->port = session->port;
-  memset (ep->ip, 0, sizeof (ip6_address_t));
-  clib_memcpy (ep->ip, session->ip, sizeof (ip6_address_t));
-  session->state = STATE_LISTEN;
+  ep->vrf = client_session->vrf;
+  ep->is_cut_thru = client_session->is_cut_thru;
+  ep->is_ip4 = client_session->peer_addr.is_ip4;
+  ep->port = client_session->port;
+  if (client_session->peer_addr.is_ip4)
+    clib_memcpy (ep->ip, &client_session->peer_addr.ip46.ip4,
+                sizeof (ip4_address_t));
+  else
+    clib_memcpy (ep->ip, &client_session->peer_addr.ip46.ip6,
+                sizeof (ip6_address_t));
   clib_spinlock_unlock (&vcm->sessions_lockp);
   return (int) client_session_index;
 }
@@ -2042,7 +2133,6 @@ vppcom_session_connect (uint32_t session_index, vppcom_endpt_t * server_ep)
   vppcom_main_t *vcm = &vppcom_main;
   session_t *session = 0;
   int rv;
-  ip46_address_t *ip46;
 
   clib_spinlock_lock (&vcm->sessions_lockp);
   rv = vppcom_session_at_index (session_index, &session);
@@ -2050,7 +2140,7 @@ vppcom_session_connect (uint32_t session_index, vppcom_endpt_t * server_ep)
     {
       clib_spinlock_unlock (&vcm->sessions_lockp);
       if (VPPCOM_DEBUG > 0)
-       clib_warning ("[%d] invalid session, sid (%d) has been closed!",
+       clib_warning ("[%d] invalid session, sid (%u) has been closed!",
                      vcm->my_pid, session_index);
       return rv;
     }
@@ -2059,21 +2149,21 @@ vppcom_session_connect (uint32_t session_index, vppcom_endpt_t * server_ep)
     {
       clib_spinlock_unlock (&vcm->sessions_lockp);
       if (VPPCOM_DEBUG > 0)
-       clib_warning ("[%d] session, sid (%d) already connected!",
+       clib_warning ("[%d] session, sid (%u) already connected!",
                      vcm->my_pid, session_index);
       return VPPCOM_OK;
     }
 
   session->vrf = server_ep->vrf;
-  session->is_ip4 = server_ep->is_ip4;
-  ip46 = (ip46_address_t *) session->ip;
-  *ip46 = to_ip46 (!server_ep->is_ip4, server_ep->ip);
+  session->peer_addr.is_ip4 = server_ep->is_ip4;
+  session->peer_addr.ip46 = to_ip46 (!server_ep->is_ip4, server_ep->ip);
   session->port = server_ep->port;
 
   if (VPPCOM_DEBUG > 0)
     {
       u8 *ip_str = format (0, "%U", format_ip46_address,
-                          &session->ip, session->is_ip4);
+                          &session->peer_addr.ip46,
+                          session->peer_addr.is_ip4);
       clib_warning ("[%d] connect sid %d to %s server port %d",
                    vcm->my_pid, session_index, ip_str,
                    clib_net_to_host_u16 (session->port));
@@ -2097,7 +2187,6 @@ vppcom_session_connect (uint32_t session_index, vppcom_endpt_t * server_ep)
 int
 vppcom_session_read (uint32_t session_index, void *buf, int n)
 {
-  session_fifo_event_t _e, *e = &_e;
   vppcom_main_t *vcm = &vppcom_main;
   session_t *session = 0;
   svm_fifo_t *rx_fifo;
@@ -2113,104 +2202,76 @@ vppcom_session_read (uint32_t session_index, void *buf, int n)
     {
       clib_spinlock_unlock (&vcm->sessions_lockp);
       if (VPPCOM_DEBUG > 0)
-       clib_warning ("[%d] invalid session, sid (%d) has been closed!",
+       clib_warning ("[%d] invalid session, sid (%u) has been closed!",
                      vcm->my_pid, session_index);
       return rv;
     }
 
-  if (session->is_cut_thru)
+  if (session->state == STATE_DISCONNECT)
     {
-      rx_fifo = session->is_server ? session->server_rx_fifo :
-       session->server_tx_fifo;
-      fifo_str = session->is_server ? "server_rx_fifo" : "server_tx_fifo";
       clib_spinlock_unlock (&vcm->sessions_lockp);
+      if (VPPCOM_DEBUG > 0)
+       clib_warning ("[%d] sid (%u) has been closed by remote peer!",
+                     vcm->my_pid, session_index);
+      return VPPCOM_ECONNRESET;
+    }
 
-      n_read = svm_fifo_dequeue_nowait (rx_fifo, n, buf);
-
-      if (n_read <= 0)
-       return VPPCOM_EAGAIN;
+  rx_fifo = ((!session->is_cut_thru || session->is_server) ?
+            session->server_rx_fifo : session->server_tx_fifo);
+  fifo_str = ((!session->is_cut_thru || session->is_server) ?
+             "server_rx_fifo" : "server_tx_fifo");
 
-    }
-  else
+  do
     {
-      rv = unix_shared_memory_queue_sub (session->event_queue, (u8 *) e,
-                                        1 /* nowait */ );
-      clib_spinlock_unlock (&vcm->sessions_lockp);
-      if (rv < 0)
-       return VPPCOM_EAGAIN;
-
-      switch (e->event_type)
-       {
-       case FIFO_EVENT_APP_RX:
-         rx_fifo = e->fifo;
-         fifo_str = "app_rx_fifo";
-         n_read = svm_fifo_dequeue_nowait (rx_fifo, n, buf);
-         break;
+      n_read = svm_fifo_dequeue_nowait (rx_fifo, n, buf);
+    }
+  while (!session->is_nonblocking && (n_read <= 0));
 
-       case FIFO_EVENT_DISCONNECT:
-         return VPPCOM_ECONNRESET;
+  if (n_read <= 0)
+    session->vep.et_mask |= EPOLLIN;
 
-       default:
-         if (VPPCOM_DEBUG > 0)
-           clib_warning ("[%d] unknown event type %d", vcm->my_pid,
-                         e->event_type);
-         return VPPCOM_EAGAIN;
-       }
-    }
+  clib_spinlock_unlock (&vcm->sessions_lockp);
 
-  if (VPPCOM_DEBUG > 2)
+  if ((VPPCOM_DEBUG > 2) && (n_read > 0))
     clib_warning ("[%d] sid %d, read %d bytes from %s (%p)", vcm->my_pid,
                  session_index, n_read, fifo_str, rx_fifo);
-  return n_read;
+
+  return (n_read <= 0) ? VPPCOM_EAGAIN : n_read;
 }
 
 static inline int
 vppcom_session_read_ready (session_t * session, u32 session_index)
 {
-  session_fifo_event_t _e, *e = &_e;
   vppcom_main_t *vcm = &vppcom_main;
   svm_fifo_t *rx_fifo;
-  int rv;
   int ready = 0;
 
   /* Assumes caller has acquired spinlock: vcm->sessions_lockp */
-  if (session->is_cut_thru)
+  if (session->state == STATE_DISCONNECT)
     {
-      rx_fifo = session->is_server ? session->server_rx_fifo :
-       session->server_tx_fifo;
-
-      ready = svm_fifo_max_dequeue (rx_fifo);
+      if (VPPCOM_DEBUG > 0)
+       clib_warning ("[%d] sid (%u) has been closed by remote peer!",
+                     vcm->my_pid, session_index);
+      return VPPCOM_ECONNRESET;
     }
-  else if (session->is_listen)
+
+  if (session->is_listen)
     ready = clib_fifo_elts (vcm->client_session_index_fifo);
   else
     {
-      rv = unix_shared_memory_queue_sub (vcm->app_event_queue, (u8 *) e,
-                                        1 /* nowait */ );
-      if (rv >= 0)
-       {
-         switch (e->event_type)
-           {
-           case FIFO_EVENT_APP_RX:
-             rx_fifo = e->fifo;
-             ready = svm_fifo_max_dequeue (rx_fifo);
-             break;
-
-           case FIFO_EVENT_DISCONNECT:
-             return VPPCOM_ECONNRESET;
+      rx_fifo = ((!session->is_cut_thru || session->is_server) ?
+                session->server_rx_fifo : session->server_tx_fifo);
 
-           default:
-             clib_warning ("[%d] unknown event type %d", vcm->my_pid,
-                           e->event_type);
-           }
-       }
+      ready = svm_fifo_max_dequeue (rx_fifo);
     }
 
-  if (VPPCOM_DEBUG > 2)
+  if (VPPCOM_DEBUG > 3)
     clib_warning ("[%d] sid %d, peek %s (%p), ready = %d", vcm->my_pid,
                  session_index,
                  session->is_server ? "server_rx_fifo" : "server_tx_fifo",
                  rx_fifo, ready);
+  if (ready == 0)
+    session->vep.et_mask |= EPOLLIN;
 
   return ready;
 }
@@ -2223,9 +2284,8 @@ vppcom_session_write (uint32_t session_index, void *buf, int n)
   svm_fifo_t *tx_fifo;
   unix_shared_memory_queue_t *q;
   session_fifo_event_t evt;
-  int rv;
+  int rv, n_write;
   char *fifo_str;
-  u8 is_nonblocking;
 
   ASSERT (buf);
 
@@ -2235,56 +2295,63 @@ vppcom_session_write (uint32_t session_index, void *buf, int n)
     {
       clib_spinlock_unlock (&vcm->sessions_lockp);
       if (VPPCOM_DEBUG > 0)
-       clib_warning ("[%d] invalid session, sid (%d) has been closed!",
+       clib_warning ("[%d] invalid session, sid (%u) has been closed!",
                      vcm->my_pid, session_index);
       return rv;
     }
 
+  if (session->state == STATE_DISCONNECT)
+    {
+      clib_spinlock_unlock (&vcm->sessions_lockp);
+      if (VPPCOM_DEBUG > 0)
+       clib_warning ("[%d] sid (%u) has been closed by remote peer!",
+                     vcm->my_pid, session_index);
+      return VPPCOM_ECONNRESET;
+    }
+
   tx_fifo = ((!session->is_cut_thru || session->is_server) ?
             session->server_tx_fifo : session->server_rx_fifo);
   fifo_str = ((!session->is_cut_thru || session->is_server) ?
              "server_tx_fifo" : "server_rx_fifo");
-  is_nonblocking = session->is_nonblocking;
-  clib_spinlock_unlock (&vcm->sessions_lockp);
-
   do
     {
-      rv = svm_fifo_enqueue_nowait (tx_fifo, n, buf);
+      n_write = svm_fifo_enqueue_nowait (tx_fifo, n, buf);
     }
-  while (!is_nonblocking && (rv <= 0));
+  while (!session->is_nonblocking && (n_write <= 0));
 
   /* If event wasn't set, add one */
-  if ((rv > 0) && svm_fifo_set_event (tx_fifo))
+  if (!session->is_cut_thru && (n_write > 0) && svm_fifo_set_event (tx_fifo))
     {
       int rval;
 
       /* Fabricate TX event, send to vpp */
       evt.fifo = tx_fifo;
       evt.event_type = FIFO_EVENT_APP_TX;
-      evt.event_id = vcm->tx_event_id++;
 
-      clib_spinlock_lock (&vcm->sessions_lockp);
       rval = vppcom_session_at_index (session_index, &session);
       if (PREDICT_FALSE (rval))
        {
          clib_spinlock_unlock (&vcm->sessions_lockp);
          if (VPPCOM_DEBUG > 1)
-           clib_warning ("[%d] invalid session, sid (%d) has been closed!",
+           clib_warning ("[%d] invalid session, sid (%u) has been closed!",
                          vcm->my_pid, session_index);
          return rval;
        }
-      q = session->event_queue;
-      clib_spinlock_unlock (&vcm->sessions_lockp);
+      q = session->vpp_event_queue;
       ASSERT (q);
       unix_shared_memory_queue_add (q, (u8 *) & evt,
                                    0 /* do wait for mutex */ );
     }
 
+  if (n_write <= 0)
+    session->vep.et_mask |= EPOLLOUT;
+
+  clib_spinlock_unlock (&vcm->sessions_lockp);
+
   if (VPPCOM_DEBUG > 2)
     clib_warning ("[%d] sid %d, wrote %d bytes to %s (%p)", vcm->my_pid,
-                 session_index, rv, fifo_str, tx_fifo);
-
-  return rv;
+                 session_index, n_write, fifo_str, tx_fifo);
+  return n_write;
 }
 
 static inline int
@@ -2292,20 +2359,32 @@ vppcom_session_write_ready (session_t * session, u32 session_index)
 {
   vppcom_main_t *vcm = &vppcom_main;
   svm_fifo_t *tx_fifo;
-  int rv;
+  char *fifo_str;
+  int ready;
 
   /* Assumes caller has acquired spinlock: vcm->sessions_lockp */
+  if (session->state == STATE_DISCONNECT)
+    {
+      if (VPPCOM_DEBUG > 0)
+       clib_warning ("[%d] sid (%u) has been closed by remote peer!",
+                     vcm->my_pid, session_index);
+      return VPPCOM_ECONNRESET;
+    }
+
   tx_fifo = ((!session->is_cut_thru || session->is_server) ?
             session->server_tx_fifo : session->server_rx_fifo);
+  fifo_str = ((!session->is_cut_thru || session->is_server) ?
+             "server_tx_fifo" : "server_rx_fifo");
 
-  rv = svm_fifo_max_enqueue (tx_fifo);
+  ready = svm_fifo_max_enqueue (tx_fifo);
 
-  if (VPPCOM_DEBUG > 2)
+  if (VPPCOM_DEBUG > 3)
     clib_warning ("[%d] sid %d, peek %s (%p), ready = %d", vcm->my_pid,
-                 session_index,
-                 session->is_server ? "server_tx_fifo" : "server_rx_fifo",
-                 tx_fifo, rv);
-  return rv;
+                 session_index, fifo_str, tx_fifo, ready);
+  if (ready == 0)
+    session->vep.et_mask |= EPOLLOUT;
+
+  return ready;
 }
 
 int
@@ -2365,12 +2444,14 @@ vppcom_select (unsigned long n_bits, unsigned long *read_map,
               clib_bitmap_get (vcm->ex_bitmap, session_index) && (rv < 0))
             {
               // TBD: clib_warning
+              /* coverity[FORWARD_NULL] */
               clib_bitmap_set_no_check (except_map, session_index, 1);
               bits_set++;
             }
           else if (rv > 0)
             {
               // TBD: clib_warning
+              /* coverity[FORWARD_NULL] */
               clib_bitmap_set_no_check (read_map, session_index, 1);
               bits_set++;
             }
@@ -2393,9 +2474,10 @@ vppcom_select (unsigned long n_bits, unsigned long *read_map,
 
           rv = vppcom_session_write_ready (session, session_index);
           clib_spinlock_unlock (&vcm->sessions_lockp);
-          if (rv > 0)
+          if (rv > 0 )
             {
               // TBD: clib_warning
+              /* coverity[FORWARD_NULL] */
               clib_bitmap_set_no_check (write_map, session_index, 1);
               bits_set++;
             }
@@ -2421,6 +2503,7 @@ vppcom_select (unsigned long n_bits, unsigned long *read_map,
           if (rv < 0)
             {
               // TBD: clib_warning
+              /* coverity[FORWARD_NULL] */
               clib_bitmap_set_no_check (except_map, session_index, 1);
               bits_set++;
             }
@@ -2433,10 +2516,640 @@ select_done:
   return (bits_set);
 }
 
-/*
- * fd.io coding-style-patch-verification: ON
- *
- * Local Variables:
- * eval: (c-set-style "gnu")
- * End:
- */
+static inline void
+vep_verify_epoll_chain (u32 vep_idx)
+{
+  session_t *session;
+  vppcom_epoll_t *vep;
+  int rv;
+  u32 sid;
+
+  if (VPPCOM_DEBUG < 1)
+    return;
+
+  /* Assumes caller has acquired spinlock: vcm->sessions_lockp */
+  rv = vppcom_session_at_index (vep_idx, &session);
+  if (PREDICT_FALSE (rv))
+    {
+      clib_warning ("ERROR: Invalid vep_idx (%u)!", vep_idx);
+      goto done;
+    }
+  if (PREDICT_FALSE (!session->is_vep))
+    {
+      clib_warning ("ERROR: vep_idx (%u) is not a vep!", vep_idx);
+      goto done;
+    }
+  if (VPPCOM_DEBUG > 1)
+    clib_warning ("vep_idx (%u): Dumping epoll chain\n"
+                 "{\n"
+                 "   is_vep         = %u\n"
+                 "   is_vep_session = %u\n"
+                 "   wait_cont_idx  = 0x%x (%u)\n"
+                 "}\n",
+                 vep_idx, session->is_vep, session->is_vep_session,
+                 session->wait_cont_idx, session->wait_cont_idx);
+  do
+    {
+      vep = &session->vep;
+      if (session->is_vep_session)
+       {
+         if (VPPCOM_DEBUG > 1)
+           clib_warning ("vep_idx[%u]: sid 0x%x (%u)\n"
+                         "{\n"
+                         "   next_sid       = 0x%x (%u)\n"
+                         "   prev_sid       = 0x%x (%u)\n"
+                         "   vep_idx        = 0x%x (%u)\n"
+                         "   ev.events      = 0x%x\n"
+                         "   ev.data.u64    = 0x%llx\n"
+                         "   et_mask        = 0x%x\n"
+                         "}\n",
+                         vep_idx, sid, sid,
+                         vep->next_sid, vep->next_sid,
+                         vep->prev_sid, vep->prev_sid,
+                         vep->vep_idx, vep->vep_idx,
+                         vep->ev.events, vep->ev.data.u64, vep->et_mask);
+       }
+      sid = vep->next_sid;
+      if (sid != ~0)
+       {
+         rv = vppcom_session_at_index (sid, &session);
+         if (PREDICT_FALSE (rv))
+           {
+             clib_warning ("ERROR: Invalid sid (%u)!", sid);
+             goto done;
+           }
+         if (PREDICT_FALSE (session->is_vep))
+           clib_warning ("ERROR: sid (%u) is a vep!", vep_idx);
+         else if (PREDICT_FALSE (!session->is_vep_session))
+           {
+             clib_warning ("ERROR: session (%u) is not a vep session!", sid);
+             goto done;
+           }
+         if (PREDICT_FALSE (session->vep.vep_idx != vep_idx))
+           clib_warning ("ERROR: session (%u) vep_idx (%u) != "
+                         "vep_idx (%u)!",
+                         sid, session->vep.vep_idx, vep_idx);
+       }
+    }
+  while (sid != ~0);
+
+done:
+  if (VPPCOM_DEBUG > 1)
+    clib_warning ("vep_idx (%u): Dump complete!", vep_idx);
+}
+
+int
+vppcom_epoll_create (void)
+{
+  vppcom_main_t *vcm = &vppcom_main;
+  session_t *vep_session;
+  u32 vep_idx;
+
+  clib_spinlock_lock (&vcm->sessions_lockp);
+  pool_get (vcm->sessions, vep_session);
+  memset (vep_session, 0, sizeof (*vep_session));
+  vep_idx = vep_session - vcm->sessions;
+
+  vep_session->is_vep = 1;
+  vep_session->vep.vep_idx = ~0;
+  vep_session->vep.next_sid = ~0;
+  vep_session->vep.prev_sid = ~0;
+  vep_session->wait_cont_idx = ~0;
+  clib_spinlock_unlock (&vcm->sessions_lockp);
+
+  if (VPPCOM_DEBUG > 0)
+    clib_warning ("Created vep_idx %u!", vep_idx);
+
+  return (vep_idx);
+}
+
+int
+vppcom_epoll_ctl (uint32_t vep_idx, int op, uint32_t session_index,
+                 struct epoll_event *event)
+{
+  vppcom_main_t *vcm = &vppcom_main;
+  session_t *vep_session;
+  session_t *session;
+  int rv;
+
+  if (vep_idx == session_index)
+    {
+      if (VPPCOM_DEBUG > 0)
+       clib_warning ("ERROR: vep_idx == session_index (%u)!", vep_idx);
+      return VPPCOM_EINVAL;
+    }
+
+  clib_spinlock_lock (&vcm->sessions_lockp);
+  rv = vppcom_session_at_index (vep_idx, &vep_session);
+  if (PREDICT_FALSE (rv))
+    {
+      if (VPPCOM_DEBUG > 0)
+       clib_warning ("ERROR: Invalid vep_idx (%u)!", vep_idx);
+      goto done;
+    }
+  if (PREDICT_FALSE (!vep_session->is_vep))
+    {
+      if (VPPCOM_DEBUG > 0)
+       clib_warning ("ERROR: vep_idx (%u) is not a vep!", vep_idx);
+      rv = VPPCOM_EINVAL;
+      goto done;
+    }
+
+  ASSERT (vep_session->vep.vep_idx == ~0);
+  ASSERT (vep_session->vep.prev_sid == ~0);
+
+  rv = vppcom_session_at_index (session_index, &session);
+  if (PREDICT_FALSE (rv))
+    {
+      if (VPPCOM_DEBUG > 0)
+       clib_warning ("ERROR: Invalid session_index (%u)!", session_index);
+      goto done;
+    }
+  if (PREDICT_FALSE (session->is_vep))
+    {
+      if (VPPCOM_DEBUG > 0)
+       clib_warning ("ERROR: session_index (%u) is a vep!", vep_idx);
+      rv = VPPCOM_EINVAL;
+      goto done;
+    }
+
+  switch (op)
+    {
+    case EPOLL_CTL_ADD:
+      if (PREDICT_FALSE (!event))
+       {
+         clib_warning ("NULL pointer to epoll_event structure!");
+         rv = VPPCOM_EINVAL;
+         goto done;
+       }
+      if (vep_session->vep.next_sid != ~0)
+       {
+         session_t *next_session;
+         rv = vppcom_session_at_index (vep_session->vep.next_sid,
+                                       &next_session);
+         if (PREDICT_FALSE (rv))
+           {
+             if (VPPCOM_DEBUG > 0)
+               clib_warning ("EPOLL_CTL_ADD: Invalid vep.next_sid (%u) on"
+                             " vep_idx (%u)!", vep_session->vep.next_sid,
+                             vep_idx);
+             goto done;
+           }
+         ASSERT (next_session->vep.prev_sid == vep_idx);
+         next_session->vep.prev_sid = session_index;
+       }
+      session->vep.next_sid = vep_session->vep.next_sid;
+      session->vep.prev_sid = vep_idx;
+      session->vep.vep_idx = vep_idx;
+      session->vep.et_mask = VEP_DEFAULT_ET_MASK;
+      session->vep.ev = *event;
+      session->is_vep_session = 1;
+      vep_session->vep.next_sid = session_index;
+      if (VPPCOM_DEBUG > 1)
+       clib_warning ("EPOLL_CTL_ADD: vep_idx %u, sid %u, events 0x%x,"
+                     " data 0x%llx!", vep_idx, session_index,
+                     event->events, event->data.u64);
+      break;
+
+    case EPOLL_CTL_MOD:
+      if (PREDICT_FALSE (!event))
+       {
+         clib_warning ("NULL pointer to epoll_event structure!");
+         rv = VPPCOM_EINVAL;
+         goto done;
+       }
+      if (PREDICT_FALSE (!session->is_vep_session &&
+                        (session->vep.vep_idx != vep_idx)))
+       {
+         if (VPPCOM_DEBUG > 0)
+           {
+             if (!session->is_vep_session)
+               clib_warning ("EPOLL_CTL_MOD: session (%u) is not "
+                             "a vep session!", session_index);
+             else
+               clib_warning ("EPOLL_CTL_MOD: session (%u) vep_idx (%u) != "
+                             "vep_idx (%u)!", session_index,
+                             session->vep.vep_idx, vep_idx);
+           }
+         rv = VPPCOM_EINVAL;
+         goto done;
+       }
+      session->vep.et_mask = VEP_DEFAULT_ET_MASK;
+      session->vep.ev = *event;
+      if (VPPCOM_DEBUG > 1)
+       clib_warning ("EPOLL_CTL_MOD: vep_idx %u, sid %u, events 0x%x,"
+                     " data 0x%llx!", vep_idx, session_index,
+                     event->events, event->data.u64);
+      break;
+
+    case EPOLL_CTL_DEL:
+      if (PREDICT_FALSE (!session->is_vep_session &&
+                        (session->vep.vep_idx != vep_idx)))
+       {
+         if (VPPCOM_DEBUG > 0)
+           {
+             if (!session->is_vep_session)
+               clib_warning ("EPOLL_CTL_DEL: session (%u) is not "
+                             "a vep session!", session_index);
+             else
+               clib_warning ("EPOLL_CTL_DEL: session (%u) vep_idx (%u) != "
+                             "vep_idx (%u)!", session_index,
+                             session->vep.vep_idx, vep_idx);
+           }
+         rv = VPPCOM_EINVAL;
+         goto done;
+       }
+
+      vep_session->wait_cont_idx =
+       (vep_session->wait_cont_idx == session_index) ?
+       session->vep.next_sid : vep_session->wait_cont_idx;
+
+      if (session->vep.prev_sid == vep_idx)
+       vep_session->vep.next_sid = session->vep.next_sid;
+      else
+       {
+         session_t *prev_session;
+         rv = vppcom_session_at_index (session->vep.prev_sid, &prev_session);
+         if (PREDICT_FALSE (rv))
+           {
+             if (VPPCOM_DEBUG > 0)
+               clib_warning ("EPOLL_CTL_DEL: Invalid vep.prev_sid (%u) on"
+                             " sid (%u)!", session->vep.prev_sid,
+                             session_index);
+             goto done;
+           }
+         ASSERT (prev_session->vep.next_sid == session_index);
+         prev_session->vep.next_sid = session->vep.next_sid;
+       }
+      if (session->vep.next_sid != ~0)
+       {
+         session_t *next_session;
+         rv = vppcom_session_at_index (session->vep.next_sid, &next_session);
+         if (PREDICT_FALSE (rv))
+           {
+             if (VPPCOM_DEBUG > 0)
+               clib_warning ("EPOLL_CTL_DEL: Invalid vep.next_sid (%u) on"
+                             " sid (%u)!", session->vep.next_sid,
+                             session_index);
+             goto done;
+           }
+         ASSERT (next_session->vep.prev_sid == session_index);
+         next_session->vep.prev_sid = session->vep.prev_sid;
+       }
+
+      memset (&session->vep, 0, sizeof (session->vep));
+      session->vep.next_sid = ~0;
+      session->vep.prev_sid = ~0;
+      session->vep.vep_idx = ~0;
+      session->is_vep_session = 0;
+      if (VPPCOM_DEBUG > 1)
+       clib_warning ("EPOLL_CTL_DEL: vep_idx %u, sid %u!", vep_idx,
+                     session_index);
+      break;
+
+    default:
+      clib_warning ("Invalid operation (%d)!", op);
+      rv = VPPCOM_EINVAL;
+    }
+
+  vep_verify_epoll_chain (vep_idx);
+
+done:
+  clib_spinlock_unlock (&vcm->sessions_lockp);
+  return rv;
+}
+
+#define VCL_LOCK_AND_GET_SESSION(I, S)                  \
+do {                                                    \
+  vppcom_main_t *vcm = &vppcom_main;                    \
+                                                        \
+  clib_spinlock_lock (&vcm->sessions_lockp);            \
+  rv = vppcom_session_at_index (I, S);                  \
+  if (PREDICT_FALSE (rv))                               \
+    {                                                   \
+      clib_spinlock_unlock (&vcm->sessions_lockp);      \
+                                                        \
+      if (VPPCOM_DEBUG > 0)                             \
+        clib_warning ("ERROR: Invalid ##I (%u)!", I);   \
+                                                        \
+      goto done;                                        \
+    }                                                   \
+} while (0)
+
+int
+vppcom_epoll_wait (uint32_t vep_idx, struct epoll_event *events,
+                  int maxevents, double wait_for_time)
+{
+  vppcom_main_t *vcm = &vppcom_main;
+  session_t *vep_session;
+  int rv;
+  f64 timeout = clib_time_now (&vcm->clib_time) + wait_for_time;
+  int num_ev = 0;
+  u32 vep_next_sid, wait_cont_idx;
+  u8 is_vep;
+
+  if (PREDICT_FALSE (maxevents <= 0))
+    {
+      if (VPPCOM_DEBUG > 0)
+       clib_warning ("ERROR: Invalid maxevents (%d)!", maxevents);
+      return VPPCOM_EINVAL;
+    }
+  if (PREDICT_FALSE (wait_for_time < 0))
+    {
+      if (VPPCOM_DEBUG > 0)
+       clib_warning ("ERROR: Invalid wait_for_time (%f)!", wait_for_time);
+      return VPPCOM_EINVAL;
+    }
+  memset (events, 0, sizeof (*events) * maxevents);
+
+  VCL_LOCK_AND_GET_SESSION (vep_idx, &vep_session);
+  vep_next_sid = vep_session->vep.next_sid;
+  is_vep = vep_session->is_vep;
+  wait_cont_idx = vep_session->wait_cont_idx;
+  clib_spinlock_unlock (&vcm->sessions_lockp);
+
+  if (PREDICT_FALSE (!is_vep))
+    {
+      if (VPPCOM_DEBUG > 0)
+       clib_warning ("ERROR: vep_idx (%u) is not a vep!", vep_idx);
+      rv = VPPCOM_EINVAL;
+      goto done;
+    }
+  if ((VPPCOM_DEBUG > 0) && (PREDICT_FALSE (vep_next_sid == ~0)))
+    {
+      clib_warning ("WARNING: vep_idx (%u) is empty!", vep_idx);
+      goto done;
+    }
+
+  do
+    {
+      u32 sid;
+      u32 next_sid = ~0;
+      session_t *session;
+
+      for (sid = (wait_cont_idx == ~0) ? vep_next_sid : wait_cont_idx;
+          sid != ~0; sid = next_sid)
+       {
+         u32 session_events, et_mask, clear_et_mask, session_vep_idx;
+         u8 add_event, is_vep_session;
+         int ready;
+         u64 session_ev_data;
+
+         VCL_LOCK_AND_GET_SESSION (sid, &session);
+         next_sid = session->vep.next_sid;
+         session_events = session->vep.ev.events;
+         et_mask = session->vep.et_mask;
+         is_vep = session->is_vep;
+         is_vep_session = session->is_vep_session;
+         session_vep_idx = session->vep.vep_idx;
+         session_ev_data = session->vep.ev.data.u64;
+         clib_spinlock_unlock (&vcm->sessions_lockp);
+
+         if (PREDICT_FALSE (is_vep))
+           {
+             if (VPPCOM_DEBUG > 0)
+               clib_warning ("ERROR: sid (%u) is a vep!", vep_idx);
+             rv = VPPCOM_EINVAL;
+             goto done;
+           }
+         if (PREDICT_FALSE (!is_vep_session))
+           {
+             if (VPPCOM_DEBUG > 0)
+               clib_warning ("EPOLL_CTL_MOD: session (%u) is not "
+                             "a vep session!", sid);
+             rv = VPPCOM_EINVAL;
+             goto done;
+           }
+         if (PREDICT_FALSE (session_vep_idx != vep_idx))
+           {
+             clib_warning ("EPOLL_CTL_MOD: session (%u) "
+                           "vep_idx (%u) != vep_idx (%u)!",
+                           sid, session->vep.vep_idx, vep_idx);
+             rv = VPPCOM_EINVAL;
+             goto done;
+           }
+
+         add_event = clear_et_mask = 0;
+
+         if ((EPOLLIN & session_events) && (EPOLLIN & et_mask))
+           {
+             VCL_LOCK_AND_GET_SESSION (sid, &session);
+             ready = vppcom_session_read_ready (session, sid);
+             clib_spinlock_unlock (&vcm->sessions_lockp);
+             if (ready > 0)
+               {
+                 add_event = 1;
+                 events[num_ev].events |= EPOLLIN;
+                 if (EPOLLET & session_events)
+                   clear_et_mask |= EPOLLIN;
+               }
+             else if (ready < 0)
+               {
+                 add_event = 1;
+                 switch (ready)
+                   {
+                   case VPPCOM_ECONNRESET:
+                     events[num_ev].events |= EPOLLHUP | EPOLLRDHUP;
+                     break;
+
+                   default:
+                     events[num_ev].events |= EPOLLERR;
+                     break;
+                   }
+               }
+           }
+
+         if ((EPOLLOUT & session_events) && (EPOLLOUT & et_mask))
+           {
+             VCL_LOCK_AND_GET_SESSION (sid, &session);
+             ready = vppcom_session_write_ready (session, sid);
+             clib_spinlock_unlock (&vcm->sessions_lockp);
+             if (ready > 0)
+               {
+                 add_event = 1;
+                 events[num_ev].events |= EPOLLOUT;
+                 if (EPOLLET & session_events)
+                   clear_et_mask |= EPOLLOUT;
+               }
+             else if (ready < 0)
+               {
+                 add_event = 1;
+                 switch (ready)
+                   {
+                   case VPPCOM_ECONNRESET:
+                     events[num_ev].events |= EPOLLHUP;
+                     break;
+
+                   default:
+                     events[num_ev].events |= EPOLLERR;
+                     break;
+                   }
+               }
+           }
+
+         if (add_event)
+           {
+             events[num_ev].data.u64 = session_ev_data;
+             if (EPOLLONESHOT & session_events)
+               {
+                 VCL_LOCK_AND_GET_SESSION (sid, &session);
+                 session->vep.ev.events = 0;
+                 clib_spinlock_unlock (&vcm->sessions_lockp);
+               }
+             num_ev++;
+             if (num_ev == maxevents)
+               {
+                 VCL_LOCK_AND_GET_SESSION (vep_idx, &vep_session);
+                 vep_session->wait_cont_idx = next_sid;
+                 clib_spinlock_unlock (&vcm->sessions_lockp);
+                 goto done;
+               }
+           }
+         if (wait_cont_idx != ~0)
+           {
+             if (next_sid == ~0)
+               next_sid = vep_next_sid;
+             else if (next_sid == wait_cont_idx)
+               next_sid = ~0;
+           }
+       }
+    }
+  while ((num_ev == 0) && (clib_time_now (&vcm->clib_time) <= timeout));
+
+  if (wait_cont_idx != ~0)
+    {
+      VCL_LOCK_AND_GET_SESSION (vep_idx, &vep_session);
+      vep_session->wait_cont_idx = ~0;
+      clib_spinlock_unlock (&vcm->sessions_lockp);
+    }
+done:
+  return (rv != VPPCOM_OK) ? rv : num_ev;
+}
+
+int
+vppcom_session_attr (uint32_t session_index, uint32_t op,
+                    void *buffer, uint32_t * buflen)
+{
+  vppcom_main_t *vcm = &vppcom_main;
+  session_t *session;
+  int rv = VPPCOM_OK;
+  u32 *flags = buffer;
+  vppcom_endpt_t *ep = buffer;
+
+  VCL_LOCK_AND_GET_SESSION (session_index, &session);
+  switch (op)
+    {
+    case VPPCOM_ATTR_GET_NREAD:
+      rv = vppcom_session_read_ready (session, session_index);
+      if (VPPCOM_DEBUG > 0)
+       clib_warning ("VPPCOM_ATTR_GET_NREAD: nread = %d", rv);
+
+      break;
+
+    case VPPCOM_ATTR_PEEK_NREAD:
+      /* TBD */
+      break;
+
+    case VPPCOM_ATTR_GET_FLAGS:
+      if (buffer && buflen && (*buflen >= sizeof (*flags)))
+       {
+         *flags = O_RDWR | ((session->is_nonblocking) ? O_NONBLOCK : 0);
+         *buflen = sizeof (*flags);
+         if (VPPCOM_DEBUG > 0)
+           clib_warning ("VPPCOM_ATTR_GET_FLAGS: flags = 0x%08x, "
+                         "is_nonblocking = %u", *flags,
+                         session->is_nonblocking);
+       }
+      else
+       rv = VPPCOM_EINVAL;
+      break;
+
+    case VPPCOM_ATTR_SET_FLAGS:
+      if (buffer && buflen && (*buflen >= sizeof (*flags)))
+       {
+         session->is_nonblocking = (*flags & O_NONBLOCK) ? 1 : 0;
+         if (VPPCOM_DEBUG > 0)
+           clib_warning ("VPPCOM_ATTR_SET_FLAGS: flags = 0x%08x, "
+                         "is_nonblocking = %u", *flags,
+                         session->is_nonblocking);
+       }
+      else
+       rv = VPPCOM_EINVAL;
+      break;
+
+    case VPPCOM_ATTR_GET_PEER_ADDR:
+      if (buffer && buflen && (*buflen >= sizeof (*ep)))
+       {
+         ep->vrf = session->vrf;
+         ep->is_ip4 = session->peer_addr.is_ip4;
+         ep->port = session->port;
+         if (session->peer_addr.is_ip4)
+           clib_memcpy (ep->ip, &session->peer_addr.ip46.ip4,
+                        sizeof (ip4_address_t));
+         else
+           clib_memcpy (ep->ip, &session->peer_addr.ip46.ip6,
+                        sizeof (ip6_address_t));
+         *buflen = sizeof (*ep);
+         if (VPPCOM_DEBUG > 0)
+           clib_warning ("VPPCOM_ATTR_GET_PEER_ADDR: is_ip4 = %u, "
+                         "addr = %U", ep->is_ip4, format_ip46_address,
+                         &session->peer_addr.ip46, ep->is_ip4);
+       }
+      else
+       rv = VPPCOM_EINVAL;
+      break;
+
+    case VPPCOM_ATTR_GET_LCL_ADDR:
+      if (buffer && buflen && (*buflen >= sizeof (*ep)))
+       {
+         ep->vrf = session->vrf;
+         ep->is_ip4 = session->lcl_addr.is_ip4;
+         ep->port = session->port;
+         if (session->lcl_addr.is_ip4)
+           clib_memcpy (ep->ip, &session->lcl_addr.ip46.ip4,
+                        sizeof (ip4_address_t));
+         else
+           clib_memcpy (ep->ip, &session->lcl_addr.ip46.ip6,
+                        sizeof (ip6_address_t));
+         *buflen = sizeof (*ep);
+         if (VPPCOM_DEBUG > 0)
+           if (VPPCOM_DEBUG > 0)
+             clib_warning ("VPPCOM_ATTR_GET_LCL_ADDR: is_ip4 = %u, "
+                           "addr = %U", ep->is_ip4, format_ip46_address,
+                           &session->lcl_addr.ip46, ep->is_ip4);
+       }
+      else
+       rv = VPPCOM_EINVAL;
+      break;
+
+    case VPPCOM_ATTR_SET_REUSEADDR:
+      break;
+
+    case VPPCOM_ATTR_SET_BROADCAST:
+      break;
+
+    case VPPCOM_ATTR_SET_V6ONLY:
+      break;
+
+    case VPPCOM_ATTR_SET_KEEPALIVE:
+      break;
+
+    case VPPCOM_ATTR_SET_TCP_KEEPIDLE:
+      break;
+
+    case VPPCOM_ATTR_SET_TCP_KEEPINTVL:
+      break;
+    }
+
+done:
+  clib_spinlock_unlock (&vcm->sessions_lockp);
+  return rv;
+}
+
+  /*
+   * fd.io coding-style-patch-verification: ON
+   *
+   * Local Variables:
+   * eval: (c-set-style "gnu")
+   * End:
+   */