2 * mc_socket.c: socket based multicast for vlib mc
4 * Copyright (c) 2010 Cisco and/or its affiliates.
5 * Licensed under the Apache License, Version 2.0 (the "License");
6 * you may not use this file except in compliance with the License.
7 * You may obtain a copy of the License at:
9 * http://www.apache.org/licenses/LICENSE-2.0
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
18 #include <vlib/vlib.h>
19 #include <vlib/unix/mc_socket.h>
21 #include <sys/ioctl.h> /* for FIONBIO */
22 #include <netinet/tcp.h> /* for TCP_NODELAY */
23 #include <net/if.h> /* for struct ifreq */
26 format_socket_peer_id (u8 * s, va_list * args)
28 u64 peer_id_as_u64 = va_arg (*args, u64);
30 peer_id.as_u64 = peer_id_as_u64;
31 u32 a = mc_socket_peer_id_get_address (peer_id);
32 u32 p = mc_socket_peer_id_get_port (peer_id);
34 s = format (s, "%U:%04x", format_network_address, AF_INET, &a, ntohs (p));
39 typedef void (mc_msg_handler_t) (mc_main_t * mcm, void *msg,
43 msg_handler (mc_main_t * mcm,
44 u32 buffer_index, u32 handler_frees_buffer, void *_h)
46 vlib_main_t *vm = mcm->vlib_main;
47 mc_msg_handler_t *h = _h;
48 vlib_buffer_t *b = vlib_get_buffer (vm, buffer_index);
49 void *the_msg = vlib_buffer_get_current (b);
51 h (mcm, the_msg, buffer_index);
52 if (!handler_frees_buffer)
53 vlib_buffer_free_one (vm, buffer_index);
57 append_buffer_index_to_iovec (vlib_main_t * vm,
58 u32 buffer_index, struct iovec **iovs_return)
62 u32 bi = buffer_index;
67 b = vlib_get_buffer (vm, bi);
68 vec_add2 (*iovs_return, i, 1);
69 i->iov_base = vlib_buffer_get_current (b);
70 i->iov_len = b->current_length;
72 if (!(b->flags & VLIB_BUFFER_NEXT_PRESENT))
81 sendmsg_helper (mc_socket_main_t * msm,
82 int socket, struct sockaddr_in *tx_addr, u32 buffer_index)
84 vlib_main_t *vm = msm->mc_main.vlib_main;
86 word n_bytes, n_bytes_tx, n_retries;
88 clib_memset (&h, 0, sizeof (h));
90 h.msg_namelen = sizeof (tx_addr[0]);
93 _vec_len (msm->iovecs) = 0;
95 n_bytes = append_buffer_index_to_iovec (vm, buffer_index, &msm->iovecs);
96 ASSERT (n_bytes <= msm->mc_main.transport.max_packet_size);
97 if (n_bytes > msm->mc_main.transport.max_packet_size)
98 clib_error ("sending packet larger than interace MTU %d bytes", n_bytes);
100 h.msg_iov = msm->iovecs;
101 h.msg_iovlen = vec_len (msm->iovecs);
104 while ((n_bytes_tx = sendmsg (socket, &h, /* flags */ 0)) != n_bytes
107 if (n_bytes_tx != n_bytes)
109 clib_unix_warning ("sendmsg");
114 ELOG_TYPE_DECLARE (e) =
116 .format = "sendmsg-helper: %d retries",.format_args = "i4",};
122 ed = ELOG_DATA (&vm->elog_main, e);
123 ed->retries = n_retries;
128 static clib_error_t *
129 tx_buffer (void *transport, mc_transport_type_t type, u32 buffer_index)
131 mc_socket_main_t *msm = (mc_socket_main_t *) transport;
132 vlib_main_t *vm = msm->mc_main.vlib_main;
133 mc_multicast_socket_t *ms = &msm->multicast_sockets[type];
135 error = sendmsg_helper (msm, ms->socket, &ms->tx_addr, buffer_index);
136 if (type != MC_TRANSPORT_USER_REQUEST_TO_RELAY)
137 vlib_buffer_free_one (vm, buffer_index);
141 static clib_error_t *
142 tx_ack (void *transport, mc_peer_id_t dest_peer_id, u32 buffer_index)
144 struct sockaddr_in tx_addr;
145 mc_socket_main_t *msm = (mc_socket_main_t *) transport;
146 vlib_main_t *vm = msm->mc_main.vlib_main;
149 clib_memset (&tx_addr, 0, sizeof (tx_addr));
150 tx_addr.sin_family = AF_INET;
151 tx_addr.sin_addr.s_addr = mc_socket_peer_id_get_address (dest_peer_id);
152 tx_addr.sin_port = mc_socket_peer_id_get_port (dest_peer_id);
154 error = sendmsg_helper (msm, msm->ack_socket, &tx_addr, buffer_index);
155 vlib_buffer_free_one (vm, buffer_index);
159 static clib_error_t *
160 recvmsg_helper (mc_socket_main_t * msm,
162 struct sockaddr_in *rx_addr,
163 u32 * buffer_index, u32 drop_message)
165 vlib_main_t *vm = msm->mc_main.vlib_main;
167 uword n_left, n_alloc, n_mtu, i, i_rx;
168 const uword buffer_size = vlib_buffer_get_default_data_size (vm);
171 /* Make sure we have at least a MTU worth of buffers. */
172 n_mtu = msm->rx_mtu_n_buffers;
173 n_left = vec_len (msm->rx_buffers);
176 uword max_alloc = 8 * n_mtu;
177 vec_validate (msm->rx_buffers, max_alloc - 1);
179 vlib_buffer_alloc (vm, msm->rx_buffers + n_left, max_alloc - n_left);
180 _vec_len (msm->rx_buffers) = n_left + n_alloc;
183 ASSERT (vec_len (msm->rx_buffers) >= n_mtu);
184 vec_validate (msm->iovecs, n_mtu - 1);
186 /* Allocate RX buffers from end of rx_buffers.
187 Turn them into iovecs to pass to readv. */
188 i_rx = vec_len (msm->rx_buffers) - 1;
189 for (i = 0; i < n_mtu; i++)
191 b = vlib_get_buffer (vm, msm->rx_buffers[i_rx - i]);
192 msm->iovecs[i].iov_base = b->data;
193 msm->iovecs[i].iov_len = buffer_size;
195 _vec_len (msm->iovecs) = n_mtu;
200 clib_memset (&h, 0, sizeof (h));
203 h.msg_name = rx_addr;
204 h.msg_namelen = sizeof (rx_addr[0]);
206 h.msg_iov = msm->iovecs;
207 h.msg_iovlen = vec_len (msm->iovecs);
209 n_bytes_left = recvmsg (socket, &h, 0);
210 if (n_bytes_left < 0)
211 return clib_error_return_unix (0, "recvmsg");
220 *buffer_index = msm->rx_buffers[i_rx];
223 b = vlib_get_buffer (vm, msm->rx_buffers[i_rx]);
228 n_bytes_left < buffer_size ? n_bytes_left : buffer_size;
230 n_bytes_left -= buffer_size;
232 if (n_bytes_left <= 0)
236 b->flags |= VLIB_BUFFER_NEXT_PRESENT;
237 b->next_buffer = msm->rx_buffers[i_rx];
240 _vec_len (msm->rx_buffers) = i_rx;
242 return 0 /* no error */ ;
245 static clib_error_t *
246 mastership_socket_read_ready (clib_file_t * uf)
248 mc_socket_main_t *msm = (mc_socket_main_t *) uf->private_data;
249 mc_main_t *mcm = &msm->mc_main;
250 mc_multicast_socket_t *ms =
251 &msm->multicast_sockets[MC_TRANSPORT_MASTERSHIP];
255 error = recvmsg_helper (msm, ms->socket, /* rx_addr */ 0, &bi, /* drop_message */
258 msg_handler (mcm, bi,
259 /* handler_frees_buffer */ 0,
260 mc_msg_master_assert_handler);
265 static clib_error_t *
266 to_relay_socket_read_ready (clib_file_t * uf)
268 mc_socket_main_t *msm = (mc_socket_main_t *) uf->private_data;
269 mc_main_t *mcm = &msm->mc_main;
270 vlib_main_t *vm = msm->mc_main.vlib_main;
271 mc_multicast_socket_t *ms_to_relay =
272 &msm->multicast_sockets[MC_TRANSPORT_USER_REQUEST_TO_RELAY];
273 mc_multicast_socket_t *ms_from_relay =
274 &msm->multicast_sockets[MC_TRANSPORT_USER_REQUEST_FROM_RELAY];
277 u32 is_master = mcm->relay_state == MC_RELAY_STATE_MASTER;
279 /* Not the ordering master? Turf the msg */
280 error = recvmsg_helper (msm, ms_to_relay->socket, /* rx_addr */ 0, &bi,
281 /* drop_message */ !is_master);
283 /* If we are the master, number and rebroadcast the msg. */
284 if (!error && is_master)
286 vlib_buffer_t *b = vlib_get_buffer (vm, bi);
287 mc_msg_user_request_t *mp = vlib_buffer_get_current (b);
288 mp->global_sequence = clib_host_to_net_u32 (mcm->relay_global_sequence);
289 mcm->relay_global_sequence++;
291 sendmsg_helper (msm, ms_from_relay->socket, &ms_from_relay->tx_addr,
293 vlib_buffer_free_one (vm, bi);
299 static clib_error_t *
300 from_relay_socket_read_ready (clib_file_t * uf)
302 mc_socket_main_t *msm = (mc_socket_main_t *) uf->private_data;
303 mc_main_t *mcm = &msm->mc_main;
304 mc_multicast_socket_t *ms =
305 &msm->multicast_sockets[MC_TRANSPORT_USER_REQUEST_FROM_RELAY];
309 error = recvmsg_helper (msm, ms->socket, /* rx_addr */ 0, &bi, /* drop_message */
313 msg_handler (mcm, bi, /* handler_frees_buffer */ 1,
314 mc_msg_user_request_handler);
319 static clib_error_t *
320 join_socket_read_ready (clib_file_t * uf)
322 mc_socket_main_t *msm = (mc_socket_main_t *) uf->private_data;
323 mc_main_t *mcm = &msm->mc_main;
324 vlib_main_t *vm = mcm->vlib_main;
325 mc_multicast_socket_t *ms = &msm->multicast_sockets[MC_TRANSPORT_JOIN];
329 error = recvmsg_helper (msm, ms->socket, /* rx_addr */ 0, &bi, /* drop_message */
333 vlib_buffer_t *b = vlib_get_buffer (vm, bi);
334 mc_msg_join_or_leave_request_t *mp = vlib_buffer_get_current (b);
336 switch (clib_host_to_net_u32 (mp->type))
338 case MC_MSG_TYPE_join_or_leave_request:
339 msg_handler (mcm, bi, /* handler_frees_buffer */ 0,
340 mc_msg_join_or_leave_request_handler);
343 case MC_MSG_TYPE_join_reply:
344 msg_handler (mcm, bi, /* handler_frees_buffer */ 0,
345 mc_msg_join_reply_handler);
356 static clib_error_t *
357 ack_socket_read_ready (clib_file_t * uf)
359 mc_socket_main_t *msm = (mc_socket_main_t *) uf->private_data;
360 mc_main_t *mcm = &msm->mc_main;
364 error = recvmsg_helper (msm, msm->ack_socket, /* rx_addr */ 0, &bi,
365 /* drop_message */ 0);
367 msg_handler (mcm, bi, /* handler_frees_buffer */ 0,
368 mc_msg_user_ack_handler);
373 catchup_cleanup (mc_socket_main_t * msm,
374 mc_socket_catchup_t * c, clib_file_main_t * um,
377 hash_unset (msm->catchup_index_by_file_descriptor, uf->file_descriptor);
378 clib_file_del (um, uf);
379 vec_free (c->input_vector);
380 vec_free (c->output_vector);
381 pool_put (msm->catchups, c);
384 static mc_socket_catchup_t *
385 find_catchup_from_file_descriptor (mc_socket_main_t * msm,
389 hash_get (msm->catchup_index_by_file_descriptor, file_descriptor);
390 return p ? pool_elt_at_index (msm->catchups, p[0]) : 0;
393 static clib_error_t *
394 catchup_socket_read_ready (clib_file_t * uf, int is_server)
396 clib_file_main_t *um = &file_main;
397 mc_socket_main_t *msm = (mc_socket_main_t *) uf->private_data;
398 mc_main_t *mcm = &msm->mc_main;
399 mc_socket_catchup_t *c =
400 find_catchup_from_file_descriptor (msm, uf->file_descriptor);
403 l = vec_len (c->input_vector);
404 vec_resize (c->input_vector, 4096);
406 read (uf->file_descriptor, c->input_vector + l,
407 vec_len (c->input_vector) - l);
416 catchup_cleanup (msm, c, um, uf);
417 return clib_error_return_unix (0, "read");
421 _vec_len (c->input_vector) = l + n;
423 if (is_eof && vec_len (c->input_vector) > 0)
427 mc_msg_catchup_request_handler (mcm, (void *) c->input_vector,
429 _vec_len (c->input_vector) = 0;
433 mc_msg_catchup_reply_handler (mcm, (void *) c->input_vector,
435 c->input_vector = 0; /* reply handler is responsible for freeing vector */
436 catchup_cleanup (msm, c, um, uf);
440 return 0 /* no error */ ;
443 static clib_error_t *
444 catchup_server_read_ready (clib_file_t * uf)
446 return catchup_socket_read_ready (uf, /* is_server */ 1);
449 static clib_error_t *
450 catchup_client_read_ready (clib_file_t * uf)
452 if (MC_EVENT_LOGGING)
454 mc_socket_main_t *msm = (mc_socket_main_t *) uf->private_data;
455 vlib_main_t *vm = msm->mc_main.vlib_main;
457 ELOG_TYPE (e, "catchup_client_read_ready");
458 ELOG (&vm->elog_main, e, 0);
460 return catchup_socket_read_ready (uf, /* is_server */ 0);
463 static clib_error_t *
464 catchup_socket_write_ready (clib_file_t * uf, int is_server)
466 clib_file_main_t *um = &file_main;
467 mc_socket_main_t *msm = (mc_socket_main_t *) uf->private_data;
468 mc_socket_catchup_t *c =
469 find_catchup_from_file_descriptor (msm, uf->file_descriptor);
470 clib_error_t *error = 0;
473 if (c->connect_in_progress)
477 c->connect_in_progress = 0;
478 len = sizeof (value);
479 if (getsockopt (c->socket, SOL_SOCKET, SO_ERROR, &value, &len) < 0)
481 error = clib_error_return_unix (0, "getsockopt SO_ERROR");
487 clib_error_return_code (0, value, CLIB_ERROR_ERRNO_VALID,
498 clib_min (vec_len (c->output_vector) - c->output_vector_n_written,
499 msm->rx_mtu_n_bytes -
500 64 /* ip + tcp + option allowance */ );
502 if (n_this_write <= 0)
507 n = write (uf->file_descriptor,
508 c->output_vector + c->output_vector_n_written,
511 while (n < 0 && errno == EAGAIN);
515 error = clib_error_return_unix (0, "write");
518 c->output_vector_n_written += n;
521 if (c->output_vector_n_written >= vec_len (c->output_vector))
525 uf->flags &= ~UNIX_FILE_DATA_AVAILABLE_TO_WRITE;
526 file_main.file_update (uf, UNIX_FILE_UPDATE_MODIFY);
527 /* Send EOF to other side. */
528 shutdown (uf->file_descriptor, SHUT_WR);
534 catchup_cleanup (msm, c, um, uf);
540 static clib_error_t *
541 catchup_server_write_ready (clib_file_t * uf)
543 return catchup_socket_write_ready (uf, /* is_server */ 1);
546 static clib_error_t *
547 catchup_client_write_ready (clib_file_t * uf)
549 return catchup_socket_write_ready (uf, /* is_server */ 0);
552 static clib_error_t *
553 catchup_socket_error_ready (clib_file_t * uf)
555 clib_file_main_t *um = &file_main;
556 mc_socket_main_t *msm = (mc_socket_main_t *) uf->private_data;
557 mc_socket_catchup_t *c =
558 find_catchup_from_file_descriptor (msm, uf->file_descriptor);
559 catchup_cleanup (msm, c, um, uf);
560 return clib_error_return (0, "error");
563 static clib_error_t *
564 catchup_listen_read_ready (clib_file_t * uf)
566 mc_socket_main_t *msm = (mc_socket_main_t *) uf->private_data;
567 struct sockaddr_in client_addr;
569 mc_socket_catchup_t *c;
570 clib_file_t template = { 0 };
572 pool_get (msm->catchups, c);
573 clib_memset (c, 0, sizeof (c[0]));
575 client_len = sizeof (client_addr);
577 /* Acquires the non-blocking attrib from the server socket. */
578 c->socket = accept (uf->file_descriptor,
579 (struct sockaddr *) &client_addr,
580 (socklen_t *) & client_len);
584 pool_put (msm->catchups, c);
585 return clib_error_return_unix (0, "accept");
588 if (MC_EVENT_LOGGING)
590 mc_main_t *mcm = &msm->mc_main;
591 vlib_main_t *vm = mcm->vlib_main;
593 ELOG_TYPE_DECLARE (e) =
595 .format = "catchup accepted from 0x%lx",.format_args = "i4",};
601 ed = ELOG_DATA (&vm->elog_main, e);
602 ed->addr = ntohl (client_addr.sin_addr.s_addr);
605 /* Disable the Nagle algorithm, ship catchup pkts immediately */
608 if ((setsockopt (c->socket, IPPROTO_TCP,
609 TCP_NODELAY, (void *) &one, sizeof (one))) < 0)
611 clib_unix_warning ("catchup socket: set TCP_NODELAY");
615 template.read_function = catchup_server_read_ready;
616 template.write_function = catchup_server_write_ready;
617 template.error_function = catchup_socket_error_ready;
618 template.file_descriptor = c->socket;
619 template.private_data = pointer_to_uword (msm);
620 c->clib_file_index = clib_file_add (&file_main, &template);
621 hash_set (msm->catchup_index_by_file_descriptor, c->socket,
627 /* Return and bind to an unused port. */
629 find_and_bind_to_free_port (word sock, word port)
631 for (; port < 1 << 16; port++)
633 struct sockaddr_in a;
635 clib_memset (&a, 0, sizeof (a)); /* Warnings be gone */
637 a.sin_family = PF_INET;
638 a.sin_addr.s_addr = INADDR_ANY;
639 a.sin_port = htons (port);
641 if (bind (sock, (struct sockaddr *) &a, sizeof (a)) >= 0)
645 return port < 1 << 16 ? port : -1;
648 static clib_error_t *
649 setup_mutlicast_socket (mc_socket_main_t * msm,
650 mc_multicast_socket_t * ms,
651 char *type, uword udp_port)
654 struct ip_mreq mcast_req;
656 if (!msm->multicast_ttl)
657 msm->multicast_ttl = 1;
659 /* mastership (multicast) TX socket */
660 if ((ms->socket = socket (PF_INET, SOCK_DGRAM, IPPROTO_UDP)) < 0)
661 return clib_error_return_unix (0, "%s socket", type);
664 u8 ttl = msm->multicast_ttl;
666 if ((setsockopt (ms->socket, IPPROTO_IP,
667 IP_MULTICAST_TTL, (void *) &ttl, sizeof (ttl))) < 0)
668 return clib_error_return_unix (0, "%s set multicast ttl", type);
671 if (setsockopt (ms->socket, SOL_SOCKET, SO_REUSEADDR, &one, sizeof (one)) <
673 return clib_error_return_unix (0, "%s setsockopt SO_REUSEADDR", type);
675 clib_memset (&ms->tx_addr, 0, sizeof (ms->tx_addr));
676 ms->tx_addr.sin_family = AF_INET;
677 ms->tx_addr.sin_addr.s_addr =
678 htonl (msm->multicast_tx_ip4_address_host_byte_order);
679 ms->tx_addr.sin_port = htons (udp_port);
681 if (bind (ms->socket, (struct sockaddr *) &ms->tx_addr,
682 sizeof (ms->tx_addr)) < 0)
683 return clib_error_return_unix (0, "%s bind", type);
685 clib_memset (&mcast_req, 0, sizeof (mcast_req));
686 mcast_req.imr_multiaddr.s_addr =
687 htonl (msm->multicast_tx_ip4_address_host_byte_order);
688 mcast_req.imr_interface.s_addr = msm->if_ip4_address_net_byte_order;
690 if ((setsockopt (ms->socket, IPPROTO_IP,
691 IP_ADD_MEMBERSHIP, (void *) &mcast_req,
692 sizeof (mcast_req))) < 0)
693 return clib_error_return_unix (0, "%s IP_ADD_MEMBERSHIP setsockopt",
696 if (ioctl (ms->socket, FIONBIO, &one) < 0)
697 return clib_error_return_unix (0, "%s set FIONBIO", type);
699 /* FIXME remove this when we support tx_ready. */
702 socklen_t sl = sizeof (len);
703 if (setsockopt (ms->socket, SOL_SOCKET, SO_SNDBUF, &len, sl) < 0)
704 clib_unix_error ("setsockopt");
710 static clib_error_t *
711 socket_setup (mc_socket_main_t * msm)
717 if (!msm->base_multicast_udp_port_host_byte_order)
718 msm->base_multicast_udp_port_host_byte_order =
719 0xffff - ((MC_N_TRANSPORT_TYPE + 2 /* ack socket, catchup socket */ )
722 port = msm->base_multicast_udp_port_host_byte_order;
724 error = setup_mutlicast_socket (msm,
725 &msm->multicast_sockets
726 [MC_TRANSPORT_MASTERSHIP], "mastership",
731 error = setup_mutlicast_socket (msm,
732 &msm->multicast_sockets[MC_TRANSPORT_JOIN],
737 error = setup_mutlicast_socket (msm,
738 &msm->multicast_sockets
739 [MC_TRANSPORT_USER_REQUEST_TO_RELAY],
744 error = setup_mutlicast_socket (msm,
745 &msm->multicast_sockets
746 [MC_TRANSPORT_USER_REQUEST_FROM_RELAY],
747 "from relay", port++);
752 msm->ack_socket = socket (PF_INET, SOCK_DGRAM, IPPROTO_UDP);
753 if (msm->ack_socket < 0)
754 return clib_error_return_unix (0, "ack socket");
756 msm->ack_udp_port = find_and_bind_to_free_port (msm->ack_socket, port++);
758 if (ioctl (msm->ack_socket, FIONBIO, &one) < 0)
759 return clib_error_return_unix (0, "ack socket FIONBIO");
761 msm->catchup_server_socket = socket (AF_INET, SOCK_STREAM, 0);
762 if (msm->catchup_server_socket < 0)
763 return clib_error_return_unix (0, "catchup server socket");
765 msm->catchup_tcp_port =
766 find_and_bind_to_free_port (msm->catchup_server_socket, port++);
768 if (ioctl (msm->catchup_server_socket, FIONBIO, &one) < 0)
769 return clib_error_return_unix (0, "catchup server socket FIONBIO");
771 if (listen (msm->catchup_server_socket, 5) < 0)
772 return clib_error_return_unix (0, "catchup server socket listen");
774 /* epoll setup for multicast mastership socket */
776 clib_file_t template = { 0 };
778 template.read_function = mastership_socket_read_ready;
779 template.file_descriptor =
780 msm->multicast_sockets[MC_TRANSPORT_MASTERSHIP].socket;
781 template.private_data = (uword) msm;
782 clib_file_add (&file_main, &template);
784 /* epoll setup for multicast to_relay socket */
785 template.read_function = to_relay_socket_read_ready;
786 template.file_descriptor =
787 msm->multicast_sockets[MC_TRANSPORT_USER_REQUEST_TO_RELAY].socket;
788 template.private_data = (uword) msm;
789 clib_file_add (&file_main, &template);
791 /* epoll setup for multicast from_relay socket */
792 template.read_function = from_relay_socket_read_ready;
793 template.file_descriptor =
794 msm->multicast_sockets[MC_TRANSPORT_USER_REQUEST_FROM_RELAY].socket;
795 template.private_data = (uword) msm;
796 clib_file_add (&file_main, &template);
798 template.read_function = join_socket_read_ready;
799 template.file_descriptor =
800 msm->multicast_sockets[MC_TRANSPORT_JOIN].socket;
801 template.private_data = (uword) msm;
802 clib_file_add (&file_main, &template);
804 /* epoll setup for ack rx socket */
805 template.read_function = ack_socket_read_ready;
806 template.file_descriptor = msm->ack_socket;
807 template.private_data = (uword) msm;
808 clib_file_add (&file_main, &template);
810 /* epoll setup for TCP catchup server */
811 template.read_function = catchup_listen_read_ready;
812 template.file_descriptor = msm->catchup_server_socket;
813 template.private_data = (uword) msm;
814 clib_file_add (&file_main, &template);
821 catchup_add_pending_output (mc_socket_catchup_t * c, uword n_bytes,
822 u8 * set_output_vector)
824 clib_file_t *uf = pool_elt_at_index (file_main.file_pool,
828 if (set_output_vector)
829 c->output_vector = set_output_vector;
831 vec_add2 (c->output_vector, result, n_bytes);
832 if (vec_len (c->output_vector) > 0)
834 int skip_update = 0 != (uf->flags & UNIX_FILE_DATA_AVAILABLE_TO_WRITE);
835 uf->flags |= UNIX_FILE_DATA_AVAILABLE_TO_WRITE;
837 file_main.file_update (uf, UNIX_FILE_UPDATE_MODIFY);
843 catchup_request_fun (void *transport_main,
844 u32 stream_index, mc_peer_id_t catchup_peer_id)
846 mc_socket_main_t *msm = (mc_socket_main_t *) transport_main;
847 mc_main_t *mcm = &msm->mc_main;
848 vlib_main_t *vm = mcm->vlib_main;
849 mc_socket_catchup_t *c;
850 struct sockaddr_in addr;
851 clib_file_main_t *um = &file_main;
854 pool_get (msm->catchups, c);
855 clib_memset (c, 0, sizeof (*c));
857 c->socket = socket (AF_INET, SOCK_STREAM, 0);
860 clib_unix_warning ("socket");
864 if (ioctl (c->socket, FIONBIO, &one) < 0)
866 clib_unix_warning ("FIONBIO");
870 clib_memset (&addr, 0, sizeof (addr));
871 addr.sin_family = AF_INET;
872 addr.sin_addr.s_addr = mc_socket_peer_id_get_address (catchup_peer_id);
873 addr.sin_port = mc_socket_peer_id_get_port (catchup_peer_id);
875 c->connect_in_progress = 1;
877 if (MC_EVENT_LOGGING)
879 ELOG_TYPE_DECLARE (e) =
881 .format = "connecting to peer 0x%Lx",.format_args = "i8",};
886 ed = ELOG_DATA (&vm->elog_main, e);
887 ed->peer = catchup_peer_id.as_u64;
890 if (connect (c->socket, (const void *) &addr, sizeof (addr))
891 < 0 && errno != EINPROGRESS)
893 clib_unix_warning ("connect to %U fails",
894 format_socket_peer_id, catchup_peer_id);
899 clib_file_t template = { 0 };
901 template.read_function = catchup_client_read_ready;
902 template.write_function = catchup_client_write_ready;
903 template.error_function = catchup_socket_error_ready;
904 template.file_descriptor = c->socket;
905 template.private_data = (uword) msm;
906 c->clib_file_index = clib_file_add (um, &template);
908 hash_set (msm->catchup_index_by_file_descriptor, c->socket,
913 mc_msg_catchup_request_t *mp;
914 mp = catchup_add_pending_output (c, sizeof (mp[0]), /* set_output_vector */
916 mp->peer_id = msm->mc_main.transport.our_catchup_peer_id;
917 mp->stream_index = stream_index;
918 mc_byte_swap_msg_catchup_request (mp);
921 return c - msm->catchups;
925 catchup_send_fun (void *transport_main, uword opaque, u8 * data)
927 mc_socket_main_t *msm = (mc_socket_main_t *) transport_main;
928 mc_socket_catchup_t *c = pool_elt_at_index (msm->catchups, opaque);
929 catchup_add_pending_output (c, 0, data);
933 find_interface_ip4_address (char *if_name, u32 * ip4_address, u32 * mtu)
937 struct sockaddr_in *sa;
939 /* Dig up our IP address */
940 fd = socket (PF_INET, AF_INET, 0);
943 clib_unix_error ("socket");
947 ifr.ifr_addr.sa_family = AF_INET;
948 strncpy (ifr.ifr_name, if_name, sizeof (ifr.ifr_name) - 1);
949 if (ioctl (fd, SIOCGIFADDR, &ifr) < 0)
951 clib_unix_error ("ioctl(SIOCFIGADDR)");
956 sa = (void *) &ifr.ifr_addr;
957 clib_memcpy (ip4_address, &sa->sin_addr.s_addr, sizeof (ip4_address[0]));
959 if (ioctl (fd, SIOCGIFMTU, &ifr) < 0)
965 *mtu = ifr.ifr_mtu - ( /* IP4 header */ 20 + /* UDP header */ 8);
973 mc_socket_main_init (mc_socket_main_t * msm, char **intfc_probe_list,
974 int n_intfcs_to_probe)
983 if (!msm->multicast_tx_ip4_address_host_byte_order)
984 msm->multicast_tx_ip4_address_host_byte_order = 0xefff0007;
990 if (msm->multicast_interface_name)
993 !find_interface_ip4_address (msm->multicast_interface_name, &a,
998 for (i = 0; i < n_intfcs_to_probe; i++)
999 if (!find_interface_ip4_address (intfc_probe_list[i], &a, &mtu))
1002 msm->multicast_interface_name = intfc_probe_list[i];
1008 return clib_error_return (0, "can't find interface ip4 address");
1010 msm->if_ip4_address_net_byte_order = a;
1013 msm->rx_mtu_n_bytes = mtu;
1014 msm->rx_mtu_n_buffers =
1015 msm->rx_mtu_n_bytes / vlib_buffer_get_default_data_size (vm);
1016 msm->rx_mtu_n_buffers +=
1017 (msm->rx_mtu_n_bytes % vlib_buffer_get_default_data_size (vm)) != 0;
1019 error = socket_setup (msm);
1023 mcm->transport.our_ack_peer_id =
1024 mc_socket_set_peer_id (msm->if_ip4_address_net_byte_order,
1027 mcm->transport.our_catchup_peer_id =
1028 mc_socket_set_peer_id (msm->if_ip4_address_net_byte_order,
1029 msm->catchup_tcp_port);
1031 mcm->transport.tx_buffer = tx_buffer;
1032 mcm->transport.tx_ack = tx_ack;
1033 mcm->transport.catchup_request_fun = catchup_request_fun;
1034 mcm->transport.catchup_send_fun = catchup_send_fun;
1035 mcm->transport.format_peer_id = format_socket_peer_id;
1036 mcm->transport.opaque = msm;
1037 mcm->transport.max_packet_size = mtu;
1039 mc_main_init (mcm, "socket");
1045 * fd.io coding-style-patch-verification: ON
1048 * eval: (c-set-style "gnu")