2 *------------------------------------------------------------------
3 * Copyright (c) 2020 Cisco and/or its affiliates.
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at:
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 *------------------------------------------------------------------
30 const maxEpollEvents = 1
31 const maxControlLen = 256
33 const errorFdNotFound = "fd not found"
35 // controlMsg represents a message used in communication between memif peers
36 type controlMsg struct {
41 // listener represents a listener functionality of UNIX domain socket
42 type listener struct {
44 event syscall.EpollEvent
47 // controlChannel represents a communication channel between memif peers
48 // backed by UNIX domain socket
49 type controlChannel struct {
53 event syscall.EpollEvent
55 control [maxControlLen]byte
61 // Socket represents a UNIX domain socket used for communication
62 // between memif peers
67 interfaceList *list.List
70 wakeEvent syscall.EpollEvent
71 stopPollChan chan struct{}
75 // StopPolling stops polling events on the socket
76 func (socket *Socket) StopPolling() error {
77 if socket.stopPollChan != nil {
79 close(socket.stopPollChan)
81 buf := make([]byte, 8)
82 binary.PutUvarint(buf, 1)
83 n, err := syscall.Write(int(socket.wakeEvent.Fd), buf[:])
88 return fmt.Errorf("Faild to write to eventfd")
90 // wait until polling is stopped
97 // StartPolling starts polling and handling events on the socket,
98 // enabling communication between memif peers
99 func (socket *Socket) StartPolling(errChan chan<- error) {
100 socket.stopPollChan = make(chan struct{})
103 var events [maxEpollEvents]syscall.EpollEvent
104 defer socket.wg.Done()
108 case <-socket.stopPollChan:
111 num, err := syscall.EpollWait(socket.epfd, events[:], -1)
113 errChan <- fmt.Errorf("EpollWait: ", err)
117 for ev := 0; ev < num; ev++ {
118 if events[0].Fd == socket.wakeEvent.Fd {
121 err = socket.handleEvent(&events[0])
123 errChan <- fmt.Errorf("handleEvent: ", err)
131 // addEvent adds event to epoll instance associated with the socket
132 func (socket *Socket) addEvent(event *syscall.EpollEvent) error {
133 err := syscall.EpollCtl(socket.epfd, syscall.EPOLL_CTL_ADD, int(event.Fd), event)
135 return fmt.Errorf("EpollCtl: %s", err)
140 // addEvent deletes event to epoll instance associated with the socket
141 func (socket *Socket) delEvent(event *syscall.EpollEvent) error {
142 err := syscall.EpollCtl(socket.epfd, syscall.EPOLL_CTL_DEL, int(event.Fd), event)
144 return fmt.Errorf("EpollCtl: %s", err)
149 // Delete deletes the socket
150 func (socket *Socket) Delete() (err error) {
151 for elt := socket.ccList.Front(); elt != nil; elt = elt.Next() {
152 cc, ok := elt.Value.(*controlChannel)
154 err = cc.close(true, "Socket deleted")
160 for elt := socket.interfaceList.Front(); elt != nil; elt = elt.Next() {
161 i, ok := elt.Value.(*Interface)
170 if socket.listener != nil {
171 err = socket.listener.close()
175 err = os.Remove(socket.filename)
181 err = socket.delEvent(&socket.wakeEvent)
183 return fmt.Errorf("Failed to delete event: ", err)
186 syscall.Close(socket.epfd)
191 // NewSocket returns a new Socket
192 func NewSocket(appName string, filename string) (socket *Socket, err error) {
196 interfaceList: list.New(),
199 if socket.filename == "" {
200 socket.filename = DefaultSocketFilename
203 socket.epfd, _ = syscall.EpollCreate1(0)
205 efd, err := eventFd()
206 socket.wakeEvent = syscall.EpollEvent{
207 Events: syscall.EPOLLIN | syscall.EPOLLERR | syscall.EPOLLHUP,
210 err = socket.addEvent(&socket.wakeEvent)
212 return nil, fmt.Errorf("Failed to add event: ", err)
218 // handleEvent handles epoll event
219 func (socket *Socket) handleEvent(event *syscall.EpollEvent) error {
220 if socket.listener != nil && socket.listener.event.Fd == event.Fd {
221 return socket.listener.handleEvent(event)
224 for elt := socket.ccList.Front(); elt != nil; elt = elt.Next() {
225 cc, ok := elt.Value.(*controlChannel)
227 if cc.event.Fd == event.Fd {
228 return cc.handleEvent(event)
233 return fmt.Errorf(errorFdNotFound)
236 // handleEvent handles epoll event for listener
237 func (l *listener) handleEvent(event *syscall.EpollEvent) error {
239 if (event.Events & syscall.EPOLLHUP) == syscall.EPOLLHUP {
242 return fmt.Errorf("Failed to close listener after hang up event: ", err)
244 return fmt.Errorf("Hang up: ", l.socket.filename)
248 if (event.Events & syscall.EPOLLERR) == syscall.EPOLLERR {
251 return fmt.Errorf("Failed to close listener after receiving an error event: ", err)
253 return fmt.Errorf("Received error event on listener ", l.socket.filename)
257 if (event.Events & syscall.EPOLLIN) == syscall.EPOLLIN {
258 newFd, _, err := syscall.Accept(int(l.event.Fd))
260 return fmt.Errorf("Accept: %s", err)
263 cc, err := l.socket.addControlChannel(newFd, nil)
265 return fmt.Errorf("Failed to add control channel: %s", err)
268 err = cc.msgEnqHello()
270 return fmt.Errorf("msgEnqHello: %s", err)
281 return fmt.Errorf("Unexpected event: ", event.Events)
284 // handleEvent handles epoll event for control channel
285 func (cc *controlChannel) handleEvent(event *syscall.EpollEvent) error {
290 if (event.Events & syscall.EPOLLHUP) == syscall.EPOLLHUP {
291 // close cc, don't send msg
292 err := cc.close(false, "")
294 return fmt.Errorf("Failed to close control channel after hang up event: ", err)
296 return fmt.Errorf("Hang up: ", cc.i.GetName())
299 if (event.Events & syscall.EPOLLERR) == syscall.EPOLLERR {
300 // close cc, don't send msg
301 err := cc.close(false, "")
303 return fmt.Errorf("Failed to close control channel after receiving an error event: ", err)
305 return fmt.Errorf("Received error event on control channel ", cc.i.GetName())
308 if (event.Events & syscall.EPOLLIN) == syscall.EPOLLIN {
309 size, cc.controlLen, _, _, err = syscall.Recvmsg(int(cc.event.Fd), cc.data[:], cc.control[:], 0)
311 return fmt.Errorf("recvmsg: %s", err)
314 return fmt.Errorf("invalid message size %d", size)
330 return fmt.Errorf("Unexpected event: ", event.Events)
333 // close closes the listener
334 func (l *listener) close() error {
335 err := l.socket.delEvent(&l.event)
337 return fmt.Errorf("Failed to del event: ", err)
339 err = syscall.Close(int(l.event.Fd))
341 return fmt.Errorf("Failed to close socket: ", err)
346 // AddListener adds a lisntener to the socket. The fd must describe a
347 // UNIX domain socket already bound to a UNIX domain filename and
348 // marked as listener
349 func (socket *Socket) AddListener(fd int) (err error) {
351 // we will need this to look up master interface by id
355 l.event = syscall.EpollEvent{
356 Events: syscall.EPOLLIN | syscall.EPOLLERR | syscall.EPOLLHUP,
359 err = socket.addEvent(&l.event)
361 return fmt.Errorf("Failed to add event: ", err)
369 // addListener creates new UNIX domain socket, binds it to the address
370 // and marks it as listener
371 func (socket *Socket) addListener() (err error) {
373 fd, err := syscall.Socket(syscall.AF_UNIX, syscall.SOCK_SEQPACKET, 0)
375 return fmt.Errorf("Failed to create UNIX domain socket")
377 usa := &syscall.SockaddrUnix{Name: socket.filename}
378 // Bind to address and start listening
379 err = syscall.SetsockoptInt(fd, syscall.SOL_SOCKET, syscall.SO_PASSCRED, 1)
381 return fmt.Errorf("Failed to set socket option %s : %v", socket.filename, err)
383 err = syscall.Bind(fd, usa)
385 return fmt.Errorf("Failed to bind socket %s : %v", socket.filename, err)
387 err = syscall.Listen(fd, syscall.SOMAXCONN)
389 return fmt.Errorf("Failed to listen on socket %s : %v", socket.filename, err)
392 return socket.AddListener(fd)
395 // close closes a control channel, if the control channel is assigned an
396 // interface, the interface is disconnected
397 func (cc *controlChannel) close(sendMsg bool, str string) (err error) {
399 // first clear message queue so that the disconnect
400 // message is the only message in queue
401 cc.msgQueue = []controlMsg{}
402 cc.msgEnqDisconnect(str)
410 err = cc.socket.delEvent(&cc.event)
412 return fmt.Errorf("Failed to del event: ", err)
415 // remove referance form socket
416 cc.socket.ccList.Remove(cc.listRef)
419 err = cc.i.disconnect()
421 return fmt.Errorf("Interface Disconnect: ", err)
428 //addControlChannel returns a new controlChannel and adds it to the socket
429 func (socket *Socket) addControlChannel(fd int, i *Interface) (*controlChannel, error) {
430 cc := &controlChannel{
438 cc.event = syscall.EpollEvent{
439 Events: syscall.EPOLLIN | syscall.EPOLLERR | syscall.EPOLLHUP,
442 err = socket.addEvent(&cc.event)
444 return nil, fmt.Errorf("Failed to add event: ", err)
447 cc.listRef = socket.ccList.PushBack(cc)
452 func (cc *controlChannel) msgEnqAck() (err error) {
453 buf := new(bytes.Buffer)
454 err = binary.Write(buf, binary.LittleEndian, msgTypeAck)
461 cc.msgQueue = append(cc.msgQueue, msg)
466 func (cc *controlChannel) msgEnqHello() (err error) {
476 copy(hello.Name[:], []byte(cc.socket.appName))
478 buf := new(bytes.Buffer)
479 err = binary.Write(buf, binary.LittleEndian, msgTypeHello)
480 err = binary.Write(buf, binary.LittleEndian, hello)
487 cc.msgQueue = append(cc.msgQueue, msg)
492 func (cc *controlChannel) parseHello() (err error) {
495 buf := bytes.NewReader(cc.data[msgTypeSize:])
496 err = binary.Read(buf, binary.LittleEndian, &hello)
501 if hello.VersionMin > Version || hello.VersionMax < Version {
502 return fmt.Errorf("Incompatible memif version")
505 cc.i.run = cc.i.args.MemoryConfig
507 cc.i.run.NumQueuePairs = min16(cc.i.args.MemoryConfig.NumQueuePairs, hello.MaxRingS2M)
508 cc.i.run.NumQueuePairs = min16(cc.i.args.MemoryConfig.NumQueuePairs, hello.MaxRingM2S)
509 cc.i.run.Log2RingSize = min8(cc.i.args.MemoryConfig.Log2RingSize, hello.MaxLog2RingSize)
511 cc.i.remoteName = string(hello.Name[:])
516 func (cc *controlChannel) msgEnqInit() (err error) {
520 Mode: cc.i.args.Mode,
523 copy(init.Name[:], []byte(cc.socket.appName))
525 buf := new(bytes.Buffer)
526 err = binary.Write(buf, binary.LittleEndian, msgTypeInit)
527 err = binary.Write(buf, binary.LittleEndian, init)
534 cc.msgQueue = append(cc.msgQueue, msg)
539 func (cc *controlChannel) parseInit() (err error) {
542 buf := bytes.NewReader(cc.data[msgTypeSize:])
543 err = binary.Read(buf, binary.LittleEndian, &init)
548 if init.Version != Version {
549 return fmt.Errorf("Incompatible memif driver version")
552 // find peer interface
553 for elt := cc.socket.interfaceList.Front(); elt != nil; elt = elt.Next() {
554 i, ok := elt.Value.(*Interface)
556 if i.args.Id == init.Id && i.args.IsMaster && i.cc == nil {
558 if i.args.Secret != init.Secret {
559 return fmt.Errorf("Invalid secret")
561 // interface is assigned to control channel
564 cc.i.run = cc.i.args.MemoryConfig
565 cc.i.remoteName = string(init.Name[:])
572 return fmt.Errorf("Invalid interface id")
575 func (cc *controlChannel) msgEnqAddRegion(regionIndex uint16) (err error) {
576 if len(cc.i.regions) <= int(regionIndex) {
577 return fmt.Errorf("Invalid region index")
580 addRegion := MsgAddRegion{
582 Size: cc.i.regions[regionIndex].size,
585 buf := new(bytes.Buffer)
586 err = binary.Write(buf, binary.LittleEndian, msgTypeAddRegion)
587 err = binary.Write(buf, binary.LittleEndian, addRegion)
591 Fd: cc.i.regions[regionIndex].fd,
594 cc.msgQueue = append(cc.msgQueue, msg)
599 func (cc *controlChannel) parseAddRegion() (err error) {
600 var addRegion MsgAddRegion
602 buf := bytes.NewReader(cc.data[msgTypeSize:])
603 err = binary.Read(buf, binary.LittleEndian, &addRegion)
608 fd, err := cc.parseControlMsg()
610 return fmt.Errorf("parseControlMsg: %s", err)
613 if addRegion.Index > 255 {
614 return fmt.Errorf("Invalid memory region index")
617 region := memoryRegion{
618 size: addRegion.Size,
622 cc.i.regions = append(cc.i.regions, region)
627 func (cc *controlChannel) msgEnqAddRing(ringType ringType, ringIndex uint16) (err error) {
631 if ringType == ringTypeS2M {
632 q = cc.i.txQueues[ringIndex]
633 flags = msgAddRingFlagS2M
635 q = cc.i.rxQueues[ringIndex]
638 addRing := MsgAddRing{
640 Offset: uint32(q.ring.offset),
641 Region: uint16(q.ring.region),
642 RingSizeLog2: uint8(q.ring.log2Size),
647 buf := new(bytes.Buffer)
648 err = binary.Write(buf, binary.LittleEndian, msgTypeAddRing)
649 err = binary.Write(buf, binary.LittleEndian, addRing)
656 cc.msgQueue = append(cc.msgQueue, msg)
661 func (cc *controlChannel) parseAddRing() (err error) {
662 var addRing MsgAddRing
664 buf := bytes.NewReader(cc.data[msgTypeSize:])
665 err = binary.Read(buf, binary.LittleEndian, &addRing)
670 fd, err := cc.parseControlMsg()
675 if addRing.Index >= cc.i.run.NumQueuePairs {
676 return fmt.Errorf("invalid ring index")
684 if (addRing.Flags & msgAddRingFlagS2M) == msgAddRingFlagS2M {
685 q.ring = newRing(int(addRing.Region), ringTypeS2M, int(addRing.Offset), int(addRing.RingSizeLog2))
686 cc.i.rxQueues = append(cc.i.rxQueues, q)
688 q.ring = newRing(int(addRing.Region), ringTypeM2S, int(addRing.Offset), int(addRing.RingSizeLog2))
689 cc.i.txQueues = append(cc.i.txQueues, q)
695 func (cc *controlChannel) msgEnqConnect() (err error) {
696 var connect MsgConnect
697 copy(connect.Name[:], []byte(cc.i.args.Name))
699 buf := new(bytes.Buffer)
700 err = binary.Write(buf, binary.LittleEndian, msgTypeConnect)
701 err = binary.Write(buf, binary.LittleEndian, connect)
708 cc.msgQueue = append(cc.msgQueue, msg)
713 func (cc *controlChannel) parseConnect() (err error) {
714 var connect MsgConnect
716 buf := bytes.NewReader(cc.data[msgTypeSize:])
717 err = binary.Read(buf, binary.LittleEndian, &connect)
722 cc.i.peerName = string(connect.Name[:])
729 cc.isConnected = true
734 func (cc *controlChannel) msgEnqConnected() (err error) {
735 var connected MsgConnected
736 copy(connected.Name[:], []byte(cc.i.args.Name))
738 buf := new(bytes.Buffer)
739 err = binary.Write(buf, binary.LittleEndian, msgTypeConnected)
740 err = binary.Write(buf, binary.LittleEndian, connected)
747 cc.msgQueue = append(cc.msgQueue, msg)
752 func (cc *controlChannel) parseConnected() (err error) {
753 var conn MsgConnected
755 buf := bytes.NewReader(cc.data[msgTypeSize:])
756 err = binary.Read(buf, binary.LittleEndian, &conn)
761 cc.i.peerName = string(conn.Name[:])
768 cc.isConnected = true
773 func (cc *controlChannel) msgEnqDisconnect(str string) (err error) {
778 copy(dc.String[:], str)
780 buf := new(bytes.Buffer)
781 err = binary.Write(buf, binary.LittleEndian, msgTypeDisconnect)
782 err = binary.Write(buf, binary.LittleEndian, dc)
789 cc.msgQueue = append(cc.msgQueue, msg)
794 func (cc *controlChannel) parseDisconnect() (err error) {
797 buf := bytes.NewReader(cc.data[msgTypeSize:])
798 err = binary.Read(buf, binary.LittleEndian, &dc)
803 err = cc.close(false, string(dc.String[:]))
805 return fmt.Errorf("Failed to disconnect control channel: ", err)
811 func (cc *controlChannel) parseMsg() error {
815 buf := bytes.NewReader(cc.data[:])
816 err = binary.Read(buf, binary.LittleEndian, &msgType)
818 if msgType == msgTypeAck {
820 } else if msgType == msgTypeHello {
822 err = cc.parseHello()
826 // Initialize slave memif
827 err = cc.i.initializeRegions()
831 err = cc.i.initializeQueues()
836 err = cc.msgEnqInit()
840 for i := 0; i < len(cc.i.regions); i++ {
841 err = cc.msgEnqAddRegion(uint16(i))
846 for i := 0; uint16(i) < cc.i.run.NumQueuePairs; i++ {
847 err = cc.msgEnqAddRing(ringTypeS2M, uint16(i))
852 for i := 0; uint16(i) < cc.i.run.NumQueuePairs; i++ {
853 err = cc.msgEnqAddRing(ringTypeM2S, uint16(i))
858 err = cc.msgEnqConnect()
862 } else if msgType == msgTypeInit {
872 } else if msgType == msgTypeAddRegion {
873 err = cc.parseAddRegion()
882 } else if msgType == msgTypeAddRing {
883 err = cc.parseAddRing()
892 } else if msgType == msgTypeConnect {
893 err = cc.parseConnect()
898 err = cc.msgEnqConnected()
902 } else if msgType == msgTypeConnected {
903 err = cc.parseConnected()
907 } else if msgType == msgTypeDisconnect {
908 err = cc.parseDisconnect()
913 err = fmt.Errorf("unknown message %d", msgType)
920 err1 := cc.close(true, err.Error())
922 return fmt.Errorf(err.Error(), ": Failed to close control channel: ", err1)
928 // parseControlMsg parses control message and returns file descriptor
930 func (cc *controlChannel) parseControlMsg() (fd int, err error) {
931 // Assert only called when we require FD
934 controlMsgs, err := syscall.ParseSocketControlMessage(cc.control[:cc.controlLen])
936 return -1, fmt.Errorf("syscall.ParseSocketControlMessage: %s", err)
939 if len(controlMsgs) == 0 {
940 return -1, fmt.Errorf("Missing control message")
943 for _, cmsg := range controlMsgs {
944 if cmsg.Header.Level == syscall.SOL_SOCKET {
945 if cmsg.Header.Type == syscall.SCM_RIGHTS {
946 FDs, err := syscall.ParseUnixRights(&cmsg)
948 return -1, fmt.Errorf("syscall.ParseUnixRights: %s", err)
953 // Only expect single FD
960 return -1, fmt.Errorf("Missing file descriptor")