Skip to content

Support sendmsg() and recvmsg() for tcp wrapper#2805

Merged
stevenengler merged 1 commit intoshadow:mainfrom
stevenengler:readv-writev
Mar 27, 2023
Merged

Support sendmsg() and recvmsg() for tcp wrapper#2805
stevenengler merged 1 commit intoshadow:mainfrom
stevenengler:readv-writev

Conversation

@stevenengler
Copy link
Copy Markdown
Contributor

@stevenengler stevenengler commented Mar 24, 2023

We don't limit the send/recv buffer size using SYSCALL_IO_BUFSIZE = 10 MiB in this new version. I don't think this is needed with the current memory manager.

I duplicated some packet-related C functions to support passing the memory manager through, but this should hopefully be cleaned up once UDP sockets are migrated to rust.

The logic generally follows the logic from src/main/host/syscall/socket.c.

SysCallReturn _syscallhandler_recvfromHelper(SysCallHandler* sys, int sockfd,
PluginPtr bufPtr, size_t bufSize,
int flags, PluginPtr srcAddrPtr,
PluginPtr addrlenPtr) {
trace("trying to recv %zu bytes on socket %i", bufSize, sockfd);
/* Get and validate the socket. */
LegacySocket* socket_desc = NULL;
int errcode =
_syscallhandler_validateSocketHelper(sys, sockfd, &socket_desc);
LegacyFile* desc = (LegacyFile*)socket_desc;
if (errcode < 0 && _syscallhandler_readableWhenClosed(sys, desc)) {
errcode = 0;
}
if (errcode < 0) {
return syscallreturn_makeDoneErrno(-errcode);
}
if (flags & ~MSG_DONTWAIT) {
warning("Unsupported recv flag(s): %d", flags);
}
ssize_t retval = 0;
if (legacyfile_getType(desc) == DT_TCPSOCKET) {
int errcode = tcp_getConnectionError((TCP*)socket_desc);
if (errcode > 0) {
/* connect() was not called yet. */
return syscallreturn_makeDoneErrno(ENOTCONN);
} else if (errcode == -EALREADY) {
/* Connection in progress. */
retval = -EWOULDBLOCK;
}
}
struct sockaddr_in inet_addr = {.sin_family = AF_INET};
if (retval == 0) {
size_t sizeNeeded = bufSize;
if (legacyfile_getType(desc) == DT_TCPSOCKET) {
// we can only truncate the data if it is a TCP connection
/* TODO: Dynamically compute size based on how much data is actually
* available in the descriptor. */
sizeNeeded = MIN(sizeNeeded, SYSCALL_IO_BUFSIZE);
} else if (legacyfile_getType(desc) == DT_UDPSOCKET) {
// allow it to be 1 byte longer than the max datagram size
sizeNeeded = MIN(sizeNeeded, CONFIG_DATAGRAM_MAX_SIZE + 1);
}
retval = legacysocket_receiveUserData(socket_desc, _syscallhandler_getThread(sys), bufPtr,
sizeNeeded, &inet_addr.sin_addr.s_addr,
&inet_addr.sin_port);
trace("recv returned %zd", retval);
}
bool nonblocking_mode = legacyfile_getFlags(desc) & O_NONBLOCK || flags & MSG_DONTWAIT;
if (retval == -EWOULDBLOCK && !nonblocking_mode) {
trace("recv would block on socket %i", sockfd);
/* We need to block until the descriptor is ready to read. */
Trigger trigger =
(Trigger){.type = TRIGGER_DESCRIPTOR, .object = desc, .status = STATUS_FILE_READABLE};
return syscallreturn_makeBlocked(
syscallcondition_new(trigger), legacyfile_supportsSaRestart(desc));
}
/* check if they wanted to know where we got the data from */
if (retval > 0 && srcAddrPtr.val) {
trace("address info is requested in recv on socket %i", sockfd);
/* only write an address for UDP sockets */
if (legacyfile_getType(desc) == DT_UDPSOCKET) {
errcode = _syscallhandler_getnameHelper(
sys, (struct sockaddr*)&inet_addr, sizeof(inet_addr), srcAddrPtr, addrlenPtr);
if (errcode) {
return syscallreturn_makeDoneErrno(-errcode);
}
} else {
/* set the address length as 0 */
socklen_t addrlen = 0;
if (process_writePtr(
_syscallhandler_getProcess(sys), addrlenPtr, &addrlen, sizeof(addrlen)) != 0) {
return syscallreturn_makeDoneErrno(EFAULT);
}
}
}
return syscallreturn_makeDoneI64(retval);
}
SysCallReturn _syscallhandler_sendtoHelper(SysCallHandler* sys, int sockfd,
PluginPtr bufPtr, size_t bufSize,
int flags, PluginPtr destAddrPtr,
socklen_t addrlen) {
trace("trying to send %zu bytes on socket %i", bufSize, sockfd);
/* Get and validate the socket. */
LegacySocket* socket_desc = NULL;
int errcode =
_syscallhandler_validateSocketHelper(sys, sockfd, &socket_desc);
if (errcode < 0) {
return syscallreturn_makeDoneErrno(-errcode);
}
/* Need non-NULL buffer. */
/* FIXME: should push this check to the point the data is actually read,
* to correctly handle non-NULL pointers that aren't accessible.
* This is currently in the Payload code; need to bubble up errors from there.
*/
if (!bufPtr.val) {
debug("Can't send from NULL buffer on socket %i", sockfd);
return syscallreturn_makeDoneErrno(EFAULT);
}
/* TODO: when we support AF_UNIX this could be sockaddr_un */
size_t inet_len = sizeof(struct sockaddr_in);
if (destAddrPtr.val && addrlen < inet_len) {
debug("Address length %ld is too small on socket %i", (long int)addrlen, sockfd);
return syscallreturn_makeDoneErrno(EINVAL);
}
if (flags & ~MSG_DONTWAIT) {
warning("Unsupported send flag(s): %d", flags);
}
/* Get the address info if they specified one. */
in_addr_t dest_ip = 0;
in_port_t dest_port = 0;
if (destAddrPtr.val) {
const struct sockaddr* dest_addr =
process_getReadablePtr(_syscallhandler_getProcess(sys), destAddrPtr, addrlen);
utility_debugAssert(dest_addr);
/* TODO: we assume AF_INET here, change this when we support AF_UNIX */
if (dest_addr->sa_family != AF_INET) {
warning(
"We only support address family AF_INET on socket %i", sockfd);
return syscallreturn_makeDoneErrno(EAFNOSUPPORT);
}
dest_ip = ((struct sockaddr_in*)dest_addr)->sin_addr.s_addr;
dest_port = ((struct sockaddr_in*)dest_addr)->sin_port;
}
LegacyFile* desc = (LegacyFile*)socket_desc;
errcode = 0;
if (legacyfile_getType(desc) == DT_UDPSOCKET) {
/* make sure that we have somewhere to send it */
if (dest_ip == 0 || dest_port == 0) {
/* its ok if they setup a default destination with connect() */
legacysocket_getPeerName(socket_desc, &dest_ip, &dest_port);
if (dest_ip == 0 || dest_port == 0) {
/* we have nowhere to send it */
return syscallreturn_makeDoneErrno(EDESTADDRREQ);
}
}
/* if this socket is not bound, do an implicit bind to a random port */
if (!legacysocket_isBound(socket_desc)) {
ProtocolType ptype = legacysocket_getProtocol(socket_desc);
/* We don't bind to peer ip/port since that might change later. */
in_addr_t bindAddr =
(dest_ip == htonl(INADDR_LOOPBACK))
? htonl(INADDR_LOOPBACK)
: address_toNetworkIP(host_getDefaultAddress(_syscallhandler_getHost(sys)));
in_port_t bindPort =
host_getRandomFreePort(_syscallhandler_getHost(sys), ptype, bindAddr, 0, 0);
if (!bindPort) {
return syscallreturn_makeDoneErrno(EADDRNOTAVAIL);
}
/* connect up socket layer */
legacysocket_setPeerName(socket_desc, 0, 0);
legacysocket_setSocketName(socket_desc, bindAddr, bindPort);
/* set netiface->socket associations */
CompatSocket compat_socket = compatsocket_fromLegacySocket(socket_desc);
host_associateInterface(
_syscallhandler_getHost(sys), &compat_socket, ptype, bindAddr, bindPort, 0, 0);
}
} else if (legacyfile_getType(desc) == DT_TCPSOCKET) {
errcode = tcp_getConnectionError((TCP*)socket_desc);
trace("connection error state is currently %i", errcode);
if (errcode > 0) {
/* connect() was not called yet.
* TODO: Can they can piggy back a connect() on sendto() if they
* provide an address for the connection? */
return syscallreturn_makeDoneErrno(EPIPE);
} else if (errcode == 0) {
/* They connected, but never read the success code with a second
* call to connect(). That's OK, proceed to send as usual. */
} else if (errcode == -EISCONN) {
/* They are connected, and we can send now. */
errcode = 0;
} else if (errcode == -EALREADY) {
/* Connection in progress.
* TODO: should we wait, or just return -EALREADY? */
errcode = -EWOULDBLOCK;
}
}
gssize retval = (gssize)errcode;
if (errcode == 0) {
size_t sizeNeeded = bufSize;
if (legacyfile_getType(desc) == DT_TCPSOCKET) {
// we can only truncate the data if it is a TCP connection
/* TODO: Dynamically compute size based on how much data is actually
* available in the descriptor. */
sizeNeeded = MIN(sizeNeeded, SYSCALL_IO_BUFSIZE);
} else if (legacyfile_getType(desc) == DT_UDPSOCKET) {
// allow it to be 1 byte longer than the max so that we can receive EMSGSIZE
sizeNeeded = MIN(sizeNeeded, CONFIG_DATAGRAM_MAX_SIZE + 1);
}
retval = legacysocket_sendUserData(
socket_desc, _syscallhandler_getThread(sys), bufPtr, sizeNeeded, dest_ip, dest_port);
trace("send returned %zd", retval);
}
bool nonblocking_mode = legacyfile_getFlags(desc) & O_NONBLOCK || flags & MSG_DONTWAIT;
if (retval == -EWOULDBLOCK && !nonblocking_mode) {
if (bufSize > 0) {
/* We need to block until the descriptor is ready to write. */
Trigger trigger = (Trigger){
.type = TRIGGER_DESCRIPTOR, .object = desc, .status = STATUS_FILE_WRITABLE};
return syscallreturn_makeBlocked(
syscallcondition_new(trigger), legacyfile_supportsSaRestart(desc));
} else {
/* We attempted to write 0 bytes, so no need to block or return EWOULDBLOCK. */
retval = 0;
}
}
return syscallreturn_makeDoneI64(retval);
}

@github-actions github-actions bot added the Component: Main Composing the core Shadow executable label Mar 24, 2023
@stevenengler stevenengler requested a review from sporksmith March 24, 2023 19:46
@stevenengler stevenengler force-pushed the readv-writev branch 2 times, most recently from c3be597 to a484fdf Compare March 27, 2023 20:01
We don't limit the send/recv buffer size using `SYSCALL_IO_BUFSIZE` in this new
version.
@stevenengler stevenengler enabled auto-merge March 27, 2023 20:44
@stevenengler stevenengler merged commit bb7e604 into shadow:main Mar 27, 2023
@stevenengler stevenengler deleted the readv-writev branch March 27, 2023 23:27
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Labels

Component: Main Composing the core Shadow executable

Projects

None yet

Development

Successfully merging this pull request may close these issues.

2 participants