linux(4): Rework Linux ppoll system call.

For now the Linux emulation layer uses in kernel ppoll(2) without
conversion of user supplied fd 'events', and does not convert the
kernel supplied fd 'revents'.

At least POLLRDHUP is handled by FreeBSD differently than by
Linux. Seems that Linux silencly ignores POLLRDHUP on non socket fd's
unlike FreeBSD, which does more strictly check and fails.

Rework the Linux ppoll, using kern_poll and converting 'events'
and 'revents' values.
While here, move poll events defines to the MI part of code as they
mostly identical on all arches except arm.

Differential Revision:	https://reviews.freebsd.org/D30716
MFC after:		2 weeks
This commit is contained in:
Dmitry Chagin 2021-06-22 08:06:05 +03:00
parent 224c772ca5
commit 26795a0378
7 changed files with 184 additions and 64 deletions

View File

@ -407,27 +407,6 @@ struct l_ifconf {
#define ifc_buf ifc_ifcu.ifcu_buf
#define ifc_req ifc_ifcu.ifcu_req
/*
* poll()
*/
#define LINUX_POLLIN 0x0001
#define LINUX_POLLPRI 0x0002
#define LINUX_POLLOUT 0x0004
#define LINUX_POLLERR 0x0008
#define LINUX_POLLHUP 0x0010
#define LINUX_POLLNVAL 0x0020
#define LINUX_POLLRDNORM 0x0040
#define LINUX_POLLRDBAND 0x0080
#define LINUX_POLLWRNORM 0x0100
#define LINUX_POLLWRBAND 0x0200
#define LINUX_POLLMSG 0x0400
struct l_pollfd {
l_int fd;
l_short events;
l_short revents;
};
#define LINUX_ARCH_SET_GS 0x1001
#define LINUX_ARCH_SET_FS 0x1002
#define LINUX_ARCH_GET_FS 0x1003

View File

@ -515,27 +515,6 @@ struct l_ifconf {
#define ifc_buf ifc_ifcu.ifcu_buf
#define ifc_req ifc_ifcu.ifcu_req
/*
* poll()
*/
#define LINUX_POLLIN 0x0001
#define LINUX_POLLPRI 0x0002
#define LINUX_POLLOUT 0x0004
#define LINUX_POLLERR 0x0008
#define LINUX_POLLHUP 0x0010
#define LINUX_POLLNVAL 0x0020
#define LINUX_POLLRDNORM 0x0040
#define LINUX_POLLRDBAND 0x0080
#define LINUX_POLLWRNORM 0x0100
#define LINUX_POLLWRBAND 0x0200
#define LINUX_POLLMSG 0x0400
struct l_pollfd {
l_int fd;
l_short events;
l_short revents;
};
struct l_user_desc {
l_uint entry_number;
l_uint base_addr;

View File

@ -33,9 +33,13 @@ __FBSDID("$FreeBSD$");
#include <sys/systm.h>
#include <sys/conf.h>
#include <sys/ctype.h>
#include <sys/file.h>
#include <sys/filedesc.h>
#include <sys/jail.h>
#include <sys/lock.h>
#include <sys/malloc.h>
#include <sys/poll.h>
#include <sys/proc.h>
#include <sys/signalvar.h>
#include <sys/socket.h>
#include <sys/socketvar.h>
@ -50,6 +54,7 @@ __FBSDID("$FreeBSD$");
#include <compat/linux/linux.h>
#include <compat/linux/linux_common.h>
#include <compat/linux/linux_mib.h>
#include <compat/linux/linux_util.h>
struct futex_list futex_list;
@ -627,3 +632,94 @@ linux_to_bsd_bits_(int value, struct bsd_to_linux_bitmap *bitmap,
return (no_value);
return (bsd_ret);
}
void
linux_to_bsd_poll_events(struct thread *td, int fd, short lev,
short *bev)
{
struct proc *p = td->td_proc;
struct filedesc *fdp;
struct file *fp;
int error;
short bits = 0;
if (lev & LINUX_POLLIN)
bits |= POLLIN;
if (lev & LINUX_POLLPRI)
bits |= POLLPRI;
if (lev & LINUX_POLLOUT)
bits |= POLLOUT;
if (lev & LINUX_POLLERR)
bits |= POLLERR;
if (lev & LINUX_POLLHUP)
bits |= POLLHUP;
if (lev & LINUX_POLLNVAL)
bits |= POLLNVAL;
if (lev & LINUX_POLLRDNORM)
bits |= POLLRDNORM;
if (lev & LINUX_POLLRDBAND)
bits |= POLLRDBAND;
if (lev & LINUX_POLLWRBAND)
bits |= POLLWRBAND;
if (lev & LINUX_POLLWRNORM)
bits |= POLLWRNORM;
if (lev & LINUX_POLLRDHUP) {
/*
* It seems that the Linux silencly ignores POLLRDHUP
* on non-socket file descriptors unlike FreeBSD, where
* events bits is more strictly checked (POLLSTANDARD).
*/
fdp = p->p_fd;
error = fget_unlocked(fdp, fd, &cap_no_rights, &fp);
if (error == 0) {
/*
* XXX. On FreeBSD POLLRDHUP applies only to
* stream sockets.
*/
if (fp->f_type == DTYPE_SOCKET)
bits |= POLLRDHUP;
fdrop(fp, td);
}
}
if (lev & LINUX_POLLMSG)
LINUX_RATELIMIT_MSG_OPT1("unsupported POLLMSG, events(%d)", lev);
if (lev & LINUX_POLLREMOVE)
LINUX_RATELIMIT_MSG_OPT1("unsupported POLLREMOVE, events(%d)", lev);
*bev = bits;
}
void
bsd_to_linux_poll_events(short bev, short *lev)
{
short bits = 0;
if (bev & POLLIN)
bits |= LINUX_POLLIN;
if (bev & POLLPRI)
bits |= LINUX_POLLPRI;
if (bev & (POLLOUT | POLLWRNORM))
/*
* POLLWRNORM is equal to POLLOUT on FreeBSD,
* but not on Linux
*/
bits |= LINUX_POLLOUT;
if (bev & POLLERR)
bits |= LINUX_POLLERR;
if (bev & POLLHUP)
bits |= LINUX_POLLHUP;
if (bev & POLLNVAL)
bits |= LINUX_POLLNVAL;
if (bev & POLLRDNORM)
bits |= LINUX_POLLRDNORM;
if (bev & POLLRDBAND)
bits |= LINUX_POLLRDBAND;
if (bev & POLLWRBAND)
bits |= LINUX_POLLWRBAND;
if (bev & POLLRDHUP)
bits |= LINUX_POLLRDHUP;
*lev = bits;
}

View File

@ -31,6 +31,23 @@
#include <sys/queue.h>
/*
* poll()
*/
#define LINUX_POLLIN 0x0001
#define LINUX_POLLPRI 0x0002
#define LINUX_POLLOUT 0x0004
#define LINUX_POLLERR 0x0008
#define LINUX_POLLHUP 0x0010
#define LINUX_POLLNVAL 0x0020
#define LINUX_POLLRDNORM 0x0040
#define LINUX_POLLRDBAND 0x0080
#define LINUX_POLLWRNORM 0x0100
#define LINUX_POLLWRBAND 0x0200
#define LINUX_POLLMSG 0x0400
#define LINUX_POLLREMOVE 0x1000
#define LINUX_POLLRDHUP 0x2000
#define LINUX_IFHWADDRLEN 6
#define LINUX_IFNAMSIZ 16

View File

@ -41,5 +41,9 @@ int bsd_to_linux_sockaddr(const struct sockaddr *sa,
struct l_sockaddr **lsa, socklen_t len);
int linux_to_bsd_sockaddr(const struct l_sockaddr *lsa,
struct sockaddr **sap, socklen_t *len);
void linux_to_bsd_poll_events(struct thread *td, int fd,
short lev, short *bev);
void bsd_to_linux_poll_events(short bev, short *lev);
#endif /* _LINUX_COMMON_H_ */

View File

@ -50,6 +50,7 @@ __FBSDID("$FreeBSD$");
#include <sys/msgbuf.h>
#include <sys/mutex.h>
#include <sys/namei.h>
#include <sys/poll.h>
#include <sys/priv.h>
#include <sys/proc.h>
#include <sys/procctl.h>
@ -89,6 +90,7 @@ __FBSDID("$FreeBSD$");
#include <machine/../linux/linux_proto.h>
#endif
#include <compat/linux/linux_common.h>
#include <compat/linux/linux_dtrace.h>
#include <compat/linux/linux_file.h>
#include <compat/linux/linux_mib.h>
@ -144,6 +146,10 @@ static int linux_common_pselect6(struct thread *, l_int,
static int linux_common_ppoll(struct thread *, struct pollfd *,
uint32_t, struct timespec *, l_sigset_t *,
l_size_t);
static int linux_pollin(struct thread *, struct pollfd *,
struct pollfd *, u_int);
static int linux_pollout(struct thread *, struct pollfd *,
struct pollfd *, u_int);
int
linux_sysinfo(struct thread *td, struct linux_sysinfo_args *args)
@ -2519,11 +2525,15 @@ linux_common_ppoll(struct thread *td, struct pollfd *fds, uint32_t nfds,
struct timespec *tsp, l_sigset_t *sset, l_size_t ssize)
{
struct timespec ts0, ts1;
struct pollfd stackfds[32];
struct pollfd *kfds;
l_sigset_t l_ss;
sigset_t *ssp;
sigset_t ss;
int error;
if (kern_poll_maxfds(nfds))
return (EINVAL);
if (sset != NULL) {
if (ssize != sizeof(l_ss))
return (EINVAL);
@ -2537,7 +2547,17 @@ linux_common_ppoll(struct thread *td, struct pollfd *fds, uint32_t nfds,
if (tsp != NULL)
nanotime(&ts0);
error = kern_poll(td, fds, nfds, tsp, ssp);
if (nfds > nitems(stackfds))
kfds = mallocarray(nfds, sizeof(*kfds), M_TEMP, M_WAITOK);
else
kfds = stackfds;
error = linux_pollin(td, kfds, fds, nfds);
if (error != 0)
goto out;
error = kern_poll_kfds(td, kfds, nfds, tsp, ssp);
if (error == 0)
error = linux_pollout(td, kfds, fds, nfds);
if (error == 0 && tsp != NULL) {
if (td->td_retval[0]) {
@ -2549,6 +2569,10 @@ linux_common_ppoll(struct thread *td, struct pollfd *fds, uint32_t nfds,
} else
timespecclear(tsp);
}
out:
if (nfds > nitems(stackfds))
free(kfds, M_TEMP);
return (error);
}
@ -2583,6 +2607,48 @@ linux_ppoll_time64(struct thread *td, struct linux_ppoll_time64_args *args)
}
#endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */
static int
linux_pollin(struct thread *td, struct pollfd *fds, struct pollfd *ufds, u_int nfd)
{
int error;
u_int i;
error = copyin(ufds, fds, nfd * sizeof(*fds));
if (error != 0)
return (error);
for (i = 0; i < nfd; i++) {
if (fds->events != 0)
linux_to_bsd_poll_events(td, fds->fd,
fds->events, &fds->events);
fds++;
}
return (0);
}
static int
linux_pollout(struct thread *td, struct pollfd *fds, struct pollfd *ufds, u_int nfd)
{
int error = 0;
u_int i, n = 0;
for (i = 0; i < nfd; i++) {
if (fds->revents != 0) {
bsd_to_linux_poll_events(fds->revents,
&fds->revents);
n++;
}
error = copyout(&fds->revents, &ufds->revents,
sizeof(ufds->revents));
if (error)
return (error);
fds++;
ufds++;
}
td->td_retval[0] = n;
return (0);
}
int
linux_sched_rr_get_interval(struct thread *td,
struct linux_sched_rr_get_interval_args *uap)

View File

@ -478,27 +478,6 @@ struct l_ifreq {
#define ifr_hwaddr ifr_ifru.ifru_hwaddr /* MAC address */
#define ifr_ifindex ifr_ifru.ifru_ivalue /* Interface index */
/*
* poll()
*/
#define LINUX_POLLIN 0x0001
#define LINUX_POLLPRI 0x0002
#define LINUX_POLLOUT 0x0004
#define LINUX_POLLERR 0x0008
#define LINUX_POLLHUP 0x0010
#define LINUX_POLLNVAL 0x0020
#define LINUX_POLLRDNORM 0x0040
#define LINUX_POLLRDBAND 0x0080
#define LINUX_POLLWRNORM 0x0100
#define LINUX_POLLWRBAND 0x0200
#define LINUX_POLLMSG 0x0400
struct l_pollfd {
l_int fd;
l_short events;
l_short revents;
};
struct l_user_desc {
l_uint entry_number;
l_uint base_addr;