Use Linux semantics for the thread affinity syscalls.

Linux has more tolerant checks of the user supplied cpuset_t's.

Minimum cpuset_t size that the Linux kernel permits in case of
getaffinity() is the maximum CPU id, present in the system / NBBY,
the maximum size is not limited.
For setaffinity(), Linux does not limit the size of the user-provided
cpuset_t, internally using only the meaningful part of the set, where
the upper bound is the maximum CPU id, present in the system, no larger
than the size of the kernel cpuset_t.
Unlike FreeBSD, Linux ignores high bits if set in the setaffinity(),
so clear it in the sched_setaffinity() and Linuxulator itself.

Reviewed by:		Pau Amma (man pages)
In collaboration with:	jhb
Differential revision:	https://reviews.freebsd.org/D34849
MFC after:		2 weeks
This commit is contained in:
Dmitry Chagin 2022-05-11 10:36:01 +03:00
parent 50dd2ceaea
commit f35093f8d6
8 changed files with 163 additions and 95 deletions

View File

@ -33,24 +33,15 @@
int
sched_getaffinity(pid_t pid, size_t cpusetsz, cpuset_t *cpuset)
{
/*
* Be more Linux-compatible:
* - return EINVAL in passed size is less than size of cpuset_t
* in advance, instead of ERANGE from the syscall
* - if passed size is larger than the size of cpuset_t, be
* permissive by claming it back to sizeof(cpuset_t) and
* zeroing the rest.
*/
if (cpusetsz < sizeof(cpuset_t)) {
errno = EINVAL;
return (-1);
}
if (cpusetsz > sizeof(cpuset_t)) {
memset((char *)cpuset + sizeof(cpuset_t), 0,
cpusetsz - sizeof(cpuset_t));
cpusetsz = sizeof(cpuset_t);
}
int error;
return (cpuset_getaffinity(CPU_LEVEL_WHICH, CPU_WHICH_PID,
pid == 0 ? -1 : pid, cpusetsz, cpuset));
error = cpuset_getaffinity(CPU_LEVEL_WHICH, CPU_WHICH_PID,
pid == 0 ? -1 : pid, cpusetsz, cpuset);
if (error == -1 && errno == ERANGE)
errno = EINVAL;
if (error == 0)
return (cpusetsz < sizeof(cpuset_t) ? cpusetsz :
sizeof(cpuset_t));
return (error);
}

View File

@ -26,6 +26,8 @@
* SUCH DAMAGE.
*/
#include <sys/param.h>
#include <sys/sysctl.h>
#include <errno.h>
#include <sched.h>
#include <string.h>
@ -33,15 +35,28 @@
int
sched_setaffinity(pid_t pid, size_t cpusetsz, const cpuset_t *cpuset)
{
static int mp_maxid;
cpuset_t c;
int error;
int error, lbs, cpu;
size_t len, sz;
if (cpusetsz > sizeof(cpuset_t)) {
errno = EINVAL;
return (-1);
} else {
memset(&c, 0, sizeof(c));
memcpy(&c, cpuset, cpusetsz);
sz = cpusetsz > sizeof(cpuset_t) ? sizeof(cpuset_t) : cpusetsz;
memset(&c, 0, sizeof(c));
memcpy(&c, cpuset, sz);
/* Linux ignores high bits */
if (mp_maxid == 0) {
len = sizeof(mp_maxid);
error = sysctlbyname("kern.smp.maxid", &mp_maxid, &len,
NULL, 0);
if (error == -1)
return (error);
}
lbs = CPU_FLS(&c) - 1;
if (lbs > mp_maxid) {
CPU_FOREACH_ISSET(cpu, &c)
if (cpu > mp_maxid)
CPU_CLR(cpu, &c);
}
error = cpuset_setaffinity(CPU_LEVEL_WHICH, CPU_WHICH_PID,
pid == 0 ? -1 : pid, sizeof(cpuset_t), &c);

View File

@ -25,7 +25,7 @@
.\"
.\" $FreeBSD$
.\"
.Dd May 23, 2017
.Dd April 27, 2022
.Dt CPUSET_GETAFFINITY 2
.Os
.Sh NAME
@ -71,14 +71,19 @@ Masks of type
are composed using the
.Dv CPU_SET
macros.
The kernel tolerates large sets as long as all CPUs specified
in the set exist.
Sets smaller than the kernel uses generate an error on calls to
If the user-supplied mask is not large enough to fit all of the matching CPUs,
.Fn cpuset_getaffinity
even if the result set would fit within the user supplied set.
fails with
.Er ERANGE .
Calls to
.Fn cpuset_setaffinity
tolerate small sets with no restrictions.
tolerate masks of any size with no restrictions.
The kernel uses the meaningful part of the mask, where the upper bound is
the maximum CPU id present in the system.
If bits for non-existing CPUs are set, calls to
.Fn cpuset_setaffinity
fails with
.Er EINVAL .
.Pp
The supplied mask should have a size of
.Fa setsize
@ -144,7 +149,7 @@ arguments could not be found.
.It Bq Er ERANGE
The
.Fa cpusetsize
was either preposterously large or smaller than the kernel set size.
was smaller than needed to fit all of the matching CPUs.
.It Bq Er EPERM
The calling process did not have the credentials required to complete the
operation.

View File

@ -24,7 +24,7 @@
.\"
.\" $FreeBSD$
.\"
.Dd October 12, 2021
.Dd April 27, 2022
.Dt PTHREAD_ATTR_AFFINITY_NP 3
.Os
.Sh NAME
@ -51,14 +51,19 @@ Masks of type
are composed using the
.Dv CPU_SET
macros.
The kernel tolerates large sets as long as all CPUs specified
in the set exist.
Sets smaller than the kernel uses generate an error on calls to
.Fn pthread_attr_getaffinity_np
even if the result set would fit within the user supplied set.
If the user-supplied mask is not large enough to fit all of the matching CPUs,
.Fn cpuset_getaffinity
fails with
.Er ERANGE .
Calls to
.Fn pthread_attr_setaffinity_np
tolerate small sets with no restrictions.
.Fn cpuset_setaffinity
tolerate masks of any size with no restrictions.
The kernel uses the meaningful part of the mask, where the upper bound is
the maximum CPU id present in the system.
If bits for non-existing CPUs are set, calls to
.Fn cpuset_setaffinity
fails with
.Er EINVAL .
.Pp
The supplied mask should have a size of
.Fa cpusetsize
@ -119,10 +124,6 @@ or the attribute specified by it is
The
.Fa cpusetp
specified a CPU that was outside the set supported by the kernel.
.It Bq Er ERANGE
The
.Fa cpusetsize
is too small.
.It Bq Er ENOMEM
Insufficient memory exists to store the cpuset mask.
.El

View File

@ -3324,7 +3324,7 @@ freebsd32_cpuset_setaffinity(struct thread *td,
struct freebsd32_cpuset_setaffinity_args *uap)
{
return (kern_cpuset_setaffinity(td, uap->level, uap->which,
return (user_cpuset_setaffinity(td, uap->level, uap->which,
PAIR32TO64(id_t,uap->id), uap->cpusetsize, uap->mask));
}

View File

@ -61,6 +61,7 @@ __FBSDID("$FreeBSD$");
#include <sys/sched.h>
#include <sys/sdt.h>
#include <sys/signalvar.h>
#include <sys/smp.h>
#include <sys/stat.h>
#include <sys/syscallsubr.h>
#include <sys/sysctl.h>
@ -2256,22 +2257,22 @@ int
linux_sched_getaffinity(struct thread *td,
struct linux_sched_getaffinity_args *args)
{
int error;
struct thread *tdt;
if (args->len < sizeof(cpuset_t))
return (EINVAL);
int error;
id_t tid;
tdt = linux_tdfind(td, args->pid, -1);
if (tdt == NULL)
return (ESRCH);
tid = tdt->td_tid;
PROC_UNLOCK(tdt->td_proc);
error = kern_cpuset_getaffinity(td, CPU_LEVEL_WHICH, CPU_WHICH_TID,
tdt->td_tid, sizeof(cpuset_t), (cpuset_t *)args->user_mask_ptr);
tid, args->len, (cpuset_t *)args->user_mask_ptr);
if (error == ERANGE)
error = EINVAL;
if (error == 0)
td->td_retval[0] = sizeof(cpuset_t);
td->td_retval[0] = min(args->len, sizeof(cpuset_t));
return (error);
}
@ -2284,18 +2285,34 @@ linux_sched_setaffinity(struct thread *td,
struct linux_sched_setaffinity_args *args)
{
struct thread *tdt;
if (args->len < sizeof(cpuset_t))
return (EINVAL);
cpuset_t *mask;
int cpu, error;
size_t len;
id_t tid;
tdt = linux_tdfind(td, args->pid, -1);
if (tdt == NULL)
return (ESRCH);
tid = tdt->td_tid;
PROC_UNLOCK(tdt->td_proc);
return (kern_cpuset_setaffinity(td, CPU_LEVEL_WHICH, CPU_WHICH_TID,
tdt->td_tid, sizeof(cpuset_t), (cpuset_t *) args->user_mask_ptr));
len = min(args->len, sizeof(cpuset_t));
mask = malloc(sizeof(cpuset_t), M_TEMP, M_WAITOK | M_ZERO);;
error = copyin(args->user_mask_ptr, mask, len);
if (error != 0)
goto out;
/* Linux ignore high bits */
CPU_FOREACH_ISSET(cpu, mask)
if (cpu > mp_maxid)
CPU_CLR(cpu, mask);
error = kern_cpuset_setaffinity(td, CPU_LEVEL_WHICH, CPU_WHICH_TID,
tid, mask);
if (error == EDEADLK)
error = EINVAL;
out:
free(mask, M_TEMP);
return (error);
}
struct linux_rlimit64 {

View File

@ -1896,13 +1896,10 @@ kern_cpuset_getaffinity(struct thread *td, cpulevel_t level, cpuwhich_t which,
int error;
size_t size;
if (cpusetsize < sizeof(cpuset_t) || cpusetsize > CPU_MAXSIZE / NBBY)
return (ERANGE);
error = cpuset_check_capabilities(td, level, which, id);
if (error != 0)
return (error);
size = cpusetsize;
mask = malloc(size, M_TEMP, M_WAITOK | M_ZERO);
mask = malloc(sizeof(cpuset_t), M_TEMP, M_WAITOK | M_ZERO);
error = cpuset_which(which, id, &p, &ttd, &set);
if (error)
goto out;
@ -1972,8 +1969,33 @@ kern_cpuset_getaffinity(struct thread *td, cpulevel_t level, cpuwhich_t which,
cpuset_rel(set);
if (p)
PROC_UNLOCK(p);
if (error == 0)
if (error == 0) {
if (cpusetsize < howmany(CPU_FLS(mask), NBBY)) {
error = ERANGE;
goto out;
}
size = min(cpusetsize, sizeof(cpuset_t));
error = copyout(mask, maskp, size);
if (error != 0)
goto out;
if (cpusetsize > size) {
char *end;
char *cp;
int rv;
end = cp = (char *)&maskp->__bits;
end += cpusetsize;
cp += size;
while (cp != end) {
rv = subyte(cp, 0);
if (rv == -1) {
error = EFAULT;
goto out;
}
cp++;
}
}
}
out:
free(mask, M_TEMP);
return (error);
@ -1992,50 +2014,25 @@ int
sys_cpuset_setaffinity(struct thread *td, struct cpuset_setaffinity_args *uap)
{
return (kern_cpuset_setaffinity(td, uap->level, uap->which,
return (user_cpuset_setaffinity(td, uap->level, uap->which,
uap->id, uap->cpusetsize, uap->mask));
}
int
kern_cpuset_setaffinity(struct thread *td, cpulevel_t level, cpuwhich_t which,
id_t id, size_t cpusetsize, const cpuset_t *maskp)
id_t id, cpuset_t *mask)
{
struct cpuset *nset;
struct cpuset *set;
struct thread *ttd;
struct proc *p;
cpuset_t *mask;
int error;
if (cpusetsize < sizeof(cpuset_t) || cpusetsize > CPU_MAXSIZE / NBBY)
return (ERANGE);
error = cpuset_check_capabilities(td, level, which, id);
if (error != 0)
return (error);
mask = malloc(cpusetsize, M_TEMP, M_WAITOK | M_ZERO);
error = copyin(maskp, mask, cpusetsize);
if (error)
goto out;
/*
* Verify that no high bits are set.
*/
if (cpusetsize > sizeof(cpuset_t)) {
char *end;
char *cp;
end = cp = (char *)&mask->__bits;
end += cpusetsize;
cp += sizeof(cpuset_t);
while (cp != end)
if (*cp++ != 0) {
error = EINVAL;
goto out;
}
}
if (CPU_EMPTY(mask)) {
error = EDEADLK;
goto out;
}
if (CPU_EMPTY(mask))
return (EDEADLK);
switch (level) {
case CPU_LEVEL_ROOT:
case CPU_LEVEL_CPUSET:
@ -2057,8 +2054,7 @@ kern_cpuset_setaffinity(struct thread *td, cpulevel_t level, cpuwhich_t which,
case CPU_WHICH_INTRHANDLER:
case CPU_WHICH_ITHREAD:
case CPU_WHICH_DOMAIN:
error = EINVAL;
goto out;
return (EINVAL);
}
if (level == CPU_LEVEL_ROOT)
nset = cpuset_refroot(set);
@ -2098,6 +2094,47 @@ kern_cpuset_setaffinity(struct thread *td, cpulevel_t level, cpuwhich_t which,
error = EINVAL;
break;
}
return (error);
}
int
user_cpuset_setaffinity(struct thread *td, cpulevel_t level, cpuwhich_t which,
id_t id, size_t cpusetsize, const cpuset_t *maskp)
{
cpuset_t *mask;
int error;
size_t size;
size = min(cpusetsize, sizeof(cpuset_t));
mask = malloc(sizeof(cpuset_t), M_TEMP, M_WAITOK | M_ZERO);
error = copyin(maskp, mask, size);
if (error)
goto out;
/*
* Verify that no high bits are set.
*/
if (cpusetsize > sizeof(cpuset_t)) {
const char *end, *cp;
int val;
end = cp = (const char *)&maskp->__bits;
end += cpusetsize;
cp += sizeof(cpuset_t);
while (cp != end) {
val = fubyte(cp);
if (val == -1) {
error = EFAULT;
goto out;
}
if (val != 0) {
error = EINVAL;
goto out;
}
cp++;
}
}
error = kern_cpuset_setaffinity(td, level, which, id, mask);
out:
free(mask, M_TEMP);
return (error);

View File

@ -121,6 +121,8 @@ int kern_copy_file_range(struct thread *td, int infd, off_t *inoffp,
int kern_cpuset_getaffinity(struct thread *td, cpulevel_t level,
cpuwhich_t which, id_t id, size_t cpusetsize, cpuset_t *maskp);
int kern_cpuset_setaffinity(struct thread *td, cpulevel_t level,
cpuwhich_t which, id_t id, cpuset_t *maskp);
int user_cpuset_setaffinity(struct thread *td, cpulevel_t level,
cpuwhich_t which, id_t id, size_t cpusetsize,
const cpuset_t *maskp);
int kern_cpuset_getdomain(struct thread *td, cpulevel_t level,