Use Linux semantics for the thread affinity syscalls.
Linux has more tolerant checks of the user supplied cpuset_t's. Minimum cpuset_t size that the Linux kernel permits in case of getaffinity() is the maximum CPU id, present in the system / NBBY, the maximum size is not limited. For setaffinity(), Linux does not limit the size of the user-provided cpuset_t, internally using only the meaningful part of the set, where the upper bound is the maximum CPU id, present in the system, no larger than the size of the kernel cpuset_t. Unlike FreeBSD, Linux ignores high bits if set in the setaffinity(), so clear it in the sched_setaffinity() and Linuxulator itself. Reviewed by: Pau Amma (man pages) In collaboration with: jhb Differential revision: https://reviews.freebsd.org/D34849 MFC after: 2 weeks
This commit is contained in:
parent
50dd2ceaea
commit
f35093f8d6
@ -33,24 +33,15 @@
|
||||
int
|
||||
sched_getaffinity(pid_t pid, size_t cpusetsz, cpuset_t *cpuset)
|
||||
{
|
||||
/*
|
||||
* Be more Linux-compatible:
|
||||
* - return EINVAL in passed size is less than size of cpuset_t
|
||||
* in advance, instead of ERANGE from the syscall
|
||||
* - if passed size is larger than the size of cpuset_t, be
|
||||
* permissive by claming it back to sizeof(cpuset_t) and
|
||||
* zeroing the rest.
|
||||
*/
|
||||
if (cpusetsz < sizeof(cpuset_t)) {
|
||||
errno = EINVAL;
|
||||
return (-1);
|
||||
}
|
||||
if (cpusetsz > sizeof(cpuset_t)) {
|
||||
memset((char *)cpuset + sizeof(cpuset_t), 0,
|
||||
cpusetsz - sizeof(cpuset_t));
|
||||
cpusetsz = sizeof(cpuset_t);
|
||||
}
|
||||
int error;
|
||||
|
||||
return (cpuset_getaffinity(CPU_LEVEL_WHICH, CPU_WHICH_PID,
|
||||
pid == 0 ? -1 : pid, cpusetsz, cpuset));
|
||||
error = cpuset_getaffinity(CPU_LEVEL_WHICH, CPU_WHICH_PID,
|
||||
pid == 0 ? -1 : pid, cpusetsz, cpuset);
|
||||
if (error == -1 && errno == ERANGE)
|
||||
errno = EINVAL;
|
||||
if (error == 0)
|
||||
return (cpusetsz < sizeof(cpuset_t) ? cpusetsz :
|
||||
sizeof(cpuset_t));
|
||||
|
||||
return (error);
|
||||
}
|
||||
|
@ -26,6 +26,8 @@
|
||||
* SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include <sys/param.h>
|
||||
#include <sys/sysctl.h>
|
||||
#include <errno.h>
|
||||
#include <sched.h>
|
||||
#include <string.h>
|
||||
@ -33,15 +35,28 @@
|
||||
int
|
||||
sched_setaffinity(pid_t pid, size_t cpusetsz, const cpuset_t *cpuset)
|
||||
{
|
||||
static int mp_maxid;
|
||||
cpuset_t c;
|
||||
int error;
|
||||
int error, lbs, cpu;
|
||||
size_t len, sz;
|
||||
|
||||
if (cpusetsz > sizeof(cpuset_t)) {
|
||||
errno = EINVAL;
|
||||
return (-1);
|
||||
} else {
|
||||
memset(&c, 0, sizeof(c));
|
||||
memcpy(&c, cpuset, cpusetsz);
|
||||
sz = cpusetsz > sizeof(cpuset_t) ? sizeof(cpuset_t) : cpusetsz;
|
||||
memset(&c, 0, sizeof(c));
|
||||
memcpy(&c, cpuset, sz);
|
||||
|
||||
/* Linux ignores high bits */
|
||||
if (mp_maxid == 0) {
|
||||
len = sizeof(mp_maxid);
|
||||
error = sysctlbyname("kern.smp.maxid", &mp_maxid, &len,
|
||||
NULL, 0);
|
||||
if (error == -1)
|
||||
return (error);
|
||||
}
|
||||
lbs = CPU_FLS(&c) - 1;
|
||||
if (lbs > mp_maxid) {
|
||||
CPU_FOREACH_ISSET(cpu, &c)
|
||||
if (cpu > mp_maxid)
|
||||
CPU_CLR(cpu, &c);
|
||||
}
|
||||
error = cpuset_setaffinity(CPU_LEVEL_WHICH, CPU_WHICH_PID,
|
||||
pid == 0 ? -1 : pid, sizeof(cpuset_t), &c);
|
||||
|
@ -25,7 +25,7 @@
|
||||
.\"
|
||||
.\" $FreeBSD$
|
||||
.\"
|
||||
.Dd May 23, 2017
|
||||
.Dd April 27, 2022
|
||||
.Dt CPUSET_GETAFFINITY 2
|
||||
.Os
|
||||
.Sh NAME
|
||||
@ -71,14 +71,19 @@ Masks of type
|
||||
are composed using the
|
||||
.Dv CPU_SET
|
||||
macros.
|
||||
The kernel tolerates large sets as long as all CPUs specified
|
||||
in the set exist.
|
||||
Sets smaller than the kernel uses generate an error on calls to
|
||||
If the user-supplied mask is not large enough to fit all of the matching CPUs,
|
||||
.Fn cpuset_getaffinity
|
||||
even if the result set would fit within the user supplied set.
|
||||
fails with
|
||||
.Er ERANGE .
|
||||
Calls to
|
||||
.Fn cpuset_setaffinity
|
||||
tolerate small sets with no restrictions.
|
||||
tolerate masks of any size with no restrictions.
|
||||
The kernel uses the meaningful part of the mask, where the upper bound is
|
||||
the maximum CPU id present in the system.
|
||||
If bits for non-existing CPUs are set, calls to
|
||||
.Fn cpuset_setaffinity
|
||||
fails with
|
||||
.Er EINVAL .
|
||||
.Pp
|
||||
The supplied mask should have a size of
|
||||
.Fa setsize
|
||||
@ -144,7 +149,7 @@ arguments could not be found.
|
||||
.It Bq Er ERANGE
|
||||
The
|
||||
.Fa cpusetsize
|
||||
was either preposterously large or smaller than the kernel set size.
|
||||
was smaller than needed to fit all of the matching CPUs.
|
||||
.It Bq Er EPERM
|
||||
The calling process did not have the credentials required to complete the
|
||||
operation.
|
||||
|
@ -24,7 +24,7 @@
|
||||
.\"
|
||||
.\" $FreeBSD$
|
||||
.\"
|
||||
.Dd October 12, 2021
|
||||
.Dd April 27, 2022
|
||||
.Dt PTHREAD_ATTR_AFFINITY_NP 3
|
||||
.Os
|
||||
.Sh NAME
|
||||
@ -51,14 +51,19 @@ Masks of type
|
||||
are composed using the
|
||||
.Dv CPU_SET
|
||||
macros.
|
||||
The kernel tolerates large sets as long as all CPUs specified
|
||||
in the set exist.
|
||||
Sets smaller than the kernel uses generate an error on calls to
|
||||
.Fn pthread_attr_getaffinity_np
|
||||
even if the result set would fit within the user supplied set.
|
||||
If the user-supplied mask is not large enough to fit all of the matching CPUs,
|
||||
.Fn cpuset_getaffinity
|
||||
fails with
|
||||
.Er ERANGE .
|
||||
Calls to
|
||||
.Fn pthread_attr_setaffinity_np
|
||||
tolerate small sets with no restrictions.
|
||||
.Fn cpuset_setaffinity
|
||||
tolerate masks of any size with no restrictions.
|
||||
The kernel uses the meaningful part of the mask, where the upper bound is
|
||||
the maximum CPU id present in the system.
|
||||
If bits for non-existing CPUs are set, calls to
|
||||
.Fn cpuset_setaffinity
|
||||
fails with
|
||||
.Er EINVAL .
|
||||
.Pp
|
||||
The supplied mask should have a size of
|
||||
.Fa cpusetsize
|
||||
@ -119,10 +124,6 @@ or the attribute specified by it is
|
||||
The
|
||||
.Fa cpusetp
|
||||
specified a CPU that was outside the set supported by the kernel.
|
||||
.It Bq Er ERANGE
|
||||
The
|
||||
.Fa cpusetsize
|
||||
is too small.
|
||||
.It Bq Er ENOMEM
|
||||
Insufficient memory exists to store the cpuset mask.
|
||||
.El
|
||||
|
@ -3324,7 +3324,7 @@ freebsd32_cpuset_setaffinity(struct thread *td,
|
||||
struct freebsd32_cpuset_setaffinity_args *uap)
|
||||
{
|
||||
|
||||
return (kern_cpuset_setaffinity(td, uap->level, uap->which,
|
||||
return (user_cpuset_setaffinity(td, uap->level, uap->which,
|
||||
PAIR32TO64(id_t,uap->id), uap->cpusetsize, uap->mask));
|
||||
}
|
||||
|
||||
|
@ -61,6 +61,7 @@ __FBSDID("$FreeBSD$");
|
||||
#include <sys/sched.h>
|
||||
#include <sys/sdt.h>
|
||||
#include <sys/signalvar.h>
|
||||
#include <sys/smp.h>
|
||||
#include <sys/stat.h>
|
||||
#include <sys/syscallsubr.h>
|
||||
#include <sys/sysctl.h>
|
||||
@ -2256,22 +2257,22 @@ int
|
||||
linux_sched_getaffinity(struct thread *td,
|
||||
struct linux_sched_getaffinity_args *args)
|
||||
{
|
||||
int error;
|
||||
struct thread *tdt;
|
||||
|
||||
if (args->len < sizeof(cpuset_t))
|
||||
return (EINVAL);
|
||||
int error;
|
||||
id_t tid;
|
||||
|
||||
tdt = linux_tdfind(td, args->pid, -1);
|
||||
if (tdt == NULL)
|
||||
return (ESRCH);
|
||||
|
||||
tid = tdt->td_tid;
|
||||
PROC_UNLOCK(tdt->td_proc);
|
||||
|
||||
error = kern_cpuset_getaffinity(td, CPU_LEVEL_WHICH, CPU_WHICH_TID,
|
||||
tdt->td_tid, sizeof(cpuset_t), (cpuset_t *)args->user_mask_ptr);
|
||||
tid, args->len, (cpuset_t *)args->user_mask_ptr);
|
||||
if (error == ERANGE)
|
||||
error = EINVAL;
|
||||
if (error == 0)
|
||||
td->td_retval[0] = sizeof(cpuset_t);
|
||||
td->td_retval[0] = min(args->len, sizeof(cpuset_t));
|
||||
|
||||
return (error);
|
||||
}
|
||||
@ -2284,18 +2285,34 @@ linux_sched_setaffinity(struct thread *td,
|
||||
struct linux_sched_setaffinity_args *args)
|
||||
{
|
||||
struct thread *tdt;
|
||||
|
||||
if (args->len < sizeof(cpuset_t))
|
||||
return (EINVAL);
|
||||
cpuset_t *mask;
|
||||
int cpu, error;
|
||||
size_t len;
|
||||
id_t tid;
|
||||
|
||||
tdt = linux_tdfind(td, args->pid, -1);
|
||||
if (tdt == NULL)
|
||||
return (ESRCH);
|
||||
|
||||
tid = tdt->td_tid;
|
||||
PROC_UNLOCK(tdt->td_proc);
|
||||
|
||||
return (kern_cpuset_setaffinity(td, CPU_LEVEL_WHICH, CPU_WHICH_TID,
|
||||
tdt->td_tid, sizeof(cpuset_t), (cpuset_t *) args->user_mask_ptr));
|
||||
len = min(args->len, sizeof(cpuset_t));
|
||||
mask = malloc(sizeof(cpuset_t), M_TEMP, M_WAITOK | M_ZERO);;
|
||||
error = copyin(args->user_mask_ptr, mask, len);
|
||||
if (error != 0)
|
||||
goto out;
|
||||
/* Linux ignore high bits */
|
||||
CPU_FOREACH_ISSET(cpu, mask)
|
||||
if (cpu > mp_maxid)
|
||||
CPU_CLR(cpu, mask);
|
||||
|
||||
error = kern_cpuset_setaffinity(td, CPU_LEVEL_WHICH, CPU_WHICH_TID,
|
||||
tid, mask);
|
||||
if (error == EDEADLK)
|
||||
error = EINVAL;
|
||||
out:
|
||||
free(mask, M_TEMP);
|
||||
return (error);
|
||||
}
|
||||
|
||||
struct linux_rlimit64 {
|
||||
|
@ -1896,13 +1896,10 @@ kern_cpuset_getaffinity(struct thread *td, cpulevel_t level, cpuwhich_t which,
|
||||
int error;
|
||||
size_t size;
|
||||
|
||||
if (cpusetsize < sizeof(cpuset_t) || cpusetsize > CPU_MAXSIZE / NBBY)
|
||||
return (ERANGE);
|
||||
error = cpuset_check_capabilities(td, level, which, id);
|
||||
if (error != 0)
|
||||
return (error);
|
||||
size = cpusetsize;
|
||||
mask = malloc(size, M_TEMP, M_WAITOK | M_ZERO);
|
||||
mask = malloc(sizeof(cpuset_t), M_TEMP, M_WAITOK | M_ZERO);
|
||||
error = cpuset_which(which, id, &p, &ttd, &set);
|
||||
if (error)
|
||||
goto out;
|
||||
@ -1972,8 +1969,33 @@ kern_cpuset_getaffinity(struct thread *td, cpulevel_t level, cpuwhich_t which,
|
||||
cpuset_rel(set);
|
||||
if (p)
|
||||
PROC_UNLOCK(p);
|
||||
if (error == 0)
|
||||
if (error == 0) {
|
||||
if (cpusetsize < howmany(CPU_FLS(mask), NBBY)) {
|
||||
error = ERANGE;
|
||||
goto out;
|
||||
}
|
||||
size = min(cpusetsize, sizeof(cpuset_t));
|
||||
error = copyout(mask, maskp, size);
|
||||
if (error != 0)
|
||||
goto out;
|
||||
if (cpusetsize > size) {
|
||||
char *end;
|
||||
char *cp;
|
||||
int rv;
|
||||
|
||||
end = cp = (char *)&maskp->__bits;
|
||||
end += cpusetsize;
|
||||
cp += size;
|
||||
while (cp != end) {
|
||||
rv = subyte(cp, 0);
|
||||
if (rv == -1) {
|
||||
error = EFAULT;
|
||||
goto out;
|
||||
}
|
||||
cp++;
|
||||
}
|
||||
}
|
||||
}
|
||||
out:
|
||||
free(mask, M_TEMP);
|
||||
return (error);
|
||||
@ -1992,50 +2014,25 @@ int
|
||||
sys_cpuset_setaffinity(struct thread *td, struct cpuset_setaffinity_args *uap)
|
||||
{
|
||||
|
||||
return (kern_cpuset_setaffinity(td, uap->level, uap->which,
|
||||
return (user_cpuset_setaffinity(td, uap->level, uap->which,
|
||||
uap->id, uap->cpusetsize, uap->mask));
|
||||
}
|
||||
|
||||
int
|
||||
kern_cpuset_setaffinity(struct thread *td, cpulevel_t level, cpuwhich_t which,
|
||||
id_t id, size_t cpusetsize, const cpuset_t *maskp)
|
||||
id_t id, cpuset_t *mask)
|
||||
{
|
||||
struct cpuset *nset;
|
||||
struct cpuset *set;
|
||||
struct thread *ttd;
|
||||
struct proc *p;
|
||||
cpuset_t *mask;
|
||||
int error;
|
||||
|
||||
if (cpusetsize < sizeof(cpuset_t) || cpusetsize > CPU_MAXSIZE / NBBY)
|
||||
return (ERANGE);
|
||||
error = cpuset_check_capabilities(td, level, which, id);
|
||||
if (error != 0)
|
||||
return (error);
|
||||
mask = malloc(cpusetsize, M_TEMP, M_WAITOK | M_ZERO);
|
||||
error = copyin(maskp, mask, cpusetsize);
|
||||
if (error)
|
||||
goto out;
|
||||
/*
|
||||
* Verify that no high bits are set.
|
||||
*/
|
||||
if (cpusetsize > sizeof(cpuset_t)) {
|
||||
char *end;
|
||||
char *cp;
|
||||
|
||||
end = cp = (char *)&mask->__bits;
|
||||
end += cpusetsize;
|
||||
cp += sizeof(cpuset_t);
|
||||
while (cp != end)
|
||||
if (*cp++ != 0) {
|
||||
error = EINVAL;
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
if (CPU_EMPTY(mask)) {
|
||||
error = EDEADLK;
|
||||
goto out;
|
||||
}
|
||||
if (CPU_EMPTY(mask))
|
||||
return (EDEADLK);
|
||||
switch (level) {
|
||||
case CPU_LEVEL_ROOT:
|
||||
case CPU_LEVEL_CPUSET:
|
||||
@ -2057,8 +2054,7 @@ kern_cpuset_setaffinity(struct thread *td, cpulevel_t level, cpuwhich_t which,
|
||||
case CPU_WHICH_INTRHANDLER:
|
||||
case CPU_WHICH_ITHREAD:
|
||||
case CPU_WHICH_DOMAIN:
|
||||
error = EINVAL;
|
||||
goto out;
|
||||
return (EINVAL);
|
||||
}
|
||||
if (level == CPU_LEVEL_ROOT)
|
||||
nset = cpuset_refroot(set);
|
||||
@ -2098,6 +2094,47 @@ kern_cpuset_setaffinity(struct thread *td, cpulevel_t level, cpuwhich_t which,
|
||||
error = EINVAL;
|
||||
break;
|
||||
}
|
||||
return (error);
|
||||
}
|
||||
|
||||
int
|
||||
user_cpuset_setaffinity(struct thread *td, cpulevel_t level, cpuwhich_t which,
|
||||
id_t id, size_t cpusetsize, const cpuset_t *maskp)
|
||||
{
|
||||
cpuset_t *mask;
|
||||
int error;
|
||||
size_t size;
|
||||
|
||||
size = min(cpusetsize, sizeof(cpuset_t));
|
||||
mask = malloc(sizeof(cpuset_t), M_TEMP, M_WAITOK | M_ZERO);
|
||||
error = copyin(maskp, mask, size);
|
||||
if (error)
|
||||
goto out;
|
||||
/*
|
||||
* Verify that no high bits are set.
|
||||
*/
|
||||
if (cpusetsize > sizeof(cpuset_t)) {
|
||||
const char *end, *cp;
|
||||
int val;
|
||||
end = cp = (const char *)&maskp->__bits;
|
||||
end += cpusetsize;
|
||||
cp += sizeof(cpuset_t);
|
||||
|
||||
while (cp != end) {
|
||||
val = fubyte(cp);
|
||||
if (val == -1) {
|
||||
error = EFAULT;
|
||||
goto out;
|
||||
}
|
||||
if (val != 0) {
|
||||
error = EINVAL;
|
||||
goto out;
|
||||
}
|
||||
cp++;
|
||||
}
|
||||
}
|
||||
error = kern_cpuset_setaffinity(td, level, which, id, mask);
|
||||
|
||||
out:
|
||||
free(mask, M_TEMP);
|
||||
return (error);
|
||||
|
@ -121,6 +121,8 @@ int kern_copy_file_range(struct thread *td, int infd, off_t *inoffp,
|
||||
int kern_cpuset_getaffinity(struct thread *td, cpulevel_t level,
|
||||
cpuwhich_t which, id_t id, size_t cpusetsize, cpuset_t *maskp);
|
||||
int kern_cpuset_setaffinity(struct thread *td, cpulevel_t level,
|
||||
cpuwhich_t which, id_t id, cpuset_t *maskp);
|
||||
int user_cpuset_setaffinity(struct thread *td, cpulevel_t level,
|
||||
cpuwhich_t which, id_t id, size_t cpusetsize,
|
||||
const cpuset_t *maskp);
|
||||
int kern_cpuset_getdomain(struct thread *td, cpulevel_t level,
|
||||
|
Loading…
Reference in New Issue
Block a user