freebsd-dev/sys/compat/linux/linux_mib.c
Edward Tomasz Napierala b896bdb86d linux: Make compat.linux.preserve_vstatus default to 1
From a user point of view, this makes ^T work out of the box.

Reviewed By:	debdrup (man page)
Sponsored By:	EPSRC
Differential Revision:	https://reviews.freebsd.org/D33842
2022-01-17 08:45:01 +00:00

592 lines
15 KiB
C

/*-
* SPDX-License-Identifier: BSD-2-Clause-FreeBSD
*
* Copyright (c) 1999 Marcel Moolenaar
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
#include <sys/param.h>
#include <sys/kernel.h>
#include <sys/sdt.h>
#include <sys/systm.h>
#include <sys/sysctl.h>
#include <sys/proc.h>
#include <sys/malloc.h>
#include <sys/mount.h>
#include <sys/jail.h>
#include <sys/lock.h>
#include <sys/sx.h>
#include <compat/linux/linux_mib.h>
#include <compat/linux/linux_misc.h>
struct linux_prison {
char pr_osname[LINUX_MAX_UTSNAME];
char pr_osrelease[LINUX_MAX_UTSNAME];
int pr_oss_version;
int pr_osrel;
};
static struct linux_prison lprison0 = {
.pr_osname = "Linux",
.pr_osrelease = LINUX_VERSION_STR,
.pr_oss_version = 0x030600,
.pr_osrel = LINUX_VERSION_CODE
};
static unsigned linux_osd_jail_slot;
SYSCTL_NODE(_compat, OID_AUTO, linux, CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
"Linux mode");
int linux_debug = 3;
SYSCTL_INT(_compat_linux, OID_AUTO, debug, CTLFLAG_RWTUN,
&linux_debug, 0, "Log warnings from linux(4); or 0 to disable");
int linux_default_openfiles = 1024;
SYSCTL_INT(_compat_linux, OID_AUTO, default_openfiles, CTLFLAG_RWTUN,
&linux_default_openfiles, 0,
"Default soft openfiles resource limit, or -1 for unlimited");
int linux_default_stacksize = 8 * 1024 * 1024;
SYSCTL_INT(_compat_linux, OID_AUTO, default_stacksize, CTLFLAG_RWTUN,
&linux_default_stacksize, 0,
"Default soft stack size resource limit, or -1 for unlimited");
int linux_dummy_rlimits = 0;
SYSCTL_INT(_compat_linux, OID_AUTO, dummy_rlimits, CTLFLAG_RWTUN,
&linux_dummy_rlimits, 0,
"Return dummy values for unsupported Linux-specific rlimits");
int linux_ignore_ip_recverr = 1;
SYSCTL_INT(_compat_linux, OID_AUTO, ignore_ip_recverr, CTLFLAG_RWTUN,
&linux_ignore_ip_recverr, 0, "Ignore enabling IP_RECVERR");
int linux_preserve_vstatus = 1;
SYSCTL_INT(_compat_linux, OID_AUTO, preserve_vstatus, CTLFLAG_RWTUN,
&linux_preserve_vstatus, 0, "Preserve VSTATUS termios(4) flag");
bool linux_map_sched_prio = true;
SYSCTL_BOOL(_compat_linux, OID_AUTO, map_sched_prio, CTLFLAG_RDTUN,
&linux_map_sched_prio, 0, "Map scheduler priorities to Linux priorities "
"(not POSIX compliant)");
int linux_use_emul_path = 1;
SYSCTL_INT(_compat_linux, OID_AUTO, use_emul_path, CTLFLAG_RWTUN,
&linux_use_emul_path, 0, "Use linux.compat.emul_path");
static bool linux_setid_allowed = true;
SYSCTL_BOOL(_compat_linux, OID_AUTO, setid_allowed, CTLFLAG_RWTUN,
&linux_setid_allowed, 0,
"Allow setuid/setgid on execve of Linux binary");
int
linux_setid_allowed_query(struct thread *td __unused,
struct image_params *imgp __unused)
{
return (linux_setid_allowed);
}
static int linux_set_osname(struct thread *td, char *osname);
static int linux_set_osrelease(struct thread *td, char *osrelease);
static int linux_set_oss_version(struct thread *td, int oss_version);
static int
linux_sysctl_osname(SYSCTL_HANDLER_ARGS)
{
char osname[LINUX_MAX_UTSNAME];
int error;
linux_get_osname(req->td, osname);
error = sysctl_handle_string(oidp, osname, LINUX_MAX_UTSNAME, req);
if (error != 0 || req->newptr == NULL)
return (error);
error = linux_set_osname(req->td, osname);
return (error);
}
SYSCTL_PROC(_compat_linux, OID_AUTO, osname,
CTLTYPE_STRING | CTLFLAG_RW | CTLFLAG_PRISON | CTLFLAG_MPSAFE,
0, 0, linux_sysctl_osname, "A",
"Linux kernel OS name");
static int
linux_sysctl_osrelease(SYSCTL_HANDLER_ARGS)
{
char osrelease[LINUX_MAX_UTSNAME];
int error;
linux_get_osrelease(req->td, osrelease);
error = sysctl_handle_string(oidp, osrelease, LINUX_MAX_UTSNAME, req);
if (error != 0 || req->newptr == NULL)
return (error);
error = linux_set_osrelease(req->td, osrelease);
return (error);
}
SYSCTL_PROC(_compat_linux, OID_AUTO, osrelease,
CTLTYPE_STRING | CTLFLAG_RW | CTLFLAG_PRISON | CTLFLAG_MPSAFE,
0, 0, linux_sysctl_osrelease, "A",
"Linux kernel OS release");
static int
linux_sysctl_oss_version(SYSCTL_HANDLER_ARGS)
{
int oss_version;
int error;
oss_version = linux_get_oss_version(req->td);
error = sysctl_handle_int(oidp, &oss_version, 0, req);
if (error != 0 || req->newptr == NULL)
return (error);
error = linux_set_oss_version(req->td, oss_version);
return (error);
}
SYSCTL_PROC(_compat_linux, OID_AUTO, oss_version,
CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_PRISON | CTLFLAG_MPSAFE,
0, 0, linux_sysctl_oss_version, "I",
"Linux OSS version");
/*
* Map the osrelease into integer
*/
static int
linux_map_osrel(char *osrelease, int *osrel)
{
char *sep, *eosrelease;
int len, v0, v1, v2, v;
len = strlen(osrelease);
eosrelease = osrelease + len;
v0 = strtol(osrelease, &sep, 10);
if (osrelease == sep || sep + 1 >= eosrelease || *sep != '.')
return (EINVAL);
osrelease = sep + 1;
v1 = strtol(osrelease, &sep, 10);
if (osrelease == sep || sep + 1 >= eosrelease || *sep != '.')
return (EINVAL);
osrelease = sep + 1;
v2 = strtol(osrelease, &sep, 10);
if (osrelease == sep ||
(sep != eosrelease && (sep + 1 >= eosrelease || *sep != '-')))
return (EINVAL);
v = LINUX_KERNVER(v0, v1, v2);
if (v < LINUX_KERNVER(1, 0, 0))
return (EINVAL);
if (osrel != NULL)
*osrel = v;
return (0);
}
/*
* Find a prison with Linux info.
* Return the Linux info and the (locked) prison.
*/
static struct linux_prison *
linux_find_prison(struct prison *spr, struct prison **prp)
{
struct prison *pr;
struct linux_prison *lpr;
for (pr = spr;; pr = pr->pr_parent) {
mtx_lock(&pr->pr_mtx);
lpr = (pr == &prison0)
? &lprison0
: osd_jail_get(pr, linux_osd_jail_slot);
if (lpr != NULL)
break;
mtx_unlock(&pr->pr_mtx);
}
*prp = pr;
return (lpr);
}
/*
* Ensure a prison has its own Linux info. If lprp is non-null, point it to
* the Linux info and lock the prison.
*/
static void
linux_alloc_prison(struct prison *pr, struct linux_prison **lprp)
{
struct prison *ppr;
struct linux_prison *lpr, *nlpr;
void **rsv;
/* If this prison already has Linux info, return that. */
lpr = linux_find_prison(pr, &ppr);
if (ppr == pr)
goto done;
/*
* Allocate a new info record. Then check again, in case something
* changed during the allocation.
*/
mtx_unlock(&ppr->pr_mtx);
nlpr = malloc(sizeof(struct linux_prison), M_PRISON, M_WAITOK);
rsv = osd_reserve(linux_osd_jail_slot);
lpr = linux_find_prison(pr, &ppr);
if (ppr == pr) {
free(nlpr, M_PRISON);
osd_free_reserved(rsv);
goto done;
}
/* Inherit the initial values from the ancestor. */
mtx_lock(&pr->pr_mtx);
(void)osd_jail_set_reserved(pr, linux_osd_jail_slot, rsv, nlpr);
bcopy(lpr, nlpr, sizeof(*lpr));
lpr = nlpr;
mtx_unlock(&ppr->pr_mtx);
done:
if (lprp != NULL)
*lprp = lpr;
else
mtx_unlock(&pr->pr_mtx);
}
/*
* Jail OSD methods for Linux prison data.
*/
static int
linux_prison_create(void *obj, void *data)
{
struct prison *pr = obj;
struct vfsoptlist *opts = data;
int jsys;
if (vfs_copyopt(opts, "linux", &jsys, sizeof(jsys)) == 0 &&
jsys == JAIL_SYS_INHERIT)
return (0);
/*
* Inherit a prison's initial values from its parent
* (different from JAIL_SYS_INHERIT which also inherits changes).
*/
linux_alloc_prison(pr, NULL);
return (0);
}
static int
linux_prison_check(void *obj __unused, void *data)
{
struct vfsoptlist *opts = data;
char *osname, *osrelease;
int error, jsys, len, oss_version;
/* Check that the parameters are correct. */
error = vfs_copyopt(opts, "linux", &jsys, sizeof(jsys));
if (error != ENOENT) {
if (error != 0)
return (error);
if (jsys != JAIL_SYS_NEW && jsys != JAIL_SYS_INHERIT)
return (EINVAL);
}
error = vfs_getopt(opts, "linux.osname", (void **)&osname, &len);
if (error != ENOENT) {
if (error != 0)
return (error);
if (len == 0 || osname[len - 1] != '\0')
return (EINVAL);
if (len > LINUX_MAX_UTSNAME) {
vfs_opterror(opts, "linux.osname too long");
return (ENAMETOOLONG);
}
}
error = vfs_getopt(opts, "linux.osrelease", (void **)&osrelease, &len);
if (error != ENOENT) {
if (error != 0)
return (error);
if (len == 0 || osrelease[len - 1] != '\0')
return (EINVAL);
if (len > LINUX_MAX_UTSNAME) {
vfs_opterror(opts, "linux.osrelease too long");
return (ENAMETOOLONG);
}
error = linux_map_osrel(osrelease, NULL);
if (error != 0) {
vfs_opterror(opts, "linux.osrelease format error");
return (error);
}
}
error = vfs_copyopt(opts, "linux.oss_version", &oss_version,
sizeof(oss_version));
if (error == ENOENT)
error = 0;
return (error);
}
static int
linux_prison_set(void *obj, void *data)
{
struct linux_prison *lpr;
struct prison *pr = obj;
struct vfsoptlist *opts = data;
char *osname, *osrelease;
int error, gotversion, jsys, len, oss_version;
/* Set the parameters, which should be correct. */
error = vfs_copyopt(opts, "linux", &jsys, sizeof(jsys));
if (error == ENOENT)
jsys = -1;
error = vfs_getopt(opts, "linux.osname", (void **)&osname, &len);
if (error == ENOENT)
osname = NULL;
else
jsys = JAIL_SYS_NEW;
error = vfs_getopt(opts, "linux.osrelease", (void **)&osrelease, &len);
if (error == ENOENT)
osrelease = NULL;
else
jsys = JAIL_SYS_NEW;
error = vfs_copyopt(opts, "linux.oss_version", &oss_version,
sizeof(oss_version));
if (error == ENOENT)
gotversion = 0;
else {
gotversion = 1;
jsys = JAIL_SYS_NEW;
}
switch (jsys) {
case JAIL_SYS_INHERIT:
/* "linux=inherit": inherit the parent's Linux info. */
mtx_lock(&pr->pr_mtx);
osd_jail_del(pr, linux_osd_jail_slot);
mtx_unlock(&pr->pr_mtx);
break;
case JAIL_SYS_NEW:
/*
* "linux=new" or "linux.*":
* the prison gets its own Linux info.
*/
linux_alloc_prison(pr, &lpr);
if (osrelease) {
(void)linux_map_osrel(osrelease, &lpr->pr_osrel);
strlcpy(lpr->pr_osrelease, osrelease,
LINUX_MAX_UTSNAME);
}
if (osname)
strlcpy(lpr->pr_osname, osname, LINUX_MAX_UTSNAME);
if (gotversion)
lpr->pr_oss_version = oss_version;
mtx_unlock(&pr->pr_mtx);
}
return (0);
}
SYSCTL_JAIL_PARAM_SYS_NODE(linux, CTLFLAG_RW, "Jail Linux parameters");
SYSCTL_JAIL_PARAM_STRING(_linux, osname, CTLFLAG_RW, LINUX_MAX_UTSNAME,
"Jail Linux kernel OS name");
SYSCTL_JAIL_PARAM_STRING(_linux, osrelease, CTLFLAG_RW, LINUX_MAX_UTSNAME,
"Jail Linux kernel OS release");
SYSCTL_JAIL_PARAM(_linux, oss_version, CTLTYPE_INT | CTLFLAG_RW,
"I", "Jail Linux OSS version");
static int
linux_prison_get(void *obj, void *data)
{
struct linux_prison *lpr;
struct prison *ppr;
struct prison *pr = obj;
struct vfsoptlist *opts = data;
int error, i;
static int version0;
/* See if this prison is the one with the Linux info. */
lpr = linux_find_prison(pr, &ppr);
i = (ppr == pr) ? JAIL_SYS_NEW : JAIL_SYS_INHERIT;
error = vfs_setopt(opts, "linux", &i, sizeof(i));
if (error != 0 && error != ENOENT)
goto done;
if (i) {
error = vfs_setopts(opts, "linux.osname", lpr->pr_osname);
if (error != 0 && error != ENOENT)
goto done;
error = vfs_setopts(opts, "linux.osrelease", lpr->pr_osrelease);
if (error != 0 && error != ENOENT)
goto done;
error = vfs_setopt(opts, "linux.oss_version",
&lpr->pr_oss_version, sizeof(lpr->pr_oss_version));
if (error != 0 && error != ENOENT)
goto done;
} else {
/*
* If this prison is inheriting its Linux info, report
* empty/zero parameters.
*/
error = vfs_setopts(opts, "linux.osname", "");
if (error != 0 && error != ENOENT)
goto done;
error = vfs_setopts(opts, "linux.osrelease", "");
if (error != 0 && error != ENOENT)
goto done;
error = vfs_setopt(opts, "linux.oss_version", &version0,
sizeof(lpr->pr_oss_version));
if (error != 0 && error != ENOENT)
goto done;
}
error = 0;
done:
mtx_unlock(&ppr->pr_mtx);
return (error);
}
static void
linux_prison_destructor(void *data)
{
free(data, M_PRISON);
}
void
linux_osd_jail_register(void)
{
struct prison *pr;
osd_method_t methods[PR_MAXMETHOD] = {
[PR_METHOD_CREATE] = linux_prison_create,
[PR_METHOD_GET] = linux_prison_get,
[PR_METHOD_SET] = linux_prison_set,
[PR_METHOD_CHECK] = linux_prison_check
};
linux_osd_jail_slot =
osd_jail_register(linux_prison_destructor, methods);
/* Copy the system Linux info to any current prisons. */
sx_slock(&allprison_lock);
TAILQ_FOREACH(pr, &allprison, pr_list)
linux_alloc_prison(pr, NULL);
sx_sunlock(&allprison_lock);
}
void
linux_osd_jail_deregister(void)
{
osd_jail_deregister(linux_osd_jail_slot);
}
void
linux_get_osname(struct thread *td, char *dst)
{
struct prison *pr;
struct linux_prison *lpr;
lpr = linux_find_prison(td->td_ucred->cr_prison, &pr);
bcopy(lpr->pr_osname, dst, LINUX_MAX_UTSNAME);
mtx_unlock(&pr->pr_mtx);
}
static int
linux_set_osname(struct thread *td, char *osname)
{
struct prison *pr;
struct linux_prison *lpr;
lpr = linux_find_prison(td->td_ucred->cr_prison, &pr);
strlcpy(lpr->pr_osname, osname, LINUX_MAX_UTSNAME);
mtx_unlock(&pr->pr_mtx);
return (0);
}
void
linux_get_osrelease(struct thread *td, char *dst)
{
struct prison *pr;
struct linux_prison *lpr;
lpr = linux_find_prison(td->td_ucred->cr_prison, &pr);
bcopy(lpr->pr_osrelease, dst, LINUX_MAX_UTSNAME);
mtx_unlock(&pr->pr_mtx);
}
int
linux_kernver(struct thread *td)
{
struct prison *pr;
struct linux_prison *lpr;
int osrel;
lpr = linux_find_prison(td->td_ucred->cr_prison, &pr);
osrel = lpr->pr_osrel;
mtx_unlock(&pr->pr_mtx);
return (osrel);
}
static int
linux_set_osrelease(struct thread *td, char *osrelease)
{
struct prison *pr;
struct linux_prison *lpr;
int error;
lpr = linux_find_prison(td->td_ucred->cr_prison, &pr);
error = linux_map_osrel(osrelease, &lpr->pr_osrel);
if (error == 0)
strlcpy(lpr->pr_osrelease, osrelease, LINUX_MAX_UTSNAME);
mtx_unlock(&pr->pr_mtx);
return (error);
}
int
linux_get_oss_version(struct thread *td)
{
struct prison *pr;
struct linux_prison *lpr;
int version;
lpr = linux_find_prison(td->td_ucred->cr_prison, &pr);
version = lpr->pr_oss_version;
mtx_unlock(&pr->pr_mtx);
return (version);
}
static int
linux_set_oss_version(struct thread *td, int oss_version)
{
struct prison *pr;
struct linux_prison *lpr;
lpr = linux_find_prison(td->td_ucred->cr_prison, &pr);
lpr->pr_oss_version = oss_version;
mtx_unlock(&pr->pr_mtx);
return (0);
}