Provide yet another KPI for cdev creation, make_dev_s(9).

Immediate problem fixed by the new KPI is the long-standing race
between device creation and assignments to cdev->si_drv1 and
cdev->si_drv2, which allows the window where cdevsw methods might be
called with si_drv1,2 fields not yet set.  Devices typically checked
for NULL and returned spurious errors to usermode, and often left some
methods unchecked.

The new function interface is designed to be extensible, which should
allow to add more features to make_dev_s(9) without inventing yet
another name for function to create devices, while maintaining KPI and
even KBI backward-compatibility.

Reviewed by:	hps, jhb
Sponsored by:	The FreeBSD Foundation
MFC after:	3 weeks
Differential revision:	https://reviews.freebsd.org/D4746
This commit is contained in:
Konstantin Belousov 2016-01-07 20:08:02 +00:00
parent e3ebb82118
commit 48ce5d4cac
4 changed files with 235 additions and 76 deletions

View File

@ -1013,7 +1013,8 @@ MLINKS+=make_dev.9 destroy_dev.9 \
make_dev.9 make_dev_alias_p.9 \
make_dev.9 make_dev_cred.9 \
make_dev.9 make_dev_credf.9 \
make_dev.9 make_dev_p.9
make_dev.9 make_dev_p.9 \
make_dev.9 make_dev_s.9
MLINKS+=malloc.9 free.9 \
malloc.9 MALLOC_DECLARE.9 \
malloc.9 MALLOC_DEFINE.9 \

View File

@ -24,7 +24,7 @@
.\"
.\" $FreeBSD$
.\"
.Dd Dec 22, 2012
.Dd Jan 3, 2016
.Dt MAKE_DEV 9
.Os
.Sh NAME
@ -32,6 +32,7 @@
.Nm make_dev_cred ,
.Nm make_dev_credf ,
.Nm make_dev_p ,
.Nm make_dev_s ,
.Nm make_dev_alias ,
.Nm make_dev_alias_p ,
.Nm destroy_dev ,
@ -45,16 +46,10 @@ and DEVFS registration for devices
.Sh SYNOPSIS
.In sys/param.h
.In sys/conf.h
.Ft struct cdev *
.Fn make_dev "struct cdevsw *cdevsw" "int unit" "uid_t uid" "gid_t gid" "int perms" "const char *fmt" ...
.Ft struct cdev *
.Fn make_dev_cred "struct cdevsw *cdevsw" "int unit" "struct ucred *cr" "uid_t uid" "gid_t gid" "int perms" "const char *fmt" ...
.Ft struct cdev *
.Fn make_dev_credf "int flags" "struct cdevsw *cdevsw" "int unit" "struct ucred *cr" "uid_t uid" "gid_t gid" "int perms" "const char *fmt" ...
.Ft void
.Fn make_dev_args_init "struct make_dev_args *args"
.Ft int
.Fn make_dev_p "int flags" "struct cdev **cdev" "struct cdevsw *devsw" "struct ucred *cr" "uid_t uid" "gid_t gid" "int mode" "const char *fmt" ...
.Ft struct cdev *
.Fn make_dev_alias "struct cdev *pdev" "const char *fmt" ...
.Fn make_dev_s "struct make_dev_args *args" "struct cdev **cdev" "const char *fmt" ...
.Ft int
.Fn make_dev_alias_p "int flags" "struct cdev **cdev" "struct cdev *pdev" "const char *fmt" ...
.Ft void
@ -67,12 +62,26 @@ and DEVFS registration for devices
.Fn destroy_dev_drain "struct cdevsw *csw"
.Ft void
.Fn dev_depends "struct cdev *pdev" "struct cdev *cdev"
.Pp
LEGACY INTERFACES
.Ft struct cdev *
.Fn make_dev "struct cdevsw *cdevsw" "int unit" "uid_t uid" "gid_t gid" "int perms" "const char *fmt" ...
.Ft struct cdev *
.Fn make_dev_cred "struct cdevsw *cdevsw" "int unit" "struct ucred *cr" "uid_t uid" "gid_t gid" "int perms" "const char *fmt" ...
.Ft struct cdev *
.Fn make_dev_credf "int flags" "struct cdevsw *cdevsw" "int unit" "struct ucred *cr" "uid_t uid" "gid_t gid" "int perms" "const char *fmt" ...
.Ft int
.Fn make_dev_p "int flags" "struct cdev **cdev" "struct cdevsw *devsw" "struct ucred *cr" "uid_t uid" "gid_t gid" "int mode" "const char *fmt" ...
.Ft struct cdev *
.Fn make_dev_alias "struct cdev *pdev" "const char *fmt" ...
.Sh DESCRIPTION
The
.Fn make_dev_credf
.Fn make_dev_s
function creates a
.Fa cdev
structure for a new device.
structure for a new device, which is returned into the
.Fa cdev
argument.
It also notifies
.Xr devfs 5
of the presence of the new device, that causes corresponding nodes
@ -80,10 +89,34 @@ to be created.
Besides this, a
.Xr devctl 4
notification is sent.
The device will be owned by
.Va uid ,
The function takes the structure
.Va struct make_dev_args args ,
which specifies the parameters for the device creation:
.Pp
.Bd -literal -offset indent -compact
struct make_dev_args {
size_t mda_size;
int mda_flags;
struct cdevsw *mda_devsw;
struct ucred *mda_cr;
uid_t mda_uid;
gid_t mda_gid;
int mda_mode;
int mda_unit;
void *mda_si_drv1;
void *mda_si_drv2;
};
.Ed
Before use and filling with the desired values, the structure must be
initialized by the
.Fn make_dev_args_init
function, which ensures that future kernel interface expansion does
not affect driver source code or binary interface.
.Pp
The created device will be owned by
.Va args.mda_uid ,
with the group ownership as
.Va gid .
.Va args.mda_gid .
The name is the expansion of
.Va fmt
and following arguments as
@ -97,7 +130,7 @@ mount point and may contain slash
.Ql /
characters to denote subdirectories.
The permissions of the file specified in
.Va perms
.Va args.mda_mode
are defined in
.In sys/stat.h :
.Pp
@ -126,29 +159,28 @@ are defined in
.Ed
.Pp
The
.Va cr
.Va args.mda_cr
argument specifies credentials that will be stored in the
.Fa si_cred
member of the initialized
.Fa struct cdev .
.Pp
The
.Va flags
.Va args.mda_flags
argument alters the operation of
.Fn make_dev_credf
or
.Fn make_dev_p .
.Fn make_dev_s.
The following values are currently accepted:
.Pp
.Bl -tag -width "MAKEDEV_CHECKNAME" -compact -offset indent
.It MAKEDEV_REF
.Bl -tag -width "It Dv MAKEDEV_CHECKNAME" -compact -offset indent
.It Dv MAKEDEV_REF
reference the created device
.It MAKEDEV_NOWAIT
.It Dv MAKEDEV_NOWAIT
do not sleep, the call may fail
.It MAKEDEV_WAITOK
.It Dv MAKEDEV_WAITOK
allow the function to sleep to satisfy malloc
.It MAKEDEV_ETERNAL
.It Dv MAKEDEV_ETERNAL
created device will be never destroyed
.It MAKEDEV_CHECKNAME
.It Dv MAKEDEV_CHECKNAME
return an error if the device name is invalid or already exists
.El
.Pp
@ -189,10 +221,49 @@ For the convenience, use the
flag for the code that can be compiled into kernel or loaded
(and unloaded) as loadable module.
.Pp
A panic will occur if the MAKEDEV_CHECKNAME flag is not specified
A panic will occur if the
.Dv MAKEDEV_CHECKNAME
flag is not specified
and the device name is invalid or already exists.
.Pp
The
.Fn make_dev_p
use of the form
.Bd -literal -offset indent
struct cdev *dev;
int res;
res = make_dev_p(flags, &dev, cdevsw, cred, uid, gid, perms, name);
.Ed
is equivalent to the code
.Bd -literal -offset indent
struct cdev *dev;
struct make_dev_args args;
int res;
make_dev_args_init(&args);
args.mda_flags = flags;
args.mda_devsw = cdevsw;
args.mda_cred = cred;
args.mda_uid = uid;
args.mda_gid = gid;
args.mda_mode = perms;
res = make_dev_s(&args, &dev, name);
.Ed
.Pp
Similarly, the
.Fn make_dev_credf
function call is equivalent to
.Bd -literal -offset indent
(void) make_dev_s(&args, &dev, name);
.Ed
In other words,
.Fn make_dev_credf
does not allow the caller to obtain the return value, and in
kernels compiled with the
.Va INVARIANTS
options, the function asserts that the device creation succeeded.
.Pp
The
.Fn make_dev_cred
function is equivalent to the call
.Bd -literal -offset indent
@ -207,45 +278,55 @@ make_dev_credf(0, cdevsw, unit, NULL, uid, gid, perms, fmt, ...);
.Ed
.Pp
The
.Fn make_dev_p
function is similar to
.Fn make_dev_credf
but it may return an error number and takes a pointer to the resulting
.Ft *cdev
as an argument.
.Pp
The
.Fn make_dev_alias
.Fn make_dev_alias_p
function takes the returned
.Ft cdev
from
.Fn make_dev
and makes another (aliased) name for this device.
It is an error to call
.Fn make_dev_alias
.Fn make_dev_alias_p
prior to calling
.Fn make_dev .
.Pp
.Fn make_dev_alias_p
The
.Fn make_dev_alias
function is similar to
.Fn make_dev_alias
but it takes a pointer to the resulting
but it returns the resulting aliasing
.Ft *cdev
as an argument and may return an error.
and may not return an error.
.Pp
The
.Fa cdev
returned by
.Fn make_dev
.Fn make_dev_s
and
.Fn make_dev_alias
.Fn make_dev_alias_p
has two fields,
.Fa si_drv1
and
.Fa si_drv2 ,
that are available to store state.
Both fields are of type
.Ft void * .
.Ft void * ,
and can be initialized simultaneously with the
.Va cdev
allocation by filling
.Va args.mda_si_drv1
and
.Va args.mda_si_drv2
members of the
.Fn make_dev_s
argument structure, or filled after the
.Va cdev
is allocated, if using legacy interfaces.
In the latter case, the driver should handle the race of
accessing uninitialized
.Va si_drv1
and
.Va si_drv2
itself.
These are designed to replace the
.Fa unit
argument to
@ -331,8 +412,10 @@ unload until
is actually finished for all of them.
.Sh RETURN VALUES
If successful,
.Fn make_dev_s
and
.Fn make_dev_p
will return 0, otherwise it will return an error.
will return 0, otherwise they will return an error.
If successful,
.Fn make_dev_credf
will return a valid
@ -341,10 +424,11 @@ pointer, otherwise it will return
.Dv NULL .
.Sh ERRORS
The
.Fn make_dev_s ,
.Fn make_dev_p
and
.Fn make_dev_alias_p
call will fail and the device will be not registered if:
calls will fail and the device will be not registered if:
.Bl -tag -width Er
.It Bq Er ENOMEM
The
@ -403,3 +487,7 @@ The function
.Fn make_dev_p
first appeared in
.Fx 8.2 .
The function
.Fn make_dev_s
first appeared in
.Fx 11.0 .

View File

@ -566,22 +566,26 @@ notify_destroy(struct cdev *dev)
}
static struct cdev *
newdev(struct cdevsw *csw, int unit, struct cdev *si)
newdev(struct make_dev_args *args, struct cdev *si)
{
struct cdev *si2;
struct cdevsw *csw;
mtx_assert(&devmtx, MA_OWNED);
csw = args->mda_devsw;
if (csw->d_flags & D_NEEDMINOR) {
/* We may want to return an existing device */
LIST_FOREACH(si2, &csw->d_devs, si_list) {
if (dev2unit(si2) == unit) {
if (dev2unit(si2) == args->mda_unit) {
dev_free_devlocked(si);
return (si2);
}
}
}
si->si_drv0 = unit;
si->si_drv0 = args->mda_unit;
si->si_devsw = csw;
si->si_drv1 = args->mda_si_drv1;
si->si_drv2 = args->mda_si_drv2;
LIST_INSERT_HEAD(&csw->d_devs, si, si_list);
return (si);
}
@ -737,33 +741,46 @@ prep_devname(struct cdev *dev, const char *fmt, va_list ap)
return (0);
}
void
make_dev_args_init_impl(struct make_dev_args *args, size_t sz)
{
bzero(args, sz);
args->mda_size = sz;
}
static int
make_dev_credv(int flags, struct cdev **dres, struct cdevsw *devsw, int unit,
struct ucred *cr, uid_t uid, gid_t gid, int mode, const char *fmt,
va_list ap)
make_dev_sv(struct make_dev_args *args1, struct cdev **dres,
const char *fmt, va_list ap)
{
struct cdev *dev, *dev_new;
struct make_dev_args args;
int res;
KASSERT((flags & MAKEDEV_WAITOK) == 0 || (flags & MAKEDEV_NOWAIT) == 0,
("make_dev_credv: both WAITOK and NOWAIT specified"));
dev_new = devfs_alloc(flags);
bzero(&args, sizeof(args));
if (sizeof(args) < args1->mda_size)
return (EINVAL);
bcopy(args1, &args, args1->mda_size);
KASSERT((args.mda_flags & MAKEDEV_WAITOK) == 0 ||
(args.mda_flags & MAKEDEV_NOWAIT) == 0,
("make_dev_sv: both WAITOK and NOWAIT specified"));
dev_new = devfs_alloc(args.mda_flags);
if (dev_new == NULL)
return (ENOMEM);
dev_lock();
res = prep_cdevsw(devsw, flags);
res = prep_cdevsw(args.mda_devsw, args.mda_flags);
if (res != 0) {
dev_unlock();
devfs_free(dev_new);
return (res);
}
dev = newdev(devsw, unit, dev_new);
dev = newdev(&args, dev_new);
if ((dev->si_flags & SI_NAMED) == 0) {
res = prep_devname(dev, fmt, ap);
if (res != 0) {
if ((flags & MAKEDEV_CHECKNAME) == 0) {
if ((args.mda_flags & MAKEDEV_CHECKNAME) == 0) {
panic(
"make_dev_credv: bad si_name (error=%d, si_name=%s)",
"make_dev_sv: bad si_name (error=%d, si_name=%s)",
res, dev->si_name);
}
if (dev == dev_new) {
@ -775,9 +792,9 @@ make_dev_credv(int flags, struct cdev **dres, struct cdevsw *devsw, int unit,
return (res);
}
}
if (flags & MAKEDEV_REF)
if ((args.mda_flags & MAKEDEV_REF) != 0)
dev_refl(dev);
if (flags & MAKEDEV_ETERNAL)
if ((args.mda_flags & MAKEDEV_ETERNAL) != 0)
dev->si_flags |= SI_ETERNAL;
if (dev->si_flags & SI_CHEAPCLONE &&
dev->si_flags & SI_NAMED) {
@ -792,24 +809,55 @@ make_dev_credv(int flags, struct cdev **dres, struct cdevsw *devsw, int unit,
}
KASSERT(!(dev->si_flags & SI_NAMED),
("make_dev() by driver %s on pre-existing device (min=%x, name=%s)",
devsw->d_name, dev2unit(dev), devtoname(dev)));
args.mda_devsw->d_name, dev2unit(dev), devtoname(dev)));
dev->si_flags |= SI_NAMED;
if (cr != NULL)
dev->si_cred = crhold(cr);
dev->si_uid = uid;
dev->si_gid = gid;
dev->si_mode = mode;
if (args.mda_cr != NULL)
dev->si_cred = crhold(args.mda_cr);
dev->si_uid = args.mda_uid;
dev->si_gid = args.mda_gid;
dev->si_mode = args.mda_mode;
devfs_create(dev);
clean_unrhdrl(devfs_inos);
dev_unlock_and_free();
notify_create(dev, flags);
notify_create(dev, args.mda_flags);
*dres = dev;
return (0);
}
int
make_dev_s(struct make_dev_args *args, struct cdev **dres,
const char *fmt, ...)
{
va_list ap;
int res;
va_start(ap, fmt);
res = make_dev_sv(args, dres, fmt, ap);
va_end(ap);
return (res);
}
static int
make_dev_credv(int flags, struct cdev **dres, struct cdevsw *devsw, int unit,
struct ucred *cr, uid_t uid, gid_t gid, int mode, const char *fmt,
va_list ap)
{
struct make_dev_args args;
make_dev_args_init(&args);
args.mda_flags = flags;
args.mda_devsw = devsw;
args.mda_cr = cr;
args.mda_uid = uid;
args.mda_gid = gid;
args.mda_mode = mode;
args.mda_unit = unit;
return (make_dev_sv(&args, dres, fmt, ap));
}
struct cdev *
make_dev(struct cdevsw *devsw, int unit, uid_t uid, gid_t gid, int mode,
const char *fmt, ...)
@ -1247,6 +1295,7 @@ clone_create(struct clonedevs **cdp, struct cdevsw *csw, int *up,
{
struct clonedevs *cd;
struct cdev *dev, *ndev, *dl, *de;
struct make_dev_args args;
int unit, low, u;
KASSERT(*cdp != NULL,
@ -1298,7 +1347,10 @@ clone_create(struct clonedevs **cdp, struct cdevsw *csw, int *up,
}
if (unit == -1)
unit = low & CLONE_UNITMASK;
dev = newdev(csw, unit | extra, ndev);
make_dev_args_init(&args);
args.mda_unit = unit | extra;
args.mda_devsw = csw;
dev = newdev(&args, ndev);
if (dev->si_flags & SI_CLONELIST) {
printf("dev %p (%s) is on clonelist\n", dev, dev->si_name);
printf("unit=%d, low=%d, extra=0x%x\n", unit, low, extra);

View File

@ -226,6 +226,28 @@ void clone_cleanup(struct clonedevs **);
#define CLONE_FLAG0 (CLONE_UNITMASK + 1)
int clone_create(struct clonedevs **, struct cdevsw *, int *unit, struct cdev **dev, int extra);
#define MAKEDEV_REF 0x01
#define MAKEDEV_WHTOUT 0x02
#define MAKEDEV_NOWAIT 0x04
#define MAKEDEV_WAITOK 0x08
#define MAKEDEV_ETERNAL 0x10
#define MAKEDEV_CHECKNAME 0x20
struct make_dev_args {
size_t mda_size;
int mda_flags;
struct cdevsw *mda_devsw;
struct ucred *mda_cr;
uid_t mda_uid;
gid_t mda_gid;
int mda_mode;
int mda_unit;
void *mda_si_drv1;
void *mda_si_drv2;
};
void make_dev_args_init_impl(struct make_dev_args *_args, size_t _sz);
#define make_dev_args_init(a) \
make_dev_args_init_impl((a), sizeof(struct make_dev_args))
int count_dev(struct cdev *_dev);
void delist_dev(struct cdev *_dev);
void destroy_dev(struct cdev *_dev);
@ -245,12 +267,6 @@ struct cdev *make_dev(struct cdevsw *_devsw, int _unit, uid_t _uid, gid_t _gid,
struct cdev *make_dev_cred(struct cdevsw *_devsw, int _unit,
struct ucred *_cr, uid_t _uid, gid_t _gid, int _perms,
const char *_fmt, ...) __printflike(7, 8);
#define MAKEDEV_REF 0x01
#define MAKEDEV_WHTOUT 0x02
#define MAKEDEV_NOWAIT 0x04
#define MAKEDEV_WAITOK 0x08
#define MAKEDEV_ETERNAL 0x10
#define MAKEDEV_CHECKNAME 0x20
struct cdev *make_dev_credf(int _flags,
struct cdevsw *_devsw, int _unit,
struct ucred *_cr, uid_t _uid, gid_t _gid, int _mode,
@ -258,6 +274,8 @@ struct cdev *make_dev_credf(int _flags,
int make_dev_p(int _flags, struct cdev **_cdev, struct cdevsw *_devsw,
struct ucred *_cr, uid_t _uid, gid_t _gid, int _mode,
const char *_fmt, ...) __printflike(8, 9);
int make_dev_s(struct make_dev_args *_args, struct cdev **_cdev,
const char *_fmt, ...) __printflike(3, 4);
struct cdev *make_dev_alias(struct cdev *_pdev, const char *_fmt, ...)
__printflike(2, 3);
int make_dev_alias_p(int _flags, struct cdev **_cdev, struct cdev *_pdev,