From 48ce5d4cac7dc3c2160ea69dc550e218fbc0e697 Mon Sep 17 00:00:00 2001 From: Konstantin Belousov Date: Thu, 7 Jan 2016 20:08:02 +0000 Subject: [PATCH] Provide yet another KPI for cdev creation, make_dev_s(9). Immediate problem fixed by the new KPI is the long-standing race between device creation and assignments to cdev->si_drv1 and cdev->si_drv2, which allows the window where cdevsw methods might be called with si_drv1,2 fields not yet set. Devices typically checked for NULL and returned spurious errors to usermode, and often left some methods unchecked. The new function interface is designed to be extensible, which should allow to add more features to make_dev_s(9) without inventing yet another name for function to create devices, while maintaining KPI and even KBI backward-compatibility. Reviewed by: hps, jhb Sponsored by: The FreeBSD Foundation MFC after: 3 weeks Differential revision: https://reviews.freebsd.org/D4746 --- share/man/man9/Makefile | 3 +- share/man/man9/make_dev.9 | 180 ++++++++++++++++++++++++++++---------- sys/kern/kern_conf.c | 98 ++++++++++++++++----- sys/sys/conf.h | 30 +++++-- 4 files changed, 235 insertions(+), 76 deletions(-) diff --git a/share/man/man9/Makefile b/share/man/man9/Makefile index 3cd6f7f70585..def293651b41 100644 --- a/share/man/man9/Makefile +++ b/share/man/man9/Makefile @@ -1013,7 +1013,8 @@ MLINKS+=make_dev.9 destroy_dev.9 \ make_dev.9 make_dev_alias_p.9 \ make_dev.9 make_dev_cred.9 \ make_dev.9 make_dev_credf.9 \ - make_dev.9 make_dev_p.9 + make_dev.9 make_dev_p.9 \ + make_dev.9 make_dev_s.9 MLINKS+=malloc.9 free.9 \ malloc.9 MALLOC_DECLARE.9 \ malloc.9 MALLOC_DEFINE.9 \ diff --git a/share/man/man9/make_dev.9 b/share/man/man9/make_dev.9 index 78345fab4a1d..0c53e8d3916b 100644 --- a/share/man/man9/make_dev.9 +++ b/share/man/man9/make_dev.9 @@ -24,7 +24,7 @@ .\" .\" $FreeBSD$ .\" -.Dd Dec 22, 2012 +.Dd Jan 3, 2016 .Dt MAKE_DEV 9 .Os .Sh NAME @@ -32,6 +32,7 @@ .Nm make_dev_cred , .Nm make_dev_credf , .Nm make_dev_p , +.Nm make_dev_s , .Nm make_dev_alias , .Nm make_dev_alias_p , .Nm destroy_dev , @@ -45,16 +46,10 @@ and DEVFS registration for devices .Sh SYNOPSIS .In sys/param.h .In sys/conf.h -.Ft struct cdev * -.Fn make_dev "struct cdevsw *cdevsw" "int unit" "uid_t uid" "gid_t gid" "int perms" "const char *fmt" ... -.Ft struct cdev * -.Fn make_dev_cred "struct cdevsw *cdevsw" "int unit" "struct ucred *cr" "uid_t uid" "gid_t gid" "int perms" "const char *fmt" ... -.Ft struct cdev * -.Fn make_dev_credf "int flags" "struct cdevsw *cdevsw" "int unit" "struct ucred *cr" "uid_t uid" "gid_t gid" "int perms" "const char *fmt" ... +.Ft void +.Fn make_dev_args_init "struct make_dev_args *args" .Ft int -.Fn make_dev_p "int flags" "struct cdev **cdev" "struct cdevsw *devsw" "struct ucred *cr" "uid_t uid" "gid_t gid" "int mode" "const char *fmt" ... -.Ft struct cdev * -.Fn make_dev_alias "struct cdev *pdev" "const char *fmt" ... +.Fn make_dev_s "struct make_dev_args *args" "struct cdev **cdev" "const char *fmt" ... .Ft int .Fn make_dev_alias_p "int flags" "struct cdev **cdev" "struct cdev *pdev" "const char *fmt" ... .Ft void @@ -67,12 +62,26 @@ and DEVFS registration for devices .Fn destroy_dev_drain "struct cdevsw *csw" .Ft void .Fn dev_depends "struct cdev *pdev" "struct cdev *cdev" +.Pp +LEGACY INTERFACES +.Ft struct cdev * +.Fn make_dev "struct cdevsw *cdevsw" "int unit" "uid_t uid" "gid_t gid" "int perms" "const char *fmt" ... +.Ft struct cdev * +.Fn make_dev_cred "struct cdevsw *cdevsw" "int unit" "struct ucred *cr" "uid_t uid" "gid_t gid" "int perms" "const char *fmt" ... +.Ft struct cdev * +.Fn make_dev_credf "int flags" "struct cdevsw *cdevsw" "int unit" "struct ucred *cr" "uid_t uid" "gid_t gid" "int perms" "const char *fmt" ... +.Ft int +.Fn make_dev_p "int flags" "struct cdev **cdev" "struct cdevsw *devsw" "struct ucred *cr" "uid_t uid" "gid_t gid" "int mode" "const char *fmt" ... +.Ft struct cdev * +.Fn make_dev_alias "struct cdev *pdev" "const char *fmt" ... .Sh DESCRIPTION The -.Fn make_dev_credf +.Fn make_dev_s function creates a .Fa cdev -structure for a new device. +structure for a new device, which is returned into the +.Fa cdev +argument. It also notifies .Xr devfs 5 of the presence of the new device, that causes corresponding nodes @@ -80,10 +89,34 @@ to be created. Besides this, a .Xr devctl 4 notification is sent. -The device will be owned by -.Va uid , +The function takes the structure +.Va struct make_dev_args args , +which specifies the parameters for the device creation: +.Pp +.Bd -literal -offset indent -compact +struct make_dev_args { + size_t mda_size; + int mda_flags; + struct cdevsw *mda_devsw; + struct ucred *mda_cr; + uid_t mda_uid; + gid_t mda_gid; + int mda_mode; + int mda_unit; + void *mda_si_drv1; + void *mda_si_drv2; +}; +.Ed +Before use and filling with the desired values, the structure must be +initialized by the +.Fn make_dev_args_init +function, which ensures that future kernel interface expansion does +not affect driver source code or binary interface. +.Pp +The created device will be owned by +.Va args.mda_uid , with the group ownership as -.Va gid . +.Va args.mda_gid . The name is the expansion of .Va fmt and following arguments as @@ -97,7 +130,7 @@ mount point and may contain slash .Ql / characters to denote subdirectories. The permissions of the file specified in -.Va perms +.Va args.mda_mode are defined in .In sys/stat.h : .Pp @@ -126,29 +159,28 @@ are defined in .Ed .Pp The -.Va cr +.Va args.mda_cr argument specifies credentials that will be stored in the .Fa si_cred member of the initialized .Fa struct cdev . +.Pp The -.Va flags +.Va args.mda_flags argument alters the operation of -.Fn make_dev_credf -or -.Fn make_dev_p . +.Fn make_dev_s. The following values are currently accepted: .Pp -.Bl -tag -width "MAKEDEV_CHECKNAME" -compact -offset indent -.It MAKEDEV_REF +.Bl -tag -width "It Dv MAKEDEV_CHECKNAME" -compact -offset indent +.It Dv MAKEDEV_REF reference the created device -.It MAKEDEV_NOWAIT +.It Dv MAKEDEV_NOWAIT do not sleep, the call may fail -.It MAKEDEV_WAITOK +.It Dv MAKEDEV_WAITOK allow the function to sleep to satisfy malloc -.It MAKEDEV_ETERNAL +.It Dv MAKEDEV_ETERNAL created device will be never destroyed -.It MAKEDEV_CHECKNAME +.It Dv MAKEDEV_CHECKNAME return an error if the device name is invalid or already exists .El .Pp @@ -189,10 +221,49 @@ For the convenience, use the flag for the code that can be compiled into kernel or loaded (and unloaded) as loadable module. .Pp -A panic will occur if the MAKEDEV_CHECKNAME flag is not specified +A panic will occur if the +.Dv MAKEDEV_CHECKNAME +flag is not specified and the device name is invalid or already exists. .Pp The +.Fn make_dev_p +use of the form +.Bd -literal -offset indent +struct cdev *dev; +int res; +res = make_dev_p(flags, &dev, cdevsw, cred, uid, gid, perms, name); +.Ed +is equivalent to the code +.Bd -literal -offset indent +struct cdev *dev; +struct make_dev_args args; +int res; + +make_dev_args_init(&args); +args.mda_flags = flags; +args.mda_devsw = cdevsw; +args.mda_cred = cred; +args.mda_uid = uid; +args.mda_gid = gid; +args.mda_mode = perms; +res = make_dev_s(&args, &dev, name); +.Ed +.Pp +Similarly, the +.Fn make_dev_credf +function call is equivalent to +.Bd -literal -offset indent + (void) make_dev_s(&args, &dev, name); +.Ed +In other words, +.Fn make_dev_credf +does not allow the caller to obtain the return value, and in +kernels compiled with the +.Va INVARIANTS +options, the function asserts that the device creation succeeded. +.Pp +The .Fn make_dev_cred function is equivalent to the call .Bd -literal -offset indent @@ -207,45 +278,55 @@ make_dev_credf(0, cdevsw, unit, NULL, uid, gid, perms, fmt, ...); .Ed .Pp The -.Fn make_dev_p -function is similar to -.Fn make_dev_credf -but it may return an error number and takes a pointer to the resulting -.Ft *cdev -as an argument. -.Pp -The -.Fn make_dev_alias +.Fn make_dev_alias_p function takes the returned .Ft cdev from .Fn make_dev and makes another (aliased) name for this device. It is an error to call -.Fn make_dev_alias +.Fn make_dev_alias_p prior to calling .Fn make_dev . .Pp -.Fn make_dev_alias_p +The +.Fn make_dev_alias function is similar to .Fn make_dev_alias -but it takes a pointer to the resulting +but it returns the resulting aliasing .Ft *cdev -as an argument and may return an error. +and may not return an error. .Pp The .Fa cdev returned by -.Fn make_dev +.Fn make_dev_s and -.Fn make_dev_alias +.Fn make_dev_alias_p has two fields, .Fa si_drv1 and .Fa si_drv2 , that are available to store state. Both fields are of type -.Ft void * . +.Ft void * , +and can be initialized simultaneously with the +.Va cdev +allocation by filling +.Va args.mda_si_drv1 +and +.Va args.mda_si_drv2 +members of the +.Fn make_dev_s +argument structure, or filled after the +.Va cdev +is allocated, if using legacy interfaces. +In the latter case, the driver should handle the race of +accessing uninitialized +.Va si_drv1 +and +.Va si_drv2 +itself. These are designed to replace the .Fa unit argument to @@ -331,8 +412,10 @@ unload until is actually finished for all of them. .Sh RETURN VALUES If successful, +.Fn make_dev_s +and .Fn make_dev_p -will return 0, otherwise it will return an error. +will return 0, otherwise they will return an error. If successful, .Fn make_dev_credf will return a valid @@ -341,10 +424,11 @@ pointer, otherwise it will return .Dv NULL . .Sh ERRORS The +.Fn make_dev_s , .Fn make_dev_p and .Fn make_dev_alias_p -call will fail and the device will be not registered if: +calls will fail and the device will be not registered if: .Bl -tag -width Er .It Bq Er ENOMEM The @@ -403,3 +487,7 @@ The function .Fn make_dev_p first appeared in .Fx 8.2 . +The function +.Fn make_dev_s +first appeared in +.Fx 11.0 . diff --git a/sys/kern/kern_conf.c b/sys/kern/kern_conf.c index 20565420704d..ea68a6dea9a8 100644 --- a/sys/kern/kern_conf.c +++ b/sys/kern/kern_conf.c @@ -566,22 +566,26 @@ notify_destroy(struct cdev *dev) } static struct cdev * -newdev(struct cdevsw *csw, int unit, struct cdev *si) +newdev(struct make_dev_args *args, struct cdev *si) { struct cdev *si2; + struct cdevsw *csw; mtx_assert(&devmtx, MA_OWNED); + csw = args->mda_devsw; if (csw->d_flags & D_NEEDMINOR) { /* We may want to return an existing device */ LIST_FOREACH(si2, &csw->d_devs, si_list) { - if (dev2unit(si2) == unit) { + if (dev2unit(si2) == args->mda_unit) { dev_free_devlocked(si); return (si2); } } } - si->si_drv0 = unit; + si->si_drv0 = args->mda_unit; si->si_devsw = csw; + si->si_drv1 = args->mda_si_drv1; + si->si_drv2 = args->mda_si_drv2; LIST_INSERT_HEAD(&csw->d_devs, si, si_list); return (si); } @@ -737,33 +741,46 @@ prep_devname(struct cdev *dev, const char *fmt, va_list ap) return (0); } +void +make_dev_args_init_impl(struct make_dev_args *args, size_t sz) +{ + + bzero(args, sz); + args->mda_size = sz; +} + static int -make_dev_credv(int flags, struct cdev **dres, struct cdevsw *devsw, int unit, - struct ucred *cr, uid_t uid, gid_t gid, int mode, const char *fmt, - va_list ap) +make_dev_sv(struct make_dev_args *args1, struct cdev **dres, + const char *fmt, va_list ap) { struct cdev *dev, *dev_new; + struct make_dev_args args; int res; - KASSERT((flags & MAKEDEV_WAITOK) == 0 || (flags & MAKEDEV_NOWAIT) == 0, - ("make_dev_credv: both WAITOK and NOWAIT specified")); - dev_new = devfs_alloc(flags); + bzero(&args, sizeof(args)); + if (sizeof(args) < args1->mda_size) + return (EINVAL); + bcopy(args1, &args, args1->mda_size); + KASSERT((args.mda_flags & MAKEDEV_WAITOK) == 0 || + (args.mda_flags & MAKEDEV_NOWAIT) == 0, + ("make_dev_sv: both WAITOK and NOWAIT specified")); + dev_new = devfs_alloc(args.mda_flags); if (dev_new == NULL) return (ENOMEM); dev_lock(); - res = prep_cdevsw(devsw, flags); + res = prep_cdevsw(args.mda_devsw, args.mda_flags); if (res != 0) { dev_unlock(); devfs_free(dev_new); return (res); } - dev = newdev(devsw, unit, dev_new); + dev = newdev(&args, dev_new); if ((dev->si_flags & SI_NAMED) == 0) { res = prep_devname(dev, fmt, ap); if (res != 0) { - if ((flags & MAKEDEV_CHECKNAME) == 0) { + if ((args.mda_flags & MAKEDEV_CHECKNAME) == 0) { panic( - "make_dev_credv: bad si_name (error=%d, si_name=%s)", + "make_dev_sv: bad si_name (error=%d, si_name=%s)", res, dev->si_name); } if (dev == dev_new) { @@ -775,9 +792,9 @@ make_dev_credv(int flags, struct cdev **dres, struct cdevsw *devsw, int unit, return (res); } } - if (flags & MAKEDEV_REF) + if ((args.mda_flags & MAKEDEV_REF) != 0) dev_refl(dev); - if (flags & MAKEDEV_ETERNAL) + if ((args.mda_flags & MAKEDEV_ETERNAL) != 0) dev->si_flags |= SI_ETERNAL; if (dev->si_flags & SI_CHEAPCLONE && dev->si_flags & SI_NAMED) { @@ -792,24 +809,55 @@ make_dev_credv(int flags, struct cdev **dres, struct cdevsw *devsw, int unit, } KASSERT(!(dev->si_flags & SI_NAMED), ("make_dev() by driver %s on pre-existing device (min=%x, name=%s)", - devsw->d_name, dev2unit(dev), devtoname(dev))); + args.mda_devsw->d_name, dev2unit(dev), devtoname(dev))); dev->si_flags |= SI_NAMED; - if (cr != NULL) - dev->si_cred = crhold(cr); - dev->si_uid = uid; - dev->si_gid = gid; - dev->si_mode = mode; + if (args.mda_cr != NULL) + dev->si_cred = crhold(args.mda_cr); + dev->si_uid = args.mda_uid; + dev->si_gid = args.mda_gid; + dev->si_mode = args.mda_mode; devfs_create(dev); clean_unrhdrl(devfs_inos); dev_unlock_and_free(); - notify_create(dev, flags); + notify_create(dev, args.mda_flags); *dres = dev; return (0); } +int +make_dev_s(struct make_dev_args *args, struct cdev **dres, + const char *fmt, ...) +{ + va_list ap; + int res; + + va_start(ap, fmt); + res = make_dev_sv(args, dres, fmt, ap); + va_end(ap); + return (res); +} + +static int +make_dev_credv(int flags, struct cdev **dres, struct cdevsw *devsw, int unit, + struct ucred *cr, uid_t uid, gid_t gid, int mode, const char *fmt, + va_list ap) +{ + struct make_dev_args args; + + make_dev_args_init(&args); + args.mda_flags = flags; + args.mda_devsw = devsw; + args.mda_cr = cr; + args.mda_uid = uid; + args.mda_gid = gid; + args.mda_mode = mode; + args.mda_unit = unit; + return (make_dev_sv(&args, dres, fmt, ap)); +} + struct cdev * make_dev(struct cdevsw *devsw, int unit, uid_t uid, gid_t gid, int mode, const char *fmt, ...) @@ -1247,6 +1295,7 @@ clone_create(struct clonedevs **cdp, struct cdevsw *csw, int *up, { struct clonedevs *cd; struct cdev *dev, *ndev, *dl, *de; + struct make_dev_args args; int unit, low, u; KASSERT(*cdp != NULL, @@ -1298,7 +1347,10 @@ clone_create(struct clonedevs **cdp, struct cdevsw *csw, int *up, } if (unit == -1) unit = low & CLONE_UNITMASK; - dev = newdev(csw, unit | extra, ndev); + make_dev_args_init(&args); + args.mda_unit = unit | extra; + args.mda_devsw = csw; + dev = newdev(&args, ndev); if (dev->si_flags & SI_CLONELIST) { printf("dev %p (%s) is on clonelist\n", dev, dev->si_name); printf("unit=%d, low=%d, extra=0x%x\n", unit, low, extra); diff --git a/sys/sys/conf.h b/sys/sys/conf.h index 4681acd192f6..a0dbc724fc5e 100644 --- a/sys/sys/conf.h +++ b/sys/sys/conf.h @@ -226,6 +226,28 @@ void clone_cleanup(struct clonedevs **); #define CLONE_FLAG0 (CLONE_UNITMASK + 1) int clone_create(struct clonedevs **, struct cdevsw *, int *unit, struct cdev **dev, int extra); +#define MAKEDEV_REF 0x01 +#define MAKEDEV_WHTOUT 0x02 +#define MAKEDEV_NOWAIT 0x04 +#define MAKEDEV_WAITOK 0x08 +#define MAKEDEV_ETERNAL 0x10 +#define MAKEDEV_CHECKNAME 0x20 +struct make_dev_args { + size_t mda_size; + int mda_flags; + struct cdevsw *mda_devsw; + struct ucred *mda_cr; + uid_t mda_uid; + gid_t mda_gid; + int mda_mode; + int mda_unit; + void *mda_si_drv1; + void *mda_si_drv2; +}; +void make_dev_args_init_impl(struct make_dev_args *_args, size_t _sz); +#define make_dev_args_init(a) \ + make_dev_args_init_impl((a), sizeof(struct make_dev_args)) + int count_dev(struct cdev *_dev); void delist_dev(struct cdev *_dev); void destroy_dev(struct cdev *_dev); @@ -245,12 +267,6 @@ struct cdev *make_dev(struct cdevsw *_devsw, int _unit, uid_t _uid, gid_t _gid, struct cdev *make_dev_cred(struct cdevsw *_devsw, int _unit, struct ucred *_cr, uid_t _uid, gid_t _gid, int _perms, const char *_fmt, ...) __printflike(7, 8); -#define MAKEDEV_REF 0x01 -#define MAKEDEV_WHTOUT 0x02 -#define MAKEDEV_NOWAIT 0x04 -#define MAKEDEV_WAITOK 0x08 -#define MAKEDEV_ETERNAL 0x10 -#define MAKEDEV_CHECKNAME 0x20 struct cdev *make_dev_credf(int _flags, struct cdevsw *_devsw, int _unit, struct ucred *_cr, uid_t _uid, gid_t _gid, int _mode, @@ -258,6 +274,8 @@ struct cdev *make_dev_credf(int _flags, int make_dev_p(int _flags, struct cdev **_cdev, struct cdevsw *_devsw, struct ucred *_cr, uid_t _uid, gid_t _gid, int _mode, const char *_fmt, ...) __printflike(8, 9); +int make_dev_s(struct make_dev_args *_args, struct cdev **_cdev, + const char *_fmt, ...) __printflike(3, 4); struct cdev *make_dev_alias(struct cdev *_pdev, const char *_fmt, ...) __printflike(2, 3); int make_dev_alias_p(int _flags, struct cdev **_cdev, struct cdev *_pdev,