102e6817f0
Currently, subr_bus.c shares logic for (a) maintaining all HW devices (e.g. discovery/attach/detach logic) and (b) generic devctl notification layer for devices/PMU/GEOM/interfaces/etc). These two subsystems share really tiny interaction interface, composed of 3 notification functions. With that in mind, move devctl layer to a separate file, establishing a clear notification interface between the sub.c bus layer and the provider (devctl). The primary driver of this change is netlink implementation (D36002). The idea is to propagate device-level events to netlink as well, so all netlink customers can subscribe to these changes. The long-term goal is to deprecate devctl and to use netlink as the kernel<> userland transport provided netlink gets enough traction. Reviewed by: imp, markj Differential Revision: https://reviews.freebsd.org/D36091 MFC after: 1 month
573 lines
15 KiB
C
573 lines
15 KiB
C
/*-
|
|
* SPDX-License-Identifier: BSD-2-Clause
|
|
*
|
|
* Copyright (c) 2002-2020 M. Warner Losh <imp@FreeBSD.org>
|
|
* All rights reserved.
|
|
*
|
|
* Redistribution and use in source and binary forms, with or without
|
|
* modification, are permitted provided that the following conditions
|
|
* are met:
|
|
* 1. Redistributions of source code must retain the above copyright
|
|
* notice, this list of conditions and the following disclaimer.
|
|
* 2. Redistributions in binary form must reproduce the above copyright
|
|
* notice, this list of conditions and the following disclaimer in the
|
|
* documentation and/or other materials provided with the distribution.
|
|
*
|
|
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
|
|
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
|
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
|
|
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
|
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
|
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
|
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
|
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
|
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
|
* SUCH DAMAGE.
|
|
*/
|
|
#include <sys/cdefs.h>
|
|
__FBSDID("$FreeBSD$");
|
|
|
|
#include "opt_bus.h"
|
|
#include "opt_ddb.h"
|
|
|
|
#include <sys/param.h>
|
|
#include <sys/conf.h>
|
|
#include <sys/eventhandler.h>
|
|
#include <sys/filio.h>
|
|
#include <sys/lock.h>
|
|
#include <sys/kernel.h>
|
|
#include <sys/malloc.h>
|
|
#include <sys/mutex.h>
|
|
#include <sys/poll.h>
|
|
#include <sys/priv.h>
|
|
#include <sys/proc.h>
|
|
#include <sys/condvar.h>
|
|
#include <sys/queue.h>
|
|
#include <machine/bus.h>
|
|
#include <sys/sbuf.h>
|
|
#include <sys/selinfo.h>
|
|
#include <sys/smp.h>
|
|
#include <sys/sysctl.h>
|
|
#include <sys/systm.h>
|
|
#include <sys/uio.h>
|
|
#include <sys/bus.h>
|
|
|
|
#include <machine/cpu.h>
|
|
#include <machine/stdarg.h>
|
|
|
|
#include <vm/uma.h>
|
|
#include <vm/vm.h>
|
|
|
|
#include <ddb/ddb.h>
|
|
|
|
STAILQ_HEAD(devq, dev_event_info);
|
|
|
|
static struct dev_softc {
|
|
int inuse;
|
|
int nonblock;
|
|
int queued;
|
|
int async;
|
|
struct mtx mtx;
|
|
struct cv cv;
|
|
struct selinfo sel;
|
|
struct devq devq;
|
|
struct sigio *sigio;
|
|
uma_zone_t zone;
|
|
} devsoftc;
|
|
|
|
/*
|
|
* This design allows only one reader for /dev/devctl. This is not desirable
|
|
* in the long run, but will get a lot of hair out of this implementation.
|
|
* Maybe we should make this device a clonable device.
|
|
*
|
|
* Also note: we specifically do not attach a device to the device_t tree
|
|
* to avoid potential chicken and egg problems. One could argue that all
|
|
* of this belongs to the root node.
|
|
*/
|
|
|
|
#define DEVCTL_DEFAULT_QUEUE_LEN 1000
|
|
static int sysctl_devctl_queue(SYSCTL_HANDLER_ARGS);
|
|
static int devctl_queue_length = DEVCTL_DEFAULT_QUEUE_LEN;
|
|
SYSCTL_PROC(_hw_bus, OID_AUTO, devctl_queue, CTLTYPE_INT | CTLFLAG_RWTUN |
|
|
CTLFLAG_MPSAFE, NULL, 0, sysctl_devctl_queue, "I", "devctl queue length");
|
|
|
|
static void devctl_attach_handler(void *arg __unused, device_t dev);
|
|
static void devctl_detach_handler(void *arg __unused, device_t dev,
|
|
enum evhdev_detach state);
|
|
static void devctl_nomatch_handler(void *arg __unused, device_t dev);
|
|
|
|
static d_open_t devopen;
|
|
static d_close_t devclose;
|
|
static d_read_t devread;
|
|
static d_ioctl_t devioctl;
|
|
static d_poll_t devpoll;
|
|
static d_kqfilter_t devkqfilter;
|
|
|
|
#define DEVCTL_BUFFER (1024 - sizeof(void *))
|
|
struct dev_event_info {
|
|
STAILQ_ENTRY(dev_event_info) dei_link;
|
|
char dei_data[DEVCTL_BUFFER];
|
|
};
|
|
|
|
|
|
static struct cdevsw dev_cdevsw = {
|
|
.d_version = D_VERSION,
|
|
.d_open = devopen,
|
|
.d_close = devclose,
|
|
.d_read = devread,
|
|
.d_ioctl = devioctl,
|
|
.d_poll = devpoll,
|
|
.d_kqfilter = devkqfilter,
|
|
.d_name = "devctl",
|
|
};
|
|
|
|
static void filt_devctl_detach(struct knote *kn);
|
|
static int filt_devctl_read(struct knote *kn, long hint);
|
|
|
|
static struct filterops devctl_rfiltops = {
|
|
.f_isfd = 1,
|
|
.f_detach = filt_devctl_detach,
|
|
.f_event = filt_devctl_read,
|
|
};
|
|
|
|
static struct cdev *devctl_dev;
|
|
static void devaddq(const char *type, const char *what, device_t dev);
|
|
|
|
static void
|
|
devctl_init(void)
|
|
{
|
|
int reserve;
|
|
uma_zone_t z;
|
|
|
|
devctl_dev = make_dev_credf(MAKEDEV_ETERNAL, &dev_cdevsw, 0, NULL,
|
|
UID_ROOT, GID_WHEEL, 0600, "devctl");
|
|
mtx_init(&devsoftc.mtx, "dev mtx", "devd", MTX_DEF);
|
|
cv_init(&devsoftc.cv, "dev cv");
|
|
STAILQ_INIT(&devsoftc.devq);
|
|
knlist_init_mtx(&devsoftc.sel.si_note, &devsoftc.mtx);
|
|
if (devctl_queue_length > 0) {
|
|
/*
|
|
* Allocate a zone for the messages. Preallocate 2% of these for
|
|
* a reserve. Allow only devctl_queue_length slabs to cap memory
|
|
* usage. The reserve usually allows coverage of surges of
|
|
* events during memory shortages. Normally we won't have to
|
|
* re-use events from the queue, but will in extreme shortages.
|
|
*/
|
|
z = devsoftc.zone = uma_zcreate("DEVCTL",
|
|
sizeof(struct dev_event_info), NULL, NULL, NULL, NULL,
|
|
UMA_ALIGN_PTR, 0);
|
|
reserve = max(devctl_queue_length / 50, 100); /* 2% reserve */
|
|
uma_zone_set_max(z, devctl_queue_length);
|
|
uma_zone_set_maxcache(z, 0);
|
|
uma_zone_reserve(z, reserve);
|
|
uma_prealloc(z, reserve);
|
|
}
|
|
EVENTHANDLER_REGISTER(device_attach, devctl_attach_handler,
|
|
NULL, EVENTHANDLER_PRI_LAST);
|
|
EVENTHANDLER_REGISTER(device_detach, devctl_detach_handler,
|
|
NULL, EVENTHANDLER_PRI_LAST);
|
|
EVENTHANDLER_REGISTER(device_nomatch, devctl_nomatch_handler,
|
|
NULL, EVENTHANDLER_PRI_LAST);
|
|
}
|
|
SYSINIT(devctl_init, SI_SUB_DRIVERS, SI_ORDER_SECOND, devctl_init, NULL);
|
|
|
|
/*
|
|
* A device was added to the tree. We are called just after it successfully
|
|
* attaches (that is, probe and attach success for this device). No call
|
|
* is made if a device is merely parented into the tree. See devnomatch
|
|
* if probe fails. If attach fails, no notification is sent (but maybe
|
|
* we should have a different message for this).
|
|
*/
|
|
static void
|
|
devctl_attach_handler(void *arg __unused, device_t dev)
|
|
{
|
|
devaddq("+", device_get_nameunit(dev), dev);
|
|
}
|
|
|
|
/*
|
|
* A device was removed from the tree. We are called just before this
|
|
* happens.
|
|
*/
|
|
static void
|
|
devctl_detach_handler(void *arg __unused, device_t dev, enum evhdev_detach state)
|
|
{
|
|
if (state == EVHDEV_DETACH_COMPLETE)
|
|
devaddq("-", device_get_nameunit(dev), dev);
|
|
}
|
|
|
|
/*
|
|
* Called when there's no match for this device. This is only called
|
|
* the first time that no match happens, so we don't keep getting this
|
|
* message. Should that prove to be undesirable, we can change it.
|
|
* This is called when all drivers that can attach to a given bus
|
|
* decline to accept this device. Other errors may not be detected.
|
|
*/
|
|
static void
|
|
devctl_nomatch_handler(void *arg __unused, device_t dev)
|
|
{
|
|
devaddq("?", "", dev);
|
|
}
|
|
|
|
static int
|
|
devopen(struct cdev *dev, int oflags, int devtype, struct thread *td)
|
|
{
|
|
mtx_lock(&devsoftc.mtx);
|
|
if (devsoftc.inuse) {
|
|
mtx_unlock(&devsoftc.mtx);
|
|
return (EBUSY);
|
|
}
|
|
/* move to init */
|
|
devsoftc.inuse = 1;
|
|
mtx_unlock(&devsoftc.mtx);
|
|
return (0);
|
|
}
|
|
|
|
static int
|
|
devclose(struct cdev *dev, int fflag, int devtype, struct thread *td)
|
|
{
|
|
mtx_lock(&devsoftc.mtx);
|
|
devsoftc.inuse = 0;
|
|
devsoftc.nonblock = 0;
|
|
devsoftc.async = 0;
|
|
cv_broadcast(&devsoftc.cv);
|
|
funsetown(&devsoftc.sigio);
|
|
mtx_unlock(&devsoftc.mtx);
|
|
return (0);
|
|
}
|
|
|
|
/*
|
|
* The read channel for this device is used to report changes to
|
|
* userland in realtime. We are required to free the data as well as
|
|
* the n1 object because we allocate them separately. Also note that
|
|
* we return one record at a time. If you try to read this device a
|
|
* character at a time, you will lose the rest of the data. Listening
|
|
* programs are expected to cope.
|
|
*/
|
|
static int
|
|
devread(struct cdev *dev, struct uio *uio, int ioflag)
|
|
{
|
|
struct dev_event_info *n1;
|
|
int rv;
|
|
|
|
mtx_lock(&devsoftc.mtx);
|
|
while (STAILQ_EMPTY(&devsoftc.devq)) {
|
|
if (devsoftc.nonblock) {
|
|
mtx_unlock(&devsoftc.mtx);
|
|
return (EAGAIN);
|
|
}
|
|
rv = cv_wait_sig(&devsoftc.cv, &devsoftc.mtx);
|
|
if (rv) {
|
|
/*
|
|
* Need to translate ERESTART to EINTR here? -- jake
|
|
*/
|
|
mtx_unlock(&devsoftc.mtx);
|
|
return (rv);
|
|
}
|
|
}
|
|
n1 = STAILQ_FIRST(&devsoftc.devq);
|
|
STAILQ_REMOVE_HEAD(&devsoftc.devq, dei_link);
|
|
devsoftc.queued--;
|
|
mtx_unlock(&devsoftc.mtx);
|
|
rv = uiomove(n1->dei_data, strlen(n1->dei_data), uio);
|
|
uma_zfree(devsoftc.zone, n1);
|
|
return (rv);
|
|
}
|
|
|
|
static int
|
|
devioctl(struct cdev *dev, u_long cmd, caddr_t data, int fflag, struct thread *td)
|
|
{
|
|
switch (cmd) {
|
|
case FIONBIO:
|
|
if (*(int*)data)
|
|
devsoftc.nonblock = 1;
|
|
else
|
|
devsoftc.nonblock = 0;
|
|
return (0);
|
|
case FIOASYNC:
|
|
if (*(int*)data)
|
|
devsoftc.async = 1;
|
|
else
|
|
devsoftc.async = 0;
|
|
return (0);
|
|
case FIOSETOWN:
|
|
return fsetown(*(int *)data, &devsoftc.sigio);
|
|
case FIOGETOWN:
|
|
*(int *)data = fgetown(&devsoftc.sigio);
|
|
return (0);
|
|
|
|
/* (un)Support for other fcntl() calls. */
|
|
case FIOCLEX:
|
|
case FIONCLEX:
|
|
case FIONREAD:
|
|
default:
|
|
break;
|
|
}
|
|
return (ENOTTY);
|
|
}
|
|
|
|
static int
|
|
devpoll(struct cdev *dev, int events, struct thread *td)
|
|
{
|
|
int revents = 0;
|
|
|
|
mtx_lock(&devsoftc.mtx);
|
|
if (events & (POLLIN | POLLRDNORM)) {
|
|
if (!STAILQ_EMPTY(&devsoftc.devq))
|
|
revents = events & (POLLIN | POLLRDNORM);
|
|
else
|
|
selrecord(td, &devsoftc.sel);
|
|
}
|
|
mtx_unlock(&devsoftc.mtx);
|
|
|
|
return (revents);
|
|
}
|
|
|
|
static int
|
|
devkqfilter(struct cdev *dev, struct knote *kn)
|
|
{
|
|
int error;
|
|
|
|
if (kn->kn_filter == EVFILT_READ) {
|
|
kn->kn_fop = &devctl_rfiltops;
|
|
knlist_add(&devsoftc.sel.si_note, kn, 0);
|
|
error = 0;
|
|
} else
|
|
error = EINVAL;
|
|
return (error);
|
|
}
|
|
|
|
static void
|
|
filt_devctl_detach(struct knote *kn)
|
|
{
|
|
knlist_remove(&devsoftc.sel.si_note, kn, 0);
|
|
}
|
|
|
|
static int
|
|
filt_devctl_read(struct knote *kn, long hint)
|
|
{
|
|
kn->kn_data = devsoftc.queued;
|
|
return (kn->kn_data != 0);
|
|
}
|
|
|
|
/**
|
|
* @brief Return whether the userland process is running
|
|
*/
|
|
bool
|
|
devctl_process_running(void)
|
|
{
|
|
return (devsoftc.inuse == 1);
|
|
}
|
|
|
|
static struct dev_event_info *
|
|
devctl_alloc_dei(void)
|
|
{
|
|
struct dev_event_info *dei = NULL;
|
|
|
|
mtx_lock(&devsoftc.mtx);
|
|
if (devctl_queue_length == 0)
|
|
goto out;
|
|
dei = uma_zalloc(devsoftc.zone, M_NOWAIT);
|
|
if (dei == NULL)
|
|
dei = uma_zalloc(devsoftc.zone, M_NOWAIT | M_USE_RESERVE);
|
|
if (dei == NULL) {
|
|
/*
|
|
* Guard against no items in the queue. Normally, this won't
|
|
* happen, but if lots of events happen all at once and there's
|
|
* a chance we're out of allocated space but none have yet been
|
|
* queued when we get here, leaving nothing to steal. This can
|
|
* also happen with error injection. Fail safe by returning
|
|
* NULL in that case..
|
|
*/
|
|
if (devsoftc.queued == 0)
|
|
goto out;
|
|
dei = STAILQ_FIRST(&devsoftc.devq);
|
|
STAILQ_REMOVE_HEAD(&devsoftc.devq, dei_link);
|
|
devsoftc.queued--;
|
|
}
|
|
MPASS(dei != NULL);
|
|
*dei->dei_data = '\0';
|
|
out:
|
|
mtx_unlock(&devsoftc.mtx);
|
|
return (dei);
|
|
}
|
|
|
|
static struct dev_event_info *
|
|
devctl_alloc_dei_sb(struct sbuf *sb)
|
|
{
|
|
struct dev_event_info *dei;
|
|
|
|
dei = devctl_alloc_dei();
|
|
if (dei != NULL)
|
|
sbuf_new(sb, dei->dei_data, sizeof(dei->dei_data), SBUF_FIXEDLEN);
|
|
return (dei);
|
|
}
|
|
|
|
static void
|
|
devctl_free_dei(struct dev_event_info *dei)
|
|
{
|
|
uma_zfree(devsoftc.zone, dei);
|
|
}
|
|
|
|
static void
|
|
devctl_queue(struct dev_event_info *dei)
|
|
{
|
|
mtx_lock(&devsoftc.mtx);
|
|
STAILQ_INSERT_TAIL(&devsoftc.devq, dei, dei_link);
|
|
devsoftc.queued++;
|
|
cv_broadcast(&devsoftc.cv);
|
|
KNOTE_LOCKED(&devsoftc.sel.si_note, 0);
|
|
mtx_unlock(&devsoftc.mtx);
|
|
selwakeup(&devsoftc.sel);
|
|
if (devsoftc.async && devsoftc.sigio != NULL)
|
|
pgsigio(&devsoftc.sigio, SIGIO, 0);
|
|
}
|
|
|
|
/**
|
|
* @brief Send a 'notification' to userland, using standard ways
|
|
*/
|
|
void
|
|
devctl_notify(const char *system, const char *subsystem, const char *type,
|
|
const char *data)
|
|
{
|
|
struct dev_event_info *dei;
|
|
struct sbuf sb;
|
|
|
|
if (system == NULL || subsystem == NULL || type == NULL)
|
|
return;
|
|
dei = devctl_alloc_dei_sb(&sb);
|
|
if (dei == NULL)
|
|
return;
|
|
sbuf_cpy(&sb, "!system=");
|
|
sbuf_cat(&sb, system);
|
|
sbuf_cat(&sb, " subsystem=");
|
|
sbuf_cat(&sb, subsystem);
|
|
sbuf_cat(&sb, " type=");
|
|
sbuf_cat(&sb, type);
|
|
if (data != NULL) {
|
|
sbuf_putc(&sb, ' ');
|
|
sbuf_cat(&sb, data);
|
|
}
|
|
sbuf_putc(&sb, '\n');
|
|
if (sbuf_finish(&sb) != 0)
|
|
devctl_free_dei(dei); /* overflow -> drop it */
|
|
else
|
|
devctl_queue(dei);
|
|
}
|
|
|
|
/*
|
|
* Common routine that tries to make sending messages as easy as possible.
|
|
* We allocate memory for the data, copy strings into that, but do not
|
|
* free it unless there's an error. The dequeue part of the driver should
|
|
* free the data. We don't send data when the device is disabled. We do
|
|
* send data, even when we have no listeners, because we wish to avoid
|
|
* races relating to startup and restart of listening applications.
|
|
*
|
|
* devaddq is designed to string together the type of event, with the
|
|
* object of that event, plus the plug and play info and location info
|
|
* for that event. This is likely most useful for devices, but less
|
|
* useful for other consumers of this interface. Those should use
|
|
* the devctl_notify() interface instead.
|
|
*
|
|
* Output:
|
|
* ${type}${what} at $(location dev) $(pnp-info dev) on $(parent dev)
|
|
*/
|
|
static void
|
|
devaddq(const char *type, const char *what, device_t dev)
|
|
{
|
|
struct dev_event_info *dei;
|
|
const char *parstr;
|
|
struct sbuf sb;
|
|
|
|
dei = devctl_alloc_dei_sb(&sb);
|
|
if (dei == NULL)
|
|
return;
|
|
sbuf_cpy(&sb, type);
|
|
sbuf_cat(&sb, what);
|
|
sbuf_cat(&sb, " at ");
|
|
|
|
/* Add in the location */
|
|
bus_child_location(dev, &sb);
|
|
sbuf_putc(&sb, ' ');
|
|
|
|
/* Add in pnpinfo */
|
|
bus_child_pnpinfo(dev, &sb);
|
|
|
|
/* Get the parent of this device, or / if high enough in the tree. */
|
|
if (device_get_parent(dev) == NULL)
|
|
parstr = "."; /* Or '/' ? */
|
|
else
|
|
parstr = device_get_nameunit(device_get_parent(dev));
|
|
sbuf_cat(&sb, " on ");
|
|
sbuf_cat(&sb, parstr);
|
|
sbuf_putc(&sb, '\n');
|
|
if (sbuf_finish(&sb) != 0)
|
|
goto bad;
|
|
devctl_queue(dei);
|
|
return;
|
|
bad:
|
|
devctl_free_dei(dei);
|
|
}
|
|
|
|
static int
|
|
sysctl_devctl_queue(SYSCTL_HANDLER_ARGS)
|
|
{
|
|
int q, error;
|
|
|
|
q = devctl_queue_length;
|
|
error = sysctl_handle_int(oidp, &q, 0, req);
|
|
if (error || !req->newptr)
|
|
return (error);
|
|
if (q < 0)
|
|
return (EINVAL);
|
|
|
|
/*
|
|
* When set as a tunable, we've not yet initialized the mutex.
|
|
* It is safe to just assign to devctl_queue_length and return
|
|
* as we're racing no one. We'll use whatever value set in
|
|
* devinit.
|
|
*/
|
|
if (!mtx_initialized(&devsoftc.mtx)) {
|
|
devctl_queue_length = q;
|
|
return (0);
|
|
}
|
|
|
|
/*
|
|
* XXX It's hard to grow or shrink the UMA zone. Only allow
|
|
* disabling the queue size for the moment until underlying
|
|
* UMA issues can be sorted out.
|
|
*/
|
|
if (q != 0)
|
|
return (EINVAL);
|
|
if (q == devctl_queue_length)
|
|
return (0);
|
|
mtx_lock(&devsoftc.mtx);
|
|
devctl_queue_length = 0;
|
|
uma_zdestroy(devsoftc.zone);
|
|
devsoftc.zone = 0;
|
|
mtx_unlock(&devsoftc.mtx);
|
|
return (0);
|
|
}
|
|
|
|
/**
|
|
* @brief safely quotes strings that might have double quotes in them.
|
|
*
|
|
* The devctl protocol relies on quoted strings having matching quotes.
|
|
* This routine quotes any internal quotes so the resulting string
|
|
* is safe to pass to snprintf to construct, for example pnp info strings.
|
|
*
|
|
* @param sb sbuf to place the characters into
|
|
* @param src Original buffer.
|
|
*/
|
|
void
|
|
devctl_safe_quote_sb(struct sbuf *sb, const char *src)
|
|
{
|
|
while (*src != '\0') {
|
|
if (*src == '"' || *src == '\\')
|
|
sbuf_putc(sb, '\\');
|
|
sbuf_putc(sb, *src++);
|
|
}
|
|
}
|
|
|
|
|