Do not synchronously start the nfsiod threads at all. The r212506

fixed the issues with file descriptor locks, but the same problems are
present for vnode lock/user map lock.

If the nfs_asyncio() cannot find the free nfsiod, schedule task to
create new nfsiod and return error. This causes fall back to the
synchronous i/o for nfs_strategy(), or does not start read at all in
the case of readahead. The caller that holds vnode and potentially
user map lock does not wait for kproc_create() to finish, preventing
the LORs.

The change effectively reverts r203072, because we never hand off the
request to newly created nfsiod thread anymore.

Reviewed by:	jhb
Tested by:	jhb, pluknet
MFC after:	3 weeks
This commit is contained in:
Konstantin Belousov 2010-10-18 19:06:46 +00:00
parent c4965cfc44
commit 223073fd1a
4 changed files with 48 additions and 80 deletions

View File

@ -253,7 +253,7 @@ int nfs_writerpc(struct vnode *, struct uio *, struct ucred *, int *,
int nfs_commit(struct vnode *vp, u_quad_t offset, int cnt, int nfs_commit(struct vnode *vp, u_quad_t offset, int cnt,
struct ucred *cred, struct thread *td); struct ucred *cred, struct thread *td);
int nfs_readdirrpc(struct vnode *, struct uio *, struct ucred *); int nfs_readdirrpc(struct vnode *, struct uio *, struct ucred *);
int nfs_nfsiodnew(int); void nfs_nfsiodnew(void);
void nfs_nfsiodnew_tq(__unused void *, int); void nfs_nfsiodnew_tq(__unused void *, int);
int nfs_asyncio(struct nfsmount *, struct buf *, struct ucred *, struct thread *); int nfs_asyncio(struct nfsmount *, struct buf *, struct ucred *, struct thread *);
int nfs_doio(struct vnode *, struct buf *, struct ucred *, struct thread *); int nfs_doio(struct vnode *, struct buf *, struct ucred *, struct thread *);

View File

@ -1375,13 +1375,9 @@ again:
/* /*
* Try to create one if none are free. * Try to create one if none are free.
*/ */
if (!gotiod) { if (!gotiod)
iod = nfs_nfsiodnew(1); nfs_nfsiodnew();
if (iod != -1) else {
gotiod = TRUE;
}
if (gotiod) {
/* /*
* Found one, so wake it up and tell it which * Found one, so wake it up and tell it which
* mount to process. * mount to process.
@ -1401,7 +1397,7 @@ again:
if (!gotiod) { if (!gotiod) {
if (nmp->nm_bufqiods > 0) { if (nmp->nm_bufqiods > 0) {
NFS_DPF(ASYNCIO, NFS_DPF(ASYNCIO,
("nfs_asyncio: %d iods are already processing mount %p\n", ("nfs_asyncio: %d iods are already processing mount %p\n",
nmp->nm_bufqiods, nmp)); nmp->nm_bufqiods, nmp));
gotiod = TRUE; gotiod = TRUE;
} }
@ -1416,9 +1412,9 @@ again:
* Ensure that the queue never grows too large. We still want * Ensure that the queue never grows too large. We still want
* to asynchronize so we block rather then return EIO. * to asynchronize so we block rather then return EIO.
*/ */
while (nmp->nm_bufqlen >= 2*nfs_numasync) { while (nmp->nm_bufqlen >= 2 * nfs_numasync) {
NFS_DPF(ASYNCIO, NFS_DPF(ASYNCIO,
("nfs_asyncio: waiting for mount %p queue to drain\n", nmp)); ("nfs_asyncio: waiting for mount %p queue to drain\n", nmp));
nmp->nm_bufqwant = TRUE; nmp->nm_bufqwant = TRUE;
error = nfs_msleep(td, &nmp->nm_bufq, &nfs_iod_mtx, error = nfs_msleep(td, &nmp->nm_bufq, &nfs_iod_mtx,
slpflag | PRIBIO, slpflag | PRIBIO,
@ -1426,7 +1422,7 @@ again:
if (error) { if (error) {
error2 = nfs_sigintr(nmp, td); error2 = nfs_sigintr(nmp, td);
if (error2) { if (error2) {
mtx_unlock(&nfs_iod_mtx); mtx_unlock(&nfs_iod_mtx);
return (error2); return (error2);
} }
if (slpflag == NFS_PCATCH) { if (slpflag == NFS_PCATCH) {
@ -1438,17 +1434,13 @@ again:
* We might have lost our iod while sleeping, * We might have lost our iod while sleeping,
* so check and loop if nescessary. * so check and loop if nescessary.
*/ */
if (nmp->nm_bufqiods == 0) { goto again;
NFS_DPF(ASYNCIO,
("nfs_asyncio: no iods after mount %p queue was drained, looping\n", nmp));
goto again;
}
} }
/* We might have lost our nfsiod */ /* We might have lost our nfsiod */
if (nmp->nm_bufqiods == 0) { if (nmp->nm_bufqiods == 0) {
NFS_DPF(ASYNCIO, NFS_DPF(ASYNCIO,
("nfs_asyncio: no iods after mount %p queue was drained, looping\n", nmp)); ("nfs_asyncio: no iods after mount %p queue was drained, looping\n", nmp));
goto again; goto again;
} }

View File

@ -76,16 +76,6 @@ static MALLOC_DEFINE(M_NFSSVC, "nfsclient_srvsock", "Nfs server structure");
static void nfssvc_iod(void *); static void nfssvc_iod(void *);
struct nfsiod_str {
STAILQ_ENTRY(nfsiod_str) ni_links;
int *ni_inst;
int ni_iod;
int ni_error;
int ni_done;
};
static STAILQ_HEAD(, nfsiod_str) nfsiodhead =
STAILQ_HEAD_INITIALIZER(nfsiodhead);
static int nfs_asyncdaemon[NFS_MAXASYNCDAEMON]; static int nfs_asyncdaemon[NFS_MAXASYNCDAEMON];
SYSCTL_DECL(_vfs_nfs); SYSCTL_DECL(_vfs_nfs);
@ -101,6 +91,8 @@ unsigned int nfs_iodmax = 20;
/* Minimum number of nfsiod kthreads to keep as spares */ /* Minimum number of nfsiod kthreads to keep as spares */
static unsigned int nfs_iodmin = 0; static unsigned int nfs_iodmin = 0;
static int nfs_nfsiodnew_sync(void);
static int static int
sysctl_iodmin(SYSCTL_HANDLER_ARGS) sysctl_iodmin(SYSCTL_HANDLER_ARGS)
{ {
@ -124,7 +116,7 @@ sysctl_iodmin(SYSCTL_HANDLER_ARGS)
* than the new minimum, create some more. * than the new minimum, create some more.
*/ */
for (i = nfs_iodmin - nfs_numasync; i > 0; i--) for (i = nfs_iodmin - nfs_numasync; i > 0; i--)
nfs_nfsiodnew(0); nfs_nfsiodnew_sync();
out: out:
mtx_unlock(&nfs_iod_mtx); mtx_unlock(&nfs_iod_mtx);
return (0); return (0);
@ -170,68 +162,55 @@ SYSCTL_PROC(_vfs_nfs, OID_AUTO, iodmax, CTLTYPE_UINT | CTLFLAG_RW, 0,
sizeof (nfs_iodmax), sysctl_iodmax, "IU", sizeof (nfs_iodmax), sysctl_iodmax, "IU",
"Max number of nfsiod kthreads"); "Max number of nfsiod kthreads");
static int
nfs_nfsiodnew_sync(void)
{
int error, i;
mtx_assert(&nfs_iod_mtx, MA_OWNED);
for (i = 0; i < nfs_iodmax; i++) {
if (nfs_asyncdaemon[i] == 0) {
nfs_asyncdaemon[i] = 1;
break;
}
}
if (i == nfs_iodmax)
return (0);
mtx_unlock(&nfs_iod_mtx);
error = kproc_create(nfssvc_iod, nfs_asyncdaemon + i, NULL,
RFHIGHPID, 0, "nfsiod %d", i);
mtx_lock(&nfs_iod_mtx);
if (error == 0) {
nfs_numasync++;
nfs_iodwant[i] = NFSIOD_AVAILABLE;
} else
nfs_asyncdaemon[i] = 0;
return (error);
}
void void
nfs_nfsiodnew_tq(__unused void *arg, int pending) nfs_nfsiodnew_tq(__unused void *arg, int pending)
{ {
struct nfsiod_str *nip;
mtx_lock(&nfs_iod_mtx); mtx_lock(&nfs_iod_mtx);
while ((nip = STAILQ_FIRST(&nfsiodhead)) != NULL) { while (pending > 0) {
STAILQ_REMOVE_HEAD(&nfsiodhead, ni_links); pending--;
mtx_unlock(&nfs_iod_mtx); nfs_nfsiodnew_sync();
nip->ni_error = kproc_create(nfssvc_iod, nip->ni_inst, NULL,
RFHIGHPID, 0, "nfsiod %d", nip->ni_iod);
nip->ni_done = 1;
mtx_lock(&nfs_iod_mtx);
wakeup(nip);
} }
mtx_unlock(&nfs_iod_mtx); mtx_unlock(&nfs_iod_mtx);
} }
int void
nfs_nfsiodnew(int set_iodwant) nfs_nfsiodnew(void)
{ {
int error, i;
int newiod;
struct nfsiod_str *nip;
if (nfs_numasync >= nfs_iodmax) mtx_assert(&nfs_iod_mtx, MA_OWNED);
return (-1);
newiod = -1;
for (i = 0; i < nfs_iodmax; i++)
if (nfs_asyncdaemon[i] == 0) {
nfs_asyncdaemon[i]++;
newiod = i;
break;
}
if (newiod == -1)
return (-1);
if (set_iodwant > 0)
nfs_iodwant[i] = NFSIOD_CREATED_FOR_NFS_ASYNCIO;
mtx_unlock(&nfs_iod_mtx);
nip = malloc(sizeof(*nip), M_TEMP, M_WAITOK | M_ZERO);
nip->ni_inst = nfs_asyncdaemon + i;
nip->ni_iod = newiod;
mtx_lock(&nfs_iod_mtx);
STAILQ_INSERT_TAIL(&nfsiodhead, nip, ni_links);
taskqueue_enqueue(taskqueue_thread, &nfs_nfsiodnew_task); taskqueue_enqueue(taskqueue_thread, &nfs_nfsiodnew_task);
while (!nip->ni_done)
mtx_sleep(nip, &nfs_iod_mtx, 0, "niwt", 0);
error = nip->ni_error;
free(nip, M_TEMP);
if (error) {
if (set_iodwant > 0)
nfs_iodwant[i] = NFSIOD_NOT_AVAILABLE;
return (-1);
}
nfs_numasync++;
return (newiod);
} }
static void static void
nfsiod_setup(void *dummy) nfsiod_setup(void *dummy)
{ {
int i;
int error; int error;
TUNABLE_INT_FETCH("vfs.nfs.iodmin", &nfs_iodmin); TUNABLE_INT_FETCH("vfs.nfs.iodmin", &nfs_iodmin);
@ -240,8 +219,8 @@ nfsiod_setup(void *dummy)
if (nfs_iodmin > NFS_MAXASYNCDAEMON) if (nfs_iodmin > NFS_MAXASYNCDAEMON)
nfs_iodmin = NFS_MAXASYNCDAEMON; nfs_iodmin = NFS_MAXASYNCDAEMON;
for (i = 0; i < nfs_iodmin; i++) { while (nfs_numasync < nfs_iodmin) {
error = nfs_nfsiodnew(0); error = nfs_nfsiodnew_sync();
if (error == -1) if (error == -1)
panic("nfsiod_setup: nfs_nfsiodnew failed"); panic("nfsiod_setup: nfs_nfsiodnew failed");
} }

View File

@ -165,16 +165,13 @@ struct nfsnode {
#define NFS_TIMESPEC_COMPARE(T1, T2) (((T1)->tv_sec != (T2)->tv_sec) || ((T1)->tv_nsec != (T2)->tv_nsec)) #define NFS_TIMESPEC_COMPARE(T1, T2) (((T1)->tv_sec != (T2)->tv_sec) || ((T1)->tv_nsec != (T2)->tv_nsec))
/* /*
* NFS iod threads can be in one of these three states once spawned. * NFS iod threads can be in one of these two states once spawned.
* NFSIOD_NOT_AVAILABLE - Cannot be assigned an I/O operation at this time. * NFSIOD_NOT_AVAILABLE - Cannot be assigned an I/O operation at this time.
* NFSIOD_AVAILABLE - Available to be assigned an I/O operation. * NFSIOD_AVAILABLE - Available to be assigned an I/O operation.
* NFSIOD_CREATED_FOR_NFS_ASYNCIO - Newly created for nfs_asyncio() and
* will be used by the thread that called nfs_asyncio().
*/ */
enum nfsiod_state { enum nfsiod_state {
NFSIOD_NOT_AVAILABLE = 0, NFSIOD_NOT_AVAILABLE = 0,
NFSIOD_AVAILABLE = 1, NFSIOD_AVAILABLE = 1,
NFSIOD_CREATED_FOR_NFS_ASYNCIO = 2,
}; };
/* /*