From 04b49c91547cf1c95616ee0ccaf3d64d024cef78 Mon Sep 17 00:00:00 2001 From: Konstantin Belousov Date: Sun, 19 Jun 2016 18:32:35 +0000 Subject: [PATCH] Remote and local adv lock servers might de-synchronize (the added comment explains the plausible scenario), resulting in EDEADLK returned on the local registration attempt. Handle this by re-trying the local op [1]. On unmount, local registration abort is indicated as EINTR, abort the nlm call as well. Reported and tested by: pho Suggested and reviewed by: dfr (previous version, [1]) Sponsored by: The FreeBSD Foundation MFC after: 1 week Approved by: re (delphij) --- sys/nlm/nlm_advlock.c | 32 +++++++++++++++++++++++++++++++- 1 file changed, 31 insertions(+), 1 deletion(-) diff --git a/sys/nlm/nlm_advlock.c b/sys/nlm/nlm_advlock.c index 456af87faf33..72702de0bcce 100644 --- a/sys/nlm/nlm_advlock.c +++ b/sys/nlm/nlm_advlock.c @@ -713,7 +713,37 @@ nlm_record_lock(struct vnode *vp, int op, struct flock *fl, newfl.l_pid = svid; newfl.l_sysid = NLM_SYSID_CLIENT | sysid; - error = lf_advlockasync(&a, &vp->v_lockf, size); + for (;;) { + error = lf_advlockasync(&a, &vp->v_lockf, size); + if (error == EDEADLK) { + /* + * Locks are associated with the processes and + * not with threads. Suppose we have two + * threads A1 A2 in one process, A1 locked + * file f1, A2 is locking file f2, and A1 is + * unlocking f1. Then remote server may + * already unlocked f1, while local still not + * yet scheduled A1 to make the call to local + * advlock manager. The process B owns lock on + * f2 and issued the lock on f1. Remote would + * grant B the request on f1, but local would + * return EDEADLK. + */ + pause("nlmdlk", 1); + /* XXXKIB allow suspend */ + } else if (error == EINTR) { + /* + * lf_purgelocks() might wake up the lock + * waiter and removed our lock graph edges. + * There is no sense in re-trying recording + * the lock to the local manager after + * reclaim. + */ + error = 0; + break; + } else + break; + } KASSERT(error == 0 || error == ENOENT, ("Failed to register NFS lock locally - error=%d", error)); }