A crash reported on freebsd-fs@ on Sep. 23, 2011 under the subject

heading "kernel panics with RPCSEC_GSS" appears to be caused by a
corrupted tailq list for the client structure. Looking at the code, calls
to the function svc_rpc_gss_forget_client() were done in an SMP unsafe
manner, with the svc_rpc_gss_lock only being acquired in the function
and not before it. As such, when multiple threads called
svc_rpc_gss_forget_client() concurrently, it could try and remove the
same client structure from the tailq lists multiple times.
The patch fixes this by moving the critical code into a separate
function called svc_rpc_gss_forget_client_locked(), which must be
called with the lock held. For the one case where the caller would
have no interest in the lock, svc_rpc_gss_forget_client() was retained,
but a loop was added to check that the client structure is still in
the tailq lists before removing it, to make it safe for multiple
concurrent calls.

Tested by:	clinton.adams at gmail.com (earlier version)
Reviewed by:	zkirsch
MFC after:	3 days
This commit is contained in:
Rick Macklem 2011-10-07 01:15:04 +00:00
parent bf3a36cc7b
commit 5328a32e58

View File

@ -608,6 +608,22 @@ svc_rpc_gss_release_client(struct svc_rpc_gss_client *client)
svc_rpc_gss_destroy_client(client); svc_rpc_gss_destroy_client(client);
} }
/*
* Remove a client from our global lists.
* Must be called with svc_rpc_gss_lock held.
*/
static void
svc_rpc_gss_forget_client_locked(struct svc_rpc_gss_client *client)
{
struct svc_rpc_gss_client_list *list;
sx_assert(&svc_rpc_gss_lock, SX_XLOCKED);
list = &svc_rpc_gss_client_hash[client->cl_id.ci_id % CLIENT_HASH_SIZE];
TAILQ_REMOVE(list, client, cl_link);
TAILQ_REMOVE(&svc_rpc_gss_clients, client, cl_alllink);
svc_rpc_gss_client_count--;
}
/* /*
* Remove a client from our global lists and free it if we can. * Remove a client from our global lists and free it if we can.
*/ */
@ -615,21 +631,30 @@ static void
svc_rpc_gss_forget_client(struct svc_rpc_gss_client *client) svc_rpc_gss_forget_client(struct svc_rpc_gss_client *client)
{ {
struct svc_rpc_gss_client_list *list; struct svc_rpc_gss_client_list *list;
struct svc_rpc_gss_client *tclient;
list = &svc_rpc_gss_client_hash[client->cl_id.ci_id % CLIENT_HASH_SIZE]; list = &svc_rpc_gss_client_hash[client->cl_id.ci_id % CLIENT_HASH_SIZE];
sx_xlock(&svc_rpc_gss_lock); sx_xlock(&svc_rpc_gss_lock);
TAILQ_REMOVE(list, client, cl_link); TAILQ_FOREACH(tclient, list, cl_link) {
TAILQ_REMOVE(&svc_rpc_gss_clients, client, cl_alllink); /*
svc_rpc_gss_client_count--; * Make sure this client has not already been removed
* from the lists by svc_rpc_gss_forget_client() or
* svc_rpc_gss_forget_client_locked() already.
*/
if (client == tclient) {
svc_rpc_gss_forget_client_locked(client);
sx_xunlock(&svc_rpc_gss_lock);
svc_rpc_gss_release_client(client);
return;
}
}
sx_xunlock(&svc_rpc_gss_lock); sx_xunlock(&svc_rpc_gss_lock);
svc_rpc_gss_release_client(client);
} }
static void static void
svc_rpc_gss_timeout_clients(void) svc_rpc_gss_timeout_clients(void)
{ {
struct svc_rpc_gss_client *client; struct svc_rpc_gss_client *client;
struct svc_rpc_gss_client *nclient;
time_t now = time_uptime; time_t now = time_uptime;
rpc_gss_log_debug("in svc_rpc_gss_timeout_clients()"); rpc_gss_log_debug("in svc_rpc_gss_timeout_clients()");
@ -638,16 +663,29 @@ svc_rpc_gss_timeout_clients(void)
* First enforce the max client limit. We keep * First enforce the max client limit. We keep
* svc_rpc_gss_clients in LRU order. * svc_rpc_gss_clients in LRU order.
*/ */
while (svc_rpc_gss_client_count > CLIENT_MAX) sx_xlock(&svc_rpc_gss_lock);
svc_rpc_gss_forget_client(TAILQ_LAST(&svc_rpc_gss_clients, client = TAILQ_LAST(&svc_rpc_gss_clients, svc_rpc_gss_client_list);
svc_rpc_gss_client_list)); while (svc_rpc_gss_client_count > CLIENT_MAX && client != NULL) {
TAILQ_FOREACH_SAFE(client, &svc_rpc_gss_clients, cl_alllink, nclient) { svc_rpc_gss_forget_client_locked(client);
sx_xunlock(&svc_rpc_gss_lock);
svc_rpc_gss_release_client(client);
sx_xlock(&svc_rpc_gss_lock);
client = TAILQ_LAST(&svc_rpc_gss_clients,
svc_rpc_gss_client_list);
}
again:
TAILQ_FOREACH(client, &svc_rpc_gss_clients, cl_alllink) {
if (client->cl_state == CLIENT_STALE if (client->cl_state == CLIENT_STALE
|| now > client->cl_expiration) { || now > client->cl_expiration) {
svc_rpc_gss_forget_client_locked(client);
sx_xunlock(&svc_rpc_gss_lock);
rpc_gss_log_debug("expiring client %p", client); rpc_gss_log_debug("expiring client %p", client);
svc_rpc_gss_forget_client(client); svc_rpc_gss_release_client(client);
sx_xlock(&svc_rpc_gss_lock);
goto again;
} }
} }
sx_xunlock(&svc_rpc_gss_lock);
} }
#ifdef DEBUG #ifdef DEBUG