malloc(9) with M_NOWAIT seems to return NULL a lot more than I would have
expected under -current. This is a problem for GEOM because the up/down threads cannot sleep waiting for memory to become free. The reason they cannot sleep is that paging things out to disk may be the only way we can clear up some RAM. Nice catch-22 there. Implement a rudimentary ENOMEM recovery strategy: If an I/O request fails with an error code of ENOMEM, schedule it for a retry, and tell the down-thread to sleep hz/10 to get other parts of the system a chance to free up some memory, in particular the up-path in GEOM. All caches should probably start to monitor malloc(9) failures using the new malloc_last_fail() function, and release when it indicates congestion. Sponsored by: DARPA & NAI Labs.
This commit is contained in:
parent
01e8832a61
commit
982f8bb0ad
@ -60,6 +60,8 @@ static struct g_bioq g_bio_run_down;
|
||||
static struct g_bioq g_bio_run_up;
|
||||
static struct g_bioq g_bio_idle;
|
||||
|
||||
static u_int pace;
|
||||
|
||||
#include <machine/atomic.h>
|
||||
|
||||
static void
|
||||
@ -314,6 +316,14 @@ g_io_deliver(struct bio *bp, int error)
|
||||
(intmax_t)bp->bio_offset, (intmax_t)bp->bio_length);
|
||||
/* finish_stats(&bp->stats); */
|
||||
|
||||
if (error == ENOMEM) {
|
||||
printf("ENOMEM %p on %p(%s)\n",
|
||||
bp, bp->bio_to, bp->bio_to->name);
|
||||
g_io_request(bp, bp->bio_from);
|
||||
pace++;
|
||||
return;
|
||||
}
|
||||
|
||||
bp->bio_error = error;
|
||||
|
||||
g_bioq_enqueue_tail(bp, &g_bio_run_up);
|
||||
@ -331,6 +341,10 @@ g_io_schedule_down(struct thread *tp __unused)
|
||||
if (bp == NULL)
|
||||
break;
|
||||
bp->bio_to->geom->start(bp);
|
||||
if (pace) {
|
||||
pace--;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user