Improve robustness of the Xen balloon driver.

sys/dev/xen/balloon/balloon.c:
	Remove unused and commented out code.

	Fix deadlock caused by performing a sleepable malloc
	while holding the balloon mutex.

	Perform proper accounting of the memory used by the domain.

Submitted by:	Roger Pau Monné
Sponsored by:	Citrix Systems R&D
Reviewed by:	gibbs
MFC after:	2 days
This commit is contained in:
gibbs 2013-11-15 16:35:28 +00:00
parent fb991f3d0e
commit b42abe92d4

View File

@ -52,18 +52,13 @@ __FBSDID("$FreeBSD$");
static MALLOC_DEFINE(M_BALLOON, "Balloon", "Xen Balloon Driver");
struct mtx balloon_mutex;
/* Convert from KB (as fetched from xenstore) to number of PAGES */
#define KB_TO_PAGE_SHIFT (PAGE_SHIFT - 10)
/*
* Protects atomic reservation decrease/increase against concurrent increases.
* Also protects non-atomic updates of current_pages and driver_pages, and
* balloon lists.
*/
struct mtx balloon_lock;
struct mtx balloon_mutex;
/* We increase/decrease in batches which fit in a page */
static unsigned long frame_list[PAGE_SIZE / sizeof(unsigned long)];
#define ARRAY_SIZE(A) (sizeof(A) / sizeof(A[0]))
struct balloon_stats {
/* We aim for 'current allocation' == 'target allocation'. */
@ -116,15 +111,21 @@ static void balloon_process(void *unused);
printk(KERN_WARNING "xen_mem: " fmt, ##args)
/* balloon_append: add the given page to the balloon. */
static void
static int
balloon_append(vm_page_t page)
{
struct balloon_entry *entry;
entry = malloc(sizeof(struct balloon_entry), M_BALLOON, M_WAITOK);
mtx_assert(&balloon_mutex, MA_OWNED);
entry = malloc(sizeof(struct balloon_entry), M_BALLOON, M_NOWAIT);
if (!entry)
return (ENOMEM);
entry->page = page;
STAILQ_INSERT_HEAD(&ballooned_pages, entry, list);
bs.balloon_low++;
return (0);
}
/* balloon_retrieve: rescue a page from the balloon, if it is not empty. */
@ -134,8 +135,10 @@ balloon_retrieve(void)
vm_page_t page;
struct balloon_entry *entry;
mtx_assert(&balloon_mutex, MA_OWNED);
if (STAILQ_EMPTY(&ballooned_pages))
return NULL;
return (NULL);
entry = STAILQ_FIRST(&ballooned_pages);
STAILQ_REMOVE_HEAD(&ballooned_pages, list);
@ -145,7 +148,7 @@ balloon_retrieve(void)
bs.balloon_low--;
return page;
return (page);
}
static unsigned long
@ -154,21 +157,22 @@ current_target(void)
unsigned long target = min(bs.target_pages, bs.hard_limit);
if (target > (bs.current_pages + bs.balloon_low + bs.balloon_high))
target = bs.current_pages + bs.balloon_low + bs.balloon_high;
return target;
return (target);
}
static unsigned long
minimum_target(void)
{
#ifdef XENHVM
#define max_pfn physmem
#define max_pfn realmem
#else
#define max_pfn HYPERVISOR_shared_info->arch.max_pfn
#endif
unsigned long min_pages, curr_pages = current_target();
#define MB2PAGES(mb) ((mb) << (20 - PAGE_SHIFT))
/* Simple continuous piecewiese linear function:
/*
* Simple continuous piecewiese linear function:
* max MiB -> min MiB gradient
* 0 0
* 16 16
@ -189,12 +193,10 @@ minimum_target(void)
else
min_pages = MB2PAGES(296) + (max_pfn >> 5);
#undef MB2PAGES
#undef max_pfn
/* Don't enforce growth */
return min(min_pages, curr_pages);
#ifndef CONFIG_XEN
#undef max_pfn
#endif
return (min(min_pages, curr_pages));
}
static int
@ -210,10 +212,10 @@ increase_reservation(unsigned long nr_pages)
.domid = DOMID_SELF
};
if (nr_pages > ARRAY_SIZE(frame_list))
nr_pages = ARRAY_SIZE(frame_list);
mtx_assert(&balloon_mutex, MA_OWNED);
mtx_lock(&balloon_lock);
if (nr_pages > nitems(frame_list))
nr_pages = nitems(frame_list);
for (entry = STAILQ_FIRST(&ballooned_pages), i = 0;
i < nr_pages; i++, entry = STAILQ_NEXT(entry, list)) {
@ -253,33 +255,14 @@ increase_reservation(unsigned long nr_pages)
set_phys_to_machine(pfn, frame_list[i]);
#if 0
#ifndef XENHVM
/* Link back into the page tables if not highmem. */
if (pfn < max_low_pfn) {
int ret;
ret = HYPERVISOR_update_va_mapping(
(unsigned long)__va(pfn << PAGE_SHIFT),
pfn_pte_ma(frame_list[i], PAGE_KERNEL),
0);
PASSING(ret == 0,
("HYPERVISOR_update_va_mapping failed"));
}
#endif
#endif
/* Relinquish the page back to the allocator. */
vm_page_unwire(page, 0);
vm_page_free(page);
}
bs.current_pages += nr_pages;
//totalram_pages = bs.current_pages;
out:
mtx_unlock(&balloon_lock);
return 0;
return (0);
}
static int
@ -295,8 +278,10 @@ decrease_reservation(unsigned long nr_pages)
.domid = DOMID_SELF
};
if (nr_pages > ARRAY_SIZE(frame_list))
nr_pages = ARRAY_SIZE(frame_list);
mtx_assert(&balloon_mutex, MA_OWNED);
if (nr_pages > nitems(frame_list))
nr_pages = nitems(frame_list);
for (i = 0; i < nr_pages; i++) {
if ((page = vm_page_alloc(NULL, 0,
@ -310,39 +295,15 @@ decrease_reservation(unsigned long nr_pages)
pfn = (VM_PAGE_TO_PHYS(page) >> PAGE_SHIFT);
frame_list[i] = PFNTOMFN(pfn);
#if 0
if (!PageHighMem(page)) {
v = phys_to_virt(pfn << PAGE_SHIFT);
scrub_pages(v, 1);
#ifdef CONFIG_XEN
ret = HYPERVISOR_update_va_mapping(
(unsigned long)v, __pte_ma(0), 0);
BUG_ON(ret);
#endif
}
#endif
#ifdef CONFIG_XEN_SCRUB_PAGES
else {
v = kmap(page);
scrub_pages(v, 1);
kunmap(page);
}
#endif
}
#ifdef CONFIG_XEN
/* Ensure that ballooned highmem pages don't have kmaps. */
kmap_flush_unused();
flush_tlb_all();
#endif
mtx_lock(&balloon_lock);
/* No more mappings: invalidate P2M and add to balloon. */
for (i = 0; i < nr_pages; i++) {
pfn = MFNTOPFN(frame_list[i]);
set_phys_to_machine(pfn, INVALID_P2M_ENTRY);
balloon_append(PHYS_TO_VM_PAGE(pfn << PAGE_SHIFT));
if (balloon_append(page) != 0) {
vm_page_unwire(page, 0);
vm_page_free(page);
nr_pages = i;
need_sleep = 1;
break;
}
}
set_xen_guest_handle(reservation.extent_start, frame_list);
@ -351,9 +312,6 @@ decrease_reservation(unsigned long nr_pages)
KASSERT(ret == nr_pages, ("HYPERVISOR_memory_op failed"));
bs.current_pages -= nr_pages;
//totalram_pages = bs.current_pages;
mtx_unlock(&balloon_lock);
return (need_sleep);
}
@ -425,11 +383,11 @@ watch_target(struct xs_watch *watch,
return;
}
/* The given memory/target value is in KiB, so it needs converting to
pages. PAGE_SHIFT converts bytes to pages, hence PAGE_SHIFT - 10.
*/
set_new_target(new_target >> (PAGE_SHIFT - 10));
/*
* The given memory/target value is in KiB, so it needs converting to
* pages. PAGE_SHIFT converts bytes to pages, hence PAGE_SHIFT - 10.
*/
set_new_target(new_target >> KB_TO_PAGE_SHIFT);
}
static void
@ -461,13 +419,12 @@ balloon_init(void *arg)
if (!is_running_on_xen())
return;
mtx_init(&balloon_lock, "balloon_lock", NULL, MTX_DEF);
mtx_init(&balloon_mutex, "balloon_mutex", NULL, MTX_DEF);
#ifndef XENHVM
bs.current_pages = min(xen_start_info->nr_pages, max_pfn);
#else
bs.current_pages = physmem;
bs.current_pages = realmem;
#endif
bs.target_pages = bs.current_pages;
bs.balloon_low = 0;
@ -497,76 +454,7 @@ void balloon_update_driver_allowance(long delta);
void
balloon_update_driver_allowance(long delta)
{
mtx_lock(&balloon_lock);
mtx_lock(&balloon_mutex);
bs.driver_pages += delta;
mtx_unlock(&balloon_lock);
mtx_unlock(&balloon_mutex);
}
#if 0
static int dealloc_pte_fn(
pte_t *pte, struct page *pte_page, unsigned long addr, void *data)
{
unsigned long mfn = pte_mfn(*pte);
int ret;
struct xen_memory_reservation reservation = {
.extent_start = &mfn,
.nr_extents = 1,
.extent_order = 0,
.domid = DOMID_SELF
};
set_pte_at(&init_mm, addr, pte, __pte_ma(0));
set_phys_to_machine(__pa(addr) >> PAGE_SHIFT, INVALID_P2M_ENTRY);
ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation, &reservation);
KASSERT(ret == 1, ("HYPERVISOR_memory_op failed"));
return 0;
}
#endif
#if 0
vm_page_t
balloon_alloc_empty_page_range(unsigned long nr_pages)
{
vm_page_t pages;
int i, rc;
unsigned long *mfn_list;
struct xen_memory_reservation reservation = {
.address_bits = 0,
.extent_order = 0,
.domid = DOMID_SELF
};
pages = vm_page_alloc_contig(nr_pages, 0, -1, 4, 4)
if (pages == NULL)
return NULL;
mfn_list = malloc(nr_pages*sizeof(unsigned long), M_DEVBUF, M_WAITOK);
for (i = 0; i < nr_pages; i++) {
mfn_list[i] = PFNTOMFN(VM_PAGE_TO_PHYS(pages[i]) >> PAGE_SHIFT);
PFNTOMFN(i) = INVALID_P2M_ENTRY;
reservation.extent_start = mfn_list;
reservation.nr_extents = nr_pages;
rc = HYPERVISOR_memory_op(XENMEM_decrease_reservation,
&reservation);
KASSERT(rc == nr_pages, ("HYPERVISOR_memory_op failed"));
}
current_pages -= nr_pages;
wakeup(balloon_process);
return pages;
}
void
balloon_dealloc_empty_page_range(vm_page_t page, unsigned long nr_pages)
{
unsigned long i;
for (i = 0; i < nr_pages; i++)
balloon_append(page + i);
wakeup(balloon_process);
}
#endif