From 6c262608dd9129e8699bd3c3a84425b8076b83ae Mon Sep 17 00:00:00 2001 From: markj Date: Tue, 23 Oct 2018 16:35:58 +0000 Subject: [PATCH] Refactor domainset iterators for use by malloc(9) and UMA. Before this change we had two flavours of vm_domainset iterators: "page" and "malloc". The latter was only used for kmem_*() and hard-coded its behaviour based on kernel_object's policy. Moreover, its use contained a race similar to that fixed by r338755 since the kernel_object's iterator was being run without the object lock. In some cases it is useful to be able to explicitly specify a policy (domainset) or policy+iterator (domainset_ref) when performing memory allocations. To that end, refactor the vm_dominset_* KPI to permit this, and get rid of the "malloc" domainset_iter KPI in the process. Reviewed by: jeff (previous version) Tested by: pho (part of a larger patch) MFC after: 2 weeks Sponsored by: The FreeBSD Foundation Differential Revision: https://reviews.freebsd.org/D17417 --- sys/sys/_domainset.h | 2 +- sys/vm/vm_domainset.c | 122 +++++++++++++++++++++++------------------- sys/vm/vm_domainset.h | 11 ++-- sys/vm/vm_glue.c | 2 +- sys/vm/vm_kern.c | 12 ++--- sys/vm/vm_object.c | 1 + sys/vm/vm_page.c | 8 +-- 7 files changed, 88 insertions(+), 70 deletions(-) diff --git a/sys/sys/_domainset.h b/sys/sys/_domainset.h index 34d8f61ca9fc..5685d532a9e3 100644 --- a/sys/sys/_domainset.h +++ b/sys/sys/_domainset.h @@ -54,7 +54,7 @@ typedef struct _domainset domainset_t; struct domainset; struct domainset_ref { struct domainset * volatile dr_policy; - unsigned int dr_iterator; + unsigned int dr_iter; }; #endif /* !_SYS__DOMAINSET_H_ */ diff --git a/sys/vm/vm_domainset.c b/sys/vm/vm_domainset.c index b9348d6c632b..2aa516e62ee0 100644 --- a/sys/vm/vm_domainset.c +++ b/sys/vm/vm_domainset.c @@ -40,6 +40,7 @@ __FBSDID("$FreeBSD$"); #include #include #include +#include #include #include @@ -62,26 +63,13 @@ static int vm_domainset_default_stride = 64; * Determine which policy is to be used for this allocation. */ static void -vm_domainset_iter_init(struct vm_domainset_iter *di, struct vm_object *obj, - vm_pindex_t pindex) +vm_domainset_iter_init(struct vm_domainset_iter *di, struct domainset *ds, + int *iter, struct vm_object *obj, vm_pindex_t pindex) { - struct domainset *domain; - struct thread *td; - /* - * object policy takes precedence over thread policy. The policies - * are immutable and unsynchronized. Updates can race but pointer - * loads are assumed to be atomic. - */ - if (obj != NULL && (domain = obj->domain.dr_policy) != NULL) { - di->di_domain = domain; - di->di_iter = &obj->domain.dr_iterator; - } else { - td = curthread; - di->di_domain = td->td_domain.dr_policy; - di->di_iter = &td->td_domain.dr_iterator; - } - di->di_policy = di->di_domain->ds_policy; + di->di_domain = ds; + di->di_iter = iter; + di->di_policy = ds->ds_policy; if (di->di_policy == DOMAINSET_POLICY_INTERLEAVE) { #if VM_NRESERVLEVEL > 0 if (vm_object_reserv(obj)) { @@ -211,33 +199,39 @@ void vm_domainset_iter_page_init(struct vm_domainset_iter *di, struct vm_object *obj, vm_pindex_t pindex, int *domain, int *req) { + struct domainset_ref *dr; - vm_domainset_iter_init(di, obj, pindex); + /* + * Object policy takes precedence over thread policy. The policies + * are immutable and unsynchronized. Updates can race but pointer + * loads are assumed to be atomic. + */ + if (obj != NULL && obj->domain.dr_policy != NULL) + dr = &obj->domain; + else + dr = &curthread->td_domain; + vm_domainset_iter_init(di, dr->dr_policy, &dr->dr_iter, obj, pindex); di->di_flags = *req; *req = (di->di_flags & ~(VM_ALLOC_WAITOK | VM_ALLOC_WAITFAIL)) | VM_ALLOC_NOWAIT; vm_domainset_iter_first(di, domain); if (vm_page_count_min_domain(*domain)) - vm_domainset_iter_page(di, domain, req); + vm_domainset_iter_page(di, obj, domain); } int -vm_domainset_iter_page(struct vm_domainset_iter *di, int *domain, int *req) +vm_domainset_iter_page(struct vm_domainset_iter *di, struct vm_object *obj, + int *domain) { - /* - * If we exhausted all options with NOWAIT and did a WAITFAIL it - * is time to return an error to the caller. - */ - if ((*req & VM_ALLOC_WAITFAIL) != 0) - return (ENOMEM); - /* If there are more domains to visit we run the iterator. */ while (--di->di_n != 0) { vm_domainset_iter_next(di, domain); if (!di->di_minskip || !vm_page_count_min_domain(*domain)) return (0); } + + /* If we skipped domains below min restart the search. */ if (di->di_minskip) { di->di_minskip = false; vm_domainset_iter_first(di, domain); @@ -248,34 +242,53 @@ vm_domainset_iter_page(struct vm_domainset_iter *di, int *domain, int *req) if ((di->di_flags & (VM_ALLOC_WAITOK | VM_ALLOC_WAITFAIL)) == 0) return (ENOMEM); - /* - * We have visited all domains with non-blocking allocations, try - * from the beginning with a blocking allocation. - */ + /* Wait for one of the domains to accumulate some free pages. */ + if (obj != NULL) + VM_OBJECT_WUNLOCK(obj); + vm_wait_doms(&di->di_domain->ds_mask); + if (obj != NULL) + VM_OBJECT_WLOCK(obj); + if ((di->di_flags & VM_ALLOC_WAITFAIL) != 0) + return (ENOMEM); + + /* Restart the search. */ vm_domainset_iter_first(di, domain); - *req = di->di_flags; return (0); } - -void -vm_domainset_iter_malloc_init(struct vm_domainset_iter *di, - struct vm_object *obj, int *domain, int *flags) +static void +_vm_domainset_iter_policy_init(struct vm_domainset_iter *di, int *domain, + int *flags) { - vm_domainset_iter_init(di, obj, 0); - if (di->di_policy == DOMAINSET_POLICY_INTERLEAVE) - di->di_policy = DOMAINSET_POLICY_ROUNDROBIN; di->di_flags = *flags; *flags = (di->di_flags & ~M_WAITOK) | M_NOWAIT; vm_domainset_iter_first(di, domain); if (vm_page_count_min_domain(*domain)) - vm_domainset_iter_malloc(di, domain, flags); + vm_domainset_iter_policy(di, domain); +} + +void +vm_domainset_iter_policy_init(struct vm_domainset_iter *di, + struct domainset *ds, int *domain, int *flags) +{ + + vm_domainset_iter_init(di, ds, &curthread->td_domain.dr_iter, NULL, 0); + _vm_domainset_iter_policy_init(di, domain, flags); +} + +void +vm_domainset_iter_policy_ref_init(struct vm_domainset_iter *di, + struct domainset_ref *dr, int *domain, int *flags) +{ + + vm_domainset_iter_init(di, dr->dr_policy, &dr->dr_iter, NULL, 0); + _vm_domainset_iter_policy_init(di, domain, flags); } int -vm_domainset_iter_malloc(struct vm_domainset_iter *di, int *domain, int *flags) +vm_domainset_iter_policy(struct vm_domainset_iter *di, int *domain) { /* If there are more domains to visit we run the iterator. */ @@ -296,45 +309,46 @@ vm_domainset_iter_malloc(struct vm_domainset_iter *di, int *domain, int *flags) if ((di->di_flags & M_WAITOK) == 0) return (ENOMEM); - /* - * We have visited all domains with non-blocking allocations, try - * from the beginning with a blocking allocation. - */ + /* Wait for one of the domains to accumulate some free pages. */ + vm_wait_doms(&di->di_domain->ds_mask); + + /* Restart the search. */ vm_domainset_iter_first(di, domain); - *flags = di->di_flags; return (0); } #else /* !NUMA */ + int -vm_domainset_iter_page(struct vm_domainset_iter *di, int *domain, int *flags) +vm_domainset_iter_page(struct vm_domainset_iter *di, struct vm_object *obj, + int *domain) { return (EJUSTRETURN); } void -vm_domainset_iter_page_init(struct vm_domainset_iter *di, - struct vm_object *obj, vm_pindex_t pindex, int *domain, int *flags) +vm_domainset_iter_page_init(struct vm_domainset_iter *di, struct vm_object *obj, + vm_pindex_t pindex, int *domain, int *flags) { *domain = 0; } int -vm_domainset_iter_malloc(struct vm_domainset_iter *di, int *domain, int *flags) +vm_domainset_iter_policy(struct vm_domainset_iter *di, int *domain) { return (EJUSTRETURN); } void -vm_domainset_iter_malloc_init(struct vm_domainset_iter *di, - struct vm_object *obj, int *domain, int *flags) +vm_domainset_iter_policy_init(struct vm_domainset_iter *di, + struct domainset *ds, int *domain, int *flags) { *domain = 0; } -#endif +#endif /* NUMA */ diff --git a/sys/vm/vm_domainset.h b/sys/vm/vm_domainset.h index b1c5766c1c67..b70a027fb0cd 100644 --- a/sys/vm/vm_domainset.h +++ b/sys/vm/vm_domainset.h @@ -40,12 +40,15 @@ struct vm_domainset_iter { bool di_minskip; }; -int vm_domainset_iter_page(struct vm_domainset_iter *, int *, int *); +int vm_domainset_iter_page(struct vm_domainset_iter *, struct vm_object *, + int *); void vm_domainset_iter_page_init(struct vm_domainset_iter *, struct vm_object *, vm_pindex_t, int *, int *); -int vm_domainset_iter_malloc(struct vm_domainset_iter *, int *, int *); -void vm_domainset_iter_malloc_init(struct vm_domainset_iter *, - struct vm_object *, int *, int *); +int vm_domainset_iter_policy(struct vm_domainset_iter *, int *); +void vm_domainset_iter_policy_init(struct vm_domainset_iter *, + struct domainset *, int *, int *); +void vm_domainset_iter_policy_ref_init(struct vm_domainset_iter *, + struct domainset_ref *, int *, int *); void vm_wait_doms(const domainset_t *); diff --git a/sys/vm/vm_glue.c b/sys/vm/vm_glue.c index 9d69c993fa0f..fcffcd3bc34d 100644 --- a/sys/vm/vm_glue.c +++ b/sys/vm/vm_glue.c @@ -377,7 +377,7 @@ vm_thread_new(struct thread *td, int pages) */ if (vm_ndomains > 1) { ksobj->domain.dr_policy = DOMAINSET_RR(); - ksobj->domain.dr_iterator = + ksobj->domain.dr_iter = atomic_fetchadd_int(&kstack_domain_iter, 1); } diff --git a/sys/vm/vm_kern.c b/sys/vm/vm_kern.c index 88fbc74848df..e92faff0f371 100644 --- a/sys/vm/vm_kern.c +++ b/sys/vm/vm_kern.c @@ -235,13 +235,13 @@ kmem_alloc_attr(vm_size_t size, int flags, vm_paddr_t low, vm_paddr_t high, vm_offset_t addr; int domain; - vm_domainset_iter_malloc_init(&di, kernel_object, &domain, &flags); + vm_domainset_iter_policy_init(&di, DOMAINSET_RR(), &domain, &flags); do { addr = kmem_alloc_attr_domain(domain, size, flags, low, high, memattr); if (addr != 0) break; - } while (vm_domainset_iter_malloc(&di, &domain, &flags) == 0); + } while (vm_domainset_iter_policy(&di, &domain) == 0); return (addr); } @@ -319,13 +319,13 @@ kmem_alloc_contig(vm_size_t size, int flags, vm_paddr_t low, vm_paddr_t high, vm_offset_t addr; int domain; - vm_domainset_iter_malloc_init(&di, kernel_object, &domain, &flags); + vm_domainset_iter_policy_init(&di, DOMAINSET_RR(), &domain, &flags); do { addr = kmem_alloc_contig_domain(domain, size, flags, low, high, alignment, boundary, memattr); if (addr != 0) break; - } while (vm_domainset_iter_malloc(&di, &domain, &flags) == 0); + } while (vm_domainset_iter_policy(&di, &domain) == 0); return (addr); } @@ -406,12 +406,12 @@ kmem_malloc(vm_size_t size, int flags) vm_offset_t addr; int domain; - vm_domainset_iter_malloc_init(&di, kernel_object, &domain, &flags); + vm_domainset_iter_policy_init(&di, DOMAINSET_RR(), &domain, &flags); do { addr = kmem_malloc_domain(domain, size, flags); if (addr != 0) break; - } while (vm_domainset_iter_malloc(&di, &domain, &flags) == 0); + } while (vm_domainset_iter_policy(&di, &domain) == 0); return (addr); } diff --git a/sys/vm/vm_object.c b/sys/vm/vm_object.c index 38d66e1a90ea..578bcd760071 100644 --- a/sys/vm/vm_object.c +++ b/sys/vm/vm_object.c @@ -274,6 +274,7 @@ _vm_object_allocate(objtype_t type, vm_pindex_t size, vm_object_t object) panic("_vm_object_allocate: type %d is undefined", type); } object->size = size; + object->domain.dr_policy = NULL; object->generation = 1; object->ref_count = 1; object->memattr = VM_MEMATTR_DEFAULT; diff --git a/sys/vm/vm_page.c b/sys/vm/vm_page.c index 47628cba2ac1..e7af2a363957 100644 --- a/sys/vm/vm_page.c +++ b/sys/vm/vm_page.c @@ -1753,7 +1753,7 @@ vm_page_alloc_after(vm_object_t object, vm_pindex_t pindex, mpred); if (m != NULL) break; - } while (vm_domainset_iter_page(&di, &domain, &req) == 0); + } while (vm_domainset_iter_page(&di, object, &domain) == 0); return (m); } @@ -1990,7 +1990,7 @@ vm_page_alloc_contig(vm_object_t object, vm_pindex_t pindex, int req, npages, low, high, alignment, boundary, memattr); if (m != NULL) break; - } while (vm_domainset_iter_page(&di, &domain, &req) == 0); + } while (vm_domainset_iter_page(&di, object, &domain) == 0); return (m); } @@ -2191,7 +2191,7 @@ vm_page_alloc_freelist(int freelist, int req) m = vm_page_alloc_freelist_domain(domain, freelist, req); if (m != NULL) break; - } while (vm_domainset_iter_page(&di, &domain, &req) == 0); + } while (vm_domainset_iter_page(&di, NULL, &domain) == 0); return (m); } @@ -2830,7 +2830,7 @@ vm_page_reclaim_contig(int req, u_long npages, vm_paddr_t low, vm_paddr_t high, high, alignment, boundary); if (ret) break; - } while (vm_domainset_iter_page(&di, &domain, &req) == 0); + } while (vm_domainset_iter_page(&di, NULL, &domain) == 0); return (ret); }