The vm map lock is needed in vm_fault() after the page has been found,

to avoid later changes before pmap_enter() and vm_fault_prefault()
has completed.

Simplify deadlock avoidance by not blocking on vm map relookup.

In collaboration with: alc
This commit is contained in:
Tor Egge 2004-08-12 20:14:49 +00:00
parent 882a77b811
commit 19dc560756
Notes: svn2git 2020-12-20 02:59:44 +00:00
svn path=/head/; revision=133598
3 changed files with 142 additions and 59 deletions

View File

@ -760,69 +760,55 @@ RetryFault:;
* We must verify that the maps have not changed since our last
* lookup.
*/
if (!fs.lookup_still_valid &&
(fs.map->timestamp != map_generation)) {
if (!fs.lookup_still_valid) {
vm_object_t retry_object;
vm_pindex_t retry_pindex;
vm_prot_t retry_prot;
/*
* Unlock vnode before the lookup to avoid deadlock. E.G.
* avoid a deadlock between the inode and exec_map that can
* occur due to locks being obtained in different orders.
*/
if (fs.vp != NULL) {
vput(fs.vp);
fs.vp = NULL;
}
if (fs.map->infork) {
if (!vm_map_trylock_read(fs.map)) {
release_page(&fs);
unlock_and_deallocate(&fs);
goto RetryFault;
}
VM_OBJECT_UNLOCK(fs.object);
/*
* To avoid trying to write_lock the map while another process
* has it read_locked (in vm_map_wire), we do not try for
* write permission. If the page is still writable, we will
* get write permission. If it is not, or has been marked
* needs_copy, we enter the mapping without write permission,
* and will merely take another fault.
*/
result = vm_map_lookup(&fs.map, vaddr, fault_type & ~VM_PROT_WRITE,
&fs.entry, &retry_object, &retry_pindex, &retry_prot, &wired);
map_generation = fs.map->timestamp;
VM_OBJECT_LOCK(fs.object);
/*
* If we don't need the page any longer, put it on the active
* list (the easiest thing to do here). If no one needs it,
* pageout will grab it eventually.
*/
if (result != KERN_SUCCESS) {
release_page(&fs);
unlock_and_deallocate(&fs);
return (result);
}
fs.lookup_still_valid = TRUE;
if (fs.map->timestamp != map_generation) {
result = vm_map_lookup_locked(&fs.map, vaddr, fault_type,
&fs.entry, &retry_object, &retry_pindex, &retry_prot, &wired);
if ((retry_object != fs.first_object) ||
(retry_pindex != fs.first_pindex)) {
release_page(&fs);
unlock_and_deallocate(&fs);
goto RetryFault;
/*
* If we don't need the page any longer, put it on the active
* list (the easiest thing to do here). If no one needs it,
* pageout will grab it eventually.
*/
if (result != KERN_SUCCESS) {
release_page(&fs);
unlock_and_deallocate(&fs);
/*
* If retry of map lookup would have blocked then
* retry fault from start.
*/
if (result == KERN_FAILURE)
goto RetryFault;
return (result);
}
if ((retry_object != fs.first_object) ||
(retry_pindex != fs.first_pindex)) {
release_page(&fs);
unlock_and_deallocate(&fs);
goto RetryFault;
}
/*
* Check whether the protection has changed or the object has
* been copied while we left the map unlocked. Changing from
* read to write permission is OK - we leave the page
* write-protected, and catch the write fault. Changing from
* write to read permission means that we can't mark the page
* write-enabled after all.
*/
prot &= retry_prot;
}
/*
* Check whether the protection has changed or the object has
* been copied while we left the map unlocked. Changing from
* read to write permission is OK - we leave the page
* write-protected, and catch the write fault. Changing from
* write to read permission means that we can't mark the page
* write-enabled after all.
*/
prot &= retry_prot;
}
if (prot & VM_PROT_WRITE) {
vm_page_lock_queues();

View File

@ -209,7 +209,6 @@ vm_map_zinit(void *mem, int size, int flags)
map = (vm_map_t)mem;
map->nentries = 0;
map->size = 0;
map->infork = 0;
mtx_init(&map->system_mtx, "system map", NULL, MTX_DEF | MTX_DUPOK);
sx_init(&map->lock, "user map");
return (0);
@ -237,9 +236,6 @@ vm_map_zdtor(void *mem, int size, void *arg)
KASSERT(map->size == 0,
("map %p size == %lu on free.",
map, (unsigned long)map->size));
KASSERT(map->infork == 0,
("map %p infork == %d on free.",
map, map->infork));
}
#endif /* INVARIANTS */
@ -2389,7 +2385,6 @@ vmspace_fork(struct vmspace *vm1)
GIANT_REQUIRED;
vm_map_lock(old_map);
old_map->infork = 1;
vm2 = vmspace_alloc(old_map->min_offset, old_map->max_offset);
vm2->vm_taddr = vm1->vm_taddr;
@ -2488,7 +2483,6 @@ vmspace_fork(struct vmspace *vm1)
old_entry = old_entry->next;
}
old_map->infork = 0;
vm_map_unlock(old_map);
return (vm2);
@ -3027,6 +3021,108 @@ RetryLookup:;
#undef RETURN
}
/*
* vm_map_lookup_locked:
*
* Lookup the faulting address. A version of vm_map_lookup that returns
* KERN_FAILURE instead of blocking on map lock or memory allocation.
*/
int
vm_map_lookup_locked(vm_map_t *var_map, /* IN/OUT */
vm_offset_t vaddr,
vm_prot_t fault_typea,
vm_map_entry_t *out_entry, /* OUT */
vm_object_t *object, /* OUT */
vm_pindex_t *pindex, /* OUT */
vm_prot_t *out_prot, /* OUT */
boolean_t *wired) /* OUT */
{
vm_map_entry_t entry;
vm_map_t map = *var_map;
vm_prot_t prot;
vm_prot_t fault_type = fault_typea;
/*
* If the map has an interesting hint, try it before calling full
* blown lookup routine.
*/
entry = map->root;
*out_entry = entry;
if (entry == NULL ||
(vaddr < entry->start) || (vaddr >= entry->end)) {
/*
* Entry was either not a valid hint, or the vaddr was not
* contained in the entry, so do a full lookup.
*/
if (!vm_map_lookup_entry(map, vaddr, out_entry))
return (KERN_INVALID_ADDRESS);
entry = *out_entry;
}
/*
* Fail if the entry refers to a submap.
*/
if (entry->eflags & MAP_ENTRY_IS_SUB_MAP)
return (KERN_FAILURE);
/*
* Check whether this task is allowed to have this page.
* Note the special case for MAP_ENTRY_COW
* pages with an override. This is to implement a forced
* COW for debuggers.
*/
if (fault_type & VM_PROT_OVERRIDE_WRITE)
prot = entry->max_protection;
else
prot = entry->protection;
fault_type &= VM_PROT_READ | VM_PROT_WRITE | VM_PROT_EXECUTE;
if ((fault_type & prot) != fault_type)
return (KERN_PROTECTION_FAILURE);
if ((entry->eflags & MAP_ENTRY_USER_WIRED) &&
(entry->eflags & MAP_ENTRY_COW) &&
(fault_type & VM_PROT_WRITE) &&
(fault_typea & VM_PROT_OVERRIDE_WRITE) == 0)
return (KERN_PROTECTION_FAILURE);
/*
* If this page is not pageable, we have to get it for all possible
* accesses.
*/
*wired = (entry->wired_count != 0);
if (*wired)
prot = fault_type = entry->protection;
if (entry->eflags & MAP_ENTRY_NEEDS_COPY) {
/*
* Fail if the entry was copy-on-write for a write fault.
*/
if (fault_type & VM_PROT_WRITE)
return (KERN_FAILURE);
/*
* We're attempting to read a copy-on-write page --
* don't allow writes.
*/
prot &= ~VM_PROT_WRITE;
}
/*
* Fail if an object should be created.
*/
if (entry->object.vm_object == NULL && !map->system_map)
return (KERN_FAILURE);
/*
* Return the object/offset from this entry. If the entry was
* copy-on-write or empty, it has been fixed up.
*/
*pindex = OFF_TO_IDX((vaddr - entry->start) + entry->offset);
*object = entry->object.vm_object;
*out_prot = prot;
return (KERN_SUCCESS);
}
/*
* vm_map_lookup_done:
*

View File

@ -185,7 +185,6 @@ struct vm_map {
u_int timestamp; /* Version number */
u_char needs_wakeup;
u_char system_map; /* Am I a system map? */
u_char infork; /* Am I in fork processing? */
vm_flags_t flags; /* flags for this vm_map */
vm_map_entry_t root; /* Root of a binary search tree */
vm_map_entry_t first_free; /* First free space hint */
@ -341,6 +340,8 @@ void vm_map_init (struct vm_map *, vm_offset_t, vm_offset_t);
int vm_map_insert (vm_map_t, vm_object_t, vm_ooffset_t, vm_offset_t, vm_offset_t, vm_prot_t, vm_prot_t, int);
int vm_map_lookup (vm_map_t *, vm_offset_t, vm_prot_t, vm_map_entry_t *, vm_object_t *,
vm_pindex_t *, vm_prot_t *, boolean_t *);
int vm_map_lookup_locked(vm_map_t *, vm_offset_t, vm_prot_t, vm_map_entry_t *, vm_object_t *,
vm_pindex_t *, vm_prot_t *, boolean_t *);
void vm_map_lookup_done (vm_map_t, vm_map_entry_t);
boolean_t vm_map_lookup_entry (vm_map_t, vm_offset_t, vm_map_entry_t *);
void vm_map_pmap_enter(vm_map_t map, vm_offset_t addr, vm_prot_t prot,