Split the mlock() kernel code into two parts, mlock(), which unpacks

the syscall arguments and does the suser() permission check, and kern_mlock(), which does the resource limit checking and calls vm_map_wire(). Split munlock() in a similar way. Enable the RLIMIT_MEMLOCK checking code in kern_mlock(). Replace calls to vslock() and vsunlock() in the sysctl code with calls to kern_mlock() and kern_munlock() so that the sysctl code will obey the wired memory limits. Nuke the vslock() and vsunlock() implementations, which are no longer used. Add a member to struct sysctl_req to track the amount of memory that is wired to handle the request. Modify sysctl_wire_old_buffer() to return an error if its call to kern_mlock() fails. Only wire the minimum of the length specified in the sysctl request and the length specified in its argument list. It is recommended that sysctl handlers that use sysctl_wire_old_buffer() should specify reasonable estimates for the amount of data they want to return so that only the minimum amount of memory is wired no matter what length has been specified by the request. Modify the callers of sysctl_wire_old_buffer() to look for the error return. Modify sysctl_old_user to obey the wired buffer length and clean up its implementation. Reviewed by: bms
2004-02-26 00:27:04 +00:00 · 2004-02-26 00:27:04 +00:00 · 47934cef8f
commit 47934cef8f
parent 049ffe98a8
16 changed files with 111 additions and 87 deletions
--- a/sys/kern/kern_descrip.c
+++ b/sys/kern/kern_descrip.c
@ -2327,7 +2327,9 @@ sysctl_kern_file(SYSCTL_HANDLER_ARGS)
 	 * it is of a similar order of magnitude to the leakage from
 	 * global system statistics such as kern.openfiles.
 	 */
-	sysctl_wire_old_buffer(req, 0);
+	error = sysctl_wire_old_buffer(req, 0);
+	if (error != 0)
+		return (error);
 	if (req->oldptr == NULL) {
 		n = 16;		/* A slight overestimate. */
 		sx_slock(&filelist_lock);
--- a/sys/kern/kern_fork.c
+++ b/sys/kern/kern_fork.c
@ -168,7 +168,9 @@ sysctl_kern_randompid(SYSCTL_HANDLER_ARGS)
 {
 	int error, pid;

-	sysctl_wire_old_buffer(req, sizeof(int));
+	error = sysctl_wire_old_buffer(req, sizeof(int));
+	if (error != 0)
+		return(error);
 	sx_xlock(&allproc_lock);
 	pid = randompid;
 	error = sysctl_handle_int(oidp, &pid, 0, req);
--- a/sys/kern/kern_linker.c
+++ b/sys/kern/kern_linker.c
@ -1796,7 +1796,9 @@ sysctl_kern_function_list(SYSCTL_HANDLER_ARGS)
 	if (error)
 		return (error);
 #endif
-	sysctl_wire_old_buffer(req, 0);
+	error = sysctl_wire_old_buffer(req, 0);
+	if (error != 0)
+		return (error);
 	mtx_lock(&kld_mtx);
 	TAILQ_FOREACH(lf, &linker_files, link) {
 		error = LINKER_EACH_FUNCTION_NAME(lf,
--- a/sys/kern/kern_proc.c
+++ b/sys/kern/kern_proc.c
@ -946,7 +946,9 @@ sysctl_kern_proc(SYSCTL_HANDLER_ARGS)
 		if (error)
 			return (error);
 	}
-	sysctl_wire_old_buffer(req, 0);
+	error = sysctl_wire_old_buffer(req, 0);
+	if (error != 0)
+		return (error);
 	sx_slock(&allproc_lock);
 	for (doingzomb=0 ; doingzomb < 2 ; doingzomb++) {
 		if (!doingzomb)
--- a/sys/kern/kern_sysctl.c
+++ b/sys/kern/kern_sysctl.c
@ -980,7 +980,8 @@ kernel_sysctl(struct thread *td, int *name, u_int namelen, void *old,
 	error = sysctl_root(0, name, namelen, &req);

 	if (req.lock == REQ_WIRED)
-		vsunlock(req.oldptr, req.oldlen);
+		kern_munlock(req.td, (vm_offset_t)req.oldptr,
+		    (vm_size_t)req.wiredlen);

 	SYSCTL_UNLOCK();

@ -1025,26 +1026,27 @@ static int
 sysctl_old_user(struct sysctl_req *req, const void *p, size_t l)
 {
 	int error = 0;
-	size_t i = 0;
+	size_t i, len, origidx;

-	if (req->lock == REQ_LOCKED && req->oldptr)
+	origidx = req->oldidx;
+	req->oldidx += l;
+	if (req->oldptr == NULL)
+		return (0);
+	if (req->lock == REQ_LOCKED)
 		WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL,
 		    "sysctl_old_user()");
-	if (req->oldptr) {
-		i = l;
-		if (req->oldlen <= req->oldidx)
-			i = 0;
-		else
-			if (i > req->oldlen - req->oldidx)
-				i = req->oldlen - req->oldidx;
-		if (i > 0)
-			error = copyout(p, (char *)req->oldptr + req->oldidx,
-					i);
+	i = l;
+	len = (req->lock == REQ_WIRED) ? req->wiredlen : req->oldlen;
+	if (len <= origidx)
+		i = 0;
+	else {
+		if (i > len - origidx)
+			i = len - origidx;
+		error = copyout(p, (char *)req->oldptr + origidx, i);
 	}
-	req->oldidx += l;
 	if (error)
 		return (error);
-	if (req->oldptr && i < l)
+	if (i < l)
 		return (ENOMEM);
 	return (0);
 }
@ -1071,14 +1073,24 @@ sysctl_new_user(struct sysctl_req *req, void *p, size_t l)
 * a place to save it in the sysctl_req structure so that the matching
 * amount of memory can be unwired in the sysctl exit code.
 */
-void
+int
 sysctl_wire_old_buffer(struct sysctl_req *req, size_t len)
 {
+	int ret;
+	size_t wiredlen;
+
+	wiredlen = (len > 0 && len < req->oldlen) ? len : req->oldlen;
+	ret = 0;
 	if (req->lock == REQ_LOCKED && req->oldptr &&
 	    req->oldfunc == sysctl_old_user) {
-		vslock(req->oldptr, req->oldlen);
-		req->lock = REQ_WIRED;
+		ret = kern_mlock(req->td, (vm_offset_t)req->oldptr,
+		    (vm_size_t)wiredlen);
+		if (ret == 0) {
+			req->lock = REQ_WIRED;
+			req->wiredlen = wiredlen;
+		}
 	}
+	return (ret);
 }

 int
@ -1288,7 +1300,8 @@ userland_sysctl(struct thread *td, int *name, u_int namelen, void *old,

 	req = req2;
 	if (req.lock == REQ_WIRED)
-		vsunlock(req.oldptr, req.oldlen);
+		kern_munlock(req.td, (vm_offset_t)req.oldptr,
+		    (vm_size_t)req.wiredlen);

 	SYSCTL_UNLOCK();

--- a/sys/kern/vfs_subr.c
+++ b/sys/kern/vfs_subr.c
@ -2897,7 +2897,9 @@ sysctl_vnode(SYSCTL_HANDLER_ARGS)
 		/* Make an estimate */
 		return (SYSCTL_OUT(req, 0, len));

-	sysctl_wire_old_buffer(req, 0);
+	error = sysctl_wire_old_buffer(req, 0);
+	if (error != 0)
+		return (error);
 	xvn = malloc(len, M_TEMP, M_ZERO | M_WAITOK);
 	n = 0;
 	mtx_lock(&mountlist_mtx);
--- a/sys/netinet/ip_divert.c
+++ b/sys/netinet/ip_divert.c
@ -588,7 +588,10 @@ div_pcblist(SYSCTL_HANDLER_ARGS)
 	n = divcbinfo.ipi_count;
 	INP_INFO_RUNLOCK(&divcbinfo);

-	sysctl_wire_old_buffer(req, 2 * sizeof(xig) + n*sizeof(struct xinpcb));
+	error = sysctl_wire_old_buffer(req,
+	    2 * sizeof(xig) + n*sizeof(struct xinpcb));
+	if (error != 0)
+		return (error);

 	xig.xig_len = sizeof xig;
 	xig.xig_count = n;
--- a/sys/netinet/tcp_subr.c
+++ b/sys/netinet/tcp_subr.c
@ -859,8 +859,10 @@ tcp_pcblist(SYSCTL_HANDLER_ARGS)
 	INP_INFO_RUNLOCK(&tcbinfo);
 	splx(s);

-	sysctl_wire_old_buffer(req, 2 * (sizeof xig)
+	error = sysctl_wire_old_buffer(req, 2 * (sizeof xig)
 		+ n * sizeof(struct xtcpcb));
+	if (error != 0)
+		return (error);

 	xig.xig_len = sizeof xig;
 	xig.xig_count = n;
--- a/sys/netinet/tcp_timewait.c
+++ b/sys/netinet/tcp_timewait.c
@ -859,8 +859,10 @@ tcp_pcblist(SYSCTL_HANDLER_ARGS)
 	INP_INFO_RUNLOCK(&tcbinfo);
 	splx(s);

-	sysctl_wire_old_buffer(req, 2 * (sizeof xig)
+	error = sysctl_wire_old_buffer(req, 2 * (sizeof xig)
 		+ n * sizeof(struct xtcpcb));
+	if (error != 0)
+		return (error);

 	xig.xig_len = sizeof xig;
 	xig.xig_count = n;
--- a/sys/netinet/udp_usrreq.c
+++ b/sys/netinet/udp_usrreq.c
@ -604,8 +604,10 @@ udp_pcblist(SYSCTL_HANDLER_ARGS)
 	INP_INFO_RUNLOCK(&udbinfo);
 	splx(s);

-	sysctl_wire_old_buffer(req, 2 * (sizeof xig)
+	error = sysctl_wire_old_buffer(req, 2 * (sizeof xig)
 		+ n * sizeof(struct xinpcb));
+	if (error != 0)
+		return (error);

 	xig.xig_len = sizeof xig;
 	xig.xig_count = n;
--- a/sys/netncp/ncp_conn.c
+++ b/sys/netncp/ncp_conn.c
@ -647,8 +647,9 @@ ncp_sysctl_connstat(SYSCTL_HANDLER_ARGS)
 	struct ncp_conn *ncp;
 /*	struct ucred *cred = req->td->td_ucred;*/

-	error = 0;
-	sysctl_wire_old_buffer(req, 0);
+	error = sysctl_wire_old_buffer(req, 0);
+	if (error != 0)
+		return (error);
 	ncp_conn_locklist(LK_SHARED, req->td);
 	error = SYSCTL_OUT(req, &ncp_conn_cnt, sizeof(ncp_conn_cnt));
 	SLIST_FOREACH(ncp, &conn_list, nc_next) {
--- a/sys/netsmb/smb_conn.c
+++ b/sys/netsmb/smb_conn.c
@ -855,7 +855,9 @@ smb_sysctl_treedump(SYSCTL_HANDLER_ARGS)
 	int error, itype;

 	smb_makescred(&scred, td, td->td_ucred);
-	sysctl_wire_old_buffer(req, 0);
+	error = sysctl_wire_old_buffer(req, 0);
+	if (error)
+		return (error);
 	error = smb_sm_lockvclist(LK_SHARED, td);
 	if (error)
 		return error;
--- a/sys/sys/sysctl.h
+++ b/sys/sys/sysctl.h
@ -139,6 +139,7 @@ struct sysctl_req {
 	size_t		newlen;
 	size_t		newidx;
 	int		(*newfunc)(struct sysctl_req *, void *, size_t);
+	size_t		wiredlen;
 };

 SLIST_HEAD(sysctl_oid_list, sysctl_oid);
@ -623,7 +624,7 @@ int	userland_sysctl(struct thread *td, int *name, u_int namelen, void *old,
 			size_t *retval);
 int	sysctl_find_oid(int *name, u_int namelen, struct sysctl_oid **noid,
 			int *nindx, struct sysctl_req *req);
-void	sysctl_wire_old_buffer(struct sysctl_req *req, size_t len);
+int	sysctl_wire_old_buffer(struct sysctl_req *req, size_t len);

 #else	/* !_KERNEL */
 #include <sys/cdefs.h>
--- a/sys/vm/vm_extern.h
+++ b/sys/vm/vm_extern.h
@ -59,6 +59,8 @@ int sstk(struct thread *, void *, int *);
 int swapon(struct thread *, void *, int *);
 #endif			/* TYPEDEF_FOR_UAP */

+int kern_mlock(struct thread *, vm_offset_t, vm_size_t);
+int kern_munlock(struct thread *, vm_offset_t, vm_size_t);
 int kernacc(void *, int, int);
 vm_offset_t kmem_alloc(vm_map_t, vm_size_t);
 vm_offset_t kmem_alloc_nofault(vm_map_t, vm_size_t);
@ -86,8 +88,6 @@ void vmspace_unshare(struct proc *);
 void vmspace_free(struct vmspace *);
 void vmspace_exitfree(struct proc *);
 void vnode_pager_setsize(struct vnode *, vm_ooffset_t);
-void vslock(void *, u_int);
-void vsunlock(void *, u_int);
 void vm_object_print(/* db_expr_t */ long, boolean_t, /* db_expr_t */ long,
 			  char *);
 int vm_fault_quick(caddr_t v, int prot);
--- a/sys/vm/vm_glue.c
+++ b/sys/vm/vm_glue.c
@ -183,35 +183,6 @@ useracc(addr, len, rw)
 	return (rv == TRUE);
 }

-/*
- * MPSAFE
- */
-void
-vslock(addr, len)
-	void *addr;
-	u_int len;
-{
-
-	vm_map_wire(&curproc->p_vmspace->vm_map, trunc_page((vm_offset_t)addr),
-	    round_page((vm_offset_t)addr + len),
-	    VM_MAP_WIRE_SYSTEM|VM_MAP_WIRE_NOHOLES);
-}
-
-/*
- * MPSAFE
- */
-void
-vsunlock(addr, len)
-	void *addr;
-	u_int len;
-{
-
-	vm_map_unwire(&curproc->p_vmspace->vm_map,
-	    trunc_page((vm_offset_t)addr),
-	    round_page((vm_offset_t)addr + len),
-	    VM_MAP_WIRE_SYSTEM|VM_MAP_WIRE_NOHOLES);
-}
-
 /*
 * Create the U area for a new process.
 * This routine directly affects the fork perf for a process.
--- a/sys/vm/vm_mmap.c
+++ b/sys/vm/vm_mmap.c
@ -984,12 +984,26 @@ mlock(td, uap)
 	struct thread *td;
 	struct mlock_args *uap;
 {
-	vm_offset_t addr;
-	vm_size_t size, pageoff;
 	int error;

-	addr = (vm_offset_t) uap->addr;
-	size = uap->len;
+	error = suser(td);
+	if (error)
+		return (error);
+	return (kern_mlock(td, (vm_offset_t)uap->addr, (vm_size_t)uap->len));
+}
+
+/*
+ * MPSAFE
+ */
+int
+kern_mlock(td, addr, size)
+	struct thread *td;
+	vm_offset_t addr;
+	vm_size_t size;
+{
+	vm_size_t pageoff;
+	struct proc *proc = td->td_proc;
+	int error;

 	pageoff = (addr & PAGE_MASK);
 	addr -= pageoff;
@ -1003,21 +1017,15 @@ mlock(td, uap)
 	if (atop(size) + cnt.v_wire_count > vm_page_max_wired)
 		return (EAGAIN);

-#if 0
-	PROC_LOCK(td->td_proc);
-	if (size + ptoa(pmap_wired_count(vm_map_pmap(&td->td_proc->p_vmspace->vm_map))) >
-	    lim_cur(td->td_proc, RLIMIT_MEMLOCK)) {
-		PROC_UNLOCK(td->td_proc);
+	PROC_LOCK(proc);
+	if (size + ptoa(pmap_wired_count(vm_map_pmap(&proc->p_vmspace->vm_map))) >
+	    lim_cur(proc, RLIMIT_MEMLOCK)) {
+		PROC_UNLOCK(proc);
 		return (ENOMEM);
 	}
-	PROC_UNLOCK(td->td_proc);
-#else
-	error = suser(td);
-	if (error)
-		return (error);
-#endif
+	PROC_UNLOCK(proc);

-	error = vm_map_wire(&td->td_proc->p_vmspace->vm_map, addr,
+	error = vm_map_wire(&proc->p_vmspace->vm_map, addr,
 		     addr + size, VM_MAP_WIRE_USER|VM_MAP_WIRE_NOHOLES);
 	return (error == KERN_SUCCESS ? 0 : ENOMEM);
 }
@ -1133,12 +1141,25 @@ munlock(td, uap)
 	struct thread *td;
 	struct munlock_args *uap;
 {
-	vm_offset_t addr;
-	vm_size_t size, pageoff;
 	int error;

-	addr = (vm_offset_t) uap->addr;
-	size = uap->len;
+	error = suser(td);
+	if (error)
+		return (error);
+	return (kern_munlock(td, (vm_offset_t)uap->addr, (vm_size_t)uap->len));
+}
+
+/*
+ * MPSAFE
+ */
+int
+kern_munlock(td, addr, size)
+	struct thread *td;
+	vm_offset_t addr;
+	vm_size_t size;
+{
+	vm_size_t pageoff;
+	int error;

 	pageoff = (addr & PAGE_MASK);
 	addr -= pageoff;
@ -1149,10 +1170,6 @@ munlock(td, uap)
 	if (addr + size < addr)
 		return (EINVAL);

-	error = suser(td);
-	if (error)
-		return (error);
-
 	error = vm_map_unwire(&td->td_proc->p_vmspace->vm_map, addr,
 		     addr + size, VM_MAP_WIRE_USER|VM_MAP_WIRE_NOHOLES);
 	return (error == KERN_SUCCESS ? 0 : ENOMEM);