1993-06-12 14:58:17 +00:00
|
|
|
/*-
|
1994-06-06 14:54:41 +00:00
|
|
|
* Copyright (C) 1994, David Greenman
|
|
|
|
* Copyright (c) 1990, 1993
|
|
|
|
* The Regents of the University of California. All rights reserved.
|
2007-12-07 08:20:17 +00:00
|
|
|
* Copyright (c) 2007 The FreeBSD Foundation
|
1993-06-12 14:58:17 +00:00
|
|
|
*
|
|
|
|
* This code is derived from software contributed to Berkeley by
|
|
|
|
* the University of Utah, and William Jolitz.
|
|
|
|
*
|
2007-12-07 08:20:17 +00:00
|
|
|
* Portions of this software were developed by A. Joseph Koshy under
|
|
|
|
* sponsorship from the FreeBSD Foundation and Google, Inc.
|
|
|
|
*
|
1993-06-12 14:58:17 +00:00
|
|
|
* Redistribution and use in source and binary forms, with or without
|
|
|
|
* modification, are permitted provided that the following conditions
|
|
|
|
* are met:
|
|
|
|
* 1. Redistributions of source code must retain the above copyright
|
|
|
|
* notice, this list of conditions and the following disclaimer.
|
|
|
|
* 2. Redistributions in binary form must reproduce the above copyright
|
|
|
|
* notice, this list of conditions and the following disclaimer in the
|
|
|
|
* documentation and/or other materials provided with the distribution.
|
|
|
|
* 3. All advertising materials mentioning features or use of this software
|
|
|
|
* must display the following acknowledgement:
|
|
|
|
* This product includes software developed by the University of
|
|
|
|
* California, Berkeley and its contributors.
|
|
|
|
* 4. Neither the name of the University nor the names of its contributors
|
|
|
|
* may be used to endorse or promote products derived from this software
|
|
|
|
* without specific prior written permission.
|
|
|
|
*
|
|
|
|
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
|
|
|
|
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
|
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
|
|
|
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
|
|
|
|
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
|
|
|
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
|
|
|
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
|
|
|
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
|
|
|
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
|
|
|
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
|
|
|
* SUCH DAMAGE.
|
|
|
|
*
|
genassym.c:
Remove NKMEMCLUSTERS, it is no longer define or used.
locores.s:
Fix comment on PTDpde and APTDpde to be pde instead of pte
Add new equation for calculating location of Sysmap
Remove Bill's old #ifdef garbage for counting up memory,
that stuff will never be made to work and was just cluttering
up the file.
Add code that places the PTD, page table pages, and kernel
stack below the 640k ISA hole if there is room for it, otherwise
put this stuff all at 1MB. This fixes the 28K bogusity in
the boot blocks, that can now go away!
Fix the caclulation of where first is to be dependent on
NKPDE so that we can skip over the above mentioned areas.
The 28K thing is now 44K in size due to the increase in
kernel virtual memory space, but since we no longer have
to worry about that this is no big deal.
Use if NNPX > 0 instead of ifdef NPX for floating point code.
machdep.c
Change the calculation of for the buffer cache to be
20% of all memory above 2MB and add back the upper limit
of 2/5's of the VM_KMEM_SIZE so that we do not eat ALL
of the kernel memory space on large memory machines, note
that this will not even come into effect unless you have
more than 32MB. The current buffer cache limit is 6.7MB
due to this caclulation.
It seems that we where erroniously allocating bufpages pages
for buffer_map. buffer_map is UNUSED in this implementation
of the buffer cache, but since the map is referenced in
several if statements a quick fix was to simply allocate
1 vm page (but no real memory) to it.
pmap.h
Remove rcsid, don't want them in the kernel files!
Removed some cruft inside an #ifdef DEBUGx that caused
compiler errors if you where compiling this for debug.
Use the #defines for PD_SHIFT and PG_SHIFT in place of
constants.
trap.c:
Remove patch kit header and rcsid, fix $Id$.
Now include "npx.h" and use NNPX for controlling the
floating point code.
Remove a now completly invalid check for a maximum virtual
address, the virtual address now ends at 0xFFFFFFFF so
there is no more MAX!! (Thanks David, I completly missed
that one!)
vm_machdep.c
Remove patch kit header and rcsid, fix $Id$.
Now include "npx.h" and use NNPX for controlling the
floating point code.
Replace several 0xFE00000 constants with KERNBASE
1993-10-15 10:34:29 +00:00
|
|
|
* from: @(#)trap.c 7.4 (Berkeley) 5/13/91
|
1993-06-12 14:58:17 +00:00
|
|
|
*/
|
|
|
|
|
2003-06-11 00:56:59 +00:00
|
|
|
#include <sys/cdefs.h>
|
|
|
|
__FBSDID("$FreeBSD$");
|
|
|
|
|
2003-07-31 01:36:24 +00:00
|
|
|
#include "opt_ktrace.h"
|
2010-06-17 17:34:45 +00:00
|
|
|
#include "opt_kdtrace.h"
|
2007-06-12 23:27:31 +00:00
|
|
|
#include "opt_sched.h"
|
1996-01-03 21:42:35 +00:00
|
|
|
|
1994-05-25 09:21:21 +00:00
|
|
|
#include <sys/param.h>
|
2000-09-07 01:33:02 +00:00
|
|
|
#include <sys/bus.h>
|
1994-05-25 09:21:21 +00:00
|
|
|
#include <sys/kernel.h>
|
2001-06-29 19:51:37 +00:00
|
|
|
#include <sys/lock.h>
|
2000-10-20 07:58:15 +00:00
|
|
|
#include <sys/mutex.h>
|
2007-12-07 08:20:17 +00:00
|
|
|
#include <sys/pmckern.h>
|
2001-06-29 19:51:37 +00:00
|
|
|
#include <sys/proc.h>
|
Part 1 of KSE-III
The ability to schedule multiple threads per process
(one one cpu) by making ALL system calls optionally asynchronous.
to come: ia64 and power-pc patches, patches for gdb, test program (in tools)
Reviewed by: Almost everyone who counts
(at various times, peter, jhb, matt, alfred, mini, bernd,
and a cast of thousands)
NOTE: this is still Beta code, and contains lots of debugging stuff.
expect slight instability in signals..
2002-06-29 17:26:22 +00:00
|
|
|
#include <sys/ktr.h>
|
Reorganize syscall entry and leave handling.
Extend struct sysvec with three new elements:
sv_fetch_syscall_args - the method to fetch syscall arguments from
usermode into struct syscall_args. The structure is machine-depended
(this might be reconsidered after all architectures are converted).
sv_set_syscall_retval - the method to set a return value for usermode
from the syscall. It is a generalization of
cpu_set_syscall_retval(9) to allow ABIs to override the way to set a
return value.
sv_syscallnames - the table of syscall names.
Use sv_set_syscall_retval in kern_sigsuspend() instead of hardcoding
the call to cpu_set_syscall_retval().
The new functions syscallenter(9) and syscallret(9) are provided that
use sv_*syscall* pointers and contain the common repeated code from
the syscall() implementations for the architecture-specific syscall
trap handlers.
Syscallenter() fetches arguments, calls syscall implementation from
ABI sysent table, and set up return frame. The end of syscall
bookkeeping is done by syscallret().
Take advantage of single place for MI syscall handling code and
implement ptrace_lwpinfo pl_flags PL_FLAG_SCE, PL_FLAG_SCX and
PL_FLAG_EXEC. The SCE and SCX flags notify the debugger that the
thread is stopped at syscall entry or return point respectively. The
EXEC flag augments SCX and notifies debugger that the process address
space was changed by one of exec(2)-family syscalls.
The i386, amd64, sparc64, sun4v, powerpc and ia64 syscall()s are
changed to use syscallenter()/syscallret(). MIPS and arm are not
converted and use the mostly unchanged syscall() implementation.
Reviewed by: jhb, marcel, marius, nwhitehorn, stas
Tested by: marcel (ia64), marius (sparc64), nwhitehorn (powerpc),
stas (mips)
MFC after: 1 month
2010-05-23 18:32:02 +00:00
|
|
|
#include <sys/pioctl.h>
|
|
|
|
#include <sys/ptrace.h>
|
1997-11-24 13:25:37 +00:00
|
|
|
#include <sys/resourcevar.h>
|
2002-10-12 05:32:24 +00:00
|
|
|
#include <sys/sched.h>
|
1997-11-24 13:25:37 +00:00
|
|
|
#include <sys/signalvar.h>
|
Reorganize syscall entry and leave handling.
Extend struct sysvec with three new elements:
sv_fetch_syscall_args - the method to fetch syscall arguments from
usermode into struct syscall_args. The structure is machine-depended
(this might be reconsidered after all architectures are converted).
sv_set_syscall_retval - the method to set a return value for usermode
from the syscall. It is a generalization of
cpu_set_syscall_retval(9) to allow ABIs to override the way to set a
return value.
sv_syscallnames - the table of syscall names.
Use sv_set_syscall_retval in kern_sigsuspend() instead of hardcoding
the call to cpu_set_syscall_retval().
The new functions syscallenter(9) and syscallret(9) are provided that
use sv_*syscall* pointers and contain the common repeated code from
the syscall() implementations for the architecture-specific syscall
trap handlers.
Syscallenter() fetches arguments, calls syscall implementation from
ABI sysent table, and set up return frame. The end of syscall
bookkeeping is done by syscallret().
Take advantage of single place for MI syscall handling code and
implement ptrace_lwpinfo pl_flags PL_FLAG_SCE, PL_FLAG_SCX and
PL_FLAG_EXEC. The SCE and SCX flags notify the debugger that the
thread is stopped at syscall entry or return point respectively. The
EXEC flag augments SCX and notifies debugger that the process address
space was changed by one of exec(2)-family syscalls.
The i386, amd64, sparc64, sun4v, powerpc and ia64 syscall()s are
changed to use syscallenter()/syscallret(). MIPS and arm are not
converted and use the mostly unchanged syscall() implementation.
Reviewed by: jhb, marcel, marius, nwhitehorn, stas
Tested by: marcel (ia64), marius (sparc64), nwhitehorn (powerpc),
stas (mips)
MFC after: 1 month
2010-05-23 18:32:02 +00:00
|
|
|
#include <sys/syscall.h>
|
2010-06-30 18:03:42 +00:00
|
|
|
#include <sys/syscallsubr.h>
|
Reorganize syscall entry and leave handling.
Extend struct sysvec with three new elements:
sv_fetch_syscall_args - the method to fetch syscall arguments from
usermode into struct syscall_args. The structure is machine-depended
(this might be reconsidered after all architectures are converted).
sv_set_syscall_retval - the method to set a return value for usermode
from the syscall. It is a generalization of
cpu_set_syscall_retval(9) to allow ABIs to override the way to set a
return value.
sv_syscallnames - the table of syscall names.
Use sv_set_syscall_retval in kern_sigsuspend() instead of hardcoding
the call to cpu_set_syscall_retval().
The new functions syscallenter(9) and syscallret(9) are provided that
use sv_*syscall* pointers and contain the common repeated code from
the syscall() implementations for the architecture-specific syscall
trap handlers.
Syscallenter() fetches arguments, calls syscall implementation from
ABI sysent table, and set up return frame. The end of syscall
bookkeeping is done by syscallret().
Take advantage of single place for MI syscall handling code and
implement ptrace_lwpinfo pl_flags PL_FLAG_SCE, PL_FLAG_SCX and
PL_FLAG_EXEC. The SCE and SCX flags notify the debugger that the
thread is stopped at syscall entry or return point respectively. The
EXEC flag augments SCX and notifies debugger that the process address
space was changed by one of exec(2)-family syscalls.
The i386, amd64, sparc64, sun4v, powerpc and ia64 syscall()s are
changed to use syscallenter()/syscallret(). MIPS and arm are not
converted and use the mostly unchanged syscall() implementation.
Reviewed by: jhb, marcel, marius, nwhitehorn, stas
Tested by: marcel (ia64), marius (sparc64), nwhitehorn (powerpc),
stas (mips)
MFC after: 1 month
2010-05-23 18:32:02 +00:00
|
|
|
#include <sys/sysent.h>
|
2001-06-29 19:51:37 +00:00
|
|
|
#include <sys/systm.h>
|
1995-12-07 12:48:31 +00:00
|
|
|
#include <sys/vmmeter.h>
|
2003-07-31 01:36:24 +00:00
|
|
|
#ifdef KTRACE
|
|
|
|
#include <sys/uio.h>
|
|
|
|
#include <sys/ktrace.h>
|
|
|
|
#endif
|
Reorganize syscall entry and leave handling.
Extend struct sysvec with three new elements:
sv_fetch_syscall_args - the method to fetch syscall arguments from
usermode into struct syscall_args. The structure is machine-depended
(this might be reconsidered after all architectures are converted).
sv_set_syscall_retval - the method to set a return value for usermode
from the syscall. It is a generalization of
cpu_set_syscall_retval(9) to allow ABIs to override the way to set a
return value.
sv_syscallnames - the table of syscall names.
Use sv_set_syscall_retval in kern_sigsuspend() instead of hardcoding
the call to cpu_set_syscall_retval().
The new functions syscallenter(9) and syscallret(9) are provided that
use sv_*syscall* pointers and contain the common repeated code from
the syscall() implementations for the architecture-specific syscall
trap handlers.
Syscallenter() fetches arguments, calls syscall implementation from
ABI sysent table, and set up return frame. The end of syscall
bookkeeping is done by syscallret().
Take advantage of single place for MI syscall handling code and
implement ptrace_lwpinfo pl_flags PL_FLAG_SCE, PL_FLAG_SCX and
PL_FLAG_EXEC. The SCE and SCX flags notify the debugger that the
thread is stopped at syscall entry or return point respectively. The
EXEC flag augments SCX and notifies debugger that the process address
space was changed by one of exec(2)-family syscalls.
The i386, amd64, sparc64, sun4v, powerpc and ia64 syscall()s are
changed to use syscallenter()/syscallret(). MIPS and arm are not
converted and use the mostly unchanged syscall() implementation.
Reviewed by: jhb, marcel, marius, nwhitehorn, stas
Tested by: marcel (ia64), marius (sparc64), nwhitehorn (powerpc),
stas (mips)
MFC after: 1 month
2010-05-23 18:32:02 +00:00
|
|
|
#include <security/audit/audit.h>
|
2003-07-31 01:36:24 +00:00
|
|
|
|
1994-05-25 09:21:21 +00:00
|
|
|
#include <machine/cpu.h>
|
2000-12-12 01:14:32 +00:00
|
|
|
|
2008-10-19 01:35:27 +00:00
|
|
|
#ifdef XEN
|
|
|
|
#include <vm/vm.h>
|
|
|
|
#include <vm/vm_param.h>
|
|
|
|
#include <vm/pmap.h>
|
|
|
|
#endif
|
|
|
|
|
2006-10-22 11:52:19 +00:00
|
|
|
#include <security/mac/mac_framework.h>
|
|
|
|
|
2001-06-29 19:51:37 +00:00
|
|
|
/*
|
2007-03-04 22:36:48 +00:00
|
|
|
* Define the code needed before returning to user mode, for trap and
|
|
|
|
* syscall.
|
2001-06-29 19:51:37 +00:00
|
|
|
*/
|
2001-01-24 09:53:49 +00:00
|
|
|
void
|
2006-02-08 08:09:17 +00:00
|
|
|
userret(struct thread *td, struct trapframe *frame)
|
1994-06-06 14:54:41 +00:00
|
|
|
{
|
2001-09-12 08:38:13 +00:00
|
|
|
struct proc *p = td->td_proc;
|
1994-06-06 14:54:41 +00:00
|
|
|
|
Part 1 of KSE-III
The ability to schedule multiple threads per process
(one one cpu) by making ALL system calls optionally asynchronous.
to come: ia64 and power-pc patches, patches for gdb, test program (in tools)
Reviewed by: Almost everyone who counts
(at various times, peter, jhb, matt, alfred, mini, bernd,
and a cast of thousands)
NOTE: this is still Beta code, and contains lots of debugging stuff.
expect slight instability in signals..
2002-06-29 17:26:22 +00:00
|
|
|
CTR3(KTR_SYSC, "userret: thread %p (pid %d, %s)", td, p->p_pid,
|
2007-11-14 06:51:33 +00:00
|
|
|
td->td_name);
|
Currently, when signal is delivered to the process and there is a thread
not blocking the signal, signal is placed on the thread sigqueue. If
the selected thread is in kernel executing thr_exit() or sigprocmask()
syscalls, then signal might be not delivered to usermode for arbitrary
amount of time, and for exiting thread it is lost.
Put process-directed signals to the process queue unconditionally,
selecting the thread to deliver the signal only by the thread returning
to usermode, since only then the thread can handle delivery of signal
reliably. For exiting thread or thread that has blocked some signals,
check whether the newly blocked signal is queued for the process, and
try to find a thread to wakeup for delivery, in reschedule_signal(). For
exiting thread, assume that all signals are blocked.
Change cursig() and postsig() to look both into the thread and process
signal queues. When there is a signal that thread returning to usermode
could consume, TDF_NEEDSIGCHK flag is not neccessary set now. Do
unlocked read of p_siglist and p_pendingcnt to check for queued signals.
Note that thread that has a signal unblocked might get spurious wakeup
and EINTR from the interruptible system call now, due to the possibility
of being selected by reschedule_signals(), while other thread returned
to usermode earlier and removed the signal from process queue. This
should not cause compliance issues, since the thread has not blocked a
signal and thus should be ready to receive it anyway.
Reported by: Justin Teller <justin.teller gmail com>
Reviewed by: davidxu, jilles
MFC after: 1 month
2009-10-11 16:49:30 +00:00
|
|
|
#if 0
|
2004-03-05 17:35:28 +00:00
|
|
|
#ifdef DIAGNOSTIC
|
2003-02-01 12:17:09 +00:00
|
|
|
/* Check that we called signotify() enough. */
|
2001-06-22 23:05:11 +00:00
|
|
|
PROC_LOCK(p);
|
Commit 14/14 of sched_lock decomposition.
- Use thread_lock() rather than sched_lock for per-thread scheduling
sychronization.
- Use the per-process spinlock rather than the sched_lock for per-process
scheduling synchronization.
Tested by: kris, current@
Tested on: i386, amd64, ULE, 4BSD, libthr, libkse, PREEMPTION, etc.
Discussed with: kris, attilio, kmacy, jhb, julian, bde (small parts each)
2007-06-05 00:00:57 +00:00
|
|
|
thread_lock(td);
|
2003-03-31 22:49:17 +00:00
|
|
|
if (SIGPENDING(td) && ((td->td_flags & TDF_NEEDSIGCHK) == 0 ||
|
2003-02-17 09:55:10 +00:00
|
|
|
(td->td_flags & TDF_ASTPENDING) == 0))
|
2002-08-22 14:36:03 +00:00
|
|
|
printf("failed to set signal flags properly for ast()\n");
|
Commit 14/14 of sched_lock decomposition.
- Use thread_lock() rather than sched_lock for per-thread scheduling
sychronization.
- Use the per-process spinlock rather than the sched_lock for per-process
scheduling synchronization.
Tested by: kris, current@
Tested on: i386, amd64, ULE, 4BSD, libthr, libkse, PREEMPTION, etc.
Discussed with: kris, attilio, kmacy, jhb, julian, bde (small parts each)
2007-06-05 00:00:57 +00:00
|
|
|
thread_unlock(td);
|
2001-08-10 22:53:32 +00:00
|
|
|
PROC_UNLOCK(p);
|
2002-04-04 17:49:48 +00:00
|
|
|
#endif
|
Currently, when signal is delivered to the process and there is a thread
not blocking the signal, signal is placed on the thread sigqueue. If
the selected thread is in kernel executing thr_exit() or sigprocmask()
syscalls, then signal might be not delivered to usermode for arbitrary
amount of time, and for exiting thread it is lost.
Put process-directed signals to the process queue unconditionally,
selecting the thread to deliver the signal only by the thread returning
to usermode, since only then the thread can handle delivery of signal
reliably. For exiting thread or thread that has blocked some signals,
check whether the newly blocked signal is queued for the process, and
try to find a thread to wakeup for delivery, in reschedule_signal(). For
exiting thread, assume that all signals are blocked.
Change cursig() and postsig() to look both into the thread and process
signal queues. When there is a signal that thread returning to usermode
could consume, TDF_NEEDSIGCHK flag is not neccessary set now. Do
unlocked read of p_siglist and p_pendingcnt to check for queued signals.
Note that thread that has a signal unblocked might get spurious wakeup
and EINTR from the interruptible system call now, due to the possibility
of being selected by reschedule_signals(), while other thread returned
to usermode earlier and removed the signal from process queue. This
should not cause compliance issues, since the thread has not blocked a
signal and thus should be ready to receive it anyway.
Reported by: Justin Teller <justin.teller gmail com>
Reviewed by: davidxu, jilles
MFC after: 1 month
2009-10-11 16:49:30 +00:00
|
|
|
#endif
|
Moderate rewrite of kernel ktrace code to attempt to generally improve
reliability when tracing fast-moving processes or writing traces to
slow file systems by avoiding unbounded queueuing and dropped records.
Record loss was previously possible when the global pool of records
become depleted as a result of record generation outstripping record
commit, which occurred quickly in many common situations.
These changes partially restore the 4.x model of committing ktrace
records at the point of trace generation (synchronous), but maintain
the 5.x deferred record commit behavior (asynchronous) for situations
where entering VFS and sleeping is not possible (i.e., in the
scheduler). Records are now queued per-process as opposed to
globally, with processes responsible for committing records from their
own context as required.
- Eliminate the ktrace worker thread and global record queue, as they
are no longer used. Keep the global free record list, as records
are still used.
- Add a per-process record queue, which will hold any asynchronously
generated records, such as from context switches. This replaces the
global queue as the place to submit asynchronous records to.
- When a record is committed asynchronously, simply queue it to the
process.
- When a record is committed synchronously, first drain any pending
per-process records in order to maintain ordering as best we can.
Currently ordering between competing threads is provided via a global
ktrace_sx, but a per-process flag or lock may be desirable in the
future.
- When a process returns to user space following a system call, trap,
signal delivery, etc, flush any pending records.
- When a process exits, flush any pending records.
- Assert on process tear-down that there are no pending records.
- Slightly abstract the notion of being "in ktrace", which is used to
prevent the recursive generation of records, as well as generating
traces for ktrace events.
Future work here might look at changing the set of events marked for
synchronous and asynchronous record generation, re-balancing queue
depth, timeliness of commit to disk, and so on. I.e., performing a
drain every (n) records.
MFC after: 1 month
Discussed with: jhb
Requested by: Marc Olzheim <marcolz at stack dot nl>
2005-11-13 13:27:44 +00:00
|
|
|
#ifdef KTRACE
|
|
|
|
KTRUSERRET(td);
|
|
|
|
#endif
|
2004-10-23 20:49:17 +00:00
|
|
|
/*
|
|
|
|
* If this thread tickled GEOM, we need to wait for the giggling to
|
|
|
|
* stop before we return to userland
|
|
|
|
*/
|
|
|
|
if (td->td_pflags & TDP_GEOM)
|
|
|
|
g_waitidle();
|
|
|
|
|
2003-02-01 12:17:09 +00:00
|
|
|
/*
|
|
|
|
* Charge system time if profiling.
|
|
|
|
*/
|
2010-09-28 01:36:01 +00:00
|
|
|
if (p->p_flag & P_PROFIL)
|
2006-02-08 08:09:17 +00:00
|
|
|
addupc_task(td, TRAPF_PC(frame), td->td_pticks * psratio);
|
2004-12-26 07:30:35 +00:00
|
|
|
/*
|
|
|
|
* Let the scheduler adjust our priority etc.
|
|
|
|
*/
|
|
|
|
sched_userret(td);
|
2005-03-24 09:35:38 +00:00
|
|
|
KASSERT(td->td_locks == 0,
|
|
|
|
("userret: Returning with %d locks held.", td->td_locks));
|
2008-10-19 01:35:27 +00:00
|
|
|
#ifdef XEN
|
|
|
|
PT_UPDATES_FLUSH();
|
|
|
|
#endif
|
1994-06-06 14:54:41 +00:00
|
|
|
}
|
1993-06-12 14:58:17 +00:00
|
|
|
|
|
|
|
/*
|
2001-06-29 19:51:37 +00:00
|
|
|
* Process an asynchronous software trap.
|
|
|
|
* This is relatively easy.
|
2001-08-10 22:53:32 +00:00
|
|
|
* This function will return with preemption disabled.
|
1993-06-12 14:58:17 +00:00
|
|
|
*/
|
2000-09-07 01:33:02 +00:00
|
|
|
void
|
Part 1 of KSE-III
The ability to schedule multiple threads per process
(one one cpu) by making ALL system calls optionally asynchronous.
to come: ia64 and power-pc patches, patches for gdb, test program (in tools)
Reviewed by: Almost everyone who counts
(at various times, peter, jhb, matt, alfred, mini, bernd,
and a cast of thousands)
NOTE: this is still Beta code, and contains lots of debugging stuff.
expect slight instability in signals..
2002-06-29 17:26:22 +00:00
|
|
|
ast(struct trapframe *framep)
|
2000-09-07 01:33:02 +00:00
|
|
|
{
|
2002-10-01 14:16:50 +00:00
|
|
|
struct thread *td;
|
|
|
|
struct proc *p;
|
2001-09-12 08:38:13 +00:00
|
|
|
int flags;
|
2002-04-04 17:49:48 +00:00
|
|
|
int sig;
|
2000-09-07 01:33:02 +00:00
|
|
|
|
2002-10-01 14:16:50 +00:00
|
|
|
td = curthread;
|
|
|
|
p = td->td_proc;
|
2002-10-02 16:39:39 +00:00
|
|
|
|
Part 1 of KSE-III
The ability to schedule multiple threads per process
(one one cpu) by making ALL system calls optionally asynchronous.
to come: ia64 and power-pc patches, patches for gdb, test program (in tools)
Reviewed by: Almost everyone who counts
(at various times, peter, jhb, matt, alfred, mini, bernd,
and a cast of thousands)
NOTE: this is still Beta code, and contains lots of debugging stuff.
expect slight instability in signals..
2002-06-29 17:26:22 +00:00
|
|
|
CTR3(KTR_SYSC, "ast: thread %p (pid %d, %s)", td, p->p_pid,
|
|
|
|
p->p_comm);
|
2001-02-22 18:05:15 +00:00
|
|
|
KASSERT(TRAPF_USERMODE(framep), ("ast in kernel mode"));
|
2003-03-04 21:03:05 +00:00
|
|
|
WITNESS_WARN(WARN_PANIC, NULL, "Returning to user mode");
|
2001-08-10 22:53:32 +00:00
|
|
|
mtx_assert(&Giant, MA_NOTOWNED);
|
Commit 14/14 of sched_lock decomposition.
- Use thread_lock() rather than sched_lock for per-thread scheduling
sychronization.
- Use the per-process spinlock rather than the sched_lock for per-process
scheduling synchronization.
Tested by: kris, current@
Tested on: i386, amd64, ULE, 4BSD, libthr, libkse, PREEMPTION, etc.
Discussed with: kris, attilio, kmacy, jhb, julian, bde (small parts each)
2007-06-05 00:00:57 +00:00
|
|
|
THREAD_LOCK_ASSERT(td, MA_NOTOWNED);
|
2002-03-29 16:45:03 +00:00
|
|
|
td->td_frame = framep;
|
2006-02-08 08:09:17 +00:00
|
|
|
td->td_pticks = 0;
|
2002-10-01 14:16:50 +00:00
|
|
|
|
2002-03-29 16:45:03 +00:00
|
|
|
/*
|
2007-09-17 05:31:39 +00:00
|
|
|
* This updates the td_flag's for the checks below in one
|
2002-03-29 16:45:03 +00:00
|
|
|
* "atomic" operation with turning off the astpending flag.
|
|
|
|
* If another AST is triggered while we are handling the
|
2007-09-17 05:31:39 +00:00
|
|
|
* AST's saved in flags, the astpending flag will be set and
|
2002-03-29 16:45:03 +00:00
|
|
|
* ast() will be called again.
|
|
|
|
*/
|
Commit 14/14 of sched_lock decomposition.
- Use thread_lock() rather than sched_lock for per-thread scheduling
sychronization.
- Use the per-process spinlock rather than the sched_lock for per-process
scheduling synchronization.
Tested by: kris, current@
Tested on: i386, amd64, ULE, 4BSD, libthr, libkse, PREEMPTION, etc.
Discussed with: kris, attilio, kmacy, jhb, julian, bde (small parts each)
2007-06-05 00:00:57 +00:00
|
|
|
thread_lock(td);
|
|
|
|
flags = td->td_flags;
|
2008-03-21 08:23:25 +00:00
|
|
|
td->td_flags &= ~(TDF_ASTPENDING | TDF_NEEDSIGCHK | TDF_NEEDSUSPCHK |
|
|
|
|
TDF_NEEDRESCHED | TDF_ALRMPEND | TDF_PROFPEND | TDF_MACPEND);
|
Commit 14/14 of sched_lock decomposition.
- Use thread_lock() rather than sched_lock for per-thread scheduling
sychronization.
- Use the per-process spinlock rather than the sched_lock for per-process
scheduling synchronization.
Tested by: kris, current@
Tested on: i386, amd64, ULE, 4BSD, libthr, libkse, PREEMPTION, etc.
Discussed with: kris, attilio, kmacy, jhb, julian, bde (small parts each)
2007-06-05 00:00:57 +00:00
|
|
|
thread_unlock(td);
|
2007-06-04 21:45:18 +00:00
|
|
|
PCPU_INC(cnt.v_trap);
|
2004-09-22 15:24:33 +00:00
|
|
|
|
2002-03-29 16:45:03 +00:00
|
|
|
if (td->td_ucred != p->p_ucred)
|
|
|
|
cred_update_thread(td);
|
2004-07-16 21:04:55 +00:00
|
|
|
if (td->td_pflags & TDP_OWEUPC && p->p_flag & P_PROFIL) {
|
|
|
|
addupc_task(td, td->td_profil_addr, td->td_profil_ticks);
|
|
|
|
td->td_profil_ticks = 0;
|
|
|
|
td->td_pflags &= ~TDP_OWEUPC;
|
2004-07-02 03:50:48 +00:00
|
|
|
}
|
2007-09-17 05:31:39 +00:00
|
|
|
if (flags & TDF_ALRMPEND) {
|
2002-03-29 16:45:03 +00:00
|
|
|
PROC_LOCK(p);
|
|
|
|
psignal(p, SIGVTALRM);
|
|
|
|
PROC_UNLOCK(p);
|
|
|
|
}
|
2007-09-17 05:31:39 +00:00
|
|
|
if (flags & TDF_PROFPEND) {
|
2002-03-29 16:45:03 +00:00
|
|
|
PROC_LOCK(p);
|
|
|
|
psignal(p, SIGPROF);
|
|
|
|
PROC_UNLOCK(p);
|
|
|
|
}
|
2002-11-08 19:00:17 +00:00
|
|
|
#ifdef MAC
|
2007-09-17 05:31:39 +00:00
|
|
|
if (flags & TDF_MACPEND)
|
2002-11-08 19:00:17 +00:00
|
|
|
mac_thread_userret(td);
|
|
|
|
#endif
|
2003-02-17 09:55:10 +00:00
|
|
|
if (flags & TDF_NEEDRESCHED) {
|
2003-07-31 01:36:24 +00:00
|
|
|
#ifdef KTRACE
|
|
|
|
if (KTRPOINT(td, KTR_CSW))
|
2003-09-05 22:15:26 +00:00
|
|
|
ktrcsw(1, 1);
|
2003-07-31 01:36:24 +00:00
|
|
|
#endif
|
Commit 14/14 of sched_lock decomposition.
- Use thread_lock() rather than sched_lock for per-thread scheduling
sychronization.
- Use the per-process spinlock rather than the sched_lock for per-process
scheduling synchronization.
Tested by: kris, current@
Tested on: i386, amd64, ULE, 4BSD, libthr, libkse, PREEMPTION, etc.
Discussed with: kris, attilio, kmacy, jhb, julian, bde (small parts each)
2007-06-05 00:00:57 +00:00
|
|
|
thread_lock(td);
|
2006-10-26 21:42:22 +00:00
|
|
|
sched_prio(td, td->td_user_pri);
|
2008-04-17 04:20:10 +00:00
|
|
|
mi_switch(SW_INVOL | SWT_NEEDRESCHED, NULL);
|
Commit 14/14 of sched_lock decomposition.
- Use thread_lock() rather than sched_lock for per-thread scheduling
sychronization.
- Use the per-process spinlock rather than the sched_lock for per-process
scheduling synchronization.
Tested by: kris, current@
Tested on: i386, amd64, ULE, 4BSD, libthr, libkse, PREEMPTION, etc.
Discussed with: kris, attilio, kmacy, jhb, julian, bde (small parts each)
2007-06-05 00:00:57 +00:00
|
|
|
thread_unlock(td);
|
2003-07-31 01:36:24 +00:00
|
|
|
#ifdef KTRACE
|
|
|
|
if (KTRPOINT(td, KTR_CSW))
|
2003-09-05 22:15:26 +00:00
|
|
|
ktrcsw(0, 1);
|
2003-07-31 01:36:24 +00:00
|
|
|
#endif
|
2002-04-04 17:49:48 +00:00
|
|
|
}
|
Currently, when signal is delivered to the process and there is a thread
not blocking the signal, signal is placed on the thread sigqueue. If
the selected thread is in kernel executing thr_exit() or sigprocmask()
syscalls, then signal might be not delivered to usermode for arbitrary
amount of time, and for exiting thread it is lost.
Put process-directed signals to the process queue unconditionally,
selecting the thread to deliver the signal only by the thread returning
to usermode, since only then the thread can handle delivery of signal
reliably. For exiting thread or thread that has blocked some signals,
check whether the newly blocked signal is queued for the process, and
try to find a thread to wakeup for delivery, in reschedule_signal(). For
exiting thread, assume that all signals are blocked.
Change cursig() and postsig() to look both into the thread and process
signal queues. When there is a signal that thread returning to usermode
could consume, TDF_NEEDSIGCHK flag is not neccessary set now. Do
unlocked read of p_siglist and p_pendingcnt to check for queued signals.
Note that thread that has a signal unblocked might get spurious wakeup
and EINTR from the interruptible system call now, due to the possibility
of being selected by reschedule_signals(), while other thread returned
to usermode earlier and removed the signal from process queue. This
should not cause compliance issues, since the thread has not blocked a
signal and thus should be ready to receive it anyway.
Reported by: Justin Teller <justin.teller gmail com>
Reviewed by: davidxu, jilles
MFC after: 1 month
2009-10-11 16:49:30 +00:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Check for signals. Unlocked reads of p_pendingcnt or
|
|
|
|
* p_siglist might cause process-directed signal to be handled
|
|
|
|
* later.
|
|
|
|
*/
|
|
|
|
if (flags & TDF_NEEDSIGCHK || p->p_pendingcnt > 0 ||
|
|
|
|
!SIGISEMPTY(p->p_siglist)) {
|
2002-04-04 17:49:48 +00:00
|
|
|
PROC_LOCK(p);
|
- Merge struct procsig with struct sigacts.
- Move struct sigacts out of the u-area and malloc() it using the
M_SUBPROC malloc bucket.
- Add a small sigacts_*() API for managing sigacts structures: sigacts_alloc(),
sigacts_free(), sigacts_copy(), sigacts_share(), and sigacts_shared().
- Remove the p_sigignore, p_sigacts, and p_sigcatch macros.
- Add a mutex to struct sigacts that protects all the members of the struct.
- Add sigacts locking.
- Remove Giant from nosys(), kill(), killpg(), and kern_sigaction() now
that sigacts is locked.
- Several in-kernel functions such as psignal(), tdsignal(), trapsignal(),
and thread_stopped() are now MP safe.
Reviewed by: arch@
Approved by: re (rwatson)
2003-05-13 20:36:02 +00:00
|
|
|
mtx_lock(&p->p_sigacts->ps_mtx);
|
2009-07-14 22:52:46 +00:00
|
|
|
while ((sig = cursig(td, SIG_STOP_ALLOWED)) != 0)
|
2002-04-04 17:49:48 +00:00
|
|
|
postsig(sig);
|
- Merge struct procsig with struct sigacts.
- Move struct sigacts out of the u-area and malloc() it using the
M_SUBPROC malloc bucket.
- Add a small sigacts_*() API for managing sigacts structures: sigacts_alloc(),
sigacts_free(), sigacts_copy(), sigacts_share(), and sigacts_shared().
- Remove the p_sigignore, p_sigacts, and p_sigcatch macros.
- Add a mutex to struct sigacts that protects all the members of the struct.
- Add sigacts locking.
- Remove Giant from nosys(), kill(), killpg(), and kern_sigaction() now
that sigacts is locked.
- Several in-kernel functions such as psignal(), tdsignal(), trapsignal(),
and thread_stopped() are now MP safe.
Reviewed by: arch@
Approved by: re (rwatson)
2003-05-13 20:36:02 +00:00
|
|
|
mtx_unlock(&p->p_sigacts->ps_mtx);
|
2002-04-04 17:49:48 +00:00
|
|
|
PROC_UNLOCK(p);
|
|
|
|
}
|
2008-03-21 08:23:25 +00:00
|
|
|
/*
|
|
|
|
* We need to check to see if we have to exit or wait due to a
|
|
|
|
* single threading requirement or some other STOP condition.
|
|
|
|
*/
|
|
|
|
if (flags & TDF_NEEDSUSPCHK) {
|
|
|
|
PROC_LOCK(p);
|
|
|
|
thread_suspend_check(0);
|
|
|
|
PROC_UNLOCK(p);
|
|
|
|
}
|
1997-04-07 07:16:06 +00:00
|
|
|
|
2009-10-27 10:55:34 +00:00
|
|
|
if (td->td_pflags & TDP_OLDMASK) {
|
|
|
|
td->td_pflags &= ~TDP_OLDMASK;
|
|
|
|
kern_sigprocmask(td, SIG_SETMASK, &td->td_oldsigmask, NULL, 0);
|
|
|
|
}
|
|
|
|
|
2006-02-08 08:09:17 +00:00
|
|
|
userret(td, framep);
|
2001-08-10 22:53:32 +00:00
|
|
|
mtx_assert(&Giant, MA_NOTOWNED);
|
1997-04-07 07:16:06 +00:00
|
|
|
}
|
Reorganize syscall entry and leave handling.
Extend struct sysvec with three new elements:
sv_fetch_syscall_args - the method to fetch syscall arguments from
usermode into struct syscall_args. The structure is machine-depended
(this might be reconsidered after all architectures are converted).
sv_set_syscall_retval - the method to set a return value for usermode
from the syscall. It is a generalization of
cpu_set_syscall_retval(9) to allow ABIs to override the way to set a
return value.
sv_syscallnames - the table of syscall names.
Use sv_set_syscall_retval in kern_sigsuspend() instead of hardcoding
the call to cpu_set_syscall_retval().
The new functions syscallenter(9) and syscallret(9) are provided that
use sv_*syscall* pointers and contain the common repeated code from
the syscall() implementations for the architecture-specific syscall
trap handlers.
Syscallenter() fetches arguments, calls syscall implementation from
ABI sysent table, and set up return frame. The end of syscall
bookkeeping is done by syscallret().
Take advantage of single place for MI syscall handling code and
implement ptrace_lwpinfo pl_flags PL_FLAG_SCE, PL_FLAG_SCX and
PL_FLAG_EXEC. The SCE and SCX flags notify the debugger that the
thread is stopped at syscall entry or return point respectively. The
EXEC flag augments SCX and notifies debugger that the process address
space was changed by one of exec(2)-family syscalls.
The i386, amd64, sparc64, sun4v, powerpc and ia64 syscall()s are
changed to use syscallenter()/syscallret(). MIPS and arm are not
converted and use the mostly unchanged syscall() implementation.
Reviewed by: jhb, marcel, marius, nwhitehorn, stas
Tested by: marcel (ia64), marius (sparc64), nwhitehorn (powerpc),
stas (mips)
MFC after: 1 month
2010-05-23 18:32:02 +00:00
|
|
|
|
|
|
|
#ifdef HAVE_SYSCALL_ARGS_DEF
|
2010-05-26 15:39:43 +00:00
|
|
|
const char *
|
Reorganize syscall entry and leave handling.
Extend struct sysvec with three new elements:
sv_fetch_syscall_args - the method to fetch syscall arguments from
usermode into struct syscall_args. The structure is machine-depended
(this might be reconsidered after all architectures are converted).
sv_set_syscall_retval - the method to set a return value for usermode
from the syscall. It is a generalization of
cpu_set_syscall_retval(9) to allow ABIs to override the way to set a
return value.
sv_syscallnames - the table of syscall names.
Use sv_set_syscall_retval in kern_sigsuspend() instead of hardcoding
the call to cpu_set_syscall_retval().
The new functions syscallenter(9) and syscallret(9) are provided that
use sv_*syscall* pointers and contain the common repeated code from
the syscall() implementations for the architecture-specific syscall
trap handlers.
Syscallenter() fetches arguments, calls syscall implementation from
ABI sysent table, and set up return frame. The end of syscall
bookkeeping is done by syscallret().
Take advantage of single place for MI syscall handling code and
implement ptrace_lwpinfo pl_flags PL_FLAG_SCE, PL_FLAG_SCX and
PL_FLAG_EXEC. The SCE and SCX flags notify the debugger that the
thread is stopped at syscall entry or return point respectively. The
EXEC flag augments SCX and notifies debugger that the process address
space was changed by one of exec(2)-family syscalls.
The i386, amd64, sparc64, sun4v, powerpc and ia64 syscall()s are
changed to use syscallenter()/syscallret(). MIPS and arm are not
converted and use the mostly unchanged syscall() implementation.
Reviewed by: jhb, marcel, marius, nwhitehorn, stas
Tested by: marcel (ia64), marius (sparc64), nwhitehorn (powerpc),
stas (mips)
MFC after: 1 month
2010-05-23 18:32:02 +00:00
|
|
|
syscallname(struct proc *p, u_int code)
|
|
|
|
{
|
|
|
|
static const char unknown[] = "unknown";
|
2010-07-04 18:16:17 +00:00
|
|
|
struct sysentvec *sv;
|
Reorganize syscall entry and leave handling.
Extend struct sysvec with three new elements:
sv_fetch_syscall_args - the method to fetch syscall arguments from
usermode into struct syscall_args. The structure is machine-depended
(this might be reconsidered after all architectures are converted).
sv_set_syscall_retval - the method to set a return value for usermode
from the syscall. It is a generalization of
cpu_set_syscall_retval(9) to allow ABIs to override the way to set a
return value.
sv_syscallnames - the table of syscall names.
Use sv_set_syscall_retval in kern_sigsuspend() instead of hardcoding
the call to cpu_set_syscall_retval().
The new functions syscallenter(9) and syscallret(9) are provided that
use sv_*syscall* pointers and contain the common repeated code from
the syscall() implementations for the architecture-specific syscall
trap handlers.
Syscallenter() fetches arguments, calls syscall implementation from
ABI sysent table, and set up return frame. The end of syscall
bookkeeping is done by syscallret().
Take advantage of single place for MI syscall handling code and
implement ptrace_lwpinfo pl_flags PL_FLAG_SCE, PL_FLAG_SCX and
PL_FLAG_EXEC. The SCE and SCX flags notify the debugger that the
thread is stopped at syscall entry or return point respectively. The
EXEC flag augments SCX and notifies debugger that the process address
space was changed by one of exec(2)-family syscalls.
The i386, amd64, sparc64, sun4v, powerpc and ia64 syscall()s are
changed to use syscallenter()/syscallret(). MIPS and arm are not
converted and use the mostly unchanged syscall() implementation.
Reviewed by: jhb, marcel, marius, nwhitehorn, stas
Tested by: marcel (ia64), marius (sparc64), nwhitehorn (powerpc),
stas (mips)
MFC after: 1 month
2010-05-23 18:32:02 +00:00
|
|
|
|
2010-07-04 18:16:17 +00:00
|
|
|
sv = p->p_sysent;
|
|
|
|
if (sv->sv_syscallnames == NULL || code >= sv->sv_size)
|
Reorganize syscall entry and leave handling.
Extend struct sysvec with three new elements:
sv_fetch_syscall_args - the method to fetch syscall arguments from
usermode into struct syscall_args. The structure is machine-depended
(this might be reconsidered after all architectures are converted).
sv_set_syscall_retval - the method to set a return value for usermode
from the syscall. It is a generalization of
cpu_set_syscall_retval(9) to allow ABIs to override the way to set a
return value.
sv_syscallnames - the table of syscall names.
Use sv_set_syscall_retval in kern_sigsuspend() instead of hardcoding
the call to cpu_set_syscall_retval().
The new functions syscallenter(9) and syscallret(9) are provided that
use sv_*syscall* pointers and contain the common repeated code from
the syscall() implementations for the architecture-specific syscall
trap handlers.
Syscallenter() fetches arguments, calls syscall implementation from
ABI sysent table, and set up return frame. The end of syscall
bookkeeping is done by syscallret().
Take advantage of single place for MI syscall handling code and
implement ptrace_lwpinfo pl_flags PL_FLAG_SCE, PL_FLAG_SCX and
PL_FLAG_EXEC. The SCE and SCX flags notify the debugger that the
thread is stopped at syscall entry or return point respectively. The
EXEC flag augments SCX and notifies debugger that the process address
space was changed by one of exec(2)-family syscalls.
The i386, amd64, sparc64, sun4v, powerpc and ia64 syscall()s are
changed to use syscallenter()/syscallret(). MIPS and arm are not
converted and use the mostly unchanged syscall() implementation.
Reviewed by: jhb, marcel, marius, nwhitehorn, stas
Tested by: marcel (ia64), marius (sparc64), nwhitehorn (powerpc),
stas (mips)
MFC after: 1 month
2010-05-23 18:32:02 +00:00
|
|
|
return (unknown);
|
2010-07-04 18:16:17 +00:00
|
|
|
return (sv->sv_syscallnames[code]);
|
Reorganize syscall entry and leave handling.
Extend struct sysvec with three new elements:
sv_fetch_syscall_args - the method to fetch syscall arguments from
usermode into struct syscall_args. The structure is machine-depended
(this might be reconsidered after all architectures are converted).
sv_set_syscall_retval - the method to set a return value for usermode
from the syscall. It is a generalization of
cpu_set_syscall_retval(9) to allow ABIs to override the way to set a
return value.
sv_syscallnames - the table of syscall names.
Use sv_set_syscall_retval in kern_sigsuspend() instead of hardcoding
the call to cpu_set_syscall_retval().
The new functions syscallenter(9) and syscallret(9) are provided that
use sv_*syscall* pointers and contain the common repeated code from
the syscall() implementations for the architecture-specific syscall
trap handlers.
Syscallenter() fetches arguments, calls syscall implementation from
ABI sysent table, and set up return frame. The end of syscall
bookkeeping is done by syscallret().
Take advantage of single place for MI syscall handling code and
implement ptrace_lwpinfo pl_flags PL_FLAG_SCE, PL_FLAG_SCX and
PL_FLAG_EXEC. The SCE and SCX flags notify the debugger that the
thread is stopped at syscall entry or return point respectively. The
EXEC flag augments SCX and notifies debugger that the process address
space was changed by one of exec(2)-family syscalls.
The i386, amd64, sparc64, sun4v, powerpc and ia64 syscall()s are
changed to use syscallenter()/syscallret(). MIPS and arm are not
converted and use the mostly unchanged syscall() implementation.
Reviewed by: jhb, marcel, marius, nwhitehorn, stas
Tested by: marcel (ia64), marius (sparc64), nwhitehorn (powerpc),
stas (mips)
MFC after: 1 month
2010-05-23 18:32:02 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
int
|
|
|
|
syscallenter(struct thread *td, struct syscall_args *sa)
|
|
|
|
{
|
|
|
|
struct proc *p;
|
|
|
|
int error, traced;
|
|
|
|
|
|
|
|
PCPU_INC(cnt.v_syscall);
|
|
|
|
p = td->td_proc;
|
|
|
|
|
|
|
|
td->td_pticks = 0;
|
|
|
|
if (td->td_ucred != p->p_ucred)
|
|
|
|
cred_update_thread(td);
|
|
|
|
if (p->p_flag & P_TRACED) {
|
|
|
|
traced = 1;
|
|
|
|
PROC_LOCK(p);
|
|
|
|
td->td_dbgflags &= ~TDB_USERWR;
|
|
|
|
td->td_dbgflags |= TDB_SCE;
|
|
|
|
PROC_UNLOCK(p);
|
|
|
|
} else
|
|
|
|
traced = 0;
|
|
|
|
error = (p->p_sysent->sv_fetch_syscall_args)(td, sa);
|
|
|
|
#ifdef KTRACE
|
|
|
|
if (KTRPOINT(td, KTR_SYSCALL))
|
|
|
|
ktrsyscall(sa->code, sa->narg, sa->args);
|
|
|
|
#endif
|
|
|
|
|
|
|
|
CTR6(KTR_SYSC,
|
|
|
|
"syscall: td=%p pid %d %s (%#lx, %#lx, %#lx)",
|
|
|
|
td, td->td_proc->p_pid, syscallname(p, sa->code),
|
|
|
|
sa->args[0], sa->args[1], sa->args[2]);
|
|
|
|
|
|
|
|
if (error == 0) {
|
|
|
|
STOPEVENT(p, S_SCE, sa->narg);
|
|
|
|
PTRACESTOP_SC(p, td, S_PT_SCE);
|
|
|
|
if (td->td_dbgflags & TDB_USERWR) {
|
|
|
|
/*
|
|
|
|
* Reread syscall number and arguments if
|
|
|
|
* debugger modified registers or memory.
|
|
|
|
*/
|
|
|
|
error = (p->p_sysent->sv_fetch_syscall_args)(td, sa);
|
|
|
|
#ifdef KTRACE
|
|
|
|
if (KTRPOINT(td, KTR_SYSCALL))
|
|
|
|
ktrsyscall(sa->code, sa->narg, sa->args);
|
|
|
|
#endif
|
|
|
|
if (error != 0)
|
|
|
|
goto retval;
|
|
|
|
}
|
2010-06-28 18:06:46 +00:00
|
|
|
error = syscall_thread_enter(td, sa->callp);
|
|
|
|
if (error != 0)
|
|
|
|
goto retval;
|
Reorganize syscall entry and leave handling.
Extend struct sysvec with three new elements:
sv_fetch_syscall_args - the method to fetch syscall arguments from
usermode into struct syscall_args. The structure is machine-depended
(this might be reconsidered after all architectures are converted).
sv_set_syscall_retval - the method to set a return value for usermode
from the syscall. It is a generalization of
cpu_set_syscall_retval(9) to allow ABIs to override the way to set a
return value.
sv_syscallnames - the table of syscall names.
Use sv_set_syscall_retval in kern_sigsuspend() instead of hardcoding
the call to cpu_set_syscall_retval().
The new functions syscallenter(9) and syscallret(9) are provided that
use sv_*syscall* pointers and contain the common repeated code from
the syscall() implementations for the architecture-specific syscall
trap handlers.
Syscallenter() fetches arguments, calls syscall implementation from
ABI sysent table, and set up return frame. The end of syscall
bookkeeping is done by syscallret().
Take advantage of single place for MI syscall handling code and
implement ptrace_lwpinfo pl_flags PL_FLAG_SCE, PL_FLAG_SCX and
PL_FLAG_EXEC. The SCE and SCX flags notify the debugger that the
thread is stopped at syscall entry or return point respectively. The
EXEC flag augments SCX and notifies debugger that the process address
space was changed by one of exec(2)-family syscalls.
The i386, amd64, sparc64, sun4v, powerpc and ia64 syscall()s are
changed to use syscallenter()/syscallret(). MIPS and arm are not
converted and use the mostly unchanged syscall() implementation.
Reviewed by: jhb, marcel, marius, nwhitehorn, stas
Tested by: marcel (ia64), marius (sparc64), nwhitehorn (powerpc),
stas (mips)
MFC after: 1 month
2010-05-23 18:32:02 +00:00
|
|
|
|
|
|
|
#ifdef KDTRACE_HOOKS
|
|
|
|
/*
|
|
|
|
* If the systrace module has registered it's probe
|
|
|
|
* callback and if there is a probe active for the
|
|
|
|
* syscall 'entry', process the probe.
|
|
|
|
*/
|
|
|
|
if (systrace_probe_func != NULL && sa->callp->sy_entry != 0)
|
|
|
|
(*systrace_probe_func)(sa->callp->sy_entry, sa->code,
|
2010-08-22 11:30:49 +00:00
|
|
|
sa->callp, sa->args, 0);
|
Reorganize syscall entry and leave handling.
Extend struct sysvec with three new elements:
sv_fetch_syscall_args - the method to fetch syscall arguments from
usermode into struct syscall_args. The structure is machine-depended
(this might be reconsidered after all architectures are converted).
sv_set_syscall_retval - the method to set a return value for usermode
from the syscall. It is a generalization of
cpu_set_syscall_retval(9) to allow ABIs to override the way to set a
return value.
sv_syscallnames - the table of syscall names.
Use sv_set_syscall_retval in kern_sigsuspend() instead of hardcoding
the call to cpu_set_syscall_retval().
The new functions syscallenter(9) and syscallret(9) are provided that
use sv_*syscall* pointers and contain the common repeated code from
the syscall() implementations for the architecture-specific syscall
trap handlers.
Syscallenter() fetches arguments, calls syscall implementation from
ABI sysent table, and set up return frame. The end of syscall
bookkeeping is done by syscallret().
Take advantage of single place for MI syscall handling code and
implement ptrace_lwpinfo pl_flags PL_FLAG_SCE, PL_FLAG_SCX and
PL_FLAG_EXEC. The SCE and SCX flags notify the debugger that the
thread is stopped at syscall entry or return point respectively. The
EXEC flag augments SCX and notifies debugger that the process address
space was changed by one of exec(2)-family syscalls.
The i386, amd64, sparc64, sun4v, powerpc and ia64 syscall()s are
changed to use syscallenter()/syscallret(). MIPS and arm are not
converted and use the mostly unchanged syscall() implementation.
Reviewed by: jhb, marcel, marius, nwhitehorn, stas
Tested by: marcel (ia64), marius (sparc64), nwhitehorn (powerpc),
stas (mips)
MFC after: 1 month
2010-05-23 18:32:02 +00:00
|
|
|
#endif
|
|
|
|
|
|
|
|
AUDIT_SYSCALL_ENTER(sa->code, td);
|
|
|
|
error = (sa->callp->sy_call)(td, sa->args);
|
|
|
|
AUDIT_SYSCALL_EXIT(error, td);
|
|
|
|
|
|
|
|
/* Save the latest error return value. */
|
|
|
|
td->td_errno = error;
|
|
|
|
|
|
|
|
#ifdef KDTRACE_HOOKS
|
|
|
|
/*
|
|
|
|
* If the systrace module has registered it's probe
|
|
|
|
* callback and if there is a probe active for the
|
|
|
|
* syscall 'return', process the probe.
|
|
|
|
*/
|
|
|
|
if (systrace_probe_func != NULL && sa->callp->sy_return != 0)
|
|
|
|
(*systrace_probe_func)(sa->callp->sy_return, sa->code,
|
2010-08-22 11:30:49 +00:00
|
|
|
sa->callp, NULL, (error) ? -1 : td->td_retval[0]);
|
Reorganize syscall entry and leave handling.
Extend struct sysvec with three new elements:
sv_fetch_syscall_args - the method to fetch syscall arguments from
usermode into struct syscall_args. The structure is machine-depended
(this might be reconsidered after all architectures are converted).
sv_set_syscall_retval - the method to set a return value for usermode
from the syscall. It is a generalization of
cpu_set_syscall_retval(9) to allow ABIs to override the way to set a
return value.
sv_syscallnames - the table of syscall names.
Use sv_set_syscall_retval in kern_sigsuspend() instead of hardcoding
the call to cpu_set_syscall_retval().
The new functions syscallenter(9) and syscallret(9) are provided that
use sv_*syscall* pointers and contain the common repeated code from
the syscall() implementations for the architecture-specific syscall
trap handlers.
Syscallenter() fetches arguments, calls syscall implementation from
ABI sysent table, and set up return frame. The end of syscall
bookkeeping is done by syscallret().
Take advantage of single place for MI syscall handling code and
implement ptrace_lwpinfo pl_flags PL_FLAG_SCE, PL_FLAG_SCX and
PL_FLAG_EXEC. The SCE and SCX flags notify the debugger that the
thread is stopped at syscall entry or return point respectively. The
EXEC flag augments SCX and notifies debugger that the process address
space was changed by one of exec(2)-family syscalls.
The i386, amd64, sparc64, sun4v, powerpc and ia64 syscall()s are
changed to use syscallenter()/syscallret(). MIPS and arm are not
converted and use the mostly unchanged syscall() implementation.
Reviewed by: jhb, marcel, marius, nwhitehorn, stas
Tested by: marcel (ia64), marius (sparc64), nwhitehorn (powerpc),
stas (mips)
MFC after: 1 month
2010-05-23 18:32:02 +00:00
|
|
|
#endif
|
2010-06-28 18:06:46 +00:00
|
|
|
syscall_thread_exit(td, sa->callp);
|
Reorganize syscall entry and leave handling.
Extend struct sysvec with three new elements:
sv_fetch_syscall_args - the method to fetch syscall arguments from
usermode into struct syscall_args. The structure is machine-depended
(this might be reconsidered after all architectures are converted).
sv_set_syscall_retval - the method to set a return value for usermode
from the syscall. It is a generalization of
cpu_set_syscall_retval(9) to allow ABIs to override the way to set a
return value.
sv_syscallnames - the table of syscall names.
Use sv_set_syscall_retval in kern_sigsuspend() instead of hardcoding
the call to cpu_set_syscall_retval().
The new functions syscallenter(9) and syscallret(9) are provided that
use sv_*syscall* pointers and contain the common repeated code from
the syscall() implementations for the architecture-specific syscall
trap handlers.
Syscallenter() fetches arguments, calls syscall implementation from
ABI sysent table, and set up return frame. The end of syscall
bookkeeping is done by syscallret().
Take advantage of single place for MI syscall handling code and
implement ptrace_lwpinfo pl_flags PL_FLAG_SCE, PL_FLAG_SCX and
PL_FLAG_EXEC. The SCE and SCX flags notify the debugger that the
thread is stopped at syscall entry or return point respectively. The
EXEC flag augments SCX and notifies debugger that the process address
space was changed by one of exec(2)-family syscalls.
The i386, amd64, sparc64, sun4v, powerpc and ia64 syscall()s are
changed to use syscallenter()/syscallret(). MIPS and arm are not
converted and use the mostly unchanged syscall() implementation.
Reviewed by: jhb, marcel, marius, nwhitehorn, stas
Tested by: marcel (ia64), marius (sparc64), nwhitehorn (powerpc),
stas (mips)
MFC after: 1 month
2010-05-23 18:32:02 +00:00
|
|
|
CTR4(KTR_SYSC, "syscall: p=%p error=%d return %#lx %#lx",
|
|
|
|
p, error, td->td_retval[0], td->td_retval[1]);
|
|
|
|
}
|
|
|
|
retval:
|
|
|
|
if (traced) {
|
|
|
|
PROC_LOCK(p);
|
|
|
|
td->td_dbgflags &= ~TDB_SCE;
|
|
|
|
PROC_UNLOCK(p);
|
|
|
|
}
|
|
|
|
(p->p_sysent->sv_set_syscall_retval)(td, error);
|
|
|
|
return (error);
|
|
|
|
}
|
|
|
|
|
|
|
|
void
|
|
|
|
syscallret(struct thread *td, int error, struct syscall_args *sa __unused)
|
|
|
|
{
|
|
|
|
struct proc *p;
|
|
|
|
int traced;
|
|
|
|
|
|
|
|
p = td->td_proc;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Check for misbehavior.
|
|
|
|
*/
|
|
|
|
WITNESS_WARN(WARN_PANIC, NULL, "System call %s returning",
|
|
|
|
syscallname(p, sa->code));
|
|
|
|
KASSERT(td->td_critnest == 0,
|
|
|
|
("System call %s returning in a critical section",
|
|
|
|
syscallname(p, sa->code)));
|
|
|
|
KASSERT(td->td_locks == 0,
|
|
|
|
("System call %s returning with %d locks held",
|
|
|
|
syscallname(p, sa->code), td->td_locks));
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Handle reschedule and other end-of-syscall issues
|
|
|
|
*/
|
|
|
|
userret(td, td->td_frame);
|
|
|
|
|
|
|
|
CTR4(KTR_SYSC, "syscall %s exit thread %p pid %d proc %s",
|
|
|
|
syscallname(p, sa->code), td, td->td_proc->p_pid, td->td_name);
|
|
|
|
|
|
|
|
#ifdef KTRACE
|
|
|
|
if (KTRPOINT(td, KTR_SYSRET))
|
|
|
|
ktrsysret(sa->code, error, td->td_retval[0]);
|
|
|
|
#endif
|
|
|
|
|
|
|
|
if (p->p_flag & P_TRACED) {
|
|
|
|
traced = 1;
|
|
|
|
PROC_LOCK(p);
|
|
|
|
td->td_dbgflags |= TDB_SCX;
|
|
|
|
PROC_UNLOCK(p);
|
|
|
|
} else
|
|
|
|
traced = 0;
|
|
|
|
/*
|
|
|
|
* This works because errno is findable through the
|
|
|
|
* register set. If we ever support an emulation where this
|
|
|
|
* is not the case, this code will need to be revisited.
|
|
|
|
*/
|
|
|
|
STOPEVENT(p, S_SCX, sa->code);
|
|
|
|
PTRACESTOP_SC(p, td, S_PT_SCX);
|
|
|
|
if (traced || (td->td_dbgflags & TDB_EXEC) != 0) {
|
|
|
|
PROC_LOCK(p);
|
|
|
|
td->td_dbgflags &= ~(TDB_SCX | TDB_EXEC);
|
|
|
|
PROC_UNLOCK(p);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
#endif /* HAVE_SYSCALL_ARGS_DEF */
|