From 269312019a13a34b73e2c761145aa9166105f4e5 Mon Sep 17 00:00:00 2001 From: David Greenman Date: Tue, 27 Jul 1993 10:52:31 +0000 Subject: [PATCH] * Applied fixes from Bruce Evans to fix COW bugs, >1MB kernel loading, profiling, and various protection checks that cause security holes and system crashes. * Changed min/max/bcmp/ffs/strlen to be static inline functions - included from cpufunc.h in via systm.h. This change improves performance in many parts of the kernel - up to 5% in the networking layer alone. Note that this requires systm.h to be included in any file that uses these functions otherwise it won't be able to find them during the load. * Fixed incorrect call to splx() in if_is.c * Fixed bogus variable assignment to splx() in if_ed.c --- sys/amd64/amd64/locore.S | 277 +++++++++++++++++++++-------------- sys/amd64/amd64/locore.s | 277 +++++++++++++++++++++-------------- sys/amd64/amd64/machdep.c | 45 +++++- sys/amd64/amd64/pmap.c | 4 +- sys/amd64/amd64/trap.c | 84 +++++++++-- sys/amd64/amd64/vm_machdep.c | 13 +- sys/amd64/include/cpu.h | 2 + sys/amd64/include/cpufunc.h | 111 ++++++++++++++ sys/ddb/db_output.c | 4 + sys/dev/ed/if_ed.c | 5 +- sys/dev/speaker/spkr.c | 1 + sys/i386/i386/locore.s | 277 +++++++++++++++++++++-------------- sys/i386/i386/machdep.c | 45 +++++- sys/i386/i386/pmap.c | 4 +- sys/i386/i386/trap.c | 84 +++++++++-- sys/i386/i386/vm_machdep.c | 13 +- sys/i386/include/cpu.h | 2 + sys/i386/include/cpufunc.h | 111 ++++++++++++++ sys/i386/isa/if_ed.c | 5 +- sys/i386/isa/if_is.c | 3 +- sys/i386/isa/spkr.c | 1 + sys/kern/subr_trap.c | 84 +++++++++-- 22 files changed, 1057 insertions(+), 395 deletions(-) diff --git a/sys/amd64/amd64/locore.S b/sys/amd64/amd64/locore.S index ef92a8c9c62c..74235a2f8bdb 100644 --- a/sys/amd64/amd64/locore.S +++ b/sys/amd64/amd64/locore.S @@ -89,9 +89,35 @@ #define ALIGN_TEXT .align 2,0x90 /* 4-byte boundaries, NOP-filled */ #define SUPERALIGN_TEXT .align 4,0x90 /* 16-byte boundaries better for 486 */ -#define LENTRY(name) ALIGN_TEXT; .globl name; name: -#define ENTRY(name) ALIGN_TEXT; .globl _/**/name; _/**/name: -#define ALTENTRY(name) .globl _/**/name; _/**/name: +#define GEN_ENTRY(name) ALIGN_TEXT; .globl name; name: +#define NON_GPROF_ENTRY(name) GEN_ENTRY(_/**/name) + +#ifdef GPROF +/* + * ALTENTRY() must be before a corresponding ENTRY() so that it can jump + * over the mcounting. + */ +#define ALTENTRY(name) GEN_ENTRY(_/**/name); MCOUNT; jmp 2f +#define ENTRY(name) GEN_ENTRY(_/**/name); MCOUNT; 2: +/* + * The call to mcount supports the usual (bad) conventions. We allocate + * some data and pass a pointer to it although the 386BSD doesn't use + * the data. We set up a frame before calling mcount because that is + * the standard convention although it makes work for both mcount and + * callers. + */ +#define MCOUNT .data; ALIGN_DATA; 1:; .long 0; .text; \ + pushl %ebp; movl %esp, %ebp; \ + movl $1b,%eax; call mcount; popl %ebp +#else +/* + * ALTENTRY() has to align because it is before a corresponding ENTRY(). + * ENTRY() has to align to because there may be no ALTENTRY() before it. + * If there is a previous ALTENTRY() then the alignment code is empty. + */ +#define ALTENTRY(name) GEN_ENTRY(_/**/name) +#define ENTRY(name) GEN_ENTRY(_/**/name) +#endif /* NB: NOP now preserves registers so NOPs can be inserted anywhere */ /* XXX: NOP and FASTER_NOP are misleadingly named */ @@ -143,10 +169,11 @@ .globl _boothowto, _bootdev, _curpcb .globl __ucodesel,__udatasel - .globl _cpu, _cold, _atdevbase, _atdevphys + .globl _cpu, _cold, _atdevbase _cpu: .long 0 # are we 386, 386sx, or 486 _cold: .long 1 # cold till we are not _atdevbase: .long 0 # location of start of iomem in virtual + # .nonglobl _atdevphys (should be register or something) _atdevphys: .long 0 # location of device mapping ptes (phys) .globl _IdlePTD, _KPTphys @@ -158,7 +185,8 @@ _cyloffset: .long 0 _proc0paddr: .long 0 #ifdef SHOW_A_LOT -bit_colors: .byte GREEN,RED,0,0 +bit_colors: + .byte GREEN,RED,0,0 #endif .space 512 @@ -171,11 +199,11 @@ tmpstk: /*****************************************************************************/ /* - * start: jumped to directly from the boot blocks + * btext: beginning of text section. + * Also the entry point (jumped to directly from the boot blocks). */ -LENTRY(start) - movw $0x1234,%ax - movw %ax,0x472 # warm boot +ENTRY(btext) + movw $0x1234, 0x472 # warm boot jmp 1f .space 0x500 # skip over warm boot shit @@ -278,7 +306,10 @@ LENTRY(start) movl %esi,%ecx # this much memory, shrl $ PGSHIFT,%ecx # for this many pte s addl $ UPAGES+4,%ecx # including our early context - movl $0xa0,%ecx # XXX - cover debugger pages + cmpl $0xa0,%ecx # XXX - cover debugger pages + jae 1f + movl $0xa0,%ecx +1: movl $PG_V|PG_KW,%eax # having these bits set, lea (4*NBPG)(%esi),%ebx # physical address of KPT in proc 0, movl %ebx,_KPTphys-SYSTEM # in the kernel page table, @@ -372,9 +403,15 @@ LENTRY(start) /* load base of page directory, and enable mapping */ movl %esi,%eax # phys address of ptd in proc 0 - orl $ I386_CR3PAT,%eax + orl $ I386_CR3PAT,%eax movl %eax,%cr3 # load ptd addr into mmu movl %cr0,%eax # get control word +/* + * XXX it is now safe to always (attempt to) set CR0_WP and to set up + * the page tables assuming it works, so USE_486_WRITE_PROTECT will go + * away. The special 386 PTE checking needs to be conditional on + * whatever distingiushes 486-only kernels from 386-486 kernels. + */ #ifdef USE_486_WRITE_PROTECT orl $CR0_PE|CR0_PG|CR0_WP,%eax # and let s page! #else @@ -407,7 +444,7 @@ begin: /* now running relocated at SYSTEM where the system is linked to run */ lea 7*NBPG(%esi),%esi # skip past stack. pushl %esi - + /* relocate debugger gdt entries */ movl $_gdt+8*9,%eax # adjust slots 9-17 @@ -423,9 +460,9 @@ reloc_gdt: 1: call _init386 # wire 386 chip for unix operation - + movl $0,_PTD - call _main # autoconfiguration, mountroot etc + call _main # autoconfiguration, mountroot etc popl %esi /* @@ -459,9 +496,11 @@ lretmsg1: /* * Icode is copied out to process 1 and executed in user mode: * execve("/sbin/init", argv, envp); exit(0); - * If the exec fails, process 1 exits and the system panics. + * If the execve fails, process 1 exits and the system panics. */ -ENTRY(icode) +NON_GPROF_ENTRY(icode) + pushl $0 # envp + # pushl $argv-_icode # gas fucks up again movl $argv,%eax subl $_icode,%eax @@ -471,14 +510,17 @@ ENTRY(icode) movl $init,%eax subl $_icode,%eax pushl %eax - pushl %eax # dummy out rta - movl %esp,%ebp + pushl %eax # junk to fake return address + movl $exec,%eax LCALL(0x7,0x0) - pushl %eax + + pushl %eax # execve failed, the errno will do for an + # exit code because errnos are < 128 + pushl %eax # junk to fake return address + movl $exit,%eax - pushl %eax # dummy out rta LCALL(0x7,0x0) init: @@ -494,7 +536,7 @@ eicode: _szicode: .long _szicode-_icode -ENTRY(sigcode) +NON_GPROF_ENTRY(sigcode) call 12(%esp) lea 28(%esp),%eax # scp (the call may have clobbered the # copy at 8(%esp)) @@ -640,7 +682,7 @@ ENTRY(bzero) # void bzero(void *base, u_int cnt) movl 8(%esp),%edi movl 12(%esp),%ecx xorl %eax,%eax - shrl $2,%ecx + shrl $2,%ecx cld rep stosl @@ -664,6 +706,7 @@ ENTRY(fillw) # fillw (pat,base,cnt) ret ENTRY(bcopyb) +bcopyb: pushl %esi pushl %edi movl 12(%esp),%esi @@ -672,7 +715,7 @@ ENTRY(bcopyb) cmpl %esi,%edi /* potentially overlapping? */ jnb 1f cld /* nope, copy forwards */ - rep + rep movsb popl %edi popl %esi @@ -693,6 +736,7 @@ ENTRY(bcopyb) ret ENTRY(bcopyw) +bcopyw: pushl %esi pushl %edi movl 12(%esp),%esi @@ -735,20 +779,18 @@ ENTRY(bcopyw) ENTRY(bcopyx) movl 16(%esp),%eax cmpl $2,%eax - je _bcopyw + je bcopyw /* not _bcopyw, to avoid multiple mcounts */ cmpl $4,%eax - jne _bcopyb - /* - * Fall through to bcopy. ENTRY() provides harmless fill bytes. - */ - + je bcopy + jmp bcopyb /* * (ov)bcopy (src,dst,cnt) * ws@tools.de (Wolfgang Solfrank, TooLs GmbH) +49-228-985800 */ -ENTRY(bcopy) ALTENTRY(ovbcopy) +ENTRY(bcopy) +bcopy: pushl %esi pushl %edi movl 12(%esp),%esi @@ -789,8 +831,8 @@ ALTENTRY(ovbcopy) cld ret -ENTRY(ntohl) -ALTENTRY(htonl) +ALTENTRY(ntohl) +ENTRY(htonl) movl 4(%esp),%eax #ifdef i486 /* XXX */ @@ -798,7 +840,7 @@ ALTENTRY(htonl) * equivalent bytes. This can be changed back to bswap when we * upgrade to a newer version of Gas */ /* bswap %eax */ - .byte 0x0f + .byte 0x0f .byte 0xc8 #else xchgb %al,%ah @@ -807,8 +849,8 @@ ALTENTRY(htonl) #endif ret -ENTRY(ntohs) -ALTENTRY(htons) +ALTENTRY(ntohs) +ENTRY(htons) movzwl 4(%esp),%eax xchgb %al,%ah ret @@ -861,10 +903,6 @@ show_bits: * protection violation occurs inside the functions, the trap handler * returns to *curpcb->onfault instead of the function. */ -/* - * XXX These routines load a segment register every time they execute. - * it would be nicer (faster) if they could depend on %gs. - */ ENTRY(copyout) # copyout (from_kernel, to_user, len) @@ -879,15 +917,36 @@ ENTRY(copyout) # copyout (from_kernel, to_user, len) orl %ebx, %ebx # nothing to do? jz done_copyout -#ifdef USE_486_WRITE_PROTECT - /* if WP bit in CR0 is set (n/a on 386), the hardware does the */ - /* write check. We just have to load the right segment selector */ - pushl %es - movl __udatasel, %eax - movl %ax, %es -#else /* USE_486_WRITE_PROTECT */ - /* we have to check each PTE for (write) permission */ + /* + * Check explicitly for non-user addresses. If 486 write protection + * is being used, this check is essential because we are in kernel + * mode so the h/w does not provide any protection against writing + * kernel addresses. + * + * Otherwise, it saves having to load and restore %es to get the + * usual segment-based protection (the destination segment for movs + * is always %es). The other explicit checks for user-writablility + * are not quite sufficient. They fail for the user area because + * we mapped the user area read/write to avoid having an #ifdef in + * vm_machdep.c. They fail for user PTEs and/or PTDs! (107 + * addresses including 0xff800000 and 0xfc000000). I'm not sure if + * this can be fixed. Marking the PTEs supervisor mode and the + * PDE's user mode would almost work, but there may be a problem + * with the self-referential PDE. + */ + movl %edi, %eax + addl %ebx, %eax + jc copyout_fault +#define VM_END_USER_ADDRESS 0xFDBFE000 /* XXX */ + cmpl $VM_END_USER_ADDRESS, %eax + ja copyout_fault +#ifndef USE_486_WRITE_PROTECT + /* + * We have to check each PTE for user write permission. + * The checking may cause a page fault, so it is important to set + * up everything for return via copyout_fault before here. + */ /* compute number of pages */ movl %edi, %ecx andl $0x0fff, %ecx @@ -906,7 +965,7 @@ ENTRY(copyout) # copyout (from_kernel, to_user, len) andb $0x07, %al /* Pages must be VALID + USERACC + WRITABLE */ cmpb $0x07, %al je 2f - + /* simulate a trap */ pushl %edx pushl %ecx @@ -924,8 +983,7 @@ ENTRY(copyout) # copyout (from_kernel, to_user, len) addl $4, %edx decl %ecx jnz 1b /* check next page */ - -#endif /* USE_486_WRITE_PROTECT */ +#endif /* ndef USE_486_WRITE_PROTECT */ /* now copy it over */ /* bcopy (%esi, %edi, %ebx) */ @@ -938,9 +996,6 @@ ENTRY(copyout) # copyout (from_kernel, to_user, len) andb $3, %cl rep movsb -#ifdef USE_486_WRITE_PROTECT - popl %es -#endif done_copyout: popl %ebx @@ -951,10 +1006,8 @@ done_copyout: movl %eax,PCB_ONFAULT(%edx) ret + ALIGN_TEXT copyout_fault: -#ifdef USE_486_WRITE_PROTECT - popl %es -#endif popl %ebx popl %edi popl %esi @@ -972,22 +1025,19 @@ ENTRY(copyin) # copyin (from_user, to_kernel, len) movl 12(%esp),%esi # caddr_t from movl 16(%esp),%edi # caddr_t to movl 20(%esp),%ecx # size_t len - movl %ecx,%edx - pushl %ds - movl __udatasel,%ax # access 'from' via user data segment - movl %ax,%ds movb %cl,%al shrl $2,%ecx # copy longword-wise cld + gs rep movsl movb %al,%cl andb $3,%cl # copy remaining bytes + gs rep movsb - popl %ds popl %edi popl %esi xorl %eax, %eax @@ -995,8 +1045,8 @@ ENTRY(copyin) # copyin (from_user, to_kernel, len) movl %eax, PCB_ONFAULT(%edx) ret + ALIGN_TEXT copyin_fault: - popl %ds popl %edi popl %esi movl _curpcb, %edx @@ -1007,10 +1057,8 @@ copyin_fault: /* * fu{byte,sword,word} : fetch a byte (sword, word) from user memory */ -ENTRY(fuword) ALTENTRY(fuiword) - movl __udatasel,%eax - movl %ax,%gs +ENTRY(fuword) movl _curpcb,%ecx movl $fusufault,PCB_ONFAULT(%ecx) movl 4(%esp),%edx @@ -1018,10 +1066,8 @@ ALTENTRY(fuiword) movl (%edx),%eax movl $0,PCB_ONFAULT(%ecx) ret - + ENTRY(fusword) - movl __udatasel,%eax - movl %ax,%gs movl _curpcb,%ecx movl $fusufault,PCB_ONFAULT(%ecx) movl 4(%esp),%edx @@ -1029,11 +1075,9 @@ ENTRY(fusword) movzwl (%edx),%eax movl $0,PCB_ONFAULT(%ecx) ret - -ENTRY(fubyte) + ALTENTRY(fuibyte) - movl __udatasel,%eax - movl %ax,%gs +ENTRY(fubyte) movl _curpcb,%ecx movl $fusufault,PCB_ONFAULT(%ecx) movl 4(%esp),%edx @@ -1041,7 +1085,8 @@ ALTENTRY(fuibyte) movzbl (%edx),%eax movl $0,PCB_ONFAULT(%ecx) ret - + + ALIGN_TEXT fusufault: movl _curpcb,%ecx xorl %eax,%eax @@ -1056,42 +1101,39 @@ fusufault: /* * we only have to set the right segment selector. */ -ENTRY(suword) ALTENTRY(suiword) - movl __udatasel,%eax - movl %ax,%gs +ENTRY(suword) movl _curpcb,%ecx movl $fusufault,PCB_ONFAULT(%ecx) movl 4(%esp),%edx movl 8(%esp),%eax gs movl %eax,(%edx) - movl $0,PCB_ONFAULT(%ecx) + xorl %eax,%eax + movl %eax,PCB_ONFAULT(%ecx) ret - + ENTRY(susword) - movl __udatasel,%eax - movl %ax,%gs movl _curpcb,%ecx movl $fusufault,PCB_ONFAULT(%ecx) movl 4(%esp),%edx movw 8(%esp),%ax gs movw %ax,(%edx) - movl $0,PCB_ONFAULT(%ecx) + xorl %eax,%eax + movl %eax,PCB_ONFAULT(%ecx) ret - -ENTRY(subyte) + ALTENTRY(suibyte) - movl __udatasel,%eax - movl %ax,%gs +ENTRY(subyte) movl _curpcb,%ecx movl $fusufault,PCB_ONFAULT(%ecx) movl 4(%esp),%edx movb 8(%esp),%al gs movb %al,(%edx) - movl $0,PCB_ONFAULT(%ecx) + xorl %eax,%eax + movl %eax,PCB_ONFAULT(%ecx) ret @@ -1102,8 +1144,8 @@ ALTENTRY(suibyte) */ # XXX - page boundary crossing is not handled yet +ALTENTRY(suibyte) ENTRY(subyte) -ENTRY(suibyte) movl _curpcb, %ecx movl $fusufault, PCB_ONFAULT(%ecx) movl 4(%esp), %edx @@ -1123,6 +1165,7 @@ ENTRY(suibyte) 1: movl 4(%esp), %edx movl 8(%esp), %eax + gs movb %al, (%edx) xorl %eax, %eax movl _curpcb, %ecx @@ -1149,14 +1192,15 @@ ENTRY(susword) 1: movl 4(%esp), %edx movl 8(%esp), %eax + gs movw %ax, (%edx) xorl %eax, %eax movl _curpcb, %ecx movl %eax, PCB_ONFAULT(%ecx) ret +ALTENTRY(suiword) ENTRY(suword) -ENTRY(suiword) movl _curpcb, %ecx movl $fusufault, PCB_ONFAULT(%ecx) movl 4(%esp), %edx @@ -1176,6 +1220,7 @@ ENTRY(suiword) 1: movl 4(%esp), %edx movl 8(%esp), %eax + gs movl %eax, 0(%edx) xorl %eax, %eax movl _curpcb, %ecx @@ -1183,6 +1228,7 @@ ENTRY(suiword) ret #endif /* USE_486_WRITE_PROTECT */ + /* * copyoutstr(from, to, maxlen, int *lencopied) * copy a string from from to to, stop when a 0 character is reached. @@ -1201,13 +1247,19 @@ ENTRY(copyoutstr) movl 12(%esp), %esi # %esi = from movl 16(%esp), %edi # %edi = to movl 20(%esp), %edx # %edx = maxlen - movl __udatasel,%eax - movl %ax,%gs incl %edx 1: decl %edx jz 4f + /* + * gs override doesn't work for stosb. Use the same explicit check + * as in copyout(). It's much slower now because it is per-char. + * XXX - however, it would be faster to rewrite this function to use + * strlen() and copyout(). + */ + cmpl $VM_END_USER_ADDRESS, %edi + jae cpystrflt lodsb gs stosb @@ -1222,7 +1274,7 @@ ENTRY(copyoutstr) movl $ENAMETOOLONG, %eax jmp 6f -#else /* USE_486_WRITE_PROTECT */ +#else /* ndef USE_486_WRITE_PROTECT */ ENTRY(copyoutstr) pushl %esi @@ -1234,6 +1286,13 @@ ENTRY(copyoutstr) movl 16(%esp), %edi # %edi = to movl 20(%esp), %edx # %edx = maxlen 1: + /* + * It suffices to check that the first byte is in user space, because + * we look at a page at a time and the end address is on a page + * boundary. + */ + cmpl $VM_END_USER_ADDRESS, %edi + jae cpystrflt movl %edi, %eax shrl $IDXSHIFT, %eax andb $0xfc, %al @@ -1280,6 +1339,7 @@ ENTRY(copyoutstr) /* edx is zero -- return ENAMETOOLONG */ movl $ENAMETOOLONG, %eax jmp 6f + #endif /* USE_486_WRITE_PROTECT */ /* @@ -1298,8 +1358,6 @@ ENTRY(copyinstr) movl 12(%esp), %esi # %esi = from movl 16(%esp), %edi # %edi = to movl 20(%esp), %edx # %edx = maxlen - movl __udatasel,%eax - movl %ax,%gs incl %edx 1: @@ -1447,13 +1505,12 @@ ENTRY(ssdtosd) # ssdtosd(*ssdp,*sdp) ENTRY(tlbflush) # tlbflush() movl %cr3,%eax - orl $ I386_CR3PAT,%eax + orl $ I386_CR3PAT,%eax movl %eax,%cr3 ret -ENTRY(lcr0) # lcr0(cr0) -ALTENTRY(load_cr0) +ENTRY(load_cr0) # load_cr0(cr0) movl 4(%esp),%eax movl %eax,%cr0 ret @@ -1470,18 +1527,13 @@ ENTRY(rcr2) # rcr2() ENTRY(rcr3) # rcr3() -ALTENTRY(_cr3) movl %cr3,%eax ret - /* - * void lcr3(caddr_t cr3) - */ -ENTRY(lcr3) -ALTENTRY(load_cr3) +ENTRY(load_cr3) # void load_cr3(caddr_t cr3) movl 4(%esp),%eax - orl $ I386_CR3PAT,%eax + orl $ I386_CR3PAT,%eax movl %eax,%cr3 ret @@ -1600,17 +1652,19 @@ sw0: .asciz "swtch" * When no processes are on the runq, Swtch branches to idle * to wait for something to come ready. */ -LENTRY(Idle) + ALIGN_TEXT +Idle: sti SHOW_STI + + ALIGN_TEXT idle_loop: call _spl0 cmpl $0,_whichqs jne sw1 - hlt # wait for interrupt + hlt # wait for interrupt jmp idle_loop - SUPERALIGN_TEXT /* so profiling doesn't lump Idle with swtch().. */ badsw: pushl $sw0 call _panic @@ -1619,6 +1673,7 @@ badsw: /* * Swtch() */ + SUPERALIGN_TEXT /* so profiling doesn't lump Idle with swtch().. */ ENTRY(swtch) incl _cnt+V_SWTCH @@ -1780,7 +1835,7 @@ ENTRY(savectx) movl 4(%esp), %ecx movw _cpl, %ax movw %ax, PCB_IML(%ecx) - movl (%esp), %eax + movl (%esp), %eax movl %eax, PCB_EIP(%ecx) movl %ebx, PCB_EBX(%ecx) movl %esp, PCB_ESP(%ecx) @@ -1885,7 +1940,7 @@ L1: proffault: /* if we get a fault, then kill profiling all together */ movl $0,PCB_ONFAULT(%edx) /* squish the fault handler */ - movl 12(%ebp),%ecx + movl 12(%ebp),%ecx movl $0,PR_SCALE(%ecx) /* up->pr_scale = 0 */ leave ret @@ -1903,7 +1958,7 @@ ENTRY(astoff) * * XXX - debugger traps are now interrupt gates so at least bdb doesn't lose * control. The sti's give the standard losing behaviour for ddb and kgdb. - */ + */ #define IDTVEC(name) ALIGN_TEXT; .globl _X/**/name; _X/**/name: #define TRAP(a) pushl $(a) ; jmp alltraps #ifdef KGDB @@ -2048,14 +2103,14 @@ bpttraps: testb $SEL_RPL_MASK,TRAPF_CS_OFF(%esp) # non-kernel mode? jne calltrap # yes - call _kgdb_trap_glue + call _kgdb_trap_glue jmp calltrap #endif /* * Call gate entry for syscall */ - SUPERALIGN_TEXT + SUPERALIGN_TEXT IDTVEC(syscall) pushfl # only for stupid carry bit and more stupid wait3 cc kludge # XXX - also for direction flag (bzero, etc. clear it) diff --git a/sys/amd64/amd64/locore.s b/sys/amd64/amd64/locore.s index ef92a8c9c62c..74235a2f8bdb 100644 --- a/sys/amd64/amd64/locore.s +++ b/sys/amd64/amd64/locore.s @@ -89,9 +89,35 @@ #define ALIGN_TEXT .align 2,0x90 /* 4-byte boundaries, NOP-filled */ #define SUPERALIGN_TEXT .align 4,0x90 /* 16-byte boundaries better for 486 */ -#define LENTRY(name) ALIGN_TEXT; .globl name; name: -#define ENTRY(name) ALIGN_TEXT; .globl _/**/name; _/**/name: -#define ALTENTRY(name) .globl _/**/name; _/**/name: +#define GEN_ENTRY(name) ALIGN_TEXT; .globl name; name: +#define NON_GPROF_ENTRY(name) GEN_ENTRY(_/**/name) + +#ifdef GPROF +/* + * ALTENTRY() must be before a corresponding ENTRY() so that it can jump + * over the mcounting. + */ +#define ALTENTRY(name) GEN_ENTRY(_/**/name); MCOUNT; jmp 2f +#define ENTRY(name) GEN_ENTRY(_/**/name); MCOUNT; 2: +/* + * The call to mcount supports the usual (bad) conventions. We allocate + * some data and pass a pointer to it although the 386BSD doesn't use + * the data. We set up a frame before calling mcount because that is + * the standard convention although it makes work for both mcount and + * callers. + */ +#define MCOUNT .data; ALIGN_DATA; 1:; .long 0; .text; \ + pushl %ebp; movl %esp, %ebp; \ + movl $1b,%eax; call mcount; popl %ebp +#else +/* + * ALTENTRY() has to align because it is before a corresponding ENTRY(). + * ENTRY() has to align to because there may be no ALTENTRY() before it. + * If there is a previous ALTENTRY() then the alignment code is empty. + */ +#define ALTENTRY(name) GEN_ENTRY(_/**/name) +#define ENTRY(name) GEN_ENTRY(_/**/name) +#endif /* NB: NOP now preserves registers so NOPs can be inserted anywhere */ /* XXX: NOP and FASTER_NOP are misleadingly named */ @@ -143,10 +169,11 @@ .globl _boothowto, _bootdev, _curpcb .globl __ucodesel,__udatasel - .globl _cpu, _cold, _atdevbase, _atdevphys + .globl _cpu, _cold, _atdevbase _cpu: .long 0 # are we 386, 386sx, or 486 _cold: .long 1 # cold till we are not _atdevbase: .long 0 # location of start of iomem in virtual + # .nonglobl _atdevphys (should be register or something) _atdevphys: .long 0 # location of device mapping ptes (phys) .globl _IdlePTD, _KPTphys @@ -158,7 +185,8 @@ _cyloffset: .long 0 _proc0paddr: .long 0 #ifdef SHOW_A_LOT -bit_colors: .byte GREEN,RED,0,0 +bit_colors: + .byte GREEN,RED,0,0 #endif .space 512 @@ -171,11 +199,11 @@ tmpstk: /*****************************************************************************/ /* - * start: jumped to directly from the boot blocks + * btext: beginning of text section. + * Also the entry point (jumped to directly from the boot blocks). */ -LENTRY(start) - movw $0x1234,%ax - movw %ax,0x472 # warm boot +ENTRY(btext) + movw $0x1234, 0x472 # warm boot jmp 1f .space 0x500 # skip over warm boot shit @@ -278,7 +306,10 @@ LENTRY(start) movl %esi,%ecx # this much memory, shrl $ PGSHIFT,%ecx # for this many pte s addl $ UPAGES+4,%ecx # including our early context - movl $0xa0,%ecx # XXX - cover debugger pages + cmpl $0xa0,%ecx # XXX - cover debugger pages + jae 1f + movl $0xa0,%ecx +1: movl $PG_V|PG_KW,%eax # having these bits set, lea (4*NBPG)(%esi),%ebx # physical address of KPT in proc 0, movl %ebx,_KPTphys-SYSTEM # in the kernel page table, @@ -372,9 +403,15 @@ LENTRY(start) /* load base of page directory, and enable mapping */ movl %esi,%eax # phys address of ptd in proc 0 - orl $ I386_CR3PAT,%eax + orl $ I386_CR3PAT,%eax movl %eax,%cr3 # load ptd addr into mmu movl %cr0,%eax # get control word +/* + * XXX it is now safe to always (attempt to) set CR0_WP and to set up + * the page tables assuming it works, so USE_486_WRITE_PROTECT will go + * away. The special 386 PTE checking needs to be conditional on + * whatever distingiushes 486-only kernels from 386-486 kernels. + */ #ifdef USE_486_WRITE_PROTECT orl $CR0_PE|CR0_PG|CR0_WP,%eax # and let s page! #else @@ -407,7 +444,7 @@ begin: /* now running relocated at SYSTEM where the system is linked to run */ lea 7*NBPG(%esi),%esi # skip past stack. pushl %esi - + /* relocate debugger gdt entries */ movl $_gdt+8*9,%eax # adjust slots 9-17 @@ -423,9 +460,9 @@ reloc_gdt: 1: call _init386 # wire 386 chip for unix operation - + movl $0,_PTD - call _main # autoconfiguration, mountroot etc + call _main # autoconfiguration, mountroot etc popl %esi /* @@ -459,9 +496,11 @@ lretmsg1: /* * Icode is copied out to process 1 and executed in user mode: * execve("/sbin/init", argv, envp); exit(0); - * If the exec fails, process 1 exits and the system panics. + * If the execve fails, process 1 exits and the system panics. */ -ENTRY(icode) +NON_GPROF_ENTRY(icode) + pushl $0 # envp + # pushl $argv-_icode # gas fucks up again movl $argv,%eax subl $_icode,%eax @@ -471,14 +510,17 @@ ENTRY(icode) movl $init,%eax subl $_icode,%eax pushl %eax - pushl %eax # dummy out rta - movl %esp,%ebp + pushl %eax # junk to fake return address + movl $exec,%eax LCALL(0x7,0x0) - pushl %eax + + pushl %eax # execve failed, the errno will do for an + # exit code because errnos are < 128 + pushl %eax # junk to fake return address + movl $exit,%eax - pushl %eax # dummy out rta LCALL(0x7,0x0) init: @@ -494,7 +536,7 @@ eicode: _szicode: .long _szicode-_icode -ENTRY(sigcode) +NON_GPROF_ENTRY(sigcode) call 12(%esp) lea 28(%esp),%eax # scp (the call may have clobbered the # copy at 8(%esp)) @@ -640,7 +682,7 @@ ENTRY(bzero) # void bzero(void *base, u_int cnt) movl 8(%esp),%edi movl 12(%esp),%ecx xorl %eax,%eax - shrl $2,%ecx + shrl $2,%ecx cld rep stosl @@ -664,6 +706,7 @@ ENTRY(fillw) # fillw (pat,base,cnt) ret ENTRY(bcopyb) +bcopyb: pushl %esi pushl %edi movl 12(%esp),%esi @@ -672,7 +715,7 @@ ENTRY(bcopyb) cmpl %esi,%edi /* potentially overlapping? */ jnb 1f cld /* nope, copy forwards */ - rep + rep movsb popl %edi popl %esi @@ -693,6 +736,7 @@ ENTRY(bcopyb) ret ENTRY(bcopyw) +bcopyw: pushl %esi pushl %edi movl 12(%esp),%esi @@ -735,20 +779,18 @@ ENTRY(bcopyw) ENTRY(bcopyx) movl 16(%esp),%eax cmpl $2,%eax - je _bcopyw + je bcopyw /* not _bcopyw, to avoid multiple mcounts */ cmpl $4,%eax - jne _bcopyb - /* - * Fall through to bcopy. ENTRY() provides harmless fill bytes. - */ - + je bcopy + jmp bcopyb /* * (ov)bcopy (src,dst,cnt) * ws@tools.de (Wolfgang Solfrank, TooLs GmbH) +49-228-985800 */ -ENTRY(bcopy) ALTENTRY(ovbcopy) +ENTRY(bcopy) +bcopy: pushl %esi pushl %edi movl 12(%esp),%esi @@ -789,8 +831,8 @@ ALTENTRY(ovbcopy) cld ret -ENTRY(ntohl) -ALTENTRY(htonl) +ALTENTRY(ntohl) +ENTRY(htonl) movl 4(%esp),%eax #ifdef i486 /* XXX */ @@ -798,7 +840,7 @@ ALTENTRY(htonl) * equivalent bytes. This can be changed back to bswap when we * upgrade to a newer version of Gas */ /* bswap %eax */ - .byte 0x0f + .byte 0x0f .byte 0xc8 #else xchgb %al,%ah @@ -807,8 +849,8 @@ ALTENTRY(htonl) #endif ret -ENTRY(ntohs) -ALTENTRY(htons) +ALTENTRY(ntohs) +ENTRY(htons) movzwl 4(%esp),%eax xchgb %al,%ah ret @@ -861,10 +903,6 @@ show_bits: * protection violation occurs inside the functions, the trap handler * returns to *curpcb->onfault instead of the function. */ -/* - * XXX These routines load a segment register every time they execute. - * it would be nicer (faster) if they could depend on %gs. - */ ENTRY(copyout) # copyout (from_kernel, to_user, len) @@ -879,15 +917,36 @@ ENTRY(copyout) # copyout (from_kernel, to_user, len) orl %ebx, %ebx # nothing to do? jz done_copyout -#ifdef USE_486_WRITE_PROTECT - /* if WP bit in CR0 is set (n/a on 386), the hardware does the */ - /* write check. We just have to load the right segment selector */ - pushl %es - movl __udatasel, %eax - movl %ax, %es -#else /* USE_486_WRITE_PROTECT */ - /* we have to check each PTE for (write) permission */ + /* + * Check explicitly for non-user addresses. If 486 write protection + * is being used, this check is essential because we are in kernel + * mode so the h/w does not provide any protection against writing + * kernel addresses. + * + * Otherwise, it saves having to load and restore %es to get the + * usual segment-based protection (the destination segment for movs + * is always %es). The other explicit checks for user-writablility + * are not quite sufficient. They fail for the user area because + * we mapped the user area read/write to avoid having an #ifdef in + * vm_machdep.c. They fail for user PTEs and/or PTDs! (107 + * addresses including 0xff800000 and 0xfc000000). I'm not sure if + * this can be fixed. Marking the PTEs supervisor mode and the + * PDE's user mode would almost work, but there may be a problem + * with the self-referential PDE. + */ + movl %edi, %eax + addl %ebx, %eax + jc copyout_fault +#define VM_END_USER_ADDRESS 0xFDBFE000 /* XXX */ + cmpl $VM_END_USER_ADDRESS, %eax + ja copyout_fault +#ifndef USE_486_WRITE_PROTECT + /* + * We have to check each PTE for user write permission. + * The checking may cause a page fault, so it is important to set + * up everything for return via copyout_fault before here. + */ /* compute number of pages */ movl %edi, %ecx andl $0x0fff, %ecx @@ -906,7 +965,7 @@ ENTRY(copyout) # copyout (from_kernel, to_user, len) andb $0x07, %al /* Pages must be VALID + USERACC + WRITABLE */ cmpb $0x07, %al je 2f - + /* simulate a trap */ pushl %edx pushl %ecx @@ -924,8 +983,7 @@ ENTRY(copyout) # copyout (from_kernel, to_user, len) addl $4, %edx decl %ecx jnz 1b /* check next page */ - -#endif /* USE_486_WRITE_PROTECT */ +#endif /* ndef USE_486_WRITE_PROTECT */ /* now copy it over */ /* bcopy (%esi, %edi, %ebx) */ @@ -938,9 +996,6 @@ ENTRY(copyout) # copyout (from_kernel, to_user, len) andb $3, %cl rep movsb -#ifdef USE_486_WRITE_PROTECT - popl %es -#endif done_copyout: popl %ebx @@ -951,10 +1006,8 @@ done_copyout: movl %eax,PCB_ONFAULT(%edx) ret + ALIGN_TEXT copyout_fault: -#ifdef USE_486_WRITE_PROTECT - popl %es -#endif popl %ebx popl %edi popl %esi @@ -972,22 +1025,19 @@ ENTRY(copyin) # copyin (from_user, to_kernel, len) movl 12(%esp),%esi # caddr_t from movl 16(%esp),%edi # caddr_t to movl 20(%esp),%ecx # size_t len - movl %ecx,%edx - pushl %ds - movl __udatasel,%ax # access 'from' via user data segment - movl %ax,%ds movb %cl,%al shrl $2,%ecx # copy longword-wise cld + gs rep movsl movb %al,%cl andb $3,%cl # copy remaining bytes + gs rep movsb - popl %ds popl %edi popl %esi xorl %eax, %eax @@ -995,8 +1045,8 @@ ENTRY(copyin) # copyin (from_user, to_kernel, len) movl %eax, PCB_ONFAULT(%edx) ret + ALIGN_TEXT copyin_fault: - popl %ds popl %edi popl %esi movl _curpcb, %edx @@ -1007,10 +1057,8 @@ copyin_fault: /* * fu{byte,sword,word} : fetch a byte (sword, word) from user memory */ -ENTRY(fuword) ALTENTRY(fuiword) - movl __udatasel,%eax - movl %ax,%gs +ENTRY(fuword) movl _curpcb,%ecx movl $fusufault,PCB_ONFAULT(%ecx) movl 4(%esp),%edx @@ -1018,10 +1066,8 @@ ALTENTRY(fuiword) movl (%edx),%eax movl $0,PCB_ONFAULT(%ecx) ret - + ENTRY(fusword) - movl __udatasel,%eax - movl %ax,%gs movl _curpcb,%ecx movl $fusufault,PCB_ONFAULT(%ecx) movl 4(%esp),%edx @@ -1029,11 +1075,9 @@ ENTRY(fusword) movzwl (%edx),%eax movl $0,PCB_ONFAULT(%ecx) ret - -ENTRY(fubyte) + ALTENTRY(fuibyte) - movl __udatasel,%eax - movl %ax,%gs +ENTRY(fubyte) movl _curpcb,%ecx movl $fusufault,PCB_ONFAULT(%ecx) movl 4(%esp),%edx @@ -1041,7 +1085,8 @@ ALTENTRY(fuibyte) movzbl (%edx),%eax movl $0,PCB_ONFAULT(%ecx) ret - + + ALIGN_TEXT fusufault: movl _curpcb,%ecx xorl %eax,%eax @@ -1056,42 +1101,39 @@ fusufault: /* * we only have to set the right segment selector. */ -ENTRY(suword) ALTENTRY(suiword) - movl __udatasel,%eax - movl %ax,%gs +ENTRY(suword) movl _curpcb,%ecx movl $fusufault,PCB_ONFAULT(%ecx) movl 4(%esp),%edx movl 8(%esp),%eax gs movl %eax,(%edx) - movl $0,PCB_ONFAULT(%ecx) + xorl %eax,%eax + movl %eax,PCB_ONFAULT(%ecx) ret - + ENTRY(susword) - movl __udatasel,%eax - movl %ax,%gs movl _curpcb,%ecx movl $fusufault,PCB_ONFAULT(%ecx) movl 4(%esp),%edx movw 8(%esp),%ax gs movw %ax,(%edx) - movl $0,PCB_ONFAULT(%ecx) + xorl %eax,%eax + movl %eax,PCB_ONFAULT(%ecx) ret - -ENTRY(subyte) + ALTENTRY(suibyte) - movl __udatasel,%eax - movl %ax,%gs +ENTRY(subyte) movl _curpcb,%ecx movl $fusufault,PCB_ONFAULT(%ecx) movl 4(%esp),%edx movb 8(%esp),%al gs movb %al,(%edx) - movl $0,PCB_ONFAULT(%ecx) + xorl %eax,%eax + movl %eax,PCB_ONFAULT(%ecx) ret @@ -1102,8 +1144,8 @@ ALTENTRY(suibyte) */ # XXX - page boundary crossing is not handled yet +ALTENTRY(suibyte) ENTRY(subyte) -ENTRY(suibyte) movl _curpcb, %ecx movl $fusufault, PCB_ONFAULT(%ecx) movl 4(%esp), %edx @@ -1123,6 +1165,7 @@ ENTRY(suibyte) 1: movl 4(%esp), %edx movl 8(%esp), %eax + gs movb %al, (%edx) xorl %eax, %eax movl _curpcb, %ecx @@ -1149,14 +1192,15 @@ ENTRY(susword) 1: movl 4(%esp), %edx movl 8(%esp), %eax + gs movw %ax, (%edx) xorl %eax, %eax movl _curpcb, %ecx movl %eax, PCB_ONFAULT(%ecx) ret +ALTENTRY(suiword) ENTRY(suword) -ENTRY(suiword) movl _curpcb, %ecx movl $fusufault, PCB_ONFAULT(%ecx) movl 4(%esp), %edx @@ -1176,6 +1220,7 @@ ENTRY(suiword) 1: movl 4(%esp), %edx movl 8(%esp), %eax + gs movl %eax, 0(%edx) xorl %eax, %eax movl _curpcb, %ecx @@ -1183,6 +1228,7 @@ ENTRY(suiword) ret #endif /* USE_486_WRITE_PROTECT */ + /* * copyoutstr(from, to, maxlen, int *lencopied) * copy a string from from to to, stop when a 0 character is reached. @@ -1201,13 +1247,19 @@ ENTRY(copyoutstr) movl 12(%esp), %esi # %esi = from movl 16(%esp), %edi # %edi = to movl 20(%esp), %edx # %edx = maxlen - movl __udatasel,%eax - movl %ax,%gs incl %edx 1: decl %edx jz 4f + /* + * gs override doesn't work for stosb. Use the same explicit check + * as in copyout(). It's much slower now because it is per-char. + * XXX - however, it would be faster to rewrite this function to use + * strlen() and copyout(). + */ + cmpl $VM_END_USER_ADDRESS, %edi + jae cpystrflt lodsb gs stosb @@ -1222,7 +1274,7 @@ ENTRY(copyoutstr) movl $ENAMETOOLONG, %eax jmp 6f -#else /* USE_486_WRITE_PROTECT */ +#else /* ndef USE_486_WRITE_PROTECT */ ENTRY(copyoutstr) pushl %esi @@ -1234,6 +1286,13 @@ ENTRY(copyoutstr) movl 16(%esp), %edi # %edi = to movl 20(%esp), %edx # %edx = maxlen 1: + /* + * It suffices to check that the first byte is in user space, because + * we look at a page at a time and the end address is on a page + * boundary. + */ + cmpl $VM_END_USER_ADDRESS, %edi + jae cpystrflt movl %edi, %eax shrl $IDXSHIFT, %eax andb $0xfc, %al @@ -1280,6 +1339,7 @@ ENTRY(copyoutstr) /* edx is zero -- return ENAMETOOLONG */ movl $ENAMETOOLONG, %eax jmp 6f + #endif /* USE_486_WRITE_PROTECT */ /* @@ -1298,8 +1358,6 @@ ENTRY(copyinstr) movl 12(%esp), %esi # %esi = from movl 16(%esp), %edi # %edi = to movl 20(%esp), %edx # %edx = maxlen - movl __udatasel,%eax - movl %ax,%gs incl %edx 1: @@ -1447,13 +1505,12 @@ ENTRY(ssdtosd) # ssdtosd(*ssdp,*sdp) ENTRY(tlbflush) # tlbflush() movl %cr3,%eax - orl $ I386_CR3PAT,%eax + orl $ I386_CR3PAT,%eax movl %eax,%cr3 ret -ENTRY(lcr0) # lcr0(cr0) -ALTENTRY(load_cr0) +ENTRY(load_cr0) # load_cr0(cr0) movl 4(%esp),%eax movl %eax,%cr0 ret @@ -1470,18 +1527,13 @@ ENTRY(rcr2) # rcr2() ENTRY(rcr3) # rcr3() -ALTENTRY(_cr3) movl %cr3,%eax ret - /* - * void lcr3(caddr_t cr3) - */ -ENTRY(lcr3) -ALTENTRY(load_cr3) +ENTRY(load_cr3) # void load_cr3(caddr_t cr3) movl 4(%esp),%eax - orl $ I386_CR3PAT,%eax + orl $ I386_CR3PAT,%eax movl %eax,%cr3 ret @@ -1600,17 +1652,19 @@ sw0: .asciz "swtch" * When no processes are on the runq, Swtch branches to idle * to wait for something to come ready. */ -LENTRY(Idle) + ALIGN_TEXT +Idle: sti SHOW_STI + + ALIGN_TEXT idle_loop: call _spl0 cmpl $0,_whichqs jne sw1 - hlt # wait for interrupt + hlt # wait for interrupt jmp idle_loop - SUPERALIGN_TEXT /* so profiling doesn't lump Idle with swtch().. */ badsw: pushl $sw0 call _panic @@ -1619,6 +1673,7 @@ badsw: /* * Swtch() */ + SUPERALIGN_TEXT /* so profiling doesn't lump Idle with swtch().. */ ENTRY(swtch) incl _cnt+V_SWTCH @@ -1780,7 +1835,7 @@ ENTRY(savectx) movl 4(%esp), %ecx movw _cpl, %ax movw %ax, PCB_IML(%ecx) - movl (%esp), %eax + movl (%esp), %eax movl %eax, PCB_EIP(%ecx) movl %ebx, PCB_EBX(%ecx) movl %esp, PCB_ESP(%ecx) @@ -1885,7 +1940,7 @@ L1: proffault: /* if we get a fault, then kill profiling all together */ movl $0,PCB_ONFAULT(%edx) /* squish the fault handler */ - movl 12(%ebp),%ecx + movl 12(%ebp),%ecx movl $0,PR_SCALE(%ecx) /* up->pr_scale = 0 */ leave ret @@ -1903,7 +1958,7 @@ ENTRY(astoff) * * XXX - debugger traps are now interrupt gates so at least bdb doesn't lose * control. The sti's give the standard losing behaviour for ddb and kgdb. - */ + */ #define IDTVEC(name) ALIGN_TEXT; .globl _X/**/name; _X/**/name: #define TRAP(a) pushl $(a) ; jmp alltraps #ifdef KGDB @@ -2048,14 +2103,14 @@ bpttraps: testb $SEL_RPL_MASK,TRAPF_CS_OFF(%esp) # non-kernel mode? jne calltrap # yes - call _kgdb_trap_glue + call _kgdb_trap_glue jmp calltrap #endif /* * Call gate entry for syscall */ - SUPERALIGN_TEXT + SUPERALIGN_TEXT IDTVEC(syscall) pushfl # only for stupid carry bit and more stupid wait3 cc kludge # XXX - also for direction flag (bzero, etc. clear it) diff --git a/sys/amd64/amd64/machdep.c b/sys/amd64/amd64/machdep.c index 9372f6008c8f..6b9ef76bb87d 100644 --- a/sys/amd64/amd64/machdep.c +++ b/sys/amd64/amd64/machdep.c @@ -49,7 +49,7 @@ * 20 Apr 93 Bruce Evans New npx-0.5 code * 25 Apr 93 Bruce Evans New intr-0.1 code */ -static char rcsid[] = "$Header: /a/cvs/386BSD/src/sys.386bsd/i386/i386/machdep.c,v 1.3 1993/07/16 20:50:42 davidg Exp $"; +static char rcsid[] = "$Header: /a/cvs/386BSD/src/sys/i386/i386/machdep.c,v 1.4 1993/07/16 23:55:07 davidg Exp $"; #include @@ -849,16 +849,41 @@ init386(first) * Initialize the console before we print anything out. */ - cninit (KERNBASE+0xa0000); + cninit (); /* make gdt memory segments */ - gdt_segs[GCODE_SEL].ssd_limit = btoc((int) &etext + NBPG); - for (x=0; x < NGDT; x++) ssdtosd(gdt_segs+x, &gdt[x][0]); + gdt_segs[GCODE_SEL].ssd_limit = i386_btop(i386_round_page(&etext)) - 1; + /* + * XXX - VM_MAX_KERNEL_ADDRESS is correctly a max, but bogusly the + * address of the last page, not the last byte. Then above the end + * :-) there is another 4M of page tables or something. + */ +#define VM_END_KERNEL_ADDRESS (VM_MAX_KERNEL_ADDRESS + NBPG + NBPDR) + gdt_segs[GDATA_SEL].ssd_limit = i386_btop(VM_END_KERNEL_ADDRESS) - 1; + for (x=0; x < NGDT; x++) ssdtosd(gdt_segs+x, gdt+x); /* make ldt memory segments */ - ldt_segs[LUCODE_SEL].ssd_limit = btoc(UPT_MIN_ADDRESS); - ldt_segs[LUDATA_SEL].ssd_limit = btoc(UPT_MIN_ADDRESS); + /* + * The data segment limit must not cover the user area because we + * don't want the user area to be writable in copyout() etc. (page + * level protection is lost in kernel mode on 386's). Also, we + * don't want the user area to be writable directly (page level + * protection of the user area is not available on 486's with + * CR0_WP set, because there is no user-read/kernel-write mode). + * + * XXX - VM_MAXUSER_ADDRESS is an end address, not a max. And it + * should be spelled ...MAX_USER... + */ +#define VM_END_USER_RW_ADDRESS VM_MAXUSER_ADDRESS + /* + * The code segment limit has to cover the user area until we move + * the signal trampoline out of the user area. This is safe because + * the code segment cannot be written to directly. + */ +#define VM_END_USER_R_ADDRESS (VM_END_USER_RW_ADDRESS + UPAGES * NBPG) + ldt_segs[LUCODE_SEL].ssd_limit = i386_btop(VM_END_USER_R_ADDRESS) - 1; + ldt_segs[LUDATA_SEL].ssd_limit = i386_btop(VM_END_USER_RW_ADDRESS) - 1; /* Note. eventually want private ldts per process */ - for (x=0; x < 5; x++) ssdtosd(ldt_segs+x, &ldt[x][0]); + for (x=0; x < 5; x++) ssdtosd(ldt_segs+x, ldt+x); /* exceptions */ setidt(0, &IDTVEC(div), SDT_SYS386TGT, SEL_KPL); @@ -951,7 +976,8 @@ init386(first) Maxmem = 640/4; else { Maxmem = pagesinext + 0x100000/NBPG; - first = 0x100000; /* skip hole */ + if (first < 0x100000) + first = 0x100000; /* skip hole */ } /* This used to explode, since Maxmem used to be 0 for bas CMOS*/ @@ -1077,6 +1103,7 @@ _remque(element) element->ph_rlink = (struct proc *)0; } +#ifdef SLOW_OLD_COPYSTRS vmunaccess() {} #if 0 /* assembler versions now in locore.s */ @@ -1124,6 +1151,8 @@ copyoutstr(fromaddr, toaddr, maxlength, lencopied) u_int *lencopied, maxlength; return(ENAMETOOLONG); } +#endif /* SLOW_OLD_COPYSTRS */ + copystr(fromaddr, toaddr, maxlength, lencopied) u_int *lencopied, maxlength; void *fromaddr, *toaddr; { u_int tally; diff --git a/sys/amd64/amd64/pmap.c b/sys/amd64/amd64/pmap.c index 66c7fec59a8d..be0ce82e38cb 100644 --- a/sys/amd64/amd64/pmap.c +++ b/sys/amd64/amd64/pmap.c @@ -43,7 +43,7 @@ * * 28 Nov 1991 Poul-Henning Kamp Speedup processing. */ -static char rcsid[] = "$Header: /usr/src/sys.386bsd/i386/i386/RCS/pmap.c,v 1.3 92/01/21 14:26:44 william Exp Locker: root $"; +static char rcsid[] = "$Header: /a/cvs/386BSD/src/sys/i386/i386/pmap.c,v 1.1.1.1 1993/06/12 14:58:06 rgrimes Exp $"; /* * Derived from hp300 version by Mike Hibler, this version by William @@ -461,7 +461,7 @@ pmap_pinit(pmap) /* install self-referential address mapping entry */ *(int *)(pmap->pm_pdir+PTDPTDI) = - (int)pmap_extract(kernel_pmap, pmap->pm_pdir) | PG_V | PG_URKW; + (int)pmap_extract(kernel_pmap, pmap->pm_pdir) | PG_V | PG_KW; pmap->pm_count = 1; simple_lock_init(&pmap->pm_lock); diff --git a/sys/amd64/amd64/trap.c b/sys/amd64/amd64/trap.c index 57195f32cb91..22fc6da31918 100644 --- a/sys/amd64/amd64/trap.c +++ b/sys/amd64/amd64/trap.c @@ -43,7 +43,7 @@ * 08 Apr 93 Bruce Evans Several VM system fixes * Paul Kranenburg Add counter for vmstat */ -static char rcsid[] = "$Header: /usr/bill/working/sys/i386/i386/RCS/trap.c,v 1.2 92/01/21 14:22:13 william Exp $"; +static char rcsid[] = "$Header: /a/cvs/386BSD/src/sys/i386/i386/trap.c,v 1.1.1.1 1993/06/12 14:58:05 rgrimes Exp $"; /* * 386 Trap and System call handleing @@ -70,6 +70,21 @@ static char rcsid[] = "$Header: /usr/bill/working/sys/i386/i386/RCS/trap.c,v 1.2 #include "machine/trap.h" +#ifdef __GNUC__ + +/* + * The "r" contraint could be "rm" except for fatal bugs in gas. As usual, + * we omit the size from the mov instruction to avoid nonfatal bugs in gas. + */ +#define read_gs() ({ u_short gs; __asm("mov %%gs,%0" : "=r" (gs)); gs; }) +#define write_gs(gs) __asm("mov %0,%%gs" : : "r" ((u_short) gs)) + +#else /* not __GNUC__ */ + +u_short read_gs __P((void)); +void write_gs __P((/* promoted u_short */ int gs)); + +#endif /* __GNUC__ */ struct sysent sysent[]; int nsysent; @@ -112,9 +127,25 @@ trap(frame) frame.tf_trapno, frame.tf_err, frame.tf_eip, frame.tf_cs, rcr2(), frame.tf_esp);*/ if(curpcb == 0 || curproc == 0) goto we_re_toast; - if (curpcb->pcb_onfault && frame.tf_trapno != 0xc) { + if (curpcb->pcb_onfault && frame.tf_trapno != T_PAGEFLT) { + extern int _udatasel; + + if (read_gs() != (u_short) _udatasel) + /* + * Some user has corrupted %gs but we depend on it in + * copyout() etc. Fix it up and retry. + * + * (We don't preserve %fs or %gs, so users can change + * them to either _ucodesel, _udatasel or a not-present + * selector, possibly ORed with 0 to 3, making them + * volatile for other users. Not preserving them saves + * time and doesn't lose functionality or open security + * holes.) + */ + write_gs(_udatasel); + else copyfault: - frame.tf_eip = (int)curpcb->pcb_onfault; + frame.tf_eip = (int)curpcb->pcb_onfault; return; } @@ -396,18 +427,49 @@ if(curpcb == 0 || curproc == 0) goto we_re_toast; } /* - * Compensate for 386 brain damage (missing URKR) + * Compensate for 386 brain damage (missing URKR). + * This is a little simpler than the pagefault handler in trap() because + * it the page tables have already been faulted in and high addresses + * are thrown out early for other reasons. */ -int trapwrite(unsigned addr) { - int rv; +int trapwrite(addr) + unsigned addr; +{ + unsigned nss; + struct proc *p; vm_offset_t va; + struct vmspace *vm; va = trunc_page((vm_offset_t)addr); - if (va > VM_MAXUSER_ADDRESS) return(1); - rv = vm_fault(&curproc->p_vmspace->vm_map, va, - VM_PROT_READ | VM_PROT_WRITE, FALSE); - if (rv == KERN_SUCCESS) return(0); - else return(1); + /* + * XXX - MAX is END. Changed > to >= for temp. fix. + */ + if (va >= VM_MAXUSER_ADDRESS) + return (1); + /* + * XXX: rude stack hack adapted from trap(). + */ + nss = 0; + p = curproc; + vm = p->p_vmspace; + if ((caddr_t)va >= vm->vm_maxsaddr && dostacklimits) { + nss = clrnd(btoc((unsigned)vm->vm_maxsaddr + MAXSSIZ + - (unsigned)va)); + if (nss > btoc(p->p_rlimit[RLIMIT_STACK].rlim_cur)) + return (1); + } + + if (vm_fault(&vm->vm_map, va, VM_PROT_READ | VM_PROT_WRITE, FALSE) + != KERN_SUCCESS) + return (1); + + /* + * XXX: continuation of rude stack hack + */ + if (nss > vm->vm_ssize) + vm->vm_ssize = nss; + + return (0); } /* diff --git a/sys/amd64/amd64/vm_machdep.c b/sys/amd64/amd64/vm_machdep.c index d675ed5a84a8..2b1a272ff99b 100644 --- a/sys/amd64/amd64/vm_machdep.c +++ b/sys/amd64/amd64/vm_machdep.c @@ -49,7 +49,7 @@ /* * Utah $Hdr: vm_machdep.c 1.16.1.1 89/06/23$ */ -static char rcsid[] = "$Header: /a/cvs/386BSD/src/sys.386bsd/i386/i386/vm_machdep.c,v 1.1.1.1 1993/06/12 14:58:05 rgrimes Exp $"; +static char rcsid[] = "$Header: /a/cvs/386BSD/src/sys/i386/i386/vm_machdep.c,v 1.2 1993/07/18 20:56:17 paul Exp $"; #include "param.h" #include "systm.h" @@ -105,8 +105,15 @@ cpu_fork(p1, p2) vm_map_pageable(&p2->p_vmspace->vm_map, addr, addr+NBPG, FALSE); for (i=0; i < UPAGES; i++) pmap_enter(&p2->p_vmspace->vm_pmap, kstack+i*NBPG, - pmap_extract(kernel_pmap, ((int)p2->p_addr)+i*NBPG), VM_PROT_READ, 1); - + pmap_extract(kernel_pmap, ((int)p2->p_addr)+i*NBPG), + /* + * The user area has to be mapped writable because + * it contains the kernel stack (when CR0_WP is on + * on a 486 there is no user-read/kernel-write + * mode). It is protected from user mode access + * by the segment limits. + */ + VM_PROT_READ|VM_PROT_WRITE, TRUE); pmap_activate(&p2->p_vmspace->vm_pmap, &up->u_pcb); /* diff --git a/sys/amd64/include/cpu.h b/sys/amd64/include/cpu.h index 583d76c98506..9e09dd48c02f 100644 --- a/sys/amd64/include/cpu.h +++ b/sys/amd64/include/cpu.h @@ -53,10 +53,12 @@ * these are defined to get generic functions * rather than inline or machine-dependent implementations */ +#if 0 #define NEED_MINMAX /* need {,i,l,ul}{min,max} functions */ #define NEED_FFS /* need ffs function */ #define NEED_BCMP /* need bcmp function */ #define NEED_STRLEN /* need strlen function */ +#endif #define cpu_exec(p) /* nothing */ diff --git a/sys/amd64/include/cpufunc.h b/sys/amd64/include/cpufunc.h index e3b4a8c9c052..eb9a792f4732 100644 --- a/sys/amd64/include/cpufunc.h +++ b/sys/amd64/include/cpufunc.h @@ -58,6 +58,116 @@ outb(u_int port, u_char data) __asm __volatile("outb %0,%%dx" : : "a" (al), "d" (port)); } +static __inline__ +imin(a, b) + int a, b; +{ + + return (a < b ? a : b); +} + +static __inline__ +imax(a, b) + int a, b; +{ + + return (a > b ? a : b); +} + +static __inline__ +unsigned int +min(a, b) + unsigned int a, b; +{ + + return (a < b ? a : b); +} + +static __inline__ +unsigned int +max(a, b) + unsigned int a, b; +{ + + return (a > b ? a : b); +} + +static __inline__ +long +lmin(a, b) + long a, b; +{ + + return (a < b ? a : b); +} + +static __inline__ +long +lmax(a, b) + long a, b; +{ + + return (a > b ? a : b); +} + +static __inline__ +unsigned long +ulmin(a, b) + unsigned long a, b; +{ + + return (a < b ? a : b); +} + +static __inline__ +unsigned long +ulmax(a, b) + unsigned long a, b; +{ + + return (a > b ? a : b); +} + +static __inline__ +ffs(mask) + register long mask; +{ + register int bit; + + if (!mask) + return(0); + for (bit = 1;; ++bit) { + if (mask&0x01) + return(bit); + mask >>= 1; + } +} + +static __inline__ +bcmp(v1, v2, len) + void *v1, *v2; + register unsigned len; +{ + register u_char *s1 = v1, *s2 = v2; + + while (len--) + if (*s1++ != *s2++) + return (1); + return (0); +} + +static __inline__ +size_t +strlen(s1) + register __const__ char *s1; +{ + register size_t len; + + for (len = 0; *s1++ != '\0'; len++) + ; + return (len); +} + #else /* not __GNUC__ */ int bdb __P((void)); @@ -80,3 +190,4 @@ really_void setidt __P((int idx, /*XXX*/caddr_t func, int typ, int dpl)); #undef really_u_int #undef really_void + diff --git a/sys/ddb/db_output.c b/sys/ddb/db_output.c index fc8fca776693..82c7ce3ef60b 100644 --- a/sys/ddb/db_output.c +++ b/sys/ddb/db_output.c @@ -34,6 +34,9 @@ /* * HISTORY * $Log: db_output.c,v $ + * Revision 1.1.1.1 1993/06/12 14:57:37 rgrimes + * Initial import, 0.1 + pk 0.2.4-B1 + * * Revision 1.1 1992/03/25 21:45:18 pace * Initial revision * @@ -60,6 +63,7 @@ */ #include "param.h" +#include "systm.h" #include /* diff --git a/sys/dev/ed/if_ed.c b/sys/dev/ed/if_ed.c index b07b752da892..4a693efa3756 100644 --- a/sys/dev/ed/if_ed.c +++ b/sys/dev/ed/if_ed.c @@ -17,6 +17,9 @@ * Modification history * * $Log: if_ed.c,v $ + * Revision 1.18 93/07/27 03:41:36 davidg + * removed unnecessary variable assignment in ed_reset() + * * Revision 1.17 93/07/26 18:40:57 davidg * Added include of systm.h to pick up inlined min/max/bcmp if you have * them in cpufunc.h. Modified wait loop in reset to look a little better. @@ -798,7 +801,7 @@ ed_reset(unit) ed_stop(unit); ed_init(unit); - s = splx(s); + (void) splx(s); } /* diff --git a/sys/dev/speaker/spkr.c b/sys/dev/speaker/spkr.c index ffeec08fe5dd..04feb2f88493 100644 --- a/sys/dev/speaker/spkr.c +++ b/sys/dev/speaker/spkr.c @@ -12,6 +12,7 @@ #if NSPEAKER > 0 #include "param.h" +#include "systm.h" #include "kernel.h" #include "errno.h" #include "buf.h" diff --git a/sys/i386/i386/locore.s b/sys/i386/i386/locore.s index ef92a8c9c62c..74235a2f8bdb 100644 --- a/sys/i386/i386/locore.s +++ b/sys/i386/i386/locore.s @@ -89,9 +89,35 @@ #define ALIGN_TEXT .align 2,0x90 /* 4-byte boundaries, NOP-filled */ #define SUPERALIGN_TEXT .align 4,0x90 /* 16-byte boundaries better for 486 */ -#define LENTRY(name) ALIGN_TEXT; .globl name; name: -#define ENTRY(name) ALIGN_TEXT; .globl _/**/name; _/**/name: -#define ALTENTRY(name) .globl _/**/name; _/**/name: +#define GEN_ENTRY(name) ALIGN_TEXT; .globl name; name: +#define NON_GPROF_ENTRY(name) GEN_ENTRY(_/**/name) + +#ifdef GPROF +/* + * ALTENTRY() must be before a corresponding ENTRY() so that it can jump + * over the mcounting. + */ +#define ALTENTRY(name) GEN_ENTRY(_/**/name); MCOUNT; jmp 2f +#define ENTRY(name) GEN_ENTRY(_/**/name); MCOUNT; 2: +/* + * The call to mcount supports the usual (bad) conventions. We allocate + * some data and pass a pointer to it although the 386BSD doesn't use + * the data. We set up a frame before calling mcount because that is + * the standard convention although it makes work for both mcount and + * callers. + */ +#define MCOUNT .data; ALIGN_DATA; 1:; .long 0; .text; \ + pushl %ebp; movl %esp, %ebp; \ + movl $1b,%eax; call mcount; popl %ebp +#else +/* + * ALTENTRY() has to align because it is before a corresponding ENTRY(). + * ENTRY() has to align to because there may be no ALTENTRY() before it. + * If there is a previous ALTENTRY() then the alignment code is empty. + */ +#define ALTENTRY(name) GEN_ENTRY(_/**/name) +#define ENTRY(name) GEN_ENTRY(_/**/name) +#endif /* NB: NOP now preserves registers so NOPs can be inserted anywhere */ /* XXX: NOP and FASTER_NOP are misleadingly named */ @@ -143,10 +169,11 @@ .globl _boothowto, _bootdev, _curpcb .globl __ucodesel,__udatasel - .globl _cpu, _cold, _atdevbase, _atdevphys + .globl _cpu, _cold, _atdevbase _cpu: .long 0 # are we 386, 386sx, or 486 _cold: .long 1 # cold till we are not _atdevbase: .long 0 # location of start of iomem in virtual + # .nonglobl _atdevphys (should be register or something) _atdevphys: .long 0 # location of device mapping ptes (phys) .globl _IdlePTD, _KPTphys @@ -158,7 +185,8 @@ _cyloffset: .long 0 _proc0paddr: .long 0 #ifdef SHOW_A_LOT -bit_colors: .byte GREEN,RED,0,0 +bit_colors: + .byte GREEN,RED,0,0 #endif .space 512 @@ -171,11 +199,11 @@ tmpstk: /*****************************************************************************/ /* - * start: jumped to directly from the boot blocks + * btext: beginning of text section. + * Also the entry point (jumped to directly from the boot blocks). */ -LENTRY(start) - movw $0x1234,%ax - movw %ax,0x472 # warm boot +ENTRY(btext) + movw $0x1234, 0x472 # warm boot jmp 1f .space 0x500 # skip over warm boot shit @@ -278,7 +306,10 @@ LENTRY(start) movl %esi,%ecx # this much memory, shrl $ PGSHIFT,%ecx # for this many pte s addl $ UPAGES+4,%ecx # including our early context - movl $0xa0,%ecx # XXX - cover debugger pages + cmpl $0xa0,%ecx # XXX - cover debugger pages + jae 1f + movl $0xa0,%ecx +1: movl $PG_V|PG_KW,%eax # having these bits set, lea (4*NBPG)(%esi),%ebx # physical address of KPT in proc 0, movl %ebx,_KPTphys-SYSTEM # in the kernel page table, @@ -372,9 +403,15 @@ LENTRY(start) /* load base of page directory, and enable mapping */ movl %esi,%eax # phys address of ptd in proc 0 - orl $ I386_CR3PAT,%eax + orl $ I386_CR3PAT,%eax movl %eax,%cr3 # load ptd addr into mmu movl %cr0,%eax # get control word +/* + * XXX it is now safe to always (attempt to) set CR0_WP and to set up + * the page tables assuming it works, so USE_486_WRITE_PROTECT will go + * away. The special 386 PTE checking needs to be conditional on + * whatever distingiushes 486-only kernels from 386-486 kernels. + */ #ifdef USE_486_WRITE_PROTECT orl $CR0_PE|CR0_PG|CR0_WP,%eax # and let s page! #else @@ -407,7 +444,7 @@ begin: /* now running relocated at SYSTEM where the system is linked to run */ lea 7*NBPG(%esi),%esi # skip past stack. pushl %esi - + /* relocate debugger gdt entries */ movl $_gdt+8*9,%eax # adjust slots 9-17 @@ -423,9 +460,9 @@ reloc_gdt: 1: call _init386 # wire 386 chip for unix operation - + movl $0,_PTD - call _main # autoconfiguration, mountroot etc + call _main # autoconfiguration, mountroot etc popl %esi /* @@ -459,9 +496,11 @@ lretmsg1: /* * Icode is copied out to process 1 and executed in user mode: * execve("/sbin/init", argv, envp); exit(0); - * If the exec fails, process 1 exits and the system panics. + * If the execve fails, process 1 exits and the system panics. */ -ENTRY(icode) +NON_GPROF_ENTRY(icode) + pushl $0 # envp + # pushl $argv-_icode # gas fucks up again movl $argv,%eax subl $_icode,%eax @@ -471,14 +510,17 @@ ENTRY(icode) movl $init,%eax subl $_icode,%eax pushl %eax - pushl %eax # dummy out rta - movl %esp,%ebp + pushl %eax # junk to fake return address + movl $exec,%eax LCALL(0x7,0x0) - pushl %eax + + pushl %eax # execve failed, the errno will do for an + # exit code because errnos are < 128 + pushl %eax # junk to fake return address + movl $exit,%eax - pushl %eax # dummy out rta LCALL(0x7,0x0) init: @@ -494,7 +536,7 @@ eicode: _szicode: .long _szicode-_icode -ENTRY(sigcode) +NON_GPROF_ENTRY(sigcode) call 12(%esp) lea 28(%esp),%eax # scp (the call may have clobbered the # copy at 8(%esp)) @@ -640,7 +682,7 @@ ENTRY(bzero) # void bzero(void *base, u_int cnt) movl 8(%esp),%edi movl 12(%esp),%ecx xorl %eax,%eax - shrl $2,%ecx + shrl $2,%ecx cld rep stosl @@ -664,6 +706,7 @@ ENTRY(fillw) # fillw (pat,base,cnt) ret ENTRY(bcopyb) +bcopyb: pushl %esi pushl %edi movl 12(%esp),%esi @@ -672,7 +715,7 @@ ENTRY(bcopyb) cmpl %esi,%edi /* potentially overlapping? */ jnb 1f cld /* nope, copy forwards */ - rep + rep movsb popl %edi popl %esi @@ -693,6 +736,7 @@ ENTRY(bcopyb) ret ENTRY(bcopyw) +bcopyw: pushl %esi pushl %edi movl 12(%esp),%esi @@ -735,20 +779,18 @@ ENTRY(bcopyw) ENTRY(bcopyx) movl 16(%esp),%eax cmpl $2,%eax - je _bcopyw + je bcopyw /* not _bcopyw, to avoid multiple mcounts */ cmpl $4,%eax - jne _bcopyb - /* - * Fall through to bcopy. ENTRY() provides harmless fill bytes. - */ - + je bcopy + jmp bcopyb /* * (ov)bcopy (src,dst,cnt) * ws@tools.de (Wolfgang Solfrank, TooLs GmbH) +49-228-985800 */ -ENTRY(bcopy) ALTENTRY(ovbcopy) +ENTRY(bcopy) +bcopy: pushl %esi pushl %edi movl 12(%esp),%esi @@ -789,8 +831,8 @@ ALTENTRY(ovbcopy) cld ret -ENTRY(ntohl) -ALTENTRY(htonl) +ALTENTRY(ntohl) +ENTRY(htonl) movl 4(%esp),%eax #ifdef i486 /* XXX */ @@ -798,7 +840,7 @@ ALTENTRY(htonl) * equivalent bytes. This can be changed back to bswap when we * upgrade to a newer version of Gas */ /* bswap %eax */ - .byte 0x0f + .byte 0x0f .byte 0xc8 #else xchgb %al,%ah @@ -807,8 +849,8 @@ ALTENTRY(htonl) #endif ret -ENTRY(ntohs) -ALTENTRY(htons) +ALTENTRY(ntohs) +ENTRY(htons) movzwl 4(%esp),%eax xchgb %al,%ah ret @@ -861,10 +903,6 @@ show_bits: * protection violation occurs inside the functions, the trap handler * returns to *curpcb->onfault instead of the function. */ -/* - * XXX These routines load a segment register every time they execute. - * it would be nicer (faster) if they could depend on %gs. - */ ENTRY(copyout) # copyout (from_kernel, to_user, len) @@ -879,15 +917,36 @@ ENTRY(copyout) # copyout (from_kernel, to_user, len) orl %ebx, %ebx # nothing to do? jz done_copyout -#ifdef USE_486_WRITE_PROTECT - /* if WP bit in CR0 is set (n/a on 386), the hardware does the */ - /* write check. We just have to load the right segment selector */ - pushl %es - movl __udatasel, %eax - movl %ax, %es -#else /* USE_486_WRITE_PROTECT */ - /* we have to check each PTE for (write) permission */ + /* + * Check explicitly for non-user addresses. If 486 write protection + * is being used, this check is essential because we are in kernel + * mode so the h/w does not provide any protection against writing + * kernel addresses. + * + * Otherwise, it saves having to load and restore %es to get the + * usual segment-based protection (the destination segment for movs + * is always %es). The other explicit checks for user-writablility + * are not quite sufficient. They fail for the user area because + * we mapped the user area read/write to avoid having an #ifdef in + * vm_machdep.c. They fail for user PTEs and/or PTDs! (107 + * addresses including 0xff800000 and 0xfc000000). I'm not sure if + * this can be fixed. Marking the PTEs supervisor mode and the + * PDE's user mode would almost work, but there may be a problem + * with the self-referential PDE. + */ + movl %edi, %eax + addl %ebx, %eax + jc copyout_fault +#define VM_END_USER_ADDRESS 0xFDBFE000 /* XXX */ + cmpl $VM_END_USER_ADDRESS, %eax + ja copyout_fault +#ifndef USE_486_WRITE_PROTECT + /* + * We have to check each PTE for user write permission. + * The checking may cause a page fault, so it is important to set + * up everything for return via copyout_fault before here. + */ /* compute number of pages */ movl %edi, %ecx andl $0x0fff, %ecx @@ -906,7 +965,7 @@ ENTRY(copyout) # copyout (from_kernel, to_user, len) andb $0x07, %al /* Pages must be VALID + USERACC + WRITABLE */ cmpb $0x07, %al je 2f - + /* simulate a trap */ pushl %edx pushl %ecx @@ -924,8 +983,7 @@ ENTRY(copyout) # copyout (from_kernel, to_user, len) addl $4, %edx decl %ecx jnz 1b /* check next page */ - -#endif /* USE_486_WRITE_PROTECT */ +#endif /* ndef USE_486_WRITE_PROTECT */ /* now copy it over */ /* bcopy (%esi, %edi, %ebx) */ @@ -938,9 +996,6 @@ ENTRY(copyout) # copyout (from_kernel, to_user, len) andb $3, %cl rep movsb -#ifdef USE_486_WRITE_PROTECT - popl %es -#endif done_copyout: popl %ebx @@ -951,10 +1006,8 @@ done_copyout: movl %eax,PCB_ONFAULT(%edx) ret + ALIGN_TEXT copyout_fault: -#ifdef USE_486_WRITE_PROTECT - popl %es -#endif popl %ebx popl %edi popl %esi @@ -972,22 +1025,19 @@ ENTRY(copyin) # copyin (from_user, to_kernel, len) movl 12(%esp),%esi # caddr_t from movl 16(%esp),%edi # caddr_t to movl 20(%esp),%ecx # size_t len - movl %ecx,%edx - pushl %ds - movl __udatasel,%ax # access 'from' via user data segment - movl %ax,%ds movb %cl,%al shrl $2,%ecx # copy longword-wise cld + gs rep movsl movb %al,%cl andb $3,%cl # copy remaining bytes + gs rep movsb - popl %ds popl %edi popl %esi xorl %eax, %eax @@ -995,8 +1045,8 @@ ENTRY(copyin) # copyin (from_user, to_kernel, len) movl %eax, PCB_ONFAULT(%edx) ret + ALIGN_TEXT copyin_fault: - popl %ds popl %edi popl %esi movl _curpcb, %edx @@ -1007,10 +1057,8 @@ copyin_fault: /* * fu{byte,sword,word} : fetch a byte (sword, word) from user memory */ -ENTRY(fuword) ALTENTRY(fuiword) - movl __udatasel,%eax - movl %ax,%gs +ENTRY(fuword) movl _curpcb,%ecx movl $fusufault,PCB_ONFAULT(%ecx) movl 4(%esp),%edx @@ -1018,10 +1066,8 @@ ALTENTRY(fuiword) movl (%edx),%eax movl $0,PCB_ONFAULT(%ecx) ret - + ENTRY(fusword) - movl __udatasel,%eax - movl %ax,%gs movl _curpcb,%ecx movl $fusufault,PCB_ONFAULT(%ecx) movl 4(%esp),%edx @@ -1029,11 +1075,9 @@ ENTRY(fusword) movzwl (%edx),%eax movl $0,PCB_ONFAULT(%ecx) ret - -ENTRY(fubyte) + ALTENTRY(fuibyte) - movl __udatasel,%eax - movl %ax,%gs +ENTRY(fubyte) movl _curpcb,%ecx movl $fusufault,PCB_ONFAULT(%ecx) movl 4(%esp),%edx @@ -1041,7 +1085,8 @@ ALTENTRY(fuibyte) movzbl (%edx),%eax movl $0,PCB_ONFAULT(%ecx) ret - + + ALIGN_TEXT fusufault: movl _curpcb,%ecx xorl %eax,%eax @@ -1056,42 +1101,39 @@ fusufault: /* * we only have to set the right segment selector. */ -ENTRY(suword) ALTENTRY(suiword) - movl __udatasel,%eax - movl %ax,%gs +ENTRY(suword) movl _curpcb,%ecx movl $fusufault,PCB_ONFAULT(%ecx) movl 4(%esp),%edx movl 8(%esp),%eax gs movl %eax,(%edx) - movl $0,PCB_ONFAULT(%ecx) + xorl %eax,%eax + movl %eax,PCB_ONFAULT(%ecx) ret - + ENTRY(susword) - movl __udatasel,%eax - movl %ax,%gs movl _curpcb,%ecx movl $fusufault,PCB_ONFAULT(%ecx) movl 4(%esp),%edx movw 8(%esp),%ax gs movw %ax,(%edx) - movl $0,PCB_ONFAULT(%ecx) + xorl %eax,%eax + movl %eax,PCB_ONFAULT(%ecx) ret - -ENTRY(subyte) + ALTENTRY(suibyte) - movl __udatasel,%eax - movl %ax,%gs +ENTRY(subyte) movl _curpcb,%ecx movl $fusufault,PCB_ONFAULT(%ecx) movl 4(%esp),%edx movb 8(%esp),%al gs movb %al,(%edx) - movl $0,PCB_ONFAULT(%ecx) + xorl %eax,%eax + movl %eax,PCB_ONFAULT(%ecx) ret @@ -1102,8 +1144,8 @@ ALTENTRY(suibyte) */ # XXX - page boundary crossing is not handled yet +ALTENTRY(suibyte) ENTRY(subyte) -ENTRY(suibyte) movl _curpcb, %ecx movl $fusufault, PCB_ONFAULT(%ecx) movl 4(%esp), %edx @@ -1123,6 +1165,7 @@ ENTRY(suibyte) 1: movl 4(%esp), %edx movl 8(%esp), %eax + gs movb %al, (%edx) xorl %eax, %eax movl _curpcb, %ecx @@ -1149,14 +1192,15 @@ ENTRY(susword) 1: movl 4(%esp), %edx movl 8(%esp), %eax + gs movw %ax, (%edx) xorl %eax, %eax movl _curpcb, %ecx movl %eax, PCB_ONFAULT(%ecx) ret +ALTENTRY(suiword) ENTRY(suword) -ENTRY(suiword) movl _curpcb, %ecx movl $fusufault, PCB_ONFAULT(%ecx) movl 4(%esp), %edx @@ -1176,6 +1220,7 @@ ENTRY(suiword) 1: movl 4(%esp), %edx movl 8(%esp), %eax + gs movl %eax, 0(%edx) xorl %eax, %eax movl _curpcb, %ecx @@ -1183,6 +1228,7 @@ ENTRY(suiword) ret #endif /* USE_486_WRITE_PROTECT */ + /* * copyoutstr(from, to, maxlen, int *lencopied) * copy a string from from to to, stop when a 0 character is reached. @@ -1201,13 +1247,19 @@ ENTRY(copyoutstr) movl 12(%esp), %esi # %esi = from movl 16(%esp), %edi # %edi = to movl 20(%esp), %edx # %edx = maxlen - movl __udatasel,%eax - movl %ax,%gs incl %edx 1: decl %edx jz 4f + /* + * gs override doesn't work for stosb. Use the same explicit check + * as in copyout(). It's much slower now because it is per-char. + * XXX - however, it would be faster to rewrite this function to use + * strlen() and copyout(). + */ + cmpl $VM_END_USER_ADDRESS, %edi + jae cpystrflt lodsb gs stosb @@ -1222,7 +1274,7 @@ ENTRY(copyoutstr) movl $ENAMETOOLONG, %eax jmp 6f -#else /* USE_486_WRITE_PROTECT */ +#else /* ndef USE_486_WRITE_PROTECT */ ENTRY(copyoutstr) pushl %esi @@ -1234,6 +1286,13 @@ ENTRY(copyoutstr) movl 16(%esp), %edi # %edi = to movl 20(%esp), %edx # %edx = maxlen 1: + /* + * It suffices to check that the first byte is in user space, because + * we look at a page at a time and the end address is on a page + * boundary. + */ + cmpl $VM_END_USER_ADDRESS, %edi + jae cpystrflt movl %edi, %eax shrl $IDXSHIFT, %eax andb $0xfc, %al @@ -1280,6 +1339,7 @@ ENTRY(copyoutstr) /* edx is zero -- return ENAMETOOLONG */ movl $ENAMETOOLONG, %eax jmp 6f + #endif /* USE_486_WRITE_PROTECT */ /* @@ -1298,8 +1358,6 @@ ENTRY(copyinstr) movl 12(%esp), %esi # %esi = from movl 16(%esp), %edi # %edi = to movl 20(%esp), %edx # %edx = maxlen - movl __udatasel,%eax - movl %ax,%gs incl %edx 1: @@ -1447,13 +1505,12 @@ ENTRY(ssdtosd) # ssdtosd(*ssdp,*sdp) ENTRY(tlbflush) # tlbflush() movl %cr3,%eax - orl $ I386_CR3PAT,%eax + orl $ I386_CR3PAT,%eax movl %eax,%cr3 ret -ENTRY(lcr0) # lcr0(cr0) -ALTENTRY(load_cr0) +ENTRY(load_cr0) # load_cr0(cr0) movl 4(%esp),%eax movl %eax,%cr0 ret @@ -1470,18 +1527,13 @@ ENTRY(rcr2) # rcr2() ENTRY(rcr3) # rcr3() -ALTENTRY(_cr3) movl %cr3,%eax ret - /* - * void lcr3(caddr_t cr3) - */ -ENTRY(lcr3) -ALTENTRY(load_cr3) +ENTRY(load_cr3) # void load_cr3(caddr_t cr3) movl 4(%esp),%eax - orl $ I386_CR3PAT,%eax + orl $ I386_CR3PAT,%eax movl %eax,%cr3 ret @@ -1600,17 +1652,19 @@ sw0: .asciz "swtch" * When no processes are on the runq, Swtch branches to idle * to wait for something to come ready. */ -LENTRY(Idle) + ALIGN_TEXT +Idle: sti SHOW_STI + + ALIGN_TEXT idle_loop: call _spl0 cmpl $0,_whichqs jne sw1 - hlt # wait for interrupt + hlt # wait for interrupt jmp idle_loop - SUPERALIGN_TEXT /* so profiling doesn't lump Idle with swtch().. */ badsw: pushl $sw0 call _panic @@ -1619,6 +1673,7 @@ badsw: /* * Swtch() */ + SUPERALIGN_TEXT /* so profiling doesn't lump Idle with swtch().. */ ENTRY(swtch) incl _cnt+V_SWTCH @@ -1780,7 +1835,7 @@ ENTRY(savectx) movl 4(%esp), %ecx movw _cpl, %ax movw %ax, PCB_IML(%ecx) - movl (%esp), %eax + movl (%esp), %eax movl %eax, PCB_EIP(%ecx) movl %ebx, PCB_EBX(%ecx) movl %esp, PCB_ESP(%ecx) @@ -1885,7 +1940,7 @@ L1: proffault: /* if we get a fault, then kill profiling all together */ movl $0,PCB_ONFAULT(%edx) /* squish the fault handler */ - movl 12(%ebp),%ecx + movl 12(%ebp),%ecx movl $0,PR_SCALE(%ecx) /* up->pr_scale = 0 */ leave ret @@ -1903,7 +1958,7 @@ ENTRY(astoff) * * XXX - debugger traps are now interrupt gates so at least bdb doesn't lose * control. The sti's give the standard losing behaviour for ddb and kgdb. - */ + */ #define IDTVEC(name) ALIGN_TEXT; .globl _X/**/name; _X/**/name: #define TRAP(a) pushl $(a) ; jmp alltraps #ifdef KGDB @@ -2048,14 +2103,14 @@ bpttraps: testb $SEL_RPL_MASK,TRAPF_CS_OFF(%esp) # non-kernel mode? jne calltrap # yes - call _kgdb_trap_glue + call _kgdb_trap_glue jmp calltrap #endif /* * Call gate entry for syscall */ - SUPERALIGN_TEXT + SUPERALIGN_TEXT IDTVEC(syscall) pushfl # only for stupid carry bit and more stupid wait3 cc kludge # XXX - also for direction flag (bzero, etc. clear it) diff --git a/sys/i386/i386/machdep.c b/sys/i386/i386/machdep.c index 9372f6008c8f..6b9ef76bb87d 100644 --- a/sys/i386/i386/machdep.c +++ b/sys/i386/i386/machdep.c @@ -49,7 +49,7 @@ * 20 Apr 93 Bruce Evans New npx-0.5 code * 25 Apr 93 Bruce Evans New intr-0.1 code */ -static char rcsid[] = "$Header: /a/cvs/386BSD/src/sys.386bsd/i386/i386/machdep.c,v 1.3 1993/07/16 20:50:42 davidg Exp $"; +static char rcsid[] = "$Header: /a/cvs/386BSD/src/sys/i386/i386/machdep.c,v 1.4 1993/07/16 23:55:07 davidg Exp $"; #include @@ -849,16 +849,41 @@ init386(first) * Initialize the console before we print anything out. */ - cninit (KERNBASE+0xa0000); + cninit (); /* make gdt memory segments */ - gdt_segs[GCODE_SEL].ssd_limit = btoc((int) &etext + NBPG); - for (x=0; x < NGDT; x++) ssdtosd(gdt_segs+x, &gdt[x][0]); + gdt_segs[GCODE_SEL].ssd_limit = i386_btop(i386_round_page(&etext)) - 1; + /* + * XXX - VM_MAX_KERNEL_ADDRESS is correctly a max, but bogusly the + * address of the last page, not the last byte. Then above the end + * :-) there is another 4M of page tables or something. + */ +#define VM_END_KERNEL_ADDRESS (VM_MAX_KERNEL_ADDRESS + NBPG + NBPDR) + gdt_segs[GDATA_SEL].ssd_limit = i386_btop(VM_END_KERNEL_ADDRESS) - 1; + for (x=0; x < NGDT; x++) ssdtosd(gdt_segs+x, gdt+x); /* make ldt memory segments */ - ldt_segs[LUCODE_SEL].ssd_limit = btoc(UPT_MIN_ADDRESS); - ldt_segs[LUDATA_SEL].ssd_limit = btoc(UPT_MIN_ADDRESS); + /* + * The data segment limit must not cover the user area because we + * don't want the user area to be writable in copyout() etc. (page + * level protection is lost in kernel mode on 386's). Also, we + * don't want the user area to be writable directly (page level + * protection of the user area is not available on 486's with + * CR0_WP set, because there is no user-read/kernel-write mode). + * + * XXX - VM_MAXUSER_ADDRESS is an end address, not a max. And it + * should be spelled ...MAX_USER... + */ +#define VM_END_USER_RW_ADDRESS VM_MAXUSER_ADDRESS + /* + * The code segment limit has to cover the user area until we move + * the signal trampoline out of the user area. This is safe because + * the code segment cannot be written to directly. + */ +#define VM_END_USER_R_ADDRESS (VM_END_USER_RW_ADDRESS + UPAGES * NBPG) + ldt_segs[LUCODE_SEL].ssd_limit = i386_btop(VM_END_USER_R_ADDRESS) - 1; + ldt_segs[LUDATA_SEL].ssd_limit = i386_btop(VM_END_USER_RW_ADDRESS) - 1; /* Note. eventually want private ldts per process */ - for (x=0; x < 5; x++) ssdtosd(ldt_segs+x, &ldt[x][0]); + for (x=0; x < 5; x++) ssdtosd(ldt_segs+x, ldt+x); /* exceptions */ setidt(0, &IDTVEC(div), SDT_SYS386TGT, SEL_KPL); @@ -951,7 +976,8 @@ init386(first) Maxmem = 640/4; else { Maxmem = pagesinext + 0x100000/NBPG; - first = 0x100000; /* skip hole */ + if (first < 0x100000) + first = 0x100000; /* skip hole */ } /* This used to explode, since Maxmem used to be 0 for bas CMOS*/ @@ -1077,6 +1103,7 @@ _remque(element) element->ph_rlink = (struct proc *)0; } +#ifdef SLOW_OLD_COPYSTRS vmunaccess() {} #if 0 /* assembler versions now in locore.s */ @@ -1124,6 +1151,8 @@ copyoutstr(fromaddr, toaddr, maxlength, lencopied) u_int *lencopied, maxlength; return(ENAMETOOLONG); } +#endif /* SLOW_OLD_COPYSTRS */ + copystr(fromaddr, toaddr, maxlength, lencopied) u_int *lencopied, maxlength; void *fromaddr, *toaddr; { u_int tally; diff --git a/sys/i386/i386/pmap.c b/sys/i386/i386/pmap.c index 66c7fec59a8d..be0ce82e38cb 100644 --- a/sys/i386/i386/pmap.c +++ b/sys/i386/i386/pmap.c @@ -43,7 +43,7 @@ * * 28 Nov 1991 Poul-Henning Kamp Speedup processing. */ -static char rcsid[] = "$Header: /usr/src/sys.386bsd/i386/i386/RCS/pmap.c,v 1.3 92/01/21 14:26:44 william Exp Locker: root $"; +static char rcsid[] = "$Header: /a/cvs/386BSD/src/sys/i386/i386/pmap.c,v 1.1.1.1 1993/06/12 14:58:06 rgrimes Exp $"; /* * Derived from hp300 version by Mike Hibler, this version by William @@ -461,7 +461,7 @@ pmap_pinit(pmap) /* install self-referential address mapping entry */ *(int *)(pmap->pm_pdir+PTDPTDI) = - (int)pmap_extract(kernel_pmap, pmap->pm_pdir) | PG_V | PG_URKW; + (int)pmap_extract(kernel_pmap, pmap->pm_pdir) | PG_V | PG_KW; pmap->pm_count = 1; simple_lock_init(&pmap->pm_lock); diff --git a/sys/i386/i386/trap.c b/sys/i386/i386/trap.c index 57195f32cb91..22fc6da31918 100644 --- a/sys/i386/i386/trap.c +++ b/sys/i386/i386/trap.c @@ -43,7 +43,7 @@ * 08 Apr 93 Bruce Evans Several VM system fixes * Paul Kranenburg Add counter for vmstat */ -static char rcsid[] = "$Header: /usr/bill/working/sys/i386/i386/RCS/trap.c,v 1.2 92/01/21 14:22:13 william Exp $"; +static char rcsid[] = "$Header: /a/cvs/386BSD/src/sys/i386/i386/trap.c,v 1.1.1.1 1993/06/12 14:58:05 rgrimes Exp $"; /* * 386 Trap and System call handleing @@ -70,6 +70,21 @@ static char rcsid[] = "$Header: /usr/bill/working/sys/i386/i386/RCS/trap.c,v 1.2 #include "machine/trap.h" +#ifdef __GNUC__ + +/* + * The "r" contraint could be "rm" except for fatal bugs in gas. As usual, + * we omit the size from the mov instruction to avoid nonfatal bugs in gas. + */ +#define read_gs() ({ u_short gs; __asm("mov %%gs,%0" : "=r" (gs)); gs; }) +#define write_gs(gs) __asm("mov %0,%%gs" : : "r" ((u_short) gs)) + +#else /* not __GNUC__ */ + +u_short read_gs __P((void)); +void write_gs __P((/* promoted u_short */ int gs)); + +#endif /* __GNUC__ */ struct sysent sysent[]; int nsysent; @@ -112,9 +127,25 @@ trap(frame) frame.tf_trapno, frame.tf_err, frame.tf_eip, frame.tf_cs, rcr2(), frame.tf_esp);*/ if(curpcb == 0 || curproc == 0) goto we_re_toast; - if (curpcb->pcb_onfault && frame.tf_trapno != 0xc) { + if (curpcb->pcb_onfault && frame.tf_trapno != T_PAGEFLT) { + extern int _udatasel; + + if (read_gs() != (u_short) _udatasel) + /* + * Some user has corrupted %gs but we depend on it in + * copyout() etc. Fix it up and retry. + * + * (We don't preserve %fs or %gs, so users can change + * them to either _ucodesel, _udatasel or a not-present + * selector, possibly ORed with 0 to 3, making them + * volatile for other users. Not preserving them saves + * time and doesn't lose functionality or open security + * holes.) + */ + write_gs(_udatasel); + else copyfault: - frame.tf_eip = (int)curpcb->pcb_onfault; + frame.tf_eip = (int)curpcb->pcb_onfault; return; } @@ -396,18 +427,49 @@ if(curpcb == 0 || curproc == 0) goto we_re_toast; } /* - * Compensate for 386 brain damage (missing URKR) + * Compensate for 386 brain damage (missing URKR). + * This is a little simpler than the pagefault handler in trap() because + * it the page tables have already been faulted in and high addresses + * are thrown out early for other reasons. */ -int trapwrite(unsigned addr) { - int rv; +int trapwrite(addr) + unsigned addr; +{ + unsigned nss; + struct proc *p; vm_offset_t va; + struct vmspace *vm; va = trunc_page((vm_offset_t)addr); - if (va > VM_MAXUSER_ADDRESS) return(1); - rv = vm_fault(&curproc->p_vmspace->vm_map, va, - VM_PROT_READ | VM_PROT_WRITE, FALSE); - if (rv == KERN_SUCCESS) return(0); - else return(1); + /* + * XXX - MAX is END. Changed > to >= for temp. fix. + */ + if (va >= VM_MAXUSER_ADDRESS) + return (1); + /* + * XXX: rude stack hack adapted from trap(). + */ + nss = 0; + p = curproc; + vm = p->p_vmspace; + if ((caddr_t)va >= vm->vm_maxsaddr && dostacklimits) { + nss = clrnd(btoc((unsigned)vm->vm_maxsaddr + MAXSSIZ + - (unsigned)va)); + if (nss > btoc(p->p_rlimit[RLIMIT_STACK].rlim_cur)) + return (1); + } + + if (vm_fault(&vm->vm_map, va, VM_PROT_READ | VM_PROT_WRITE, FALSE) + != KERN_SUCCESS) + return (1); + + /* + * XXX: continuation of rude stack hack + */ + if (nss > vm->vm_ssize) + vm->vm_ssize = nss; + + return (0); } /* diff --git a/sys/i386/i386/vm_machdep.c b/sys/i386/i386/vm_machdep.c index d675ed5a84a8..2b1a272ff99b 100644 --- a/sys/i386/i386/vm_machdep.c +++ b/sys/i386/i386/vm_machdep.c @@ -49,7 +49,7 @@ /* * Utah $Hdr: vm_machdep.c 1.16.1.1 89/06/23$ */ -static char rcsid[] = "$Header: /a/cvs/386BSD/src/sys.386bsd/i386/i386/vm_machdep.c,v 1.1.1.1 1993/06/12 14:58:05 rgrimes Exp $"; +static char rcsid[] = "$Header: /a/cvs/386BSD/src/sys/i386/i386/vm_machdep.c,v 1.2 1993/07/18 20:56:17 paul Exp $"; #include "param.h" #include "systm.h" @@ -105,8 +105,15 @@ cpu_fork(p1, p2) vm_map_pageable(&p2->p_vmspace->vm_map, addr, addr+NBPG, FALSE); for (i=0; i < UPAGES; i++) pmap_enter(&p2->p_vmspace->vm_pmap, kstack+i*NBPG, - pmap_extract(kernel_pmap, ((int)p2->p_addr)+i*NBPG), VM_PROT_READ, 1); - + pmap_extract(kernel_pmap, ((int)p2->p_addr)+i*NBPG), + /* + * The user area has to be mapped writable because + * it contains the kernel stack (when CR0_WP is on + * on a 486 there is no user-read/kernel-write + * mode). It is protected from user mode access + * by the segment limits. + */ + VM_PROT_READ|VM_PROT_WRITE, TRUE); pmap_activate(&p2->p_vmspace->vm_pmap, &up->u_pcb); /* diff --git a/sys/i386/include/cpu.h b/sys/i386/include/cpu.h index 583d76c98506..9e09dd48c02f 100644 --- a/sys/i386/include/cpu.h +++ b/sys/i386/include/cpu.h @@ -53,10 +53,12 @@ * these are defined to get generic functions * rather than inline or machine-dependent implementations */ +#if 0 #define NEED_MINMAX /* need {,i,l,ul}{min,max} functions */ #define NEED_FFS /* need ffs function */ #define NEED_BCMP /* need bcmp function */ #define NEED_STRLEN /* need strlen function */ +#endif #define cpu_exec(p) /* nothing */ diff --git a/sys/i386/include/cpufunc.h b/sys/i386/include/cpufunc.h index e3b4a8c9c052..eb9a792f4732 100644 --- a/sys/i386/include/cpufunc.h +++ b/sys/i386/include/cpufunc.h @@ -58,6 +58,116 @@ outb(u_int port, u_char data) __asm __volatile("outb %0,%%dx" : : "a" (al), "d" (port)); } +static __inline__ +imin(a, b) + int a, b; +{ + + return (a < b ? a : b); +} + +static __inline__ +imax(a, b) + int a, b; +{ + + return (a > b ? a : b); +} + +static __inline__ +unsigned int +min(a, b) + unsigned int a, b; +{ + + return (a < b ? a : b); +} + +static __inline__ +unsigned int +max(a, b) + unsigned int a, b; +{ + + return (a > b ? a : b); +} + +static __inline__ +long +lmin(a, b) + long a, b; +{ + + return (a < b ? a : b); +} + +static __inline__ +long +lmax(a, b) + long a, b; +{ + + return (a > b ? a : b); +} + +static __inline__ +unsigned long +ulmin(a, b) + unsigned long a, b; +{ + + return (a < b ? a : b); +} + +static __inline__ +unsigned long +ulmax(a, b) + unsigned long a, b; +{ + + return (a > b ? a : b); +} + +static __inline__ +ffs(mask) + register long mask; +{ + register int bit; + + if (!mask) + return(0); + for (bit = 1;; ++bit) { + if (mask&0x01) + return(bit); + mask >>= 1; + } +} + +static __inline__ +bcmp(v1, v2, len) + void *v1, *v2; + register unsigned len; +{ + register u_char *s1 = v1, *s2 = v2; + + while (len--) + if (*s1++ != *s2++) + return (1); + return (0); +} + +static __inline__ +size_t +strlen(s1) + register __const__ char *s1; +{ + register size_t len; + + for (len = 0; *s1++ != '\0'; len++) + ; + return (len); +} + #else /* not __GNUC__ */ int bdb __P((void)); @@ -80,3 +190,4 @@ really_void setidt __P((int idx, /*XXX*/caddr_t func, int typ, int dpl)); #undef really_u_int #undef really_void + diff --git a/sys/i386/isa/if_ed.c b/sys/i386/isa/if_ed.c index b07b752da892..4a693efa3756 100644 --- a/sys/i386/isa/if_ed.c +++ b/sys/i386/isa/if_ed.c @@ -17,6 +17,9 @@ * Modification history * * $Log: if_ed.c,v $ + * Revision 1.18 93/07/27 03:41:36 davidg + * removed unnecessary variable assignment in ed_reset() + * * Revision 1.17 93/07/26 18:40:57 davidg * Added include of systm.h to pick up inlined min/max/bcmp if you have * them in cpufunc.h. Modified wait loop in reset to look a little better. @@ -798,7 +801,7 @@ ed_reset(unit) ed_stop(unit); ed_init(unit); - s = splx(s); + (void) splx(s); } /* diff --git a/sys/i386/isa/if_is.c b/sys/i386/isa/if_is.c index 8485649e6d88..ffc4940499b1 100644 --- a/sys/i386/isa/if_is.c +++ b/sys/i386/isa/if_is.c @@ -27,6 +27,7 @@ #include "bpfilter.h" #include "param.h" +#include "systm.h" #include "errno.h" #include "ioctl.h" #include "mbuf.h" @@ -171,7 +172,7 @@ int is_reset(int unit) s = splnet(); printf("is%d: reset\n", unit); is_init(unit); - s = splx(); + (void) splx(s); } /* diff --git a/sys/i386/isa/spkr.c b/sys/i386/isa/spkr.c index ffeec08fe5dd..04feb2f88493 100644 --- a/sys/i386/isa/spkr.c +++ b/sys/i386/isa/spkr.c @@ -12,6 +12,7 @@ #if NSPEAKER > 0 #include "param.h" +#include "systm.h" #include "kernel.h" #include "errno.h" #include "buf.h" diff --git a/sys/kern/subr_trap.c b/sys/kern/subr_trap.c index 57195f32cb91..22fc6da31918 100644 --- a/sys/kern/subr_trap.c +++ b/sys/kern/subr_trap.c @@ -43,7 +43,7 @@ * 08 Apr 93 Bruce Evans Several VM system fixes * Paul Kranenburg Add counter for vmstat */ -static char rcsid[] = "$Header: /usr/bill/working/sys/i386/i386/RCS/trap.c,v 1.2 92/01/21 14:22:13 william Exp $"; +static char rcsid[] = "$Header: /a/cvs/386BSD/src/sys/i386/i386/trap.c,v 1.1.1.1 1993/06/12 14:58:05 rgrimes Exp $"; /* * 386 Trap and System call handleing @@ -70,6 +70,21 @@ static char rcsid[] = "$Header: /usr/bill/working/sys/i386/i386/RCS/trap.c,v 1.2 #include "machine/trap.h" +#ifdef __GNUC__ + +/* + * The "r" contraint could be "rm" except for fatal bugs in gas. As usual, + * we omit the size from the mov instruction to avoid nonfatal bugs in gas. + */ +#define read_gs() ({ u_short gs; __asm("mov %%gs,%0" : "=r" (gs)); gs; }) +#define write_gs(gs) __asm("mov %0,%%gs" : : "r" ((u_short) gs)) + +#else /* not __GNUC__ */ + +u_short read_gs __P((void)); +void write_gs __P((/* promoted u_short */ int gs)); + +#endif /* __GNUC__ */ struct sysent sysent[]; int nsysent; @@ -112,9 +127,25 @@ trap(frame) frame.tf_trapno, frame.tf_err, frame.tf_eip, frame.tf_cs, rcr2(), frame.tf_esp);*/ if(curpcb == 0 || curproc == 0) goto we_re_toast; - if (curpcb->pcb_onfault && frame.tf_trapno != 0xc) { + if (curpcb->pcb_onfault && frame.tf_trapno != T_PAGEFLT) { + extern int _udatasel; + + if (read_gs() != (u_short) _udatasel) + /* + * Some user has corrupted %gs but we depend on it in + * copyout() etc. Fix it up and retry. + * + * (We don't preserve %fs or %gs, so users can change + * them to either _ucodesel, _udatasel or a not-present + * selector, possibly ORed with 0 to 3, making them + * volatile for other users. Not preserving them saves + * time and doesn't lose functionality or open security + * holes.) + */ + write_gs(_udatasel); + else copyfault: - frame.tf_eip = (int)curpcb->pcb_onfault; + frame.tf_eip = (int)curpcb->pcb_onfault; return; } @@ -396,18 +427,49 @@ if(curpcb == 0 || curproc == 0) goto we_re_toast; } /* - * Compensate for 386 brain damage (missing URKR) + * Compensate for 386 brain damage (missing URKR). + * This is a little simpler than the pagefault handler in trap() because + * it the page tables have already been faulted in and high addresses + * are thrown out early for other reasons. */ -int trapwrite(unsigned addr) { - int rv; +int trapwrite(addr) + unsigned addr; +{ + unsigned nss; + struct proc *p; vm_offset_t va; + struct vmspace *vm; va = trunc_page((vm_offset_t)addr); - if (va > VM_MAXUSER_ADDRESS) return(1); - rv = vm_fault(&curproc->p_vmspace->vm_map, va, - VM_PROT_READ | VM_PROT_WRITE, FALSE); - if (rv == KERN_SUCCESS) return(0); - else return(1); + /* + * XXX - MAX is END. Changed > to >= for temp. fix. + */ + if (va >= VM_MAXUSER_ADDRESS) + return (1); + /* + * XXX: rude stack hack adapted from trap(). + */ + nss = 0; + p = curproc; + vm = p->p_vmspace; + if ((caddr_t)va >= vm->vm_maxsaddr && dostacklimits) { + nss = clrnd(btoc((unsigned)vm->vm_maxsaddr + MAXSSIZ + - (unsigned)va)); + if (nss > btoc(p->p_rlimit[RLIMIT_STACK].rlim_cur)) + return (1); + } + + if (vm_fault(&vm->vm_map, va, VM_PROT_READ | VM_PROT_WRITE, FALSE) + != KERN_SUCCESS) + return (1); + + /* + * XXX: continuation of rude stack hack + */ + if (nss > vm->vm_ssize) + vm->vm_ssize = nss; + + return (0); } /*