From 1fe048505d64d473bc877004ba60183b06080be2 Mon Sep 17 00:00:00 2001 From: Bruce Evans Date: Mon, 11 Nov 1996 20:39:03 +0000 Subject: [PATCH] Replaced I586_OPTIMIZED_BCOPY and I586_OPTIMIZED_BZERO with boot-time negative-logic flags (flags 0x01 and 0x02 for npx0, defaulting to unset = on). This changes the default from off to on. The options have been in current for several months with no problems reported. Added a boot-time negative-logic flag for the old I5886_FAST_BCOPY option which went away too soon (flag 0x04 for npx0, defaulting to unset = on). Added a boot-time way to set the memory size (iosiz in config, iosize in userconfig for npx0). LINT: Removed old options. Documented npx0's flags and iosiz. options.i386: Removed old options. identcpu.c: Don't set the function pointers here. Setting them has to be delayed until after userconfig has had a chance to disable them and until after a good npx0 has been detected. machdep.c: Use npx0's iosize instead of MAXMEM if it is nonzero. support.s: Added vectors and glue code for copyin() and copyout(). Fixed ifdefs for i586_bzero(). Added ifdefs for i586_bcopy(). npx.c: Set the function pointers here. Clear hw_float when an npx exists but is too broken to use. Restored style from a year or three ago in npxattach(). --- sys/amd64/amd64/fpu.c | 60 +++++++++++---- sys/amd64/amd64/identcpu.c | 14 +--- sys/amd64/amd64/machdep.c | 7 +- sys/amd64/amd64/support.S | 145 ++++++++++++++++++++++++++++--------- sys/amd64/amd64/support.s | 145 ++++++++++++++++++++++++++++--------- sys/amd64/isa/npx.c | 60 +++++++++++---- sys/conf/NOTES | 44 +++++++---- sys/conf/options.i386 | 5 +- sys/i386/conf/LINT | 44 +++++++---- sys/i386/conf/NOTES | 44 +++++++---- sys/i386/conf/options.i386 | 5 +- sys/i386/i386/identcpu.c | 14 +--- sys/i386/i386/machdep.c | 7 +- sys/i386/i386/support.s | 145 ++++++++++++++++++++++++++++--------- sys/i386/isa/npx.c | 60 +++++++++++---- 15 files changed, 577 insertions(+), 222 deletions(-) diff --git a/sys/amd64/amd64/fpu.c b/sys/amd64/amd64/fpu.c index 687ff738a4d0..25b75335b30e 100644 --- a/sys/amd64/amd64/fpu.c +++ b/sys/amd64/amd64/fpu.c @@ -32,12 +32,13 @@ * SUCH DAMAGE. * * from: @(#)npx.c 7.2 (Berkeley) 5/12/91 - * $Id: npx.c,v 1.30 1996/06/25 20:30:38 bde Exp $ + * $Id: npx.c,v 1.31 1996/09/06 23:07:53 phk Exp $ */ #include "npx.h" #if NNPX > 0 +#include "opt_cpu.h" #include "opt_math_emulate.h" #include @@ -66,6 +67,22 @@ * 387 and 287 Numeric Coprocessor Extension (NPX) Driver. */ +/* Configuration flags. */ +#define NPX_DISABLE_I586_OPTIMIZED_BCOPY (1 << 0) +#define NPX_DISABLE_I586_OPTIMIZED_BZERO (1 << 1) +#define NPX_DISABLE_I586_OPTIMIZED_COPYIO (1 << 2) + +/* XXX - should be in header file. */ +extern void (*bcopy_vector) __P((const void *from, void *to, size_t len)); +extern void (*ovbcopy_vector) __P((const void *from, void *to, size_t len)); +extern int (*copyin_vector) __P((const void *udaddr, void *kaddr, size_t len)); +extern int (*copyout_vector) __P((const void *kaddr, void *udaddr, size_t len)); + +void i586_bcopy __P((const void *from, void *to, size_t len)); +void i586_bzero __P((void *buf, size_t len)); +int i586_copyin __P((const void *udaddr, void *kaddr, size_t len)); +int i586_copyout __P((const void *kaddr, void *udaddr, size_t len)); + #ifdef __GNUC__ #define fldcw(addr) __asm("fldcw %0" : : "m" (*(addr))) @@ -324,22 +341,39 @@ int npxattach(dvp) struct isa_device *dvp; { - if (npx_ex16) - printf("npx%d: INT 16 interface\n", dvp->id_unit); - else if (npx_irq13) - ; /* higher level has printed "irq 13" */ + /* The caller has printed "irq 13" for the npx_irq13 case. */ + if (!npx_irq13) { + printf("npx%d: ", dvp->id_unit); + if (npx_ex16) + printf("INT 16 interface\n"); #if defined(MATH_EMULATE) || defined(GPL_MATH_EMULATE) - else if (npx_exists) { - printf("npx%d: error reporting broken; using 387 emulator\n", - dvp->id_unit); - npx_exists = 0; - } else - printf("npx%d: 387 emulator\n",dvp->id_unit); + else if (npx_exists) { + printf("error reporting broken; using 387 emulator\n"); + hw_float = npx_exists = 0; + } else + printf("387 emulator\n"); #else - else - printf("npx%d: no 387 emulator in kernel!\n", dvp->id_unit); + else + printf("no 387 emulator in kernel!\n"); #endif + } npxinit(__INITIAL_NPXCW__); + +#ifdef I586_CPU + if (cpu_class == CPUCLASS_586 && npx_ex16) { + if (!(dvp->id_flags & NPX_DISABLE_I586_OPTIMIZED_BCOPY)) { + bcopy_vector = i586_bcopy; + ovbcopy_vector = i586_bcopy; + } + if (!(dvp->id_flags & NPX_DISABLE_I586_OPTIMIZED_BZERO)) + bzero = i586_bzero; + if (!(dvp->id_flags & NPX_DISABLE_I586_OPTIMIZED_COPYIO)) { + copyin_vector = i586_copyin; + copyout_vector = i586_copyout; + } + } +#endif + return (1); /* XXX unused */ } diff --git a/sys/amd64/amd64/identcpu.c b/sys/amd64/amd64/identcpu.c index d0331ae85d7b..7a4eb8ca8529 100644 --- a/sys/amd64/amd64/identcpu.c +++ b/sys/amd64/amd64/identcpu.c @@ -35,7 +35,7 @@ * SUCH DAMAGE. * * from: Id: machdep.c,v 1.193 1996/06/18 01:22:04 bde Exp - * $Id: identcpu.c,v 1.6 1996/10/09 18:30:08 bde Exp $ + * $Id: identcpu.c,v 1.7 1996/10/09 19:47:15 bde Exp $ */ #include "opt_cpu.h" @@ -56,12 +56,7 @@ #include /* XXX - should be in header file */ -extern void (*bcopy_vector) __P((const void *from, void *to, size_t len)); -extern void (*ovbcopy_vector) __P((const void *from, void *to, size_t len)); - void i486_bzero __P((void *buf, size_t len)); -void i586_bcopy __P((const void *from, void *to, size_t len)); -void i586_bzero __P((void *buf, size_t len)); void identifycpu(void); /* XXX should be in different header file */ void earlysetcpuclass(void); @@ -179,13 +174,6 @@ identifycpu(void) (i586_ctr_freq + 4999) / 1000000, ((i586_ctr_freq + 4999) / 10000) % 100); printf("586"); -#ifdef I586_OPTIMIZED_BCOPY - bcopy_vector = i586_bcopy; - ovbcopy_vector = i586_bcopy; -#endif -#ifdef I586_OPTIMIZED_BZERO - bzero = i586_bzero; -#endif break; #endif #if defined(I686_CPU) diff --git a/sys/amd64/amd64/machdep.c b/sys/amd64/amd64/machdep.c index 049c86dac025..6082eec0f56b 100644 --- a/sys/amd64/amd64/machdep.c +++ b/sys/amd64/amd64/machdep.c @@ -35,7 +35,7 @@ * SUCH DAMAGE. * * from: @(#)machdep.c 7.4 (Berkeley) 6/3/91 - * $Id: machdep.c,v 1.209 1996/10/31 00:57:25 julian Exp $ + * $Id: machdep.c,v 1.210 1996/11/07 14:43:59 joerg Exp $ */ #include "npx.h" @@ -972,6 +972,7 @@ init386(first) unsigned biosbasemem, biosextmem; struct gate_descriptor *gdp; int gsel_tss; + struct isa_device *idp; /* table descriptors - used to load tables by microp */ struct region_descriptor r_gdt, r_idt; int pagesinbase, pagesinext; @@ -1175,6 +1176,10 @@ init386(first) Maxmem = MAXMEM/4; #endif + idp = find_isadev(isa_devtab_null, &npxdriver, 0); + if (idp != NULL && idp->id_msize != 0) + Maxmem = idp->id_msize / 4; + /* call pmap initialization to make new kernel address space */ pmap_bootstrap (first, 0); diff --git a/sys/amd64/amd64/support.S b/sys/amd64/amd64/support.S index 5ea1334e8870..7f7173f52c97 100644 --- a/sys/amd64/amd64/support.S +++ b/sys/amd64/amd64/support.S @@ -30,7 +30,7 @@ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $Id: support.s,v 1.41 1996/10/09 19:47:20 bde Exp $ + * $Id: support.s,v 1.42 1996/11/08 02:38:44 asami Exp $ */ #include "opt_cpu.h" @@ -52,6 +52,12 @@ _bcopy_vector: .globl _bzero _bzero: .long _generic_bzero + .globl _copyin_vector +_copyin_vector: + .long _generic_copyin + .globl _copyout_vector +_copyout_vector: + .long _generic_copyout .globl _ovbcopy_vector _ovbcopy_vector: .long _generic_bcopy @@ -184,7 +190,7 @@ do0: ret #endif -#if defined(I586_CPU) || defined(I686_CPU) +#ifdef I586_CPU ENTRY(i586_bzero) movl 4(%esp),%edx movl 8(%esp),%ecx @@ -324,7 +330,7 @@ intreg_i586_bzero: stosb popl %edi ret -#endif /* I586_CPU || I686_CPU */ +#endif /* I586_CPU */ /* fillw(pat, base, cnt) */ ENTRY(fillw) @@ -427,6 +433,7 @@ ENTRY(generic_bcopy) cld ret +#ifdef I586_CPU ENTRY(i586_bcopy) pushl %esi pushl %edi @@ -562,6 +569,7 @@ small_i586_bcopy: popl %esi cld ret +#endif /* I586_CPU */ /* * Note: memcpy does not support overlapping copies @@ -604,8 +612,12 @@ ENTRY(memcpy) * returns to *curpcb->onfault instead of the function. */ +/* copyout(from_kernel, to_user, len) */ +ENTRY(copyout) + MEXITCOUNT + jmp *_copyout_vector -ENTRY(copyout) /* copyout(from_kernel, to_user, len) */ +ENTRY(generic_copyout) movl _curpcb,%eax movl $copyout_fault,PCB_ONFAULT(%eax) pushl %esi @@ -691,23 +703,11 @@ ENTRY(copyout) /* copyout(from_kernel, to_user, len) */ /* bcopy(%esi, %edi, %ebx) */ 3: movl %ebx,%ecx -#if defined(I586_CPU) - cmpl $1024,%ecx - jb slow_copyout - -#if defined(I386_CPU) || defined(I486_CPU) || defined(I686_CPU) - cmpl $CPUCLASS_586,_cpu_class - jne slow_copyout -#endif /* I386_CPU || I486_CPU || I686_CPU */ - - pushl %ecx - call _fastmove - addl $4,%esp - jmp done_copyout +#ifdef I586_CPU ALIGN_TEXT slow_copyout: -#endif /* I586_CPU */ +#endif shrl $2,%ecx cld rep @@ -736,8 +736,66 @@ copyout_fault: movl $EFAULT,%eax ret +#ifdef I586_CPU +ENTRY(i586_copyout) + /* + * Duplicated from generic_copyout. Could be done a bit better. + */ + movl _curpcb,%eax + movl $copyout_fault,PCB_ONFAULT(%eax) + pushl %esi + pushl %edi + pushl %ebx + movl 16(%esp),%esi + movl 20(%esp),%edi + movl 24(%esp),%ebx + testl %ebx,%ebx /* anything to do? */ + jz done_copyout + + /* + * Check explicitly for non-user addresses. If 486 write protection + * is being used, this check is essential because we are in kernel + * mode so the h/w does not provide any protection against writing + * kernel addresses. + */ + + /* + * First, prevent address wrapping. + */ + movl %edi,%eax + addl %ebx,%eax + jc copyout_fault +/* + * XXX STOP USING VM_MAXUSER_ADDRESS. + * It is an end address, not a max, so every time it is used correctly it + * looks like there is an off by one error, and of course it caused an off + * by one error in several places. + */ + cmpl $VM_MAXUSER_ADDRESS,%eax + ja copyout_fault + + /* bcopy(%esi, %edi, %ebx) */ +3: + movl %ebx,%ecx + /* + * End of duplicated code. + */ + + cmpl $1024,%ecx + jb slow_copyout + + pushl %ecx + call _fastmove + addl $4,%esp + jmp done_copyout +#endif /* I586_CPU */ + /* copyin(from_user, to_kernel, len) */ ENTRY(copyin) + MEXITCOUNT + jmp *_copyin_vector + +ENTRY(generic_copyin) movl _curpcb,%eax movl $copyin_fault,PCB_ONFAULT(%eax) pushl %esi @@ -755,23 +813,10 @@ ENTRY(copyin) cmpl $VM_MAXUSER_ADDRESS,%edx ja copyin_fault -#if defined(I586_CPU) - cmpl $1024,%ecx - jb slow_copyin - -#if defined(I386_CPU) || defined(I486_CPU) || defined(I686_CPU) - cmpl $CPUCLASS_586,_cpu_class - jne slow_copyin -#endif /* I386_CPU || I486_CPU || I686_CPU */ - - pushl %ecx - call _fastmove - addl $4,%esp - jmp done_copyin - +#ifdef I586_CPU ALIGN_TEXT slow_copyin: -#endif /* I586_CPU */ +#endif movb %cl,%al shrl $2,%ecx /* copy longword-wise */ cld @@ -802,6 +847,40 @@ copyin_fault: movl $EFAULT,%eax ret +#ifdef I586_CPU +ENTRY(i586_copyin) + /* + * Duplicated from generic_copyin. Could be done a bit better. + */ + movl _curpcb,%eax + movl $copyin_fault,PCB_ONFAULT(%eax) + pushl %esi + pushl %edi + movl 12(%esp),%esi /* caddr_t from */ + movl 16(%esp),%edi /* caddr_t to */ + movl 20(%esp),%ecx /* size_t len */ + + /* + * make sure address is valid + */ + movl %esi,%edx + addl %ecx,%edx + jc copyin_fault + cmpl $VM_MAXUSER_ADDRESS,%edx + ja copyin_fault + /* + * End of duplicated code. + */ + + cmpl $1024,%ecx + jb slow_copyin + + pushl %ecx + call _fastmove + addl $4,%esp + jmp done_copyin +#endif /* I586_CPU */ + #if defined(I586_CPU) /* fastmove(src, dst, len) src in %esi diff --git a/sys/amd64/amd64/support.s b/sys/amd64/amd64/support.s index 5ea1334e8870..7f7173f52c97 100644 --- a/sys/amd64/amd64/support.s +++ b/sys/amd64/amd64/support.s @@ -30,7 +30,7 @@ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $Id: support.s,v 1.41 1996/10/09 19:47:20 bde Exp $ + * $Id: support.s,v 1.42 1996/11/08 02:38:44 asami Exp $ */ #include "opt_cpu.h" @@ -52,6 +52,12 @@ _bcopy_vector: .globl _bzero _bzero: .long _generic_bzero + .globl _copyin_vector +_copyin_vector: + .long _generic_copyin + .globl _copyout_vector +_copyout_vector: + .long _generic_copyout .globl _ovbcopy_vector _ovbcopy_vector: .long _generic_bcopy @@ -184,7 +190,7 @@ do0: ret #endif -#if defined(I586_CPU) || defined(I686_CPU) +#ifdef I586_CPU ENTRY(i586_bzero) movl 4(%esp),%edx movl 8(%esp),%ecx @@ -324,7 +330,7 @@ intreg_i586_bzero: stosb popl %edi ret -#endif /* I586_CPU || I686_CPU */ +#endif /* I586_CPU */ /* fillw(pat, base, cnt) */ ENTRY(fillw) @@ -427,6 +433,7 @@ ENTRY(generic_bcopy) cld ret +#ifdef I586_CPU ENTRY(i586_bcopy) pushl %esi pushl %edi @@ -562,6 +569,7 @@ small_i586_bcopy: popl %esi cld ret +#endif /* I586_CPU */ /* * Note: memcpy does not support overlapping copies @@ -604,8 +612,12 @@ ENTRY(memcpy) * returns to *curpcb->onfault instead of the function. */ +/* copyout(from_kernel, to_user, len) */ +ENTRY(copyout) + MEXITCOUNT + jmp *_copyout_vector -ENTRY(copyout) /* copyout(from_kernel, to_user, len) */ +ENTRY(generic_copyout) movl _curpcb,%eax movl $copyout_fault,PCB_ONFAULT(%eax) pushl %esi @@ -691,23 +703,11 @@ ENTRY(copyout) /* copyout(from_kernel, to_user, len) */ /* bcopy(%esi, %edi, %ebx) */ 3: movl %ebx,%ecx -#if defined(I586_CPU) - cmpl $1024,%ecx - jb slow_copyout - -#if defined(I386_CPU) || defined(I486_CPU) || defined(I686_CPU) - cmpl $CPUCLASS_586,_cpu_class - jne slow_copyout -#endif /* I386_CPU || I486_CPU || I686_CPU */ - - pushl %ecx - call _fastmove - addl $4,%esp - jmp done_copyout +#ifdef I586_CPU ALIGN_TEXT slow_copyout: -#endif /* I586_CPU */ +#endif shrl $2,%ecx cld rep @@ -736,8 +736,66 @@ copyout_fault: movl $EFAULT,%eax ret +#ifdef I586_CPU +ENTRY(i586_copyout) + /* + * Duplicated from generic_copyout. Could be done a bit better. + */ + movl _curpcb,%eax + movl $copyout_fault,PCB_ONFAULT(%eax) + pushl %esi + pushl %edi + pushl %ebx + movl 16(%esp),%esi + movl 20(%esp),%edi + movl 24(%esp),%ebx + testl %ebx,%ebx /* anything to do? */ + jz done_copyout + + /* + * Check explicitly for non-user addresses. If 486 write protection + * is being used, this check is essential because we are in kernel + * mode so the h/w does not provide any protection against writing + * kernel addresses. + */ + + /* + * First, prevent address wrapping. + */ + movl %edi,%eax + addl %ebx,%eax + jc copyout_fault +/* + * XXX STOP USING VM_MAXUSER_ADDRESS. + * It is an end address, not a max, so every time it is used correctly it + * looks like there is an off by one error, and of course it caused an off + * by one error in several places. + */ + cmpl $VM_MAXUSER_ADDRESS,%eax + ja copyout_fault + + /* bcopy(%esi, %edi, %ebx) */ +3: + movl %ebx,%ecx + /* + * End of duplicated code. + */ + + cmpl $1024,%ecx + jb slow_copyout + + pushl %ecx + call _fastmove + addl $4,%esp + jmp done_copyout +#endif /* I586_CPU */ + /* copyin(from_user, to_kernel, len) */ ENTRY(copyin) + MEXITCOUNT + jmp *_copyin_vector + +ENTRY(generic_copyin) movl _curpcb,%eax movl $copyin_fault,PCB_ONFAULT(%eax) pushl %esi @@ -755,23 +813,10 @@ ENTRY(copyin) cmpl $VM_MAXUSER_ADDRESS,%edx ja copyin_fault -#if defined(I586_CPU) - cmpl $1024,%ecx - jb slow_copyin - -#if defined(I386_CPU) || defined(I486_CPU) || defined(I686_CPU) - cmpl $CPUCLASS_586,_cpu_class - jne slow_copyin -#endif /* I386_CPU || I486_CPU || I686_CPU */ - - pushl %ecx - call _fastmove - addl $4,%esp - jmp done_copyin - +#ifdef I586_CPU ALIGN_TEXT slow_copyin: -#endif /* I586_CPU */ +#endif movb %cl,%al shrl $2,%ecx /* copy longword-wise */ cld @@ -802,6 +847,40 @@ copyin_fault: movl $EFAULT,%eax ret +#ifdef I586_CPU +ENTRY(i586_copyin) + /* + * Duplicated from generic_copyin. Could be done a bit better. + */ + movl _curpcb,%eax + movl $copyin_fault,PCB_ONFAULT(%eax) + pushl %esi + pushl %edi + movl 12(%esp),%esi /* caddr_t from */ + movl 16(%esp),%edi /* caddr_t to */ + movl 20(%esp),%ecx /* size_t len */ + + /* + * make sure address is valid + */ + movl %esi,%edx + addl %ecx,%edx + jc copyin_fault + cmpl $VM_MAXUSER_ADDRESS,%edx + ja copyin_fault + /* + * End of duplicated code. + */ + + cmpl $1024,%ecx + jb slow_copyin + + pushl %ecx + call _fastmove + addl $4,%esp + jmp done_copyin +#endif /* I586_CPU */ + #if defined(I586_CPU) /* fastmove(src, dst, len) src in %esi diff --git a/sys/amd64/isa/npx.c b/sys/amd64/isa/npx.c index 687ff738a4d0..25b75335b30e 100644 --- a/sys/amd64/isa/npx.c +++ b/sys/amd64/isa/npx.c @@ -32,12 +32,13 @@ * SUCH DAMAGE. * * from: @(#)npx.c 7.2 (Berkeley) 5/12/91 - * $Id: npx.c,v 1.30 1996/06/25 20:30:38 bde Exp $ + * $Id: npx.c,v 1.31 1996/09/06 23:07:53 phk Exp $ */ #include "npx.h" #if NNPX > 0 +#include "opt_cpu.h" #include "opt_math_emulate.h" #include @@ -66,6 +67,22 @@ * 387 and 287 Numeric Coprocessor Extension (NPX) Driver. */ +/* Configuration flags. */ +#define NPX_DISABLE_I586_OPTIMIZED_BCOPY (1 << 0) +#define NPX_DISABLE_I586_OPTIMIZED_BZERO (1 << 1) +#define NPX_DISABLE_I586_OPTIMIZED_COPYIO (1 << 2) + +/* XXX - should be in header file. */ +extern void (*bcopy_vector) __P((const void *from, void *to, size_t len)); +extern void (*ovbcopy_vector) __P((const void *from, void *to, size_t len)); +extern int (*copyin_vector) __P((const void *udaddr, void *kaddr, size_t len)); +extern int (*copyout_vector) __P((const void *kaddr, void *udaddr, size_t len)); + +void i586_bcopy __P((const void *from, void *to, size_t len)); +void i586_bzero __P((void *buf, size_t len)); +int i586_copyin __P((const void *udaddr, void *kaddr, size_t len)); +int i586_copyout __P((const void *kaddr, void *udaddr, size_t len)); + #ifdef __GNUC__ #define fldcw(addr) __asm("fldcw %0" : : "m" (*(addr))) @@ -324,22 +341,39 @@ int npxattach(dvp) struct isa_device *dvp; { - if (npx_ex16) - printf("npx%d: INT 16 interface\n", dvp->id_unit); - else if (npx_irq13) - ; /* higher level has printed "irq 13" */ + /* The caller has printed "irq 13" for the npx_irq13 case. */ + if (!npx_irq13) { + printf("npx%d: ", dvp->id_unit); + if (npx_ex16) + printf("INT 16 interface\n"); #if defined(MATH_EMULATE) || defined(GPL_MATH_EMULATE) - else if (npx_exists) { - printf("npx%d: error reporting broken; using 387 emulator\n", - dvp->id_unit); - npx_exists = 0; - } else - printf("npx%d: 387 emulator\n",dvp->id_unit); + else if (npx_exists) { + printf("error reporting broken; using 387 emulator\n"); + hw_float = npx_exists = 0; + } else + printf("387 emulator\n"); #else - else - printf("npx%d: no 387 emulator in kernel!\n", dvp->id_unit); + else + printf("no 387 emulator in kernel!\n"); #endif + } npxinit(__INITIAL_NPXCW__); + +#ifdef I586_CPU + if (cpu_class == CPUCLASS_586 && npx_ex16) { + if (!(dvp->id_flags & NPX_DISABLE_I586_OPTIMIZED_BCOPY)) { + bcopy_vector = i586_bcopy; + ovbcopy_vector = i586_bcopy; + } + if (!(dvp->id_flags & NPX_DISABLE_I586_OPTIMIZED_BZERO)) + bzero = i586_bzero; + if (!(dvp->id_flags & NPX_DISABLE_I586_OPTIMIZED_COPYIO)) { + copyin_vector = i586_copyin; + copyout_vector = i586_copyout; + } + } +#endif + return (1); /* XXX unused */ } diff --git a/sys/conf/NOTES b/sys/conf/NOTES index 1f8587b7b89d..fb98ebbab7a5 100644 --- a/sys/conf/NOTES +++ b/sys/conf/NOTES @@ -2,7 +2,7 @@ # LINT -- config file for checking all the sources, tries to pull in # as much of the source tree as it can. # -# $Id: LINT,v 1.288 1996/11/08 02:38:35 asami Exp $ +# $Id: LINT,v 1.289 1996/11/11 05:17:34 gibbs Exp $ # # NB: You probably don't want to try running a kernel built from this # file. Instead, you should start from GENERIC, and add options from @@ -72,20 +72,6 @@ options GPL_MATH_EMULATE #Support for x87 emulation via # and choose functionality over speed (on the widest variety of systems). options FAILSAFE - -# When this is set, bcopy() will use an unrolled loop using -# floating point registers. It makes bcopy() run 30-100% -# faster on Pentiums and makes no difference or slower on non-Pentium -# (including P6) systems. -# -# However, even when this is set, there is a run-time check for CPU -# type so the routine will never be invoked for a wrong architecture. -# It will become the default after some testing period. -options "I586_OPTIMIZED_BCOPY" - -# This is like I586_OPTIMIZED_BCOPY except it controls bzero(). -options "I586_OPTIMIZED_BZERO" - # # This directive defines a number of things: # - The compiled kernel is to be called `kernel' @@ -517,8 +503,34 @@ options XT_KEYBOARD # extra initialization for XT keyboard # coprocessor or enable math emulation. If your machine doesn't contain # a math co-processor, you must *also* add the option "MATH_EMULATE". # THIS IS NOT AN OPTIONAL ENTRY, DO NOT REMOVE IT +device npx0 at isa? port "IO_NPX" iosiz 0x0 flags 0x0 irq 13 vector npxintr + +# +# `flags' for npx0: +# 0x01 don't use the npx registers to optimize bcopy +# 0x02 don't use the npx registers to optimize bzero +# 0x04 don't use the npx registers to optimize copyin or copyout. +# The npx registers are normally used to optimize copying and zeroing when +# all of the following conditions are satisfied: +# "I586_CPU" is an option +# the cpu is an i586 (perhaps not a Pentium) +# the probe for npx0 succeeds +# INT 16 exception handling works. +# Then copying and zeroing using the npx registers is normally 30-100% faster. +# The flags can be used to control cases where it doesn't work or is slower. +# Setting them at boot time using userconfig works right (the optimizations +# are not used until later in the bootstrap when npx0 is attached). +# + +# +# `iosiz' for npx0: +# This can be used instead of the MAXMEM option to set the memory size. If +# it is nonzero, then it overrides both the MAXMEM option and the memory +# size reported by the BIOS. Setting it at boot time using userconfig takes +# effect on the next reboot after the change has been recorded in the kernel +# binary (the size is used early in the boot before userconfig has a chance +# to change it). # -device npx0 at isa? port "IO_NPX" irq 13 vector npxintr # # Optional ISA and EISA devices: diff --git a/sys/conf/options.i386 b/sys/conf/options.i386 index ae730522a26b..6d6e11f9a198 100644 --- a/sys/conf/options.i386 +++ b/sys/conf/options.i386 @@ -1,4 +1,4 @@ -# $Id: options.i386,v 1.27 1996/11/02 02:25:22 pst Exp $ +# $Id: options.i386,v 1.28 1996/11/08 02:38:36 asami Exp $ BOUNCEPAGES opt_bounce.h USER_LDT MATH_EMULATE opt_math_emulate.h @@ -42,9 +42,6 @@ I486_CPU opt_cpu.h I586_CPU opt_cpu.h I686_CPU opt_cpu.h -I586_OPTIMIZED_BCOPY opt_temporary.h -I586_OPTIMIZED_BZERO opt_temporary.h - SC_SPLASH_SCREEN opt_syscons.h MAXCONS opt_syscons.h SLOW_VGA opt_syscons.h diff --git a/sys/i386/conf/LINT b/sys/i386/conf/LINT index 1f8587b7b89d..fb98ebbab7a5 100644 --- a/sys/i386/conf/LINT +++ b/sys/i386/conf/LINT @@ -2,7 +2,7 @@ # LINT -- config file for checking all the sources, tries to pull in # as much of the source tree as it can. # -# $Id: LINT,v 1.288 1996/11/08 02:38:35 asami Exp $ +# $Id: LINT,v 1.289 1996/11/11 05:17:34 gibbs Exp $ # # NB: You probably don't want to try running a kernel built from this # file. Instead, you should start from GENERIC, and add options from @@ -72,20 +72,6 @@ options GPL_MATH_EMULATE #Support for x87 emulation via # and choose functionality over speed (on the widest variety of systems). options FAILSAFE - -# When this is set, bcopy() will use an unrolled loop using -# floating point registers. It makes bcopy() run 30-100% -# faster on Pentiums and makes no difference or slower on non-Pentium -# (including P6) systems. -# -# However, even when this is set, there is a run-time check for CPU -# type so the routine will never be invoked for a wrong architecture. -# It will become the default after some testing period. -options "I586_OPTIMIZED_BCOPY" - -# This is like I586_OPTIMIZED_BCOPY except it controls bzero(). -options "I586_OPTIMIZED_BZERO" - # # This directive defines a number of things: # - The compiled kernel is to be called `kernel' @@ -517,8 +503,34 @@ options XT_KEYBOARD # extra initialization for XT keyboard # coprocessor or enable math emulation. If your machine doesn't contain # a math co-processor, you must *also* add the option "MATH_EMULATE". # THIS IS NOT AN OPTIONAL ENTRY, DO NOT REMOVE IT +device npx0 at isa? port "IO_NPX" iosiz 0x0 flags 0x0 irq 13 vector npxintr + +# +# `flags' for npx0: +# 0x01 don't use the npx registers to optimize bcopy +# 0x02 don't use the npx registers to optimize bzero +# 0x04 don't use the npx registers to optimize copyin or copyout. +# The npx registers are normally used to optimize copying and zeroing when +# all of the following conditions are satisfied: +# "I586_CPU" is an option +# the cpu is an i586 (perhaps not a Pentium) +# the probe for npx0 succeeds +# INT 16 exception handling works. +# Then copying and zeroing using the npx registers is normally 30-100% faster. +# The flags can be used to control cases where it doesn't work or is slower. +# Setting them at boot time using userconfig works right (the optimizations +# are not used until later in the bootstrap when npx0 is attached). +# + +# +# `iosiz' for npx0: +# This can be used instead of the MAXMEM option to set the memory size. If +# it is nonzero, then it overrides both the MAXMEM option and the memory +# size reported by the BIOS. Setting it at boot time using userconfig takes +# effect on the next reboot after the change has been recorded in the kernel +# binary (the size is used early in the boot before userconfig has a chance +# to change it). # -device npx0 at isa? port "IO_NPX" irq 13 vector npxintr # # Optional ISA and EISA devices: diff --git a/sys/i386/conf/NOTES b/sys/i386/conf/NOTES index 1f8587b7b89d..fb98ebbab7a5 100644 --- a/sys/i386/conf/NOTES +++ b/sys/i386/conf/NOTES @@ -2,7 +2,7 @@ # LINT -- config file for checking all the sources, tries to pull in # as much of the source tree as it can. # -# $Id: LINT,v 1.288 1996/11/08 02:38:35 asami Exp $ +# $Id: LINT,v 1.289 1996/11/11 05:17:34 gibbs Exp $ # # NB: You probably don't want to try running a kernel built from this # file. Instead, you should start from GENERIC, and add options from @@ -72,20 +72,6 @@ options GPL_MATH_EMULATE #Support for x87 emulation via # and choose functionality over speed (on the widest variety of systems). options FAILSAFE - -# When this is set, bcopy() will use an unrolled loop using -# floating point registers. It makes bcopy() run 30-100% -# faster on Pentiums and makes no difference or slower on non-Pentium -# (including P6) systems. -# -# However, even when this is set, there is a run-time check for CPU -# type so the routine will never be invoked for a wrong architecture. -# It will become the default after some testing period. -options "I586_OPTIMIZED_BCOPY" - -# This is like I586_OPTIMIZED_BCOPY except it controls bzero(). -options "I586_OPTIMIZED_BZERO" - # # This directive defines a number of things: # - The compiled kernel is to be called `kernel' @@ -517,8 +503,34 @@ options XT_KEYBOARD # extra initialization for XT keyboard # coprocessor or enable math emulation. If your machine doesn't contain # a math co-processor, you must *also* add the option "MATH_EMULATE". # THIS IS NOT AN OPTIONAL ENTRY, DO NOT REMOVE IT +device npx0 at isa? port "IO_NPX" iosiz 0x0 flags 0x0 irq 13 vector npxintr + +# +# `flags' for npx0: +# 0x01 don't use the npx registers to optimize bcopy +# 0x02 don't use the npx registers to optimize bzero +# 0x04 don't use the npx registers to optimize copyin or copyout. +# The npx registers are normally used to optimize copying and zeroing when +# all of the following conditions are satisfied: +# "I586_CPU" is an option +# the cpu is an i586 (perhaps not a Pentium) +# the probe for npx0 succeeds +# INT 16 exception handling works. +# Then copying and zeroing using the npx registers is normally 30-100% faster. +# The flags can be used to control cases where it doesn't work or is slower. +# Setting them at boot time using userconfig works right (the optimizations +# are not used until later in the bootstrap when npx0 is attached). +# + +# +# `iosiz' for npx0: +# This can be used instead of the MAXMEM option to set the memory size. If +# it is nonzero, then it overrides both the MAXMEM option and the memory +# size reported by the BIOS. Setting it at boot time using userconfig takes +# effect on the next reboot after the change has been recorded in the kernel +# binary (the size is used early in the boot before userconfig has a chance +# to change it). # -device npx0 at isa? port "IO_NPX" irq 13 vector npxintr # # Optional ISA and EISA devices: diff --git a/sys/i386/conf/options.i386 b/sys/i386/conf/options.i386 index ae730522a26b..6d6e11f9a198 100644 --- a/sys/i386/conf/options.i386 +++ b/sys/i386/conf/options.i386 @@ -1,4 +1,4 @@ -# $Id: options.i386,v 1.27 1996/11/02 02:25:22 pst Exp $ +# $Id: options.i386,v 1.28 1996/11/08 02:38:36 asami Exp $ BOUNCEPAGES opt_bounce.h USER_LDT MATH_EMULATE opt_math_emulate.h @@ -42,9 +42,6 @@ I486_CPU opt_cpu.h I586_CPU opt_cpu.h I686_CPU opt_cpu.h -I586_OPTIMIZED_BCOPY opt_temporary.h -I586_OPTIMIZED_BZERO opt_temporary.h - SC_SPLASH_SCREEN opt_syscons.h MAXCONS opt_syscons.h SLOW_VGA opt_syscons.h diff --git a/sys/i386/i386/identcpu.c b/sys/i386/i386/identcpu.c index d0331ae85d7b..7a4eb8ca8529 100644 --- a/sys/i386/i386/identcpu.c +++ b/sys/i386/i386/identcpu.c @@ -35,7 +35,7 @@ * SUCH DAMAGE. * * from: Id: machdep.c,v 1.193 1996/06/18 01:22:04 bde Exp - * $Id: identcpu.c,v 1.6 1996/10/09 18:30:08 bde Exp $ + * $Id: identcpu.c,v 1.7 1996/10/09 19:47:15 bde Exp $ */ #include "opt_cpu.h" @@ -56,12 +56,7 @@ #include /* XXX - should be in header file */ -extern void (*bcopy_vector) __P((const void *from, void *to, size_t len)); -extern void (*ovbcopy_vector) __P((const void *from, void *to, size_t len)); - void i486_bzero __P((void *buf, size_t len)); -void i586_bcopy __P((const void *from, void *to, size_t len)); -void i586_bzero __P((void *buf, size_t len)); void identifycpu(void); /* XXX should be in different header file */ void earlysetcpuclass(void); @@ -179,13 +174,6 @@ identifycpu(void) (i586_ctr_freq + 4999) / 1000000, ((i586_ctr_freq + 4999) / 10000) % 100); printf("586"); -#ifdef I586_OPTIMIZED_BCOPY - bcopy_vector = i586_bcopy; - ovbcopy_vector = i586_bcopy; -#endif -#ifdef I586_OPTIMIZED_BZERO - bzero = i586_bzero; -#endif break; #endif #if defined(I686_CPU) diff --git a/sys/i386/i386/machdep.c b/sys/i386/i386/machdep.c index 049c86dac025..6082eec0f56b 100644 --- a/sys/i386/i386/machdep.c +++ b/sys/i386/i386/machdep.c @@ -35,7 +35,7 @@ * SUCH DAMAGE. * * from: @(#)machdep.c 7.4 (Berkeley) 6/3/91 - * $Id: machdep.c,v 1.209 1996/10/31 00:57:25 julian Exp $ + * $Id: machdep.c,v 1.210 1996/11/07 14:43:59 joerg Exp $ */ #include "npx.h" @@ -972,6 +972,7 @@ init386(first) unsigned biosbasemem, biosextmem; struct gate_descriptor *gdp; int gsel_tss; + struct isa_device *idp; /* table descriptors - used to load tables by microp */ struct region_descriptor r_gdt, r_idt; int pagesinbase, pagesinext; @@ -1175,6 +1176,10 @@ init386(first) Maxmem = MAXMEM/4; #endif + idp = find_isadev(isa_devtab_null, &npxdriver, 0); + if (idp != NULL && idp->id_msize != 0) + Maxmem = idp->id_msize / 4; + /* call pmap initialization to make new kernel address space */ pmap_bootstrap (first, 0); diff --git a/sys/i386/i386/support.s b/sys/i386/i386/support.s index 5ea1334e8870..7f7173f52c97 100644 --- a/sys/i386/i386/support.s +++ b/sys/i386/i386/support.s @@ -30,7 +30,7 @@ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $Id: support.s,v 1.41 1996/10/09 19:47:20 bde Exp $ + * $Id: support.s,v 1.42 1996/11/08 02:38:44 asami Exp $ */ #include "opt_cpu.h" @@ -52,6 +52,12 @@ _bcopy_vector: .globl _bzero _bzero: .long _generic_bzero + .globl _copyin_vector +_copyin_vector: + .long _generic_copyin + .globl _copyout_vector +_copyout_vector: + .long _generic_copyout .globl _ovbcopy_vector _ovbcopy_vector: .long _generic_bcopy @@ -184,7 +190,7 @@ do0: ret #endif -#if defined(I586_CPU) || defined(I686_CPU) +#ifdef I586_CPU ENTRY(i586_bzero) movl 4(%esp),%edx movl 8(%esp),%ecx @@ -324,7 +330,7 @@ intreg_i586_bzero: stosb popl %edi ret -#endif /* I586_CPU || I686_CPU */ +#endif /* I586_CPU */ /* fillw(pat, base, cnt) */ ENTRY(fillw) @@ -427,6 +433,7 @@ ENTRY(generic_bcopy) cld ret +#ifdef I586_CPU ENTRY(i586_bcopy) pushl %esi pushl %edi @@ -562,6 +569,7 @@ small_i586_bcopy: popl %esi cld ret +#endif /* I586_CPU */ /* * Note: memcpy does not support overlapping copies @@ -604,8 +612,12 @@ ENTRY(memcpy) * returns to *curpcb->onfault instead of the function. */ +/* copyout(from_kernel, to_user, len) */ +ENTRY(copyout) + MEXITCOUNT + jmp *_copyout_vector -ENTRY(copyout) /* copyout(from_kernel, to_user, len) */ +ENTRY(generic_copyout) movl _curpcb,%eax movl $copyout_fault,PCB_ONFAULT(%eax) pushl %esi @@ -691,23 +703,11 @@ ENTRY(copyout) /* copyout(from_kernel, to_user, len) */ /* bcopy(%esi, %edi, %ebx) */ 3: movl %ebx,%ecx -#if defined(I586_CPU) - cmpl $1024,%ecx - jb slow_copyout - -#if defined(I386_CPU) || defined(I486_CPU) || defined(I686_CPU) - cmpl $CPUCLASS_586,_cpu_class - jne slow_copyout -#endif /* I386_CPU || I486_CPU || I686_CPU */ - - pushl %ecx - call _fastmove - addl $4,%esp - jmp done_copyout +#ifdef I586_CPU ALIGN_TEXT slow_copyout: -#endif /* I586_CPU */ +#endif shrl $2,%ecx cld rep @@ -736,8 +736,66 @@ copyout_fault: movl $EFAULT,%eax ret +#ifdef I586_CPU +ENTRY(i586_copyout) + /* + * Duplicated from generic_copyout. Could be done a bit better. + */ + movl _curpcb,%eax + movl $copyout_fault,PCB_ONFAULT(%eax) + pushl %esi + pushl %edi + pushl %ebx + movl 16(%esp),%esi + movl 20(%esp),%edi + movl 24(%esp),%ebx + testl %ebx,%ebx /* anything to do? */ + jz done_copyout + + /* + * Check explicitly for non-user addresses. If 486 write protection + * is being used, this check is essential because we are in kernel + * mode so the h/w does not provide any protection against writing + * kernel addresses. + */ + + /* + * First, prevent address wrapping. + */ + movl %edi,%eax + addl %ebx,%eax + jc copyout_fault +/* + * XXX STOP USING VM_MAXUSER_ADDRESS. + * It is an end address, not a max, so every time it is used correctly it + * looks like there is an off by one error, and of course it caused an off + * by one error in several places. + */ + cmpl $VM_MAXUSER_ADDRESS,%eax + ja copyout_fault + + /* bcopy(%esi, %edi, %ebx) */ +3: + movl %ebx,%ecx + /* + * End of duplicated code. + */ + + cmpl $1024,%ecx + jb slow_copyout + + pushl %ecx + call _fastmove + addl $4,%esp + jmp done_copyout +#endif /* I586_CPU */ + /* copyin(from_user, to_kernel, len) */ ENTRY(copyin) + MEXITCOUNT + jmp *_copyin_vector + +ENTRY(generic_copyin) movl _curpcb,%eax movl $copyin_fault,PCB_ONFAULT(%eax) pushl %esi @@ -755,23 +813,10 @@ ENTRY(copyin) cmpl $VM_MAXUSER_ADDRESS,%edx ja copyin_fault -#if defined(I586_CPU) - cmpl $1024,%ecx - jb slow_copyin - -#if defined(I386_CPU) || defined(I486_CPU) || defined(I686_CPU) - cmpl $CPUCLASS_586,_cpu_class - jne slow_copyin -#endif /* I386_CPU || I486_CPU || I686_CPU */ - - pushl %ecx - call _fastmove - addl $4,%esp - jmp done_copyin - +#ifdef I586_CPU ALIGN_TEXT slow_copyin: -#endif /* I586_CPU */ +#endif movb %cl,%al shrl $2,%ecx /* copy longword-wise */ cld @@ -802,6 +847,40 @@ copyin_fault: movl $EFAULT,%eax ret +#ifdef I586_CPU +ENTRY(i586_copyin) + /* + * Duplicated from generic_copyin. Could be done a bit better. + */ + movl _curpcb,%eax + movl $copyin_fault,PCB_ONFAULT(%eax) + pushl %esi + pushl %edi + movl 12(%esp),%esi /* caddr_t from */ + movl 16(%esp),%edi /* caddr_t to */ + movl 20(%esp),%ecx /* size_t len */ + + /* + * make sure address is valid + */ + movl %esi,%edx + addl %ecx,%edx + jc copyin_fault + cmpl $VM_MAXUSER_ADDRESS,%edx + ja copyin_fault + /* + * End of duplicated code. + */ + + cmpl $1024,%ecx + jb slow_copyin + + pushl %ecx + call _fastmove + addl $4,%esp + jmp done_copyin +#endif /* I586_CPU */ + #if defined(I586_CPU) /* fastmove(src, dst, len) src in %esi diff --git a/sys/i386/isa/npx.c b/sys/i386/isa/npx.c index 687ff738a4d0..25b75335b30e 100644 --- a/sys/i386/isa/npx.c +++ b/sys/i386/isa/npx.c @@ -32,12 +32,13 @@ * SUCH DAMAGE. * * from: @(#)npx.c 7.2 (Berkeley) 5/12/91 - * $Id: npx.c,v 1.30 1996/06/25 20:30:38 bde Exp $ + * $Id: npx.c,v 1.31 1996/09/06 23:07:53 phk Exp $ */ #include "npx.h" #if NNPX > 0 +#include "opt_cpu.h" #include "opt_math_emulate.h" #include @@ -66,6 +67,22 @@ * 387 and 287 Numeric Coprocessor Extension (NPX) Driver. */ +/* Configuration flags. */ +#define NPX_DISABLE_I586_OPTIMIZED_BCOPY (1 << 0) +#define NPX_DISABLE_I586_OPTIMIZED_BZERO (1 << 1) +#define NPX_DISABLE_I586_OPTIMIZED_COPYIO (1 << 2) + +/* XXX - should be in header file. */ +extern void (*bcopy_vector) __P((const void *from, void *to, size_t len)); +extern void (*ovbcopy_vector) __P((const void *from, void *to, size_t len)); +extern int (*copyin_vector) __P((const void *udaddr, void *kaddr, size_t len)); +extern int (*copyout_vector) __P((const void *kaddr, void *udaddr, size_t len)); + +void i586_bcopy __P((const void *from, void *to, size_t len)); +void i586_bzero __P((void *buf, size_t len)); +int i586_copyin __P((const void *udaddr, void *kaddr, size_t len)); +int i586_copyout __P((const void *kaddr, void *udaddr, size_t len)); + #ifdef __GNUC__ #define fldcw(addr) __asm("fldcw %0" : : "m" (*(addr))) @@ -324,22 +341,39 @@ int npxattach(dvp) struct isa_device *dvp; { - if (npx_ex16) - printf("npx%d: INT 16 interface\n", dvp->id_unit); - else if (npx_irq13) - ; /* higher level has printed "irq 13" */ + /* The caller has printed "irq 13" for the npx_irq13 case. */ + if (!npx_irq13) { + printf("npx%d: ", dvp->id_unit); + if (npx_ex16) + printf("INT 16 interface\n"); #if defined(MATH_EMULATE) || defined(GPL_MATH_EMULATE) - else if (npx_exists) { - printf("npx%d: error reporting broken; using 387 emulator\n", - dvp->id_unit); - npx_exists = 0; - } else - printf("npx%d: 387 emulator\n",dvp->id_unit); + else if (npx_exists) { + printf("error reporting broken; using 387 emulator\n"); + hw_float = npx_exists = 0; + } else + printf("387 emulator\n"); #else - else - printf("npx%d: no 387 emulator in kernel!\n", dvp->id_unit); + else + printf("no 387 emulator in kernel!\n"); #endif + } npxinit(__INITIAL_NPXCW__); + +#ifdef I586_CPU + if (cpu_class == CPUCLASS_586 && npx_ex16) { + if (!(dvp->id_flags & NPX_DISABLE_I586_OPTIMIZED_BCOPY)) { + bcopy_vector = i586_bcopy; + ovbcopy_vector = i586_bcopy; + } + if (!(dvp->id_flags & NPX_DISABLE_I586_OPTIMIZED_BZERO)) + bzero = i586_bzero; + if (!(dvp->id_flags & NPX_DISABLE_I586_OPTIMIZED_COPYIO)) { + copyin_vector = i586_copyin; + copyout_vector = i586_copyout; + } + } +#endif + return (1); /* XXX unused */ }