freebsd-nq/sys/gnu/i386/fpemul/reg_round.s
Bruce Evans 61b9b2965c The change to do a longword compare in the previous commit just broke an
apparently-intended micro-optimization ("testb" is equivalent and smaller)
and added a style bug (the size suffix for "testl" was missing).
linux-2.3.35 already had the correct fix.
2000-06-03 11:09:09 +00:00

651 lines
16 KiB
ArmAsm

.file "reg_round.S"
/*
* reg_round.S
*
* Rounding/truncation/etc for FPU basic arithmetic functions.
*
* This code has four possible entry points.
* The following must be entered by a jmp intruction:
* FPU_round, FPU_round_sqrt, and FPU_Arith_exit.
*
* The _round_reg entry point is intended to be used by C code.
* From C, call as:
* void round_reg(FPU_REG *arg, unsigned int extent, unsigned int control_w)
*
*
* Copyright (C) 1992,1993,1994
* W. Metzenthen, 22 Parker St, Ormond, Vic 3163,
* Australia. E-mail billm@vaxc.cc.monash.edu.au
* All rights reserved.
*
* This copyright notice covers the redistribution and use of the
* FPU emulator developed by W. Metzenthen. It covers only its use
* in the 386BSD, FreeBSD and NetBSD operating systems. Any other
* use is not permitted under this copyright.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must include information specifying
* that source code for the emulator is freely available and include
* either:
* a) an offer to provide the source code for a nominal distribution
* fee, or
* b) list at least two alternative methods whereby the source
* can be obtained, e.g. a publically accessible bulletin board
* and an anonymous ftp site from which the software can be
* downloaded.
* 3. All advertising materials specifically mentioning features or use of
* this emulator must acknowledge that it was developed by W. Metzenthen.
* 4. The name of W. Metzenthen may not be used to endorse or promote
* products derived from this software without specific prior written
* permission.
*
* THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,
* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
* AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
* W. METZENTHEN BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
* NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
*
* The purpose of this copyright, based upon the Berkeley copyright, is to
* ensure that the covered software remains freely available to everyone.
*
* The software (with necessary differences) is also available, but under
* the terms of the GNU copyleft, for the Linux operating system and for
* the djgpp ms-dos extender.
*
* W. Metzenthen June 1994.
*
*
* $FreeBSD$
*
*/
/*---------------------------------------------------------------------------+
| Four entry points. |
| |
| Needed by both the FPU_round and FPU_round_sqrt entry points: |
| %eax:%ebx 64 bit significand |
| %edx 32 bit extension of the significand |
| %edi pointer to an FPU_REG for the result to be stored |
| stack calling function must have set up a C stack frame and |
| pushed %esi, %edi, and %ebx |
| |
| Needed just for the FPU_round_sqrt entry point: |
| %cx A control word in the same format as the FPU control word. |
| Otherwise, PARAM4 must give such a value. |
| |
| |
| The significand and its extension are assumed to be exact in the |
| following sense: |
| If the significand by itself is the exact result then the significand |
| extension (%edx) must contain 0, otherwise the significand extension |
| must be non-zero. |
| If the significand extension is non-zero then the significand is |
| smaller than the magnitude of the correct exact result by an amount |
| greater than zero and less than one ls bit of the significand. |
| The significand extension is only required to have three possible |
| non-zero values: |
| less than 0x80000000 <=> the significand is less than 1/2 an ls |
| bit smaller than the magnitude of the |
| true exact result. |
| exactly 0x80000000 <=> the significand is exactly 1/2 an ls bit |
| smaller than the magnitude of the true |
| exact result. |
| greater than 0x80000000 <=> the significand is more than 1/2 an ls |
| bit smaller than the magnitude of the |
| true exact result. |
| |
+---------------------------------------------------------------------------*/
/*---------------------------------------------------------------------------+
| The code in this module has become quite complex, but it should handle |
| all of the FPU flags which are set at this stage of the basic arithmetic |
| computations. |
| There are a few rare cases where the results are not set identically to |
| a real FPU. These require a bit more thought because at this stage the |
| results of the code here appear to be more consistent... |
| This may be changed in a future version. |
+---------------------------------------------------------------------------*/
#include <gnu/i386/fpemul/exception.h>
#include <gnu/i386/fpemul/control_w.h>
#define LOST_DOWN $1
#define LOST_UP $2
#define DENORMAL $1
#define UNMASKED_UNDERFLOW $2
.data
ALIGN_DATA
FPU_bits_lost:
.byte 0
FPU_denormal:
.byte 0
.text
.globl FPU_round
.globl FPU_round_sqrt
.globl FPU_Arith_exit
/* Entry point when called from C */
ENTRY(round_reg)
pushl %ebp
movl %esp,%ebp
pushl %esi
pushl %edi
pushl %ebx
movl PARAM1,%edi
movl SIGH(%edi),%eax
movl SIGL(%edi),%ebx
movl PARAM2,%edx
movl PARAM3,%ecx
jmp FPU_round_sqrt
FPU_round: /* Normal entry point */
movl PARAM4,%ecx
FPU_round_sqrt: /* Entry point from wm_sqrt.S */
#ifdef PARANOID
/* Cannot use this here yet */
/* orl %eax,%eax */
/* jns L_entry_bugged */
#endif PARANOID
cmpl EXP_UNDER,EXP(%edi)
jle xMake_denorm /* The number is a de-normal*/
movb $0,FPU_denormal /* 0 -> not a de-normal*/
xDenorm_done:
movb $0,FPU_bits_lost /*No bits yet lost in rounding*/
movl %ecx,%esi
andl CW_PC,%ecx
cmpl PR_64_BITS,%ecx
je LRound_To_64
cmpl PR_53_BITS,%ecx
je LRound_To_53
cmpl PR_24_BITS,%ecx
je LRound_To_24
#ifdef PARANOID
jmp L_bugged /* There is no bug, just a bad control word */
#endif PARANOID
/* Round etc to 24 bit precision */
LRound_To_24:
movl %esi,%ecx
andl CW_RC,%ecx
cmpl RC_RND,%ecx
je LRound_nearest_24
cmpl RC_CHOP,%ecx
je LCheck_truncate_24
cmpl RC_UP,%ecx /* Towards +infinity */
je LUp_24
cmpl RC_DOWN,%ecx /* Towards -infinity */
je LDown_24
#ifdef PARANOID
jmp L_bugged
#endif PARANOID
LUp_24:
cmpb SIGN_POS,SIGN(%edi)
jne LCheck_truncate_24 /* If negative then up==truncate */
jmp LCheck_24_round_up
LDown_24:
cmpb SIGN_POS,SIGN(%edi)
je LCheck_truncate_24 /* If positive then down==truncate */
LCheck_24_round_up:
movl %eax,%ecx
andl $0x000000ff,%ecx
orl %ebx,%ecx
orl %edx,%ecx
jnz LDo_24_round_up
jmp LRe_normalise
LRound_nearest_24:
/* Do rounding of the 24th bit if needed (nearest or even) */
movl %eax,%ecx
andl $0x000000ff,%ecx
cmpl $0x00000080,%ecx
jc LCheck_truncate_24 /*less than half, no increment needed*/
jne LGreater_Half_24 /* greater than half, increment needed*/
/* Possibly half, we need to check the ls bits */
orl %ebx,%ebx
jnz LGreater_Half_24 /* greater than half, increment needed*/
orl %edx,%edx
jnz LGreater_Half_24 /* greater than half, increment needed*/
/* Exactly half, increment only if 24th bit is 1 (round to even)*/
testl $0x00000100,%eax
jz LDo_truncate_24
LGreater_Half_24: /*Rounding: increment at the 24th bit*/
LDo_24_round_up:
andl $0xffffff00,%eax /*Truncate to 24 bits*/
xorl %ebx,%ebx
movb LOST_UP,FPU_bits_lost
addl $0x00000100,%eax
jmp LCheck_Round_Overflow
LCheck_truncate_24:
movl %eax,%ecx
andl $0x000000ff,%ecx
orl %ebx,%ecx
orl %edx,%ecx
jz LRe_normalise /* No truncation needed*/
LDo_truncate_24:
andl $0xffffff00,%eax /* Truncate to 24 bits*/
xorl %ebx,%ebx
movb LOST_DOWN,FPU_bits_lost
jmp LRe_normalise
/* Round etc to 53 bit precision */
LRound_To_53:
movl %esi,%ecx
andl CW_RC,%ecx
cmpl RC_RND,%ecx
je LRound_nearest_53
cmpl RC_CHOP,%ecx
je LCheck_truncate_53
cmpl RC_UP,%ecx /* Towards +infinity*/
je LUp_53
cmpl RC_DOWN,%ecx /* Towards -infinity*/
je LDown_53
#ifdef PARANOID
jmp L_bugged
#endif PARANOID
LUp_53:
cmpb SIGN_POS,SIGN(%edi)
jne LCheck_truncate_53 /* If negative then up==truncate*/
jmp LCheck_53_round_up
LDown_53:
cmpb SIGN_POS,SIGN(%edi)
je LCheck_truncate_53 /* If positive then down==truncate*/
LCheck_53_round_up:
movl %ebx,%ecx
andl $0x000007ff,%ecx
orl %edx,%ecx
jnz LDo_53_round_up
jmp LRe_normalise
LRound_nearest_53:
/*Do rounding of the 53rd bit if needed (nearest or even)*/
movl %ebx,%ecx
andl $0x000007ff,%ecx
cmpl $0x00000400,%ecx
jc LCheck_truncate_53 /* less than half, no increment needed*/
jnz LGreater_Half_53 /* greater than half, increment needed*/
/*Possibly half, we need to check the ls bits*/
orl %edx,%edx
jnz LGreater_Half_53 /* greater than half, increment needed*/
/* Exactly half, increment only if 53rd bit is 1 (round to even)*/
testl $0x00000800,%ebx
jz LTruncate_53
LGreater_Half_53: /*Rounding: increment at the 53rd bit*/
LDo_53_round_up:
movb LOST_UP,FPU_bits_lost
andl $0xfffff800,%ebx /* Truncate to 53 bits*/
addl $0x00000800,%ebx
adcl $0,%eax
jmp LCheck_Round_Overflow
LCheck_truncate_53:
movl %ebx,%ecx
andl $0x000007ff,%ecx
orl %edx,%ecx
jz LRe_normalise
LTruncate_53:
movb LOST_DOWN,FPU_bits_lost
andl $0xfffff800,%ebx /* Truncate to 53 bits*/
jmp LRe_normalise
/* Round etc to 64 bit precision*/
LRound_To_64:
movl %esi,%ecx
andl CW_RC,%ecx
cmpl RC_RND,%ecx
je LRound_nearest_64
cmpl RC_CHOP,%ecx
je LCheck_truncate_64
cmpl RC_UP,%ecx /* Towards +infinity*/
je LUp_64
cmpl RC_DOWN,%ecx /* Towards -infinity*/
je LDown_64
#ifdef PARANOID
jmp L_bugged
#endif PARANOID
LUp_64:
cmpb SIGN_POS,SIGN(%edi)
jne LCheck_truncate_64 /* If negative then up==truncate*/
orl %edx,%edx
jnz LDo_64_round_up
jmp LRe_normalise
LDown_64:
cmpb SIGN_POS,SIGN(%edi)
je LCheck_truncate_64 /*If positive then down==truncate*/
orl %edx,%edx
jnz LDo_64_round_up
jmp LRe_normalise
LRound_nearest_64:
cmpl $0x80000000,%edx
jc LCheck_truncate_64
jne LDo_64_round_up
/* Now test for round-to-even */
testb $1,%bl
jz LCheck_truncate_64
LDo_64_round_up:
movb LOST_UP,FPU_bits_lost
addl $1,%ebx
adcl $0,%eax
LCheck_Round_Overflow:
jnc LRe_normalise /* Rounding done, no overflow */
/* Overflow, adjust the result (to 1.0) */
rcrl $1,%eax
rcrl $1,%ebx
incl EXP(%edi)
jmp LRe_normalise
LCheck_truncate_64:
orl %edx,%edx
jz LRe_normalise
LTruncate_64:
movb LOST_DOWN,FPU_bits_lost
LRe_normalise:
testb $0xff,FPU_denormal
jnz xNormalise_result
xL_Normalised:
cmpb LOST_UP,FPU_bits_lost
je xL_precision_lost_up
cmpb LOST_DOWN,FPU_bits_lost
je xL_precision_lost_down
xL_no_precision_loss:
cmpl EXP_OVER,EXP(%edi)
jge L_overflow
/* store the result */
movb TW_Valid,TAG(%edi)
xL_Store_significand:
movl %eax,SIGH(%edi)
movl %ebx,SIGL(%edi)
FPU_Arith_exit:
popl %ebx
popl %edi
popl %esi
leave
ret
/* Set the FPU status flags to represent precision loss due to*/
/* round-up.*/
xL_precision_lost_up:
push %eax
call _set_precision_flag_up
popl %eax
jmp xL_no_precision_loss
/* Set the FPU status flags to represent precision loss due to*/
/* truncation.*/
xL_precision_lost_down:
push %eax
call _set_precision_flag_down
popl %eax
jmp xL_no_precision_loss
/* The number is a denormal (which might get rounded up to a normal)
// Shift the number right the required number of bits, which will
// have to be undone later...*/
xMake_denorm:
/* The action to be taken depends upon whether the underflow
// exception is masked*/
testb CW_Underflow,%cl /* Underflow mask.*/
jz xUnmasked_underflow /* Do not make a denormal.*/
movb DENORMAL,FPU_denormal
pushl %ecx /* Save*/
movl EXP(%edi),%ecx
subl EXP_UNDER+1,%ecx
negl %ecx
cmpl $64,%ecx /* shrd only works for 0..31 bits */
jnc xDenorm_shift_more_than_63
cmpl $32,%ecx /* shrd only works for 0..31 bits */
jnc xDenorm_shift_more_than_32
/* We got here without jumps by assuming that the most common requirement
// is for a small de-normalising shift.
// Shift by [1..31] bits */
addl %ecx,EXP(%edi)
orl %edx,%edx /* extension*/
setne %ch
xorl %edx,%edx
shrd %cl,%ebx,%edx
shrd %cl,%eax,%ebx
shr %cl,%eax
orb %ch,%dl
popl %ecx
jmp xDenorm_done
/* Shift by [32..63] bits*/
xDenorm_shift_more_than_32:
addl %ecx,EXP(%edi)
subb $32,%cl
orl %edx,%edx
setne %ch
orb %ch,%bl
xorl %edx,%edx
shrd %cl,%ebx,%edx
shrd %cl,%eax,%ebx
shr %cl,%eax
orl %edx,%edx /*test these 32 bits*/
setne %cl
orb %ch,%bl
orb %cl,%bl
movl %ebx,%edx
movl %eax,%ebx
xorl %eax,%eax
popl %ecx
jmp xDenorm_done
/* Shift by [64..) bits*/
xDenorm_shift_more_than_63:
cmpl $64,%ecx
jne xDenorm_shift_more_than_64
/* Exactly 64 bit shift*/
addl %ecx,EXP(%edi)
xorl %ecx,%ecx
orl %edx,%edx
setne %cl
orl %ebx,%ebx
setne %ch
orb %ch,%cl
orb %cl,%al
movl %eax,%edx
xorl %eax,%eax
xorl %ebx,%ebx
popl %ecx
jmp xDenorm_done
xDenorm_shift_more_than_64:
movl EXP_UNDER+1,EXP(%edi)
/* This is easy, %eax must be non-zero, so..*/
movl $1,%edx
xorl %eax,%eax
xorl %ebx,%ebx
popl %ecx
jmp xDenorm_done
xUnmasked_underflow:
/* Increase the exponent by the magic number*/
addl $(3*(1<<13)),EXP(%edi)
movb UNMASKED_UNDERFLOW,FPU_denormal
jmp xDenorm_done
/* Undo the de-normalisation.*/
xNormalise_result:
cmpb UNMASKED_UNDERFLOW,FPU_denormal
je xSignal_underflow
/* The number must be a denormal if we got here.*/
#ifdef PARANOID
/* But check it... just in case.*/
cmpl EXP_UNDER+1,EXP(%edi)
jne L_norm_bugged
#endif PARANOID
orl %eax,%eax /* ms bits*/
jnz LNormalise_shift_up_to_31 /* Shift left 0 - 31 bits*/
orl %ebx,%ebx
jz L_underflow_to_zero /* The contents are zero*/
/* Shift left 32 - 63 bits*/
movl %ebx,%eax
xorl %ebx,%ebx
subl $32,EXP(%edi)
LNormalise_shift_up_to_31:
bsrl %eax,%ecx /* get the required shift in %ecx */
subl $31,%ecx
negl %ecx
shld %cl,%ebx,%eax
shl %cl,%ebx
subl %ecx,EXP(%edi)
LNormalise_shift_done:
testb $0xff,FPU_bits_lost /* bits lost == underflow*/
jz xL_Normalised
/* There must be a masked underflow*/
push %eax
pushl EX_Underflow
call _exception
popl %eax
popl %eax
jmp xL_Normalised
/* The operations resulted in a number too small to represent.
// Masked response.*/
L_underflow_to_zero:
push %eax
call _set_precision_flag_down
popl %eax
push %eax
pushl EX_Underflow
call _exception
popl %eax
popl %eax
movb TW_Zero,TAG(%edi)
jmp xL_Store_significand
/* The operations resulted in a number too large to represent.*/
L_overflow:
push %edi
call _arith_overflow
pop %edi
jmp FPU_Arith_exit
xSignal_underflow:
push %eax
pushl EX_Underflow
call EXCEPTION
popl %eax
popl %eax
jmp xL_Normalised
#ifdef PARANOID
/* If we ever get here then we have problems! */
L_bugged:
pushl EX_INTERNAL|0x201
call EXCEPTION
popl %ebx
jmp FPU_Arith_exit
L_norm_bugged:
pushl EX_INTERNAL|0x216
call EXCEPTION
popl %ebx
jmp FPU_Arith_exit
L_entry_bugged:
pushl EX_INTERNAL|0x217
call EXCEPTION
popl %ebx
jmp FPU_Arith_exit
#endif PARANOID