This commit was generated by cvs2svn to compensate for changes in r34204,

which included commits to RCS files with non-trunk default branches.
This commit is contained in:
John Birrell 1998-03-07 21:27:49 +00:00
commit 051b1b1a74
9 changed files with 903 additions and 0 deletions

View File

@ -0,0 +1,120 @@
# Alpha __mpn_add_n -- Add two limb vectors of the same length > 0 and
# store sum in a third limb vector.
# Copyright (C) 1995 Free Software Foundation, Inc.
# This file is part of the GNU MP Library.
# The GNU MP Library is free software; you can redistribute it and/or modify
# it under the terms of the GNU Library General Public License as published by
# the Free Software Foundation; either version 2 of the License, or (at your
# option) any later version.
# The GNU MP Library is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
# License for more details.
# You should have received a copy of the GNU Library General Public License
# along with the GNU MP Library; see the file COPYING.LIB. If not, write to
# the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
# MA 02111-1307, USA.
# INPUT PARAMETERS
# res_ptr $16
# s1_ptr $17
# s2_ptr $18
# size $19
.set noreorder
.set noat
.text
.align 3
.globl __mpn_add_n
.ent __mpn_add_n
__mpn_add_n:
.frame $30,0,$26,0
ldq $3,0($17)
ldq $4,0($18)
subq $19,1,$19
and $19,4-1,$2 # number of limbs in first loop
bis $31,$31,$0
beq $2,.L0 # if multiple of 4 limbs, skip first loop
subq $19,$2,$19
.Loop0: subq $2,1,$2
ldq $5,8($17)
addq $4,$0,$4
ldq $6,8($18)
cmpult $4,$0,$1
addq $3,$4,$4
cmpult $4,$3,$0
stq $4,0($16)
or $0,$1,$0
addq $17,8,$17
addq $18,8,$18
bis $5,$5,$3
bis $6,$6,$4
addq $16,8,$16
bne $2,.Loop0
.L0: beq $19,.Lend
.align 3
.Loop: subq $19,4,$19
ldq $5,8($17)
addq $4,$0,$4
ldq $6,8($18)
cmpult $4,$0,$1
addq $3,$4,$4
cmpult $4,$3,$0
stq $4,0($16)
or $0,$1,$0
ldq $3,16($17)
addq $6,$0,$6
ldq $4,16($18)
cmpult $6,$0,$1
addq $5,$6,$6
cmpult $6,$5,$0
stq $6,8($16)
or $0,$1,$0
ldq $5,24($17)
addq $4,$0,$4
ldq $6,24($18)
cmpult $4,$0,$1
addq $3,$4,$4
cmpult $4,$3,$0
stq $4,16($16)
or $0,$1,$0
ldq $3,32($17)
addq $6,$0,$6
ldq $4,32($18)
cmpult $6,$0,$1
addq $5,$6,$6
cmpult $6,$5,$0
stq $6,24($16)
or $0,$1,$0
addq $17,32,$17
addq $18,32,$18
addq $16,32,$16
bne $19,.Loop
.Lend: addq $4,$0,$4
cmpult $4,$0,$1
addq $3,$4,$4
cmpult $4,$3,$0
stq $4,0($16)
or $0,$1,$0
ret $31,($26),1
.end __mpn_add_n

View File

@ -0,0 +1,92 @@
# Alpha 21064 __mpn_addmul_1 -- Multiply a limb vector with a limb and add
# the result to a second limb vector.
# Copyright (C) 1992, 1994, 1995 Free Software Foundation, Inc.
# This file is part of the GNU MP Library.
# The GNU MP Library is free software; you can redistribute it and/or modify
# it under the terms of the GNU Library General Public License as published by
# the Free Software Foundation; either version 2 of the License, or (at your
# option) any later version.
# The GNU MP Library is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
# License for more details.
# You should have received a copy of the GNU Library General Public License
# along with the GNU MP Library; see the file COPYING.LIB. If not, write to
# the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
# MA 02111-1307, USA.
# INPUT PARAMETERS
# res_ptr r16
# s1_ptr r17
# size r18
# s2_limb r19
# This code runs at 42 cycles/limb on EV4 and 18 cycles/limb on EV5.
.set noreorder
.set noat
.text
.align 3
.globl __mpn_addmul_1
.ent __mpn_addmul_1 2
__mpn_addmul_1:
.frame $30,0,$26
ldq $2,0($17) # $2 = s1_limb
addq $17,8,$17 # s1_ptr++
subq $18,1,$18 # size--
mulq $2,$19,$3 # $3 = prod_low
ldq $5,0($16) # $5 = *res_ptr
umulh $2,$19,$0 # $0 = prod_high
beq $18,.Lend1 # jump if size was == 1
ldq $2,0($17) # $2 = s1_limb
addq $17,8,$17 # s1_ptr++
subq $18,1,$18 # size--
addq $5,$3,$3
cmpult $3,$5,$4
stq $3,0($16)
addq $16,8,$16 # res_ptr++
beq $18,.Lend2 # jump if size was == 2
.align 3
.Loop: mulq $2,$19,$3 # $3 = prod_low
ldq $5,0($16) # $5 = *res_ptr
addq $4,$0,$0 # cy_limb = cy_limb + 'cy'
subq $18,1,$18 # size--
umulh $2,$19,$4 # $4 = cy_limb
ldq $2,0($17) # $2 = s1_limb
addq $17,8,$17 # s1_ptr++
addq $3,$0,$3 # $3 = cy_limb + prod_low
cmpult $3,$0,$0 # $0 = carry from (cy_limb + prod_low)
addq $5,$3,$3
cmpult $3,$5,$5
stq $3,0($16)
addq $16,8,$16 # res_ptr++
addq $5,$0,$0 # combine carries
bne $18,.Loop
.Lend2: mulq $2,$19,$3 # $3 = prod_low
ldq $5,0($16) # $5 = *res_ptr
addq $4,$0,$0 # cy_limb = cy_limb + 'cy'
umulh $2,$19,$4 # $4 = cy_limb
addq $3,$0,$3 # $3 = cy_limb + prod_low
cmpult $3,$0,$0 # $0 = carry from (cy_limb + prod_low)
addq $5,$3,$3
cmpult $3,$5,$5
stq $3,0($16)
addq $5,$0,$0 # combine carries
addq $4,$0,$0 # cy_limb = prod_high + cy
ret $31,($26),1
.Lend1: addq $5,$3,$3
cmpult $3,$5,$5
stq $3,0($16)
addq $0,$5,$0
ret $31,($26),1
.end __mpn_addmul_1

View File

@ -0,0 +1,27 @@
/* gmp-mparam.h -- Compiler/machine parameter header file.
Copyright (C) 1991, 1993, 1994 Free Software Foundation, Inc.
This file is part of the GNU MP Library.
The GNU MP Library is free software; you can redistribute it and/or modify
it under the terms of the GNU Library General Public License as published by
the Free Software Foundation; either version 2 of the License, or (at your
option) any later version.
The GNU MP Library is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
License for more details.
You should have received a copy of the GNU Library General Public License
along with the GNU MP Library; see the file COPYING.LIB. If not, write to
the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
MA 02111-1307, USA. */
#define BITS_PER_MP_LIMB 64
#define BYTES_PER_MP_LIMB 8
#define BITS_PER_LONGINT 64
#define BITS_PER_INT 32
#define BITS_PER_SHORTINT 16
#define BITS_PER_CHAR 8

View File

@ -0,0 +1,109 @@
# Alpha 21064 __mpn_lshift --
# Copyright (C) 1994, 1995 Free Software Foundation, Inc.
# This file is part of the GNU MP Library.
# The GNU MP Library is free software; you can redistribute it and/or modify
# it under the terms of the GNU Library General Public License as published by
# the Free Software Foundation; either version 2 of the License, or (at your
# option) any later version.
# The GNU MP Library is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
# License for more details.
# You should have received a copy of the GNU Library General Public License
# along with the GNU MP Library; see the file COPYING.LIB. If not, write to
# the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
# MA 02111-1307, USA.
# INPUT PARAMETERS
# res_ptr r16
# s1_ptr r17
# size r18
# cnt r19
# This code runs at 4.8 cycles/limb on the 21064. With infinite unrolling,
# it would take 4 cycles/limb. It should be possible to get down to 3
# cycles/limb since both ldq and stq can be paired with the other used
# instructions. But there are many restrictions in the 21064 pipeline that
# makes it hard, if not impossible, to get down to 3 cycles/limb:
# 1. ldq has a 3 cycle delay, srl and sll have a 2 cycle delay.
# 2. Only aligned instruction pairs can be paired.
# 3. The store buffer or silo might not be able to deal with the bandwidth.
.set noreorder
.set noat
.text
.align 3
.globl __mpn_lshift
.ent __mpn_lshift
__mpn_lshift:
.frame $30,0,$26,0
s8addq $18,$17,$17 # make r17 point at end of s1
ldq $4,-8($17) # load first limb
subq $17,8,$17
subq $31,$19,$7
s8addq $18,$16,$16 # make r16 point at end of RES
subq $18,1,$18
and $18,4-1,$20 # number of limbs in first loop
srl $4,$7,$0 # compute function result
beq $20,.L0
subq $18,$20,$18
.align 3
.Loop0:
ldq $3,-8($17)
subq $16,8,$16
subq $17,8,$17
subq $20,1,$20
sll $4,$19,$5
srl $3,$7,$6
bis $3,$3,$4
bis $5,$6,$8
stq $8,0($16)
bne $20,.Loop0
.L0: beq $18,.Lend
.align 3
.Loop: ldq $3,-8($17)
subq $16,32,$16
subq $18,4,$18
sll $4,$19,$5
srl $3,$7,$6
ldq $4,-16($17)
sll $3,$19,$1
bis $5,$6,$8
stq $8,24($16)
srl $4,$7,$2
ldq $3,-24($17)
sll $4,$19,$5
bis $1,$2,$8
stq $8,16($16)
srl $3,$7,$6
ldq $4,-32($17)
sll $3,$19,$1
bis $5,$6,$8
stq $8,8($16)
srl $4,$7,$2
subq $17,32,$17
bis $1,$2,$8
stq $8,0($16)
bgt $18,.Loop
.Lend: sll $4,$19,$8
stq $8,-8($16)
ret $31,($26),1
.end __mpn_lshift

View File

@ -0,0 +1,85 @@
# Alpha 21064 __mpn_mul_1 -- Multiply a limb vector with a limb and store
# the result in a second limb vector.
# Copyright (C) 1992, 1994, 1995 Free Software Foundation, Inc.
# This file is part of the GNU MP Library.
# The GNU MP Library is free software; you can redistribute it and/or modify
# it under the terms of the GNU Library General Public License as published by
# the Free Software Foundation; either version 2 of the License, or (at your
# option) any later version.
# The GNU MP Library is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
# License for more details.
# You should have received a copy of the GNU Library General Public License
# along with the GNU MP Library; see the file COPYING.LIB. If not, write to
# the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
# MA 02111-1307, USA.
# INPUT PARAMETERS
# res_ptr r16
# s1_ptr r17
# size r18
# s2_limb r19
# This code runs at 42 cycles/limb on the EV4 and 18 cycles/limb on the EV5.
# To improve performance for long multiplications, we would use
# 'fetch' for S1 and 'fetch_m' for RES. It's not obvious how to use
# these instructions without slowing down the general code: 1. We can
# only have two prefetches in operation at any time in the Alpha
# architecture. 2. There will seldom be any special alignment
# between RES_PTR and S1_PTR. Maybe we can simply divide the current
# loop into an inner and outer loop, having the inner loop handle
# exactly one prefetch block?
.set noreorder
.set noat
.text
.align 3
.globl __mpn_mul_1
.ent __mpn_mul_1 2
__mpn_mul_1:
.frame $30,0,$26
ldq $2,0($17) # $2 = s1_limb
subq $18,1,$18 # size--
mulq $2,$19,$3 # $3 = prod_low
bic $31,$31,$4 # clear cy_limb
umulh $2,$19,$0 # $0 = prod_high
beq $18,Lend1 # jump if size was == 1
ldq $2,8($17) # $2 = s1_limb
subq $18,1,$18 # size--
stq $3,0($16)
beq $18,Lend2 # jump if size was == 2
.align 3
Loop: mulq $2,$19,$3 # $3 = prod_low
addq $4,$0,$0 # cy_limb = cy_limb + 'cy'
subq $18,1,$18 # size--
umulh $2,$19,$4 # $4 = cy_limb
ldq $2,16($17) # $2 = s1_limb
addq $17,8,$17 # s1_ptr++
addq $3,$0,$3 # $3 = cy_limb + prod_low
stq $3,8($16)
cmpult $3,$0,$0 # $0 = carry from (cy_limb + prod_low)
addq $16,8,$16 # res_ptr++
bne $18,Loop
Lend2: mulq $2,$19,$3 # $3 = prod_low
addq $4,$0,$0 # cy_limb = cy_limb + 'cy'
umulh $2,$19,$4 # $4 = cy_limb
addq $3,$0,$3 # $3 = cy_limb + prod_low
cmpult $3,$0,$0 # $0 = carry from (cy_limb + prod_low)
stq $3,8($16)
addq $4,$0,$0 # cy_limb = prod_high + cy
ret $31,($26),1
Lend1: stq $3,0($16)
ret $31,($26),1
.end __mpn_mul_1

View File

@ -0,0 +1,107 @@
# Alpha 21064 __mpn_rshift --
# Copyright (C) 1994, 1995 Free Software Foundation, Inc.
# This file is part of the GNU MP Library.
# The GNU MP Library is free software; you can redistribute it and/or modify
# it under the terms of the GNU Library General Public License as published by
# the Free Software Foundation; either version 2 of the License, or (at your
# option) any later version.
# The GNU MP Library is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
# License for more details.
# You should have received a copy of the GNU Library General Public License
# along with the GNU MP Library; see the file COPYING.LIB. If not, write to
# the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
# MA 02111-1307, USA.
# INPUT PARAMETERS
# res_ptr r16
# s1_ptr r17
# size r18
# cnt r19
# This code runs at 4.8 cycles/limb on the 21064. With infinite unrolling,
# it would take 4 cycles/limb. It should be possible to get down to 3
# cycles/limb since both ldq and stq can be paired with the other used
# instructions. But there are many restrictions in the 21064 pipeline that
# makes it hard, if not impossible, to get down to 3 cycles/limb:
# 1. ldq has a 3 cycle delay, srl and sll have a 2 cycle delay.
# 2. Only aligned instruction pairs can be paired.
# 3. The store buffer or silo might not be able to deal with the bandwidth.
.set noreorder
.set noat
.text
.align 3
.globl __mpn_rshift
.ent __mpn_rshift
__mpn_rshift:
.frame $30,0,$26,0
ldq $4,0($17) # load first limb
addq $17,8,$17
subq $31,$19,$7
subq $18,1,$18
and $18,4-1,$20 # number of limbs in first loop
sll $4,$7,$0 # compute function result
beq $20,.L0
subq $18,$20,$18
.align 3
.Loop0:
ldq $3,0($17)
addq $16,8,$16
addq $17,8,$17
subq $20,1,$20
srl $4,$19,$5
sll $3,$7,$6
bis $3,$3,$4
bis $5,$6,$8
stq $8,-8($16)
bne $20,.Loop0
.L0: beq $18,.Lend
.align 3
.Loop: ldq $3,0($17)
addq $16,32,$16
subq $18,4,$18
srl $4,$19,$5
sll $3,$7,$6
ldq $4,8($17)
srl $3,$19,$1
bis $5,$6,$8
stq $8,-32($16)
sll $4,$7,$2
ldq $3,16($17)
srl $4,$19,$5
bis $1,$2,$8
stq $8,-24($16)
sll $3,$7,$6
ldq $4,24($17)
srl $3,$19,$1
bis $5,$6,$8
stq $8,-16($16)
sll $4,$7,$2
addq $17,32,$17
bis $1,$2,$8
stq $8,-8($16)
bgt $18,.Loop
.Lend: srl $4,$19,$8
stq $8,0($16)
ret $31,($26),1
.end __mpn_rshift

View File

@ -0,0 +1,120 @@
# Alpha __mpn_sub_n -- Subtract two limb vectors of the same length > 0 and
# store difference in a third limb vector.
# Copyright (C) 1995 Free Software Foundation, Inc.
# This file is part of the GNU MP Library.
# The GNU MP Library is free software; you can redistribute it and/or modify
# it under the terms of the GNU Library General Public License as published by
# the Free Software Foundation; either version 2 of the License, or (at your
# option) any later version.
# The GNU MP Library is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
# License for more details.
# You should have received a copy of the GNU Library General Public License
# along with the GNU MP Library; see the file COPYING.LIB. If not, write to
# the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
# MA 02111-1307, USA.
# INPUT PARAMETERS
# res_ptr $16
# s1_ptr $17
# s2_ptr $18
# size $19
.set noreorder
.set noat
.text
.align 3
.globl __mpn_sub_n
.ent __mpn_sub_n
__mpn_sub_n:
.frame $30,0,$26,0
ldq $3,0($17)
ldq $4,0($18)
subq $19,1,$19
and $19,4-1,$2 # number of limbs in first loop
bis $31,$31,$0
beq $2,.L0 # if multiple of 4 limbs, skip first loop
subq $19,$2,$19
.Loop0: subq $2,1,$2
ldq $5,8($17)
addq $4,$0,$4
ldq $6,8($18)
cmpult $4,$0,$1
subq $3,$4,$4
cmpult $3,$4,$0
stq $4,0($16)
or $0,$1,$0
addq $17,8,$17
addq $18,8,$18
bis $5,$5,$3
bis $6,$6,$4
addq $16,8,$16
bne $2,.Loop0
.L0: beq $19,.Lend
.align 3
.Loop: subq $19,4,$19
ldq $5,8($17)
addq $4,$0,$4
ldq $6,8($18)
cmpult $4,$0,$1
subq $3,$4,$4
cmpult $3,$4,$0
stq $4,0($16)
or $0,$1,$0
ldq $3,16($17)
addq $6,$0,$6
ldq $4,16($18)
cmpult $6,$0,$1
subq $5,$6,$6
cmpult $5,$6,$0
stq $6,8($16)
or $0,$1,$0
ldq $5,24($17)
addq $4,$0,$4
ldq $6,24($18)
cmpult $4,$0,$1
subq $3,$4,$4
cmpult $3,$4,$0
stq $4,16($16)
or $0,$1,$0
ldq $3,32($17)
addq $6,$0,$6
ldq $4,32($18)
cmpult $6,$0,$1
subq $5,$6,$6
cmpult $5,$6,$0
stq $6,24($16)
or $0,$1,$0
addq $17,32,$17
addq $18,32,$18
addq $16,32,$16
bne $19,.Loop
.Lend: addq $4,$0,$4
cmpult $4,$0,$1
subq $3,$4,$4
cmpult $3,$4,$0
stq $4,0($16)
or $0,$1,$0
ret $31,($26),1
.end __mpn_sub_n

View File

@ -0,0 +1,92 @@
# Alpha 21064 __mpn_submul_1 -- Multiply a limb vector with a limb and
# subtract the result from a second limb vector.
# Copyright (C) 1992, 1994, 1995 Free Software Foundation, Inc.
# This file is part of the GNU MP Library.
# The GNU MP Library is free software; you can redistribute it and/or modify
# it under the terms of the GNU Library General Public License as published by
# the Free Software Foundation; either version 2 of the License, or (at your
# option) any later version.
# The GNU MP Library is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
# License for more details.
# You should have received a copy of the GNU Library General Public License
# along with the GNU MP Library; see the file COPYING.LIB. If not, write to
# the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
# MA 02111-1307, USA.
# INPUT PARAMETERS
# res_ptr r16
# s1_ptr r17
# size r18
# s2_limb r19
# This code runs at 42 cycles/limb on EV4 and 18 cycles/limb on EV5.
.set noreorder
.set noat
.text
.align 3
.globl __mpn_submul_1
.ent __mpn_submul_1 2
__mpn_submul_1:
.frame $30,0,$26
ldq $2,0($17) # $2 = s1_limb
addq $17,8,$17 # s1_ptr++
subq $18,1,$18 # size--
mulq $2,$19,$3 # $3 = prod_low
ldq $5,0($16) # $5 = *res_ptr
umulh $2,$19,$0 # $0 = prod_high
beq $18,.Lend1 # jump if size was == 1
ldq $2,0($17) # $2 = s1_limb
addq $17,8,$17 # s1_ptr++
subq $18,1,$18 # size--
subq $5,$3,$3
cmpult $5,$3,$4
stq $3,0($16)
addq $16,8,$16 # res_ptr++
beq $18,.Lend2 # jump if size was == 2
.align 3
.Loop: mulq $2,$19,$3 # $3 = prod_low
ldq $5,0($16) # $5 = *res_ptr
addq $4,$0,$0 # cy_limb = cy_limb + 'cy'
subq $18,1,$18 # size--
umulh $2,$19,$4 # $4 = cy_limb
ldq $2,0($17) # $2 = s1_limb
addq $17,8,$17 # s1_ptr++
addq $3,$0,$3 # $3 = cy_limb + prod_low
cmpult $3,$0,$0 # $0 = carry from (cy_limb + prod_low)
subq $5,$3,$3
cmpult $5,$3,$5
stq $3,0($16)
addq $16,8,$16 # res_ptr++
addq $5,$0,$0 # combine carries
bne $18,.Loop
.Lend2: mulq $2,$19,$3 # $3 = prod_low
ldq $5,0($16) # $5 = *res_ptr
addq $4,$0,$0 # cy_limb = cy_limb + 'cy'
umulh $2,$19,$4 # $4 = cy_limb
addq $3,$0,$3 # $3 = cy_limb + prod_low
cmpult $3,$0,$0 # $0 = carry from (cy_limb + prod_low)
subq $5,$3,$3
cmpult $5,$3,$5
stq $3,0($16)
addq $5,$0,$0 # combine carries
addq $4,$0,$0 # cy_limb = prod_high + cy
ret $31,($26),1
.Lend1: subq $5,$3,$3
cmpult $5,$3,$5
stq $3,0($16)
addq $0,$5,$0
ret $31,($26),1
.end __mpn_submul_1

View File

@ -0,0 +1,151 @@
# Alpha 21064 __udiv_qrnnd
# Copyright (C) 1992, 1994, 1995 Free Software Foundation, Inc.
# This file is part of the GNU MP Library.
# The GNU MP Library is free software; you can redistribute it and/or modify
# it under the terms of the GNU Library General Public License as published by
# the Free Software Foundation; either version 2 of the License, or (at your
# option) any later version.
# The GNU MP Library is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
# License for more details.
# You should have received a copy of the GNU Library General Public License
# along with the GNU MP Library; see the file COPYING.LIB. If not, write to
# the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
# MA 02111-1307, USA.
.set noreorder
.set noat
.text
.align 3
.globl __udiv_qrnnd
.ent __udiv_qrnnd
__udiv_qrnnd:
.frame $30,0,$26,0
.prologue 0
#define cnt $2
#define tmp $3
#define rem_ptr $16
#define n1 $17
#define n0 $18
#define d $19
#define qb $20
ldiq cnt,16
blt d,.Largedivisor
.Loop1: cmplt n0,0,tmp
addq n1,n1,n1
bis n1,tmp,n1
addq n0,n0,n0
cmpule d,n1,qb
subq n1,d,tmp
cmovne qb,tmp,n1
bis n0,qb,n0
cmplt n0,0,tmp
addq n1,n1,n1
bis n1,tmp,n1
addq n0,n0,n0
cmpule d,n1,qb
subq n1,d,tmp
cmovne qb,tmp,n1
bis n0,qb,n0
cmplt n0,0,tmp
addq n1,n1,n1
bis n1,tmp,n1
addq n0,n0,n0
cmpule d,n1,qb
subq n1,d,tmp
cmovne qb,tmp,n1
bis n0,qb,n0
cmplt n0,0,tmp
addq n1,n1,n1
bis n1,tmp,n1
addq n0,n0,n0
cmpule d,n1,qb
subq n1,d,tmp
cmovne qb,tmp,n1
bis n0,qb,n0
subq cnt,1,cnt
bgt cnt,.Loop1
stq n1,0(rem_ptr)
bis $31,n0,$0
ret $31,($26),1
.Largedivisor:
and n0,1,$4
srl n0,1,n0
sll n1,63,tmp
or tmp,n0,n0
srl n1,1,n1
and d,1,$6
srl d,1,$5
addq $5,$6,$5
.Loop2: cmplt n0,0,tmp
addq n1,n1,n1
bis n1,tmp,n1
addq n0,n0,n0
cmpule $5,n1,qb
subq n1,$5,tmp
cmovne qb,tmp,n1
bis n0,qb,n0
cmplt n0,0,tmp
addq n1,n1,n1
bis n1,tmp,n1
addq n0,n0,n0
cmpule $5,n1,qb
subq n1,$5,tmp
cmovne qb,tmp,n1
bis n0,qb,n0
cmplt n0,0,tmp
addq n1,n1,n1
bis n1,tmp,n1
addq n0,n0,n0
cmpule $5,n1,qb
subq n1,$5,tmp
cmovne qb,tmp,n1
bis n0,qb,n0
cmplt n0,0,tmp
addq n1,n1,n1
bis n1,tmp,n1
addq n0,n0,n0
cmpule $5,n1,qb
subq n1,$5,tmp
cmovne qb,tmp,n1
bis n0,qb,n0
subq cnt,1,cnt
bgt cnt,.Loop2
addq n1,n1,n1
addq $4,n1,n1
bne $6,.LOdd
stq n1,0(rem_ptr)
bis $31,n0,$0
ret $31,($26),1
.LOdd:
/* q' in n0. r' in n1 */
addq n1,n0,n1
cmpult n1,n0,tmp # tmp := carry from addq
beq tmp,.LLp6
addq n0,1,n0
subq n1,d,n1
.LLp6: cmpult n1,d,tmp
bne tmp,.LLp7
addq n0,1,n0
subq n1,d,n1
.LLp7:
stq n1,0(rem_ptr)
bis $31,n0,$0
ret $31,($26),1
.end __udiv_qrnnd