207 lines
6.6 KiB
Markdown
207 lines
6.6 KiB
Markdown
|
;; AMD Athlon Scheduling
|
||
|
;; Copyright (C) 2002 Free Software Foundation, Inc.
|
||
|
;;
|
||
|
;; This file is part of GNU CC.
|
||
|
;;
|
||
|
;; GNU CC is free software; you can redistribute it and/or modify
|
||
|
;; it under the terms of the GNU General Public License as published by
|
||
|
;; the Free Software Foundation; either version 2, or (at your option)
|
||
|
;; any later version.
|
||
|
;;
|
||
|
;; GNU CC is distributed in the hope that it will be useful,
|
||
|
;; but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||
|
;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||
|
;; GNU General Public License for more details.
|
||
|
;;
|
||
|
;; You should have received a copy of the GNU General Public License
|
||
|
;; along with GNU CC; see the file COPYING. If not, write to
|
||
|
;; the Free Software Foundation, 59 Temple Place - Suite 330,
|
||
|
;; Boston, MA 02111-1307, USA. */
|
||
|
(define_attr "athlon_decode" "direct,vector"
|
||
|
(cond [(eq_attr "type" "call,imul,idiv,other,multi,fcmov,fpspc,str,pop,cld,fcmov")
|
||
|
(const_string "vector")
|
||
|
(and (eq_attr "type" "push")
|
||
|
(match_operand 1 "memory_operand" ""))
|
||
|
(const_string "vector")
|
||
|
(and (eq_attr "type" "fmov")
|
||
|
(and (eq_attr "memory" "load,store")
|
||
|
(eq_attr "mode" "XF")))
|
||
|
(const_string "vector")]
|
||
|
(const_string "direct")))
|
||
|
|
||
|
;; The Athlon does contain three pipelined FP units, three integer units and
|
||
|
;; three address generation units.
|
||
|
;;
|
||
|
;; The predecode logic is determining boundaries of instructions in the 64
|
||
|
;; byte cache line. So the cache line straddling problem of K6 might be issue
|
||
|
;; here as well, but it is not noted in the documentation.
|
||
|
;;
|
||
|
;; Three DirectPath instructions decoders and only one VectorPath decoder
|
||
|
;; is available. They can decode three DirectPath instructions or one VectorPath
|
||
|
;; instruction per cycle.
|
||
|
;; Decoded macro instructions are then passed to 72 entry instruction control
|
||
|
;; unit, that passes
|
||
|
;; it to the specialized integer (18 entry) and fp (36 entry) schedulers.
|
||
|
;;
|
||
|
;; The load/store queue unit is not attached to the schedulers but
|
||
|
;; communicates with all the execution units separately instead.
|
||
|
|
||
|
(define_function_unit "athlon_vectordec" 1 0
|
||
|
(and (eq_attr "cpu" "athlon")
|
||
|
(eq_attr "athlon_decode" "vector"))
|
||
|
1 1)
|
||
|
|
||
|
(define_function_unit "athlon_directdec" 3 0
|
||
|
(and (eq_attr "cpu" "athlon")
|
||
|
(eq_attr "athlon_decode" "direct"))
|
||
|
1 1)
|
||
|
|
||
|
(define_function_unit "athlon_vectordec" 1 0
|
||
|
(and (eq_attr "cpu" "athlon")
|
||
|
(eq_attr "athlon_decode" "direct"))
|
||
|
1 1 [(eq_attr "athlon_decode" "vector")])
|
||
|
|
||
|
(define_function_unit "athlon_ieu" 3 0
|
||
|
(and (eq_attr "cpu" "athlon")
|
||
|
(eq_attr "type" "alu1,negnot,alu,icmp,test,imov,imovx,lea,incdec,ishift,ishift1,rotate,rotate1,ibr,call,callv,icmov,cld,pop,setcc,push,pop"))
|
||
|
1 1)
|
||
|
|
||
|
(define_function_unit "athlon_ieu" 3 0
|
||
|
(and (eq_attr "cpu" "athlon")
|
||
|
(eq_attr "type" "str"))
|
||
|
15 15)
|
||
|
|
||
|
(define_function_unit "athlon_ieu" 3 0
|
||
|
(and (eq_attr "cpu" "athlon")
|
||
|
(eq_attr "type" "imul"))
|
||
|
5 0)
|
||
|
|
||
|
(define_function_unit "athlon_ieu" 3 0
|
||
|
(and (eq_attr "cpu" "athlon")
|
||
|
(eq_attr "type" "idiv"))
|
||
|
42 0)
|
||
|
|
||
|
(define_function_unit "athlon_muldiv" 1 0
|
||
|
(and (eq_attr "cpu" "athlon")
|
||
|
(eq_attr "type" "imul"))
|
||
|
5 0)
|
||
|
|
||
|
(define_function_unit "athlon_muldiv" 1 0
|
||
|
(and (eq_attr "cpu" "athlon")
|
||
|
(eq_attr "type" "idiv"))
|
||
|
42 42)
|
||
|
|
||
|
(define_attr "athlon_fpunits" "none,store,mul,add,muladd,any"
|
||
|
(cond [(eq_attr "type" "fop,fcmp,fistp")
|
||
|
(const_string "add")
|
||
|
(eq_attr "type" "fmul,fdiv,fpspc,fsgn,fcmov")
|
||
|
(const_string "mul")
|
||
|
(and (eq_attr "type" "fmov") (eq_attr "memory" "store,both"))
|
||
|
(const_string "store")
|
||
|
(and (eq_attr "type" "fmov") (eq_attr "memory" "load"))
|
||
|
(const_string "any")
|
||
|
(and (eq_attr "type" "fmov")
|
||
|
(ior (match_operand:SI 1 "register_operand" "")
|
||
|
(match_operand 1 "immediate_operand" "")))
|
||
|
(const_string "store")
|
||
|
(eq_attr "type" "fmov")
|
||
|
(const_string "muladd")]
|
||
|
(const_string "none")))
|
||
|
|
||
|
;; We use latencies 1 for definitions. This is OK to model colisions
|
||
|
;; in execution units. The real latencies are modeled in the "fp" pipeline.
|
||
|
|
||
|
;; fsin, fcos: 96-192
|
||
|
;; fsincos: 107-211
|
||
|
;; fsqrt: 19 for SFmode, 27 for DFmode, 35 for XFmode.
|
||
|
(define_function_unit "athlon_fp" 3 0
|
||
|
(and (eq_attr "cpu" "athlon")
|
||
|
(eq_attr "type" "fpspc"))
|
||
|
100 1)
|
||
|
|
||
|
;; 16 cycles for SFmode, 20 for DFmode and 24 for XFmode.
|
||
|
(define_function_unit "athlon_fp" 3 0
|
||
|
(and (eq_attr "cpu" "athlon")
|
||
|
(eq_attr "type" "fdiv"))
|
||
|
24 1)
|
||
|
|
||
|
(define_function_unit "athlon_fp" 3 0
|
||
|
(and (eq_attr "cpu" "athlon")
|
||
|
(eq_attr "type" "fop,fmul,fistp"))
|
||
|
4 1)
|
||
|
|
||
|
;; XFmode loads are slow.
|
||
|
;; XFmode store is slow too (8 cycles), but we don't need to model it, because
|
||
|
;; there are no dependent instructions.
|
||
|
|
||
|
(define_function_unit "athlon_fp" 3 0
|
||
|
(and (eq_attr "cpu" "athlon")
|
||
|
(and (eq_attr "type" "fmov")
|
||
|
(and (eq_attr "memory" "load")
|
||
|
(eq_attr "mode" "XF"))))
|
||
|
10 1)
|
||
|
|
||
|
(define_function_unit "athlon_fp" 3 0
|
||
|
(and (eq_attr "cpu" "athlon")
|
||
|
(eq_attr "type" "fmov,fsgn"))
|
||
|
2 1)
|
||
|
|
||
|
;; fcmp and ftst instructions
|
||
|
(define_function_unit "athlon_fp" 3 0
|
||
|
(and (eq_attr "cpu" "athlon")
|
||
|
(and (eq_attr "type" "fcmp")
|
||
|
(eq_attr "athlon_decode" "direct")))
|
||
|
3 1)
|
||
|
|
||
|
;; fcmpi instructions.
|
||
|
(define_function_unit "athlon_fp" 3 0
|
||
|
(and (eq_attr "cpu" "athlon")
|
||
|
(and (eq_attr "type" "fcmp")
|
||
|
(eq_attr "athlon_decode" "vector")))
|
||
|
3 1)
|
||
|
|
||
|
(define_function_unit "athlon_fp" 3 0
|
||
|
(and (eq_attr "cpu" "athlon")
|
||
|
(eq_attr "type" "fcmov"))
|
||
|
7 1)
|
||
|
|
||
|
(define_function_unit "athlon_fp_mul" 1 0
|
||
|
(and (eq_attr "cpu" "athlon")
|
||
|
(eq_attr "athlon_fpunits" "mul"))
|
||
|
1 1)
|
||
|
|
||
|
(define_function_unit "athlon_fp_add" 1 0
|
||
|
(and (eq_attr "cpu" "athlon")
|
||
|
(eq_attr "athlon_fpunits" "add"))
|
||
|
1 1)
|
||
|
|
||
|
(define_function_unit "athlon_fp_muladd" 2 0
|
||
|
(and (eq_attr "cpu" "athlon")
|
||
|
(eq_attr "athlon_fpunits" "muladd,mul,add"))
|
||
|
1 1)
|
||
|
|
||
|
(define_function_unit "athlon_fp_store" 1 0
|
||
|
(and (eq_attr "cpu" "athlon")
|
||
|
(eq_attr "athlon_fpunits" "store"))
|
||
|
1 1)
|
||
|
|
||
|
;; We don't need to model the Address Generation Unit, since we don't model
|
||
|
;; the re-order buffer yet and thus we never schedule more than three operations
|
||
|
;; at time. Later we may want to experiment with MD_SCHED macros modeling the
|
||
|
;; decoders independently on the functional units.
|
||
|
|
||
|
;(define_function_unit "athlon_agu" 3 0
|
||
|
; (and (eq_attr "cpu" "athlon")
|
||
|
; (and (eq_attr "memory" "!none")
|
||
|
; (eq_attr "athlon_fpunits" "none")))
|
||
|
; 1 1)
|
||
|
|
||
|
;; Model load unit to avoid too long sequences of loads. We don't need to
|
||
|
;; model store queue, since it is hardly going to be bottleneck.
|
||
|
|
||
|
(define_function_unit "athlon_load" 2 0
|
||
|
(and (eq_attr "cpu" "athlon")
|
||
|
(eq_attr "memory" "load,both"))
|
||
|
1 1)
|
||
|
|