Backport Intel Core 2 and AMD Geode CPU types from gcc-4.3 (GPLv2)
These options are supported in this shape in all newer GCC versions. PR: gnu/155308 Obtained from: gcc 4.3 (rev. 118090, 118973, 120846; GPLv2) MFC after: 2 weeks
This commit is contained in:
parent
369c5b0707
commit
6be340ca74
@ -1207,14 +1207,14 @@ i[34567]86-*-solaris2*)
|
||||
# FIXME: -m64 for i[34567]86-*-* should be allowed just
|
||||
# like -m32 for x86_64-*-*.
|
||||
case X"${with_cpu}" in
|
||||
Xgeneric|Xnocona|Xx86-64|Xk8|Xopteron|Xathlon64|Xathlon-fx)
|
||||
Xgeneric|Xcore2|Xnocona|Xx86-64|Xk8|Xopteron|Xathlon64|Xathlon-fx)
|
||||
;;
|
||||
X)
|
||||
with_cpu=generic
|
||||
;;
|
||||
*)
|
||||
echo "Unsupported CPU used in --with-cpu=$with_cpu, supported values:" 1>&2
|
||||
echo "generic nocona x86-64 k8 opteron athlon64 athlon-fx" 1>&2
|
||||
echo "generic core2 nocona x86-64 k8 opteron athlon64 athlon-fx" 1>&2
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
@ -2537,6 +2537,9 @@ if test x$with_cpu = x ; then
|
||||
nocona-*)
|
||||
with_cpu=nocona
|
||||
;;
|
||||
core2-*)
|
||||
with_cpu=core2
|
||||
;;
|
||||
pentium_m-*)
|
||||
with_cpu=pentium-m
|
||||
;;
|
||||
@ -2556,6 +2559,9 @@ if test x$with_cpu = x ; then
|
||||
nocona-*)
|
||||
with_cpu=nocona
|
||||
;;
|
||||
core2-*)
|
||||
with_cpu=core2
|
||||
;;
|
||||
*)
|
||||
with_cpu=generic
|
||||
;;
|
||||
@ -2787,7 +2793,7 @@ case "${target}" in
|
||||
esac
|
||||
# OK
|
||||
;;
|
||||
"" | k8 | opteron | athlon64 | athlon-fx | nocona | generic)
|
||||
"" | k8 | opteron | athlon64 | athlon-fx | nocona | core2 | generic)
|
||||
# OK
|
||||
;;
|
||||
*)
|
||||
|
153
contrib/gcc/config/i386/geode.md
Normal file
153
contrib/gcc/config/i386/geode.md
Normal file
@ -0,0 +1,153 @@
|
||||
;; Geode Scheduling
|
||||
;; Copyright (C) 2006
|
||||
;; Free Software Foundation, Inc.
|
||||
;;
|
||||
;; This file is part of GCC.
|
||||
;;
|
||||
;; GCC is free software; you can redistribute it and/or modify
|
||||
;; it under the terms of the GNU General Public License as published by
|
||||
;; the Free Software Foundation; either version 2, or (at your option)
|
||||
;; any later version.
|
||||
;;
|
||||
;; GCC is distributed in the hope that it will be useful,
|
||||
;; but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
;; GNU General Public License for more details.
|
||||
;;
|
||||
;; You should have received a copy of the GNU General Public License
|
||||
;; along with GCC; see the file COPYING. If not, write to
|
||||
;; the Free Software Foundation, 51 Franklin Street, Fifth Floor,
|
||||
;; Boston, MA 02110-1301, USA.
|
||||
;;
|
||||
;; The Geode architecture is one insn issue processor.
|
||||
;;
|
||||
;; This description is based on data from the following documents:
|
||||
;;
|
||||
;; "AMD Geode GX Processor Data Book"
|
||||
;; Advanced Micro Devices, Inc., Aug 2005.
|
||||
;;
|
||||
;; "AMD Geode LX Processor Data Book"
|
||||
;; Advanced Micro Devices, Inc., Jan 2006.
|
||||
;;
|
||||
;;
|
||||
;; CPU execution units of the Geode:
|
||||
;;
|
||||
;; issue describes the issue pipeline.
|
||||
;; alu describes the Integer unit
|
||||
;; fpu describes the FP unit
|
||||
;;
|
||||
;; The fp unit is out of order execution unit with register renaming.
|
||||
;; There is also memory management unit and execution pipeline for
|
||||
;; load/store operations. We ignore it and difference between insns
|
||||
;; using memory and registers.
|
||||
|
||||
(define_automaton "geode")
|
||||
|
||||
(define_cpu_unit "geode_issue,geode_alu,geode_fpu" "geode")
|
||||
|
||||
(define_insn_reservation "alu" 1
|
||||
(and (eq_attr "cpu" "geode")
|
||||
(eq_attr "type" "alu,alu1,negnot,icmp,lea,test,imov,imovx,icmov,incdec,setcc"))
|
||||
"geode_issue,geode_alu")
|
||||
|
||||
(define_insn_reservation "shift" 2
|
||||
(and (eq_attr "cpu" "geode")
|
||||
(eq_attr "type" "ishift,ishift1,rotate,rotate1,cld"))
|
||||
"geode_issue,geode_alu*2")
|
||||
|
||||
(define_insn_reservation "imul" 7
|
||||
(and (eq_attr "cpu" "geode")
|
||||
(eq_attr "type" "imul"))
|
||||
"geode_issue,geode_alu*7")
|
||||
|
||||
(define_insn_reservation "idiv" 40
|
||||
(and (eq_attr "cpu" "geode")
|
||||
(eq_attr "type" "idiv"))
|
||||
"geode_issue,geode_alu*40")
|
||||
|
||||
;; The branch unit.
|
||||
(define_insn_reservation "call" 2
|
||||
(and (eq_attr "cpu" "geode")
|
||||
(eq_attr "type" "call,callv"))
|
||||
"geode_issue,geode_alu*2")
|
||||
|
||||
(define_insn_reservation "geode_branch" 1
|
||||
(and (eq_attr "cpu" "geode")
|
||||
(eq_attr "type" "ibr"))
|
||||
"geode_issue,geode_alu")
|
||||
|
||||
(define_insn_reservation "geode_pop_push" 1
|
||||
(and (eq_attr "cpu" "geode")
|
||||
(eq_attr "type" "pop,push"))
|
||||
"geode_issue,geode_alu")
|
||||
|
||||
(define_insn_reservation "geode_leave" 2
|
||||
(and (eq_attr "cpu" "geode")
|
||||
(eq_attr "type" "leave"))
|
||||
"geode_issue,geode_alu*2")
|
||||
|
||||
(define_insn_reservation "geode_load_str" 4
|
||||
(and (eq_attr "cpu" "geode")
|
||||
(and (eq_attr "type" "str")
|
||||
(eq_attr "memory" "load,both")))
|
||||
"geode_issue,geode_alu*4")
|
||||
|
||||
(define_insn_reservation "geode_store_str" 2
|
||||
(and (eq_attr "cpu" "geode")
|
||||
(and (eq_attr "type" "str")
|
||||
(eq_attr "memory" "store")))
|
||||
"geode_issue,geode_alu*2")
|
||||
|
||||
;; Be optimistic
|
||||
(define_insn_reservation "geode_unknown" 1
|
||||
(and (eq_attr "cpu" "geode")
|
||||
(eq_attr "type" "multi,other"))
|
||||
"geode_issue,geode_alu")
|
||||
|
||||
;; FPU
|
||||
|
||||
(define_insn_reservation "geode_fop" 6
|
||||
(and (eq_attr "cpu" "geode")
|
||||
(eq_attr "type" "fop,fcmp"))
|
||||
"geode_issue,geode_fpu*6")
|
||||
|
||||
(define_insn_reservation "geode_fsimple" 1
|
||||
(and (eq_attr "cpu" "geode")
|
||||
(eq_attr "type" "fmov,fcmov,fsgn,fxch"))
|
||||
"geode_issue,geode_fpu")
|
||||
|
||||
(define_insn_reservation "geode_fist" 4
|
||||
(and (eq_attr "cpu" "geode")
|
||||
(eq_attr "type" "fistp,fisttp"))
|
||||
"geode_issue,geode_fpu*4")
|
||||
|
||||
(define_insn_reservation "geode_fmul" 10
|
||||
(and (eq_attr "cpu" "geode")
|
||||
(eq_attr "type" "fmul"))
|
||||
"geode_issue,geode_fpu*10")
|
||||
|
||||
(define_insn_reservation "geode_fdiv" 47
|
||||
(and (eq_attr "cpu" "geode")
|
||||
(eq_attr "type" "fdiv"))
|
||||
"geode_issue,geode_fpu*47")
|
||||
|
||||
;; We use minimal latency (fsin) here
|
||||
(define_insn_reservation "geode_fpspc" 54
|
||||
(and (eq_attr "cpu" "geode")
|
||||
(eq_attr "type" "fpspc"))
|
||||
"geode_issue,geode_fpu*54")
|
||||
|
||||
(define_insn_reservation "geode_frndint" 12
|
||||
(and (eq_attr "cpu" "geode")
|
||||
(eq_attr "type" "frndint"))
|
||||
"geode_issue,geode_fpu*12")
|
||||
|
||||
(define_insn_reservation "geode_mmxmov" 1
|
||||
(and (eq_attr "cpu" "geode")
|
||||
(eq_attr "type" "mmxmov"))
|
||||
"geode_issue,geode_fpu")
|
||||
|
||||
(define_insn_reservation "geode_mmx" 2
|
||||
(and (eq_attr "cpu" "geode")
|
||||
(eq_attr "type" "mmx,mmxadd,mmxmul,mmxcmp,mmxcvt,mmxshft"))
|
||||
"geode_issue,geode_fpu*2")
|
@ -335,6 +335,60 @@ struct processor_costs pentiumpro_cost = {
|
||||
COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
|
||||
};
|
||||
|
||||
static const
|
||||
struct processor_costs geode_cost = {
|
||||
COSTS_N_INSNS (1), /* cost of an add instruction */
|
||||
COSTS_N_INSNS (1), /* cost of a lea instruction */
|
||||
COSTS_N_INSNS (2), /* variable shift costs */
|
||||
COSTS_N_INSNS (1), /* constant shift costs */
|
||||
{COSTS_N_INSNS (3), /* cost of starting multiply for QI */
|
||||
COSTS_N_INSNS (4), /* HI */
|
||||
COSTS_N_INSNS (7), /* SI */
|
||||
COSTS_N_INSNS (7), /* DI */
|
||||
COSTS_N_INSNS (7)}, /* other */
|
||||
0, /* cost of multiply per each bit set */
|
||||
{COSTS_N_INSNS (15), /* cost of a divide/mod for QI */
|
||||
COSTS_N_INSNS (23), /* HI */
|
||||
COSTS_N_INSNS (39), /* SI */
|
||||
COSTS_N_INSNS (39), /* DI */
|
||||
COSTS_N_INSNS (39)}, /* other */
|
||||
COSTS_N_INSNS (1), /* cost of movsx */
|
||||
COSTS_N_INSNS (1), /* cost of movzx */
|
||||
8, /* "large" insn */
|
||||
4, /* MOVE_RATIO */
|
||||
1, /* cost for loading QImode using movzbl */
|
||||
{1, 1, 1}, /* cost of loading integer registers
|
||||
in QImode, HImode and SImode.
|
||||
Relative to reg-reg move (2). */
|
||||
{1, 1, 1}, /* cost of storing integer registers */
|
||||
1, /* cost of reg,reg fld/fst */
|
||||
{1, 1, 1}, /* cost of loading fp registers
|
||||
in SFmode, DFmode and XFmode */
|
||||
{4, 6, 6}, /* cost of storing fp registers
|
||||
in SFmode, DFmode and XFmode */
|
||||
|
||||
1, /* cost of moving MMX register */
|
||||
{1, 1}, /* cost of loading MMX registers
|
||||
in SImode and DImode */
|
||||
{1, 1}, /* cost of storing MMX registers
|
||||
in SImode and DImode */
|
||||
1, /* cost of moving SSE register */
|
||||
{1, 1, 1}, /* cost of loading SSE registers
|
||||
in SImode, DImode and TImode */
|
||||
{1, 1, 1}, /* cost of storing SSE registers
|
||||
in SImode, DImode and TImode */
|
||||
1, /* MMX or SSE register to integer */
|
||||
32, /* size of prefetch block */
|
||||
1, /* number of parallel prefetches */
|
||||
1, /* Branch cost */
|
||||
COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
|
||||
COSTS_N_INSNS (11), /* cost of FMUL instruction. */
|
||||
COSTS_N_INSNS (47), /* cost of FDIV instruction. */
|
||||
COSTS_N_INSNS (1), /* cost of FABS instruction. */
|
||||
COSTS_N_INSNS (1), /* cost of FCHS instruction. */
|
||||
COSTS_N_INSNS (54), /* cost of FSQRT instruction. */
|
||||
};
|
||||
|
||||
static const
|
||||
struct processor_costs k6_cost = {
|
||||
COSTS_N_INSNS (1), /* cost of an add instruction */
|
||||
@ -600,6 +654,58 @@ struct processor_costs nocona_cost = {
|
||||
COSTS_N_INSNS (44), /* cost of FSQRT instruction. */
|
||||
};
|
||||
|
||||
static const
|
||||
struct processor_costs core2_cost = {
|
||||
COSTS_N_INSNS (1), /* cost of an add instruction */
|
||||
COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
|
||||
COSTS_N_INSNS (1), /* variable shift costs */
|
||||
COSTS_N_INSNS (1), /* constant shift costs */
|
||||
{COSTS_N_INSNS (3), /* cost of starting multiply for QI */
|
||||
COSTS_N_INSNS (3), /* HI */
|
||||
COSTS_N_INSNS (3), /* SI */
|
||||
COSTS_N_INSNS (3), /* DI */
|
||||
COSTS_N_INSNS (3)}, /* other */
|
||||
0, /* cost of multiply per each bit set */
|
||||
{COSTS_N_INSNS (22), /* cost of a divide/mod for QI */
|
||||
COSTS_N_INSNS (22), /* HI */
|
||||
COSTS_N_INSNS (22), /* SI */
|
||||
COSTS_N_INSNS (22), /* DI */
|
||||
COSTS_N_INSNS (22)}, /* other */
|
||||
COSTS_N_INSNS (1), /* cost of movsx */
|
||||
COSTS_N_INSNS (1), /* cost of movzx */
|
||||
8, /* "large" insn */
|
||||
16, /* MOVE_RATIO */
|
||||
2, /* cost for loading QImode using movzbl */
|
||||
{6, 6, 6}, /* cost of loading integer registers
|
||||
in QImode, HImode and SImode.
|
||||
Relative to reg-reg move (2). */
|
||||
{4, 4, 4}, /* cost of storing integer registers */
|
||||
2, /* cost of reg,reg fld/fst */
|
||||
{6, 6, 6}, /* cost of loading fp registers
|
||||
in SFmode, DFmode and XFmode */
|
||||
{4, 4, 4}, /* cost of loading integer registers */
|
||||
2, /* cost of moving MMX register */
|
||||
{6, 6}, /* cost of loading MMX registers
|
||||
in SImode and DImode */
|
||||
{4, 4}, /* cost of storing MMX registers
|
||||
in SImode and DImode */
|
||||
2, /* cost of moving SSE register */
|
||||
{6, 6, 6}, /* cost of loading SSE registers
|
||||
in SImode, DImode and TImode */
|
||||
{4, 4, 4}, /* cost of storing SSE registers
|
||||
in SImode, DImode and TImode */
|
||||
2, /* MMX or SSE register to integer */
|
||||
128, /* size of prefetch block */
|
||||
8, /* number of parallel prefetches */
|
||||
3, /* Branch cost */
|
||||
COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
|
||||
COSTS_N_INSNS (5), /* cost of FMUL instruction. */
|
||||
COSTS_N_INSNS (32), /* cost of FDIV instruction. */
|
||||
COSTS_N_INSNS (1), /* cost of FABS instruction. */
|
||||
COSTS_N_INSNS (1), /* cost of FCHS instruction. */
|
||||
COSTS_N_INSNS (58), /* cost of FSQRT instruction. */
|
||||
};
|
||||
|
||||
/* Generic64 should produce code tuned for Nocona and K8. */
|
||||
static const
|
||||
struct processor_costs generic64_cost = {
|
||||
@ -721,38 +827,41 @@ const struct processor_costs *ix86_cost = &pentium_cost;
|
||||
#define m_486 (1<<PROCESSOR_I486)
|
||||
#define m_PENT (1<<PROCESSOR_PENTIUM)
|
||||
#define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
|
||||
#define m_GEODE (1<<PROCESSOR_GEODE)
|
||||
#define m_K6_GEODE (m_K6 | m_GEODE)
|
||||
#define m_K6 (1<<PROCESSOR_K6)
|
||||
#define m_ATHLON (1<<PROCESSOR_ATHLON)
|
||||
#define m_PENT4 (1<<PROCESSOR_PENTIUM4)
|
||||
#define m_K8 (1<<PROCESSOR_K8)
|
||||
#define m_ATHLON_K8 (m_K8 | m_ATHLON)
|
||||
#define m_NOCONA (1<<PROCESSOR_NOCONA)
|
||||
#define m_CORE2 (1<<PROCESSOR_CORE2)
|
||||
#define m_GENERIC32 (1<<PROCESSOR_GENERIC32)
|
||||
#define m_GENERIC64 (1<<PROCESSOR_GENERIC64)
|
||||
#define m_GENERIC (m_GENERIC32 | m_GENERIC64)
|
||||
|
||||
/* Generic instruction choice should be common subset of supported CPUs
|
||||
(PPro/PENT4/NOCONA/Athlon/K8). */
|
||||
(PPro/PENT4/NOCONA/CORE2/Athlon/K8). */
|
||||
|
||||
/* Leave is not affecting Nocona SPEC2000 results negatively, so enabling for
|
||||
Generic64 seems like good code size tradeoff. We can't enable it for 32bit
|
||||
generic because it is not working well with PPro base chips. */
|
||||
const int x86_use_leave = m_386 | m_K6 | m_ATHLON_K8 | m_GENERIC64;
|
||||
const int x86_push_memory = m_386 | m_K6 | m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_GENERIC;
|
||||
const int x86_use_leave = m_386 | m_K6_GEODE | m_ATHLON_K8 | m_CORE2 | m_GENERIC64;
|
||||
const int x86_push_memory = m_386 | m_K6_GEODE | m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC;
|
||||
const int x86_zero_extend_with_and = m_486 | m_PENT;
|
||||
const int x86_movx = m_ATHLON_K8 | m_PPRO | m_PENT4 | m_NOCONA | m_GENERIC /* m_386 | m_K6 */;
|
||||
const int x86_movx = m_ATHLON_K8 | m_PPRO | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC | m_GEODE /* m_386 | m_K6 */;
|
||||
const int x86_double_with_add = ~m_386;
|
||||
const int x86_use_bit_test = m_386;
|
||||
const int x86_unroll_strlen = m_486 | m_PENT | m_PPRO | m_ATHLON_K8 | m_K6 | m_GENERIC;
|
||||
const int x86_cmove = m_PPRO | m_ATHLON_K8 | m_PENT4 | m_NOCONA;
|
||||
const int x86_unroll_strlen = m_486 | m_PENT | m_PPRO | m_ATHLON_K8 | m_K6 | m_CORE2 | m_GENERIC;
|
||||
const int x86_cmove = m_PPRO | m_GEODE | m_ATHLON_K8 | m_PENT4 | m_NOCONA;
|
||||
const int x86_3dnow_a = m_ATHLON_K8;
|
||||
const int x86_deep_branch = m_PPRO | m_K6 | m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_GENERIC;
|
||||
const int x86_deep_branch = m_PPRO | m_K6_GEODE | m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC;
|
||||
/* Branch hints were put in P4 based on simulation result. But
|
||||
after P4 was made, no performance benefit was observed with
|
||||
branch hints. It also increases the code size. As the result,
|
||||
icc never generates branch hints. */
|
||||
const int x86_branch_hints = 0;
|
||||
const int x86_use_sahf = m_PPRO | m_K6 | m_PENT4 | m_NOCONA | m_GENERIC32; /*m_GENERIC | m_ATHLON_K8 ? */
|
||||
const int x86_use_sahf = m_PPRO | m_K6_GEODE | m_PENT4 | m_NOCONA | m_GENERIC32; /*m_GENERIC | m_ATHLON_K8 ? */
|
||||
/* We probably ought to watch for partial register stalls on Generic32
|
||||
compilation setting as well. However in current implementation the
|
||||
partial register stalls are not eliminated very well - they can
|
||||
@ -762,15 +871,15 @@ const int x86_use_sahf = m_PPRO | m_K6 | m_PENT4 | m_NOCONA | m_GENERIC32; /*m_G
|
||||
with partial reg. dependencies used by Athlon/P4 based chips, it is better
|
||||
to leave it off for generic32 for now. */
|
||||
const int x86_partial_reg_stall = m_PPRO;
|
||||
const int x86_partial_flag_reg_stall = m_GENERIC;
|
||||
const int x86_use_himode_fiop = m_386 | m_486 | m_K6;
|
||||
const int x86_use_simode_fiop = ~(m_PPRO | m_ATHLON_K8 | m_PENT | m_GENERIC);
|
||||
const int x86_partial_flag_reg_stall = m_CORE2 | m_GENERIC;
|
||||
const int x86_use_himode_fiop = m_386 | m_486 | m_K6_GEODE;
|
||||
const int x86_use_simode_fiop = ~(m_PPRO | m_ATHLON_K8 | m_PENT | m_CORE2 | m_GENERIC);
|
||||
const int x86_use_mov0 = m_K6;
|
||||
const int x86_use_cltd = ~(m_PENT | m_K6 | m_GENERIC);
|
||||
const int x86_use_cltd = ~(m_PENT | m_K6 | m_CORE2 | m_GENERIC);
|
||||
const int x86_read_modify_write = ~m_PENT;
|
||||
const int x86_read_modify = ~(m_PENT | m_PPRO);
|
||||
const int x86_split_long_moves = m_PPRO;
|
||||
const int x86_promote_QImode = m_K6 | m_PENT | m_386 | m_486 | m_ATHLON_K8 | m_GENERIC; /* m_PENT4 ? */
|
||||
const int x86_promote_QImode = m_K6_GEODE | m_PENT | m_386 | m_486 | m_ATHLON_K8 | m_CORE2 | m_GENERIC; /* m_PENT4 ? */
|
||||
const int x86_fast_prefix = ~(m_PENT | m_486 | m_386);
|
||||
const int x86_single_stringop = m_386 | m_PENT4 | m_NOCONA;
|
||||
const int x86_qimode_math = ~(0);
|
||||
@ -780,18 +889,18 @@ const int x86_promote_qi_regs = 0;
|
||||
if our scheme for avoiding partial stalls was more effective. */
|
||||
const int x86_himode_math = ~(m_PPRO);
|
||||
const int x86_promote_hi_regs = m_PPRO;
|
||||
const int x86_sub_esp_4 = m_ATHLON_K8 | m_PPRO | m_PENT4 | m_NOCONA | m_GENERIC;
|
||||
const int x86_sub_esp_8 = m_ATHLON_K8 | m_PPRO | m_386 | m_486 | m_PENT4 | m_NOCONA | m_GENERIC;
|
||||
const int x86_add_esp_4 = m_ATHLON_K8 | m_K6 | m_PENT4 | m_NOCONA | m_GENERIC;
|
||||
const int x86_add_esp_8 = m_ATHLON_K8 | m_PPRO | m_K6 | m_386 | m_486 | m_PENT4 | m_NOCONA | m_GENERIC;
|
||||
const int x86_integer_DFmode_moves = ~(m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_PPRO | m_GENERIC);
|
||||
const int x86_partial_reg_dependency = m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_GENERIC;
|
||||
const int x86_memory_mismatch_stall = m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_GENERIC;
|
||||
const int x86_accumulate_outgoing_args = m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_PPRO | m_GENERIC;
|
||||
const int x86_prologue_using_move = m_ATHLON_K8 | m_PPRO | m_GENERIC;
|
||||
const int x86_epilogue_using_move = m_ATHLON_K8 | m_PPRO | m_GENERIC;
|
||||
const int x86_sub_esp_4 = m_ATHLON_K8 | m_PPRO | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC;
|
||||
const int x86_sub_esp_8 = m_ATHLON_K8 | m_PPRO | m_386 | m_486 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC;
|
||||
const int x86_add_esp_4 = m_ATHLON_K8 | m_K6_GEODE | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC;
|
||||
const int x86_add_esp_8 = m_ATHLON_K8 | m_PPRO | m_K6_GEODE | m_386 | m_486 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC;
|
||||
const int x86_integer_DFmode_moves = ~(m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC | m_GEODE);
|
||||
const int x86_partial_reg_dependency = m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC;
|
||||
const int x86_memory_mismatch_stall = m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC;
|
||||
const int x86_accumulate_outgoing_args = m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC;
|
||||
const int x86_prologue_using_move = m_ATHLON_K8 | m_PPRO | m_CORE2 | m_GENERIC;
|
||||
const int x86_epilogue_using_move = m_ATHLON_K8 | m_PPRO | m_CORE2 | m_GENERIC;
|
||||
const int x86_shift1 = ~m_486;
|
||||
const int x86_arch_always_fancy_math_387 = m_PENT | m_PPRO | m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_GENERIC;
|
||||
const int x86_arch_always_fancy_math_387 = m_PENT | m_PPRO | m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC;
|
||||
/* In Generic model we have an conflict here in between PPro/Pentium4 based chips
|
||||
that thread 128bit SSE registers as single units versus K8 based chips that
|
||||
divide SSE registers to two 64bit halves.
|
||||
@ -801,7 +910,7 @@ const int x86_arch_always_fancy_math_387 = m_PENT | m_PPRO | m_ATHLON_K8 | m_PEN
|
||||
this option on P4 brings over 20% SPECfp regression, while enabling it on
|
||||
K8 brings roughly 2.4% regression that can be partly masked by careful scheduling
|
||||
of moves. */
|
||||
const int x86_sse_partial_reg_dependency = m_PENT4 | m_NOCONA | m_PPRO | m_GENERIC;
|
||||
const int x86_sse_partial_reg_dependency = m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC;
|
||||
/* Set for machines where the type and dependencies are resolved on SSE
|
||||
register parts instead of whole registers, so we may maintain just
|
||||
lower part of scalar values in proper format leaving the upper part
|
||||
@ -810,18 +919,18 @@ const int x86_sse_split_regs = m_ATHLON_K8;
|
||||
const int x86_sse_typeless_stores = m_ATHLON_K8;
|
||||
const int x86_sse_load0_by_pxor = m_PPRO | m_PENT4 | m_NOCONA;
|
||||
const int x86_use_ffreep = m_ATHLON_K8;
|
||||
const int x86_rep_movl_optimal = m_386 | m_PENT | m_PPRO | m_K6;
|
||||
const int x86_use_incdec = ~(m_PENT4 | m_NOCONA | m_GENERIC);
|
||||
const int x86_rep_movl_optimal = m_386 | m_PENT | m_PPRO | m_K6_GEODE | m_CORE2;
|
||||
const int x86_use_incdec = ~(m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC);
|
||||
|
||||
/* ??? Allowing interunit moves makes it all too easy for the compiler to put
|
||||
integer data in xmm registers. Which results in pretty abysmal code. */
|
||||
const int x86_inter_unit_moves = 0 /* ~(m_ATHLON_K8) */;
|
||||
|
||||
const int x86_ext_80387_constants = m_K6 | m_ATHLON | m_PENT4 | m_NOCONA | m_PPRO | m_GENERIC32;
|
||||
const int x86_ext_80387_constants = m_K6_GEODE | m_ATHLON | m_PENT4 | m_NOCONA | m_PPRO | m_GENERIC32;
|
||||
/* Some CPU cores are not able to predict more than 4 branch instructions in
|
||||
the 16 byte window. */
|
||||
const int x86_four_jump_limit = m_PPRO | m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_GENERIC;
|
||||
const int x86_schedule = m_PPRO | m_ATHLON_K8 | m_K6 | m_PENT | m_GENERIC;
|
||||
const int x86_four_jump_limit = m_PPRO | m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC;
|
||||
const int x86_schedule = m_PPRO | m_ATHLON_K8 | m_K6_GEODE | m_PENT | m_CORE2 | m_GENERIC;
|
||||
const int x86_use_bt = m_ATHLON_K8;
|
||||
/* Compare and exchange was added for 80486. */
|
||||
const int x86_cmpxchg = ~m_386;
|
||||
@ -831,7 +940,7 @@ const int x86_cmpxchg8b = ~(m_386 | m_486);
|
||||
const int x86_cmpxchg16b = m_NOCONA;
|
||||
/* Exchange and add was added for 80486. */
|
||||
const int x86_xadd = ~m_386;
|
||||
const int x86_pad_returns = m_ATHLON_K8 | m_GENERIC;
|
||||
const int x86_pad_returns = m_ATHLON_K8 | m_CORE2 | m_GENERIC;
|
||||
|
||||
/* In case the average insn count for single function invocation is
|
||||
lower than this constant, emit fast (but longer) prologue and
|
||||
@ -1455,11 +1564,13 @@ override_options (void)
|
||||
{&i486_cost, 0, 0, 16, 15, 16, 15, 16},
|
||||
{&pentium_cost, 0, 0, 16, 7, 16, 7, 16},
|
||||
{&pentiumpro_cost, 0, 0, 16, 15, 16, 7, 16},
|
||||
{&geode_cost, 0, 0, 0, 0, 0, 0, 0},
|
||||
{&k6_cost, 0, 0, 32, 7, 32, 7, 32},
|
||||
{&athlon_cost, 0, 0, 16, 7, 16, 7, 16},
|
||||
{&pentium4_cost, 0, 0, 0, 0, 0, 0, 0},
|
||||
{&k8_cost, 0, 0, 16, 7, 16, 7, 16},
|
||||
{&nocona_cost, 0, 0, 0, 0, 0, 0, 0},
|
||||
{&core2_cost, 0, 0, 16, 7, 16, 7, 16},
|
||||
{&generic32_cost, 0, 0, 16, 7, 16, 7, 16},
|
||||
{&generic64_cost, 0, 0, 16, 7, 16, 7, 16}
|
||||
};
|
||||
@ -1506,6 +1617,11 @@ override_options (void)
|
||||
| PTA_MMX | PTA_PREFETCH_SSE},
|
||||
{"nocona", PROCESSOR_NOCONA, PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_64BIT
|
||||
| PTA_MMX | PTA_PREFETCH_SSE},
|
||||
{"core2", PROCESSOR_CORE2, PTA_SSE | PTA_SSE2 | PTA_SSE3
|
||||
| PTA_64BIT | PTA_MMX
|
||||
| PTA_PREFETCH_SSE},
|
||||
{"geode", PROCESSOR_GEODE, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
|
||||
| PTA_3DNOW_A},
|
||||
{"k6", PROCESSOR_K6, PTA_MMX},
|
||||
{"k6-2", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
|
||||
{"k6-3", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
|
||||
@ -13706,6 +13822,9 @@ ix86_issue_rate (void)
|
||||
case PROCESSOR_GENERIC64:
|
||||
return 3;
|
||||
|
||||
case PROCESSOR_CORE2:
|
||||
return 4;
|
||||
|
||||
default:
|
||||
return 1;
|
||||
}
|
||||
|
@ -130,12 +130,14 @@ extern const struct processor_costs *ix86_cost;
|
||||
#define TARGET_486 (ix86_tune == PROCESSOR_I486)
|
||||
#define TARGET_PENTIUM (ix86_tune == PROCESSOR_PENTIUM)
|
||||
#define TARGET_PENTIUMPRO (ix86_tune == PROCESSOR_PENTIUMPRO)
|
||||
#define TARGET_GEODE (ix86_tune == PROCESSOR_GEODE)
|
||||
#define TARGET_K6 (ix86_tune == PROCESSOR_K6)
|
||||
#define TARGET_ATHLON (ix86_tune == PROCESSOR_ATHLON)
|
||||
#define TARGET_PENTIUM4 (ix86_tune == PROCESSOR_PENTIUM4)
|
||||
#define TARGET_K8 (ix86_tune == PROCESSOR_K8)
|
||||
#define TARGET_ATHLON_K8 (TARGET_K8 || TARGET_ATHLON)
|
||||
#define TARGET_NOCONA (ix86_tune == PROCESSOR_NOCONA)
|
||||
#define TARGET_CORE2 (ix86_tune == PROCESSOR_CORE2)
|
||||
#define TARGET_GENERIC32 (ix86_tune == PROCESSOR_GENERIC32)
|
||||
#define TARGET_GENERIC64 (ix86_tune == PROCESSOR_GENERIC64)
|
||||
#define TARGET_GENERIC (TARGET_GENERIC32 || TARGET_GENERIC64)
|
||||
@ -376,6 +378,10 @@ extern const char *host_detect_local_cpu (int argc, const char **argv);
|
||||
break; \
|
||||
} \
|
||||
} \
|
||||
else if (TARGET_GEODE) \
|
||||
{ \
|
||||
builtin_define ("__tune_geode__"); \
|
||||
} \
|
||||
else if (TARGET_K6) \
|
||||
{ \
|
||||
builtin_define ("__tune_k6__"); \
|
||||
@ -397,6 +403,8 @@ extern const char *host_detect_local_cpu (int argc, const char **argv);
|
||||
builtin_define ("__tune_pentium4__"); \
|
||||
else if (TARGET_NOCONA) \
|
||||
builtin_define ("__tune_nocona__"); \
|
||||
else if (TARGET_CORE2) \
|
||||
builtin_define ("__tune_core2__"); \
|
||||
\
|
||||
if (TARGET_MMX) \
|
||||
builtin_define ("__MMX__"); \
|
||||
@ -437,6 +445,11 @@ extern const char *host_detect_local_cpu (int argc, const char **argv);
|
||||
builtin_define ("__pentiumpro"); \
|
||||
builtin_define ("__pentiumpro__"); \
|
||||
} \
|
||||
else if (ix86_arch == PROCESSOR_GEODE) \
|
||||
{ \
|
||||
builtin_define ("__geode"); \
|
||||
builtin_define ("__geode__"); \
|
||||
} \
|
||||
else if (ix86_arch == PROCESSOR_K6) \
|
||||
{ \
|
||||
\
|
||||
@ -470,6 +483,11 @@ extern const char *host_detect_local_cpu (int argc, const char **argv);
|
||||
builtin_define ("__nocona"); \
|
||||
builtin_define ("__nocona__"); \
|
||||
} \
|
||||
else if (ix86_arch == PROCESSOR_CORE2) \
|
||||
{ \
|
||||
builtin_define ("__core2"); \
|
||||
builtin_define ("__core2__"); \
|
||||
} \
|
||||
} \
|
||||
while (0)
|
||||
|
||||
@ -481,23 +499,25 @@ extern const char *host_detect_local_cpu (int argc, const char **argv);
|
||||
#define TARGET_CPU_DEFAULT_pentium2 5
|
||||
#define TARGET_CPU_DEFAULT_pentium3 6
|
||||
#define TARGET_CPU_DEFAULT_pentium4 7
|
||||
#define TARGET_CPU_DEFAULT_k6 8
|
||||
#define TARGET_CPU_DEFAULT_k6_2 9
|
||||
#define TARGET_CPU_DEFAULT_k6_3 10
|
||||
#define TARGET_CPU_DEFAULT_athlon 11
|
||||
#define TARGET_CPU_DEFAULT_athlon_sse 12
|
||||
#define TARGET_CPU_DEFAULT_k8 13
|
||||
#define TARGET_CPU_DEFAULT_pentium_m 14
|
||||
#define TARGET_CPU_DEFAULT_prescott 15
|
||||
#define TARGET_CPU_DEFAULT_nocona 16
|
||||
#define TARGET_CPU_DEFAULT_generic 17
|
||||
#define TARGET_CPU_DEFAULT_geode 8
|
||||
#define TARGET_CPU_DEFAULT_k6 9
|
||||
#define TARGET_CPU_DEFAULT_k6_2 10
|
||||
#define TARGET_CPU_DEFAULT_k6_3 11
|
||||
#define TARGET_CPU_DEFAULT_athlon 12
|
||||
#define TARGET_CPU_DEFAULT_athlon_sse 13
|
||||
#define TARGET_CPU_DEFAULT_k8 14
|
||||
#define TARGET_CPU_DEFAULT_pentium_m 15
|
||||
#define TARGET_CPU_DEFAULT_prescott 16
|
||||
#define TARGET_CPU_DEFAULT_nocona 17
|
||||
#define TARGET_CPU_DEFAULT_core2 18
|
||||
#define TARGET_CPU_DEFAULT_generic 19
|
||||
|
||||
#define TARGET_CPU_DEFAULT_NAMES {"i386", "i486", "pentium", "pentium-mmx",\
|
||||
"pentiumpro", "pentium2", "pentium3", \
|
||||
"pentium4", "k6", "k6-2", "k6-3",\
|
||||
"pentium4", "geode", "k6", "k6-2", "k6-3", \
|
||||
"athlon", "athlon-4", "k8", \
|
||||
"pentium-m", "prescott", "nocona", \
|
||||
"generic"}
|
||||
"core2", "generic"}
|
||||
|
||||
#ifndef CC1_SPEC
|
||||
#define CC1_SPEC "%(cc1_cpu) "
|
||||
@ -2077,11 +2097,13 @@ enum processor_type
|
||||
PROCESSOR_I486, /* 80486DX, 80486SX, 80486DX[24] */
|
||||
PROCESSOR_PENTIUM,
|
||||
PROCESSOR_PENTIUMPRO,
|
||||
PROCESSOR_GEODE,
|
||||
PROCESSOR_K6,
|
||||
PROCESSOR_ATHLON,
|
||||
PROCESSOR_PENTIUM4,
|
||||
PROCESSOR_K8,
|
||||
PROCESSOR_NOCONA,
|
||||
PROCESSOR_CORE2,
|
||||
PROCESSOR_GENERIC32,
|
||||
PROCESSOR_GENERIC64,
|
||||
PROCESSOR_max
|
||||
|
@ -187,7 +187,7 @@
|
||||
|
||||
;; Processor type. This attribute must exactly match the processor_type
|
||||
;; enumeration in i386.h.
|
||||
(define_attr "cpu" "i386,i486,pentium,pentiumpro,k6,athlon,pentium4,k8,nocona,generic32,generic64"
|
||||
(define_attr "cpu" "i386,i486,pentium,pentiumpro,geode,k6,athlon,pentium4,k8,nocona,core2,generic32,generic64"
|
||||
(const (symbol_ref "ix86_tune")))
|
||||
|
||||
;; A basic instruction type. Refinements due to arguments to be
|
||||
@ -473,6 +473,7 @@
|
||||
(include "ppro.md")
|
||||
(include "k6.md")
|
||||
(include "athlon.md")
|
||||
(include "geode.md")
|
||||
|
||||
|
||||
;; Operand and operator predicates and constraints
|
||||
|
@ -129,7 +129,7 @@
|
||||
.\" ========================================================================
|
||||
.\"
|
||||
.IX Title "GCC 1"
|
||||
.TH GCC 1 "2011-02-20" "gcc-4.2.1" "GNU"
|
||||
.TH GCC 1 "2011-03-07" "gcc-4.2.1" "GNU"
|
||||
.SH "NAME"
|
||||
gcc \- GNU project C and C++ compiler
|
||||
.SH "SYNOPSIS"
|
||||
@ -8733,6 +8733,10 @@ set support.
|
||||
.IX Item "nocona"
|
||||
Improved version of Intel Pentium4 \s-1CPU\s0 with 64\-bit extensions, \s-1MMX\s0, \s-1SSE\s0,
|
||||
\&\s-1SSE2\s0 and \s-1SSE3\s0 instruction set support.
|
||||
.IP "\fIcore2\fR" 4
|
||||
.IX Item "core2"
|
||||
Intel Core2 \s-1CPU\s0 with 64\-bit extensions, \s-1MMX\s0, \s-1SSE\s0, \s-1SSE2\s0 and \s-1SSE3\s0
|
||||
instruction set support.
|
||||
.IP "\fIk6\fR" 4
|
||||
.IX Item "k6"
|
||||
\&\s-1AMD\s0 K6 \s-1CPU\s0 with \s-1MMX\s0 instruction set support.
|
||||
@ -8770,6 +8774,9 @@ implemented for this chip.)
|
||||
.IX Item "c3-2"
|
||||
Via C3\-2 \s-1CPU\s0 with \s-1MMX\s0 and \s-1SSE\s0 instruction set support. (No scheduling is
|
||||
implemented for this chip.)
|
||||
.IP "\fIgeode\fR" 4
|
||||
.IX Item "geode"
|
||||
Embedded AMD \s-1CPU\s0 with \s-1MMX\s0 and 3dNOW! instruction set support.
|
||||
.RE
|
||||
.RS 4
|
||||
.Sp
|
||||
|
@ -9369,6 +9369,9 @@ set support.
|
||||
@item nocona
|
||||
Improved version of Intel Pentium4 CPU with 64-bit extensions, MMX, SSE,
|
||||
SSE2 and SSE3 instruction set support.
|
||||
@item core2
|
||||
Intel Core2 CPU with 64-bit extensions, MMX, SSE, SSE2, SSE3 and SSSE3
|
||||
instruction set support.
|
||||
@item k6
|
||||
AMD K6 CPU with MMX instruction set support.
|
||||
@item k6-2, k6-3
|
||||
@ -9396,6 +9399,8 @@ implemented for this chip.)
|
||||
@item c3-2
|
||||
Via C3-2 CPU with MMX and SSE instruction set support. (No scheduling is
|
||||
implemented for this chip.)
|
||||
@item geode
|
||||
Embedded AMD CPU with MMX and 3dNOW! instruction set support.
|
||||
@end table
|
||||
|
||||
While picking a specific @var{cpu-type} will schedule things appropriately
|
||||
|
Loading…
x
Reference in New Issue
Block a user