Add experimental support for amdfam10/barcelona from the GCC 4.3 branch.
Initial support for the AMD barcelona chipsets has been available in the gcc43 branch under GPLv2 but was not included when the Core 2 support was brought to the system gcc. AMD and some linux distributions (OpenSUSE) did a backport of the amdfam10 support and made them available. Unfortunately this is still experimental and while it can improve performance, enabling the CPUTYPE may break some C++ ports (like clang). Special care was taken to make sure that the patches predate the GPLv3 switch upstream. Tested by: Vladimir Kushnir Reviewed by: mm Approved by: jhb (mentor) MFC after: 2 weeks
This commit is contained in:
parent
f584d74b4d
commit
b28518a59a
@ -1,3 +1,8 @@
|
||||
2007-05-01 Dwarakanath Rajagopal <dwarak.rajagopal@amd.com>
|
||||
|
||||
* doc/invoke.texi: Fix typo, 'AMD Family 10h core' instead of
|
||||
'AMD Family 10 core'.
|
||||
|
||||
2007-05-01 Dwarakanath Rajagopal <dwarak.rajagopal@amd.com> (r124339)
|
||||
|
||||
* config/i386/i386.c (override_options): Accept k8-sse3, opteron-sse3
|
||||
@ -5,10 +10,39 @@
|
||||
with SSE3 instruction set support.
|
||||
* doc/invoke.texi: Likewise.
|
||||
|
||||
2007-05-01 Dwarakanath Rajagopal <dwarak.rajagopal@amd.com>
|
||||
|
||||
* config/i386/i386.c (override_options): Tuning 32-byte loop
|
||||
alignment for amdfam10 architecture. Increasing the max loop
|
||||
alignment to 24 bytes.
|
||||
|
||||
2007-04-12 Richard Guenther <rguenther@suse.de>
|
||||
|
||||
PR tree-optimization/24689
|
||||
PR tree-optimization/31307
|
||||
* fold-const.c (operand_equal_p): Compare INTEGER_CST array
|
||||
indices by value.
|
||||
* gimplify.c (canonicalize_addr_expr): To be consistent with
|
||||
gimplify_compound_lval only set operands two and three of
|
||||
ARRAY_REFs if they are not gimple_min_invariant. This makes
|
||||
it never at this place.
|
||||
* tree-ssa-ccp.c (maybe_fold_offset_to_array_ref): Likewise.
|
||||
|
||||
2007-04-07 H.J. Lu <hongjiu.lu@intel.com> (r123639)
|
||||
|
||||
* config/i386/i386.c (ix86_handle_option): Handle SSSE3.
|
||||
|
||||
2007-03-28 Dwarakanath Rajagopal <dwarak.rajagopal@amd.com>
|
||||
|
||||
* config.gcc: Accept barcelona as a variant of amdfam10.
|
||||
* config/i386/i386.c (override_options): Likewise.
|
||||
* doc/invoke.texi: Likewise.
|
||||
|
||||
2007-02-09 Dwarakanath Rajagopal <dwarak.rajagopal@amd.com>
|
||||
|
||||
* config/i386/driver-i386.c: Turn on -mtune=native for AMDFAM10.
|
||||
(bit_SSE4a): New.
|
||||
|
||||
2007-02-08 Harsha Jagasia <harsha.jagasia@amd.com> (r121726)
|
||||
|
||||
* config/i386/xmmintrin.h: Make inclusion of emmintrin.h
|
||||
@ -26,6 +60,173 @@
|
||||
|
||||
* config/i386/i386.c (override_options): Set PTA_SSSE3 for core2.
|
||||
|
||||
2007-02-05 Harsha Jagasia <harsha.jagasia@amd.com>
|
||||
|
||||
* config/i386/athlon.md (athlon_fldxf_k8, athlon_fld_k8,
|
||||
athlon_fstxf_k8, athlon_fst_k8, athlon_fist, athlon_fmov,
|
||||
athlon_fadd_load, athlon_fadd_load_k8, athlon_fadd, athlon_fmul,
|
||||
athlon_fmul_load, athlon_fmul_load_k8, athlon_fsgn,
|
||||
athlon_fdiv_load, athlon_fdiv_load_k8, athlon_fdiv_k8,
|
||||
athlon_fpspc_load, athlon_fpspc, athlon_fcmov_load,
|
||||
athlon_fcmov_load_k8, athlon_fcmov_k8, athlon_fcomi_load_k8,
|
||||
athlon_fcomi, athlon_fcom_load_k8, athlon_fcom): Added amdfam10.
|
||||
|
||||
2007-02-05 Harsha Jagasia <harsha.jagasia@amd.com>
|
||||
|
||||
* config/i386/i386.md (x86_sahf_1, cmpfp_i_mixed, cmpfp_i_sse,
|
||||
cmpfp_i_i387, cmpfp_iu_mixed, cmpfp_iu_sse, cmpfp_iu_387,
|
||||
swapsi, swaphi_1, swapqi_1, swapdi_rex64, fix_truncsfdi_sse,
|
||||
fix_truncdfdi_sse, fix_truncsfsi_sse, fix_truncdfsi_sse,
|
||||
x86_fldcw_1, floatsisf2_mixed, floatsisf2_sse, floatdisf2_mixed,
|
||||
floatdisf2_sse, floatsidf2_mixed, floatsidf2_sse,
|
||||
floatdidf2_mixed, floatdidf2_sse, muldi3_1_rex64, mulsi3_1,
|
||||
mulsi3_1_zext, mulhi3_1, mulqi3_1, umulqihi3_1, mulqihi3_insn,
|
||||
umulditi3_insn, umulsidi3_insn, mulditi3_insn, mulsidi3_insn,
|
||||
umuldi3_highpart_rex64, umulsi3_highpart_insn,
|
||||
umulsi3_highpart_zext, smuldi3_highpart_rex64,
|
||||
smulsi3_highpart_insn, smulsi3_highpart_zext, x86_64_shld,
|
||||
x86_shld_1, x86_64_shrd, sqrtsf2_mixed, sqrtsf2_sse,
|
||||
sqrtsf2_i387, sqrtdf2_mixed, sqrtdf2_sse, sqrtdf2_i387,
|
||||
sqrtextendsfdf2_i387, sqrtxf2, sqrtextendsfxf2_i387,
|
||||
sqrtextenddfxf2_i387): Added amdfam10_decode.
|
||||
|
||||
* config/i386/athlon.md (athlon_idirect_amdfam10,
|
||||
athlon_ivector_amdfam10, athlon_idirect_load_amdfam10,
|
||||
athlon_ivector_load_amdfam10, athlon_idirect_both_amdfam10,
|
||||
athlon_ivector_both_amdfam10, athlon_idirect_store_amdfam10,
|
||||
athlon_ivector_store_amdfam10): New define_insn_reservation.
|
||||
(athlon_idirect_loadmov, athlon_idirect_movstore): Added
|
||||
amdfam10.
|
||||
|
||||
2007-02-05 Harsha Jagasia <harsha.jagasia@amd.com>
|
||||
|
||||
* config/i386/athlon.md (athlon_call_amdfam10,
|
||||
athlon_pop_amdfam10, athlon_lea_amdfam10): New
|
||||
define_insn_reservation.
|
||||
(athlon_branch, athlon_push, athlon_leave_k8, athlon_imul_k8,
|
||||
athlon_imul_k8_DI, athlon_imul_mem_k8, athlon_imul_mem_k8_DI,
|
||||
athlon_idiv, athlon_idiv_mem, athlon_str): Added amdfam10.
|
||||
|
||||
2007-02-05 Harsha Jagasia <harsha.jagasia@amd.com>
|
||||
|
||||
* config/i386/athlon.md (athlon_sseld_amdfam10,
|
||||
athlon_mmxld_amdfam10, athlon_ssest_amdfam10,
|
||||
athlon_mmxssest_short_amdfam10): New define_insn_reservation.
|
||||
|
||||
2007-02-05 Harsha Jagasia <harsha.jagasia@amd.com>
|
||||
|
||||
* config/i386/athlon.md (athlon_sseins_amdfam10): New
|
||||
define_insn_reservation.
|
||||
* config/i386/i386.md (sseins): Added sseins to define_attr type
|
||||
and define_attr unit.
|
||||
* config/i386/sse.md: Set type attribute to sseins for insertq
|
||||
and insertqi.
|
||||
|
||||
2007-02-05 Harsha Jagasia <harsha.jagasia@amd.com>
|
||||
|
||||
* config/i386/athlon.md (sselog_load_amdfam10, sselog_amdfam10,
|
||||
ssecmpvector_load_amdfam10, ssecmpvector_amdfam10,
|
||||
ssecomi_load_amdfam10, ssecomi_amdfam10,
|
||||
sseaddvector_load_amdfam10, sseaddvector_amdfam10): New
|
||||
define_insn_reservation.
|
||||
(ssecmp_load_k8, ssecmp, sseadd_load_k8, seadd): Added amdfam10.
|
||||
|
||||
2007-02-05 Harsha Jagasia <harsha.jagasia@amd.com>
|
||||
|
||||
* config/i386/athlon.md (cvtss2sd_load_amdfam10,
|
||||
cvtss2sd_amdfam10, cvtps2pd_load_amdfam10, cvtps2pd_amdfam10,
|
||||
cvtsi2sd_load_amdfam10, cvtsi2ss_load_amdfam10,
|
||||
cvtsi2sd_amdfam10, cvtsi2ss_amdfam10, cvtsd2ss_load_amdfam10,
|
||||
cvtsd2ss_amdfam10, cvtpd2ps_load_amdfam10, cvtpd2ps_amdfam10,
|
||||
cvtsX2si_load_amdfam10, cvtsX2si_amdfam10): New
|
||||
define_insn_reservation.
|
||||
|
||||
* config/i386/sse.md (cvtsi2ss, cvtsi2ssq, cvtss2si,
|
||||
cvtss2siq, cvttss2si, cvttss2siq, cvtsi2sd, cvtsi2sdq,
|
||||
cvtsd2si, cvtsd2siq, cvttsd2si, cvttsd2siq,
|
||||
cvtpd2dq, cvttpd2dq, cvtsd2ss, cvtss2sd,
|
||||
cvtpd2ps, cvtps2pd): Added amdfam10_decode attribute.
|
||||
|
||||
2007-02-05 Harsha Jagasia <harsha.jagasia@amd.com>
|
||||
|
||||
* config/i386/athlon.md (athlon_ssedivvector_amdfam10,
|
||||
athlon_ssedivvector_load_amdfam10, athlon_ssemulvector_amdfam10,
|
||||
athlon_ssemulvector_load_amdfam10): New define_insn_reservation.
|
||||
(athlon_ssediv, athlon_ssediv_load_k8, athlon_ssemul,
|
||||
athlon_ssemul_load_k8): Added amdfam10.
|
||||
|
||||
2007-02-05 Harsha Jagasia <harsha.jagasia@amd.com>
|
||||
|
||||
* config/i386/i386.h (TARGET_SSE_UNALIGNED_MOVE_OPTIMAL): New macro.
|
||||
(x86_sse_unaligned_move_optimal): New variable.
|
||||
|
||||
* config/i386/i386.c (x86_sse_unaligned_move_optimal): Enable for
|
||||
m_AMDFAM10.
|
||||
(ix86_expand_vector_move_misalign): Add code to generate movupd/movups
|
||||
for unaligned vector SSE double/single precision loads for AMDFAM10.
|
||||
|
||||
2007-02-05 Harsha Jagasia <harsha.jagasia@amd.com>
|
||||
|
||||
* config/i386/i386.h (TARGET_AMDFAM10): New macro.
|
||||
(TARGET_CPU_CPP_BUILTINS): Add code for amdfam10.
|
||||
Define TARGET_CPU_DEFAULT_amdfam10.
|
||||
(TARGET_CPU_DEFAULT_NAMES): Add amdfam10.
|
||||
(processor_type): Add PROCESSOR_AMDFAM10.
|
||||
|
||||
* config/i386/i386.md: Add amdfam10 as a new cpu attribute to match
|
||||
processor_type in config/i386/i386.h.
|
||||
Enable imul peepholes for TARGET_AMDFAM10.
|
||||
|
||||
* config.gcc: Add support for --with-cpu option for amdfam10.
|
||||
|
||||
* config/i386/i386.c (amdfam10_cost): New variable.
|
||||
(m_AMDFAM10): New macro.
|
||||
(m_ATHLON_K8_AMDFAM10): New macro.
|
||||
(x86_use_leave, x86_push_memory, x86_movx, x86_unroll_strlen,
|
||||
x86_cmove, x86_3dnow_a, x86_deep_branch, x86_use_simode_fiop,
|
||||
x86_promote_QImode, x86_integer_DFmode_moves,
|
||||
x86_partial_reg_dependency, x86_memory_mismatch_stall,
|
||||
x86_accumulate_outgoing_args, x86_arch_always_fancy_math_387,
|
||||
x86_sse_partial_reg_dependency, x86_sse_typeless_stores,
|
||||
x86_use_ffreep, x86_use_incdec, x86_four_jump_limit,
|
||||
x86_schedule, x86_use_bt, x86_cmpxchg16b, x86_pad_returns):
|
||||
Enable/disable for amdfam10.
|
||||
(override_options): Add amdfam10_cost to processor_target_table.
|
||||
Set up PROCESSOR_AMDFAM10 for amdfam10 entry in
|
||||
processor_alias_table.
|
||||
(ix86_issue_rate): Add PROCESSOR_AMDFAM10.
|
||||
(ix86_adjust_cost): Add code for amdfam10.
|
||||
|
||||
2007-02-05 Harsha Jagasia <harsha.jagasia@amd.com>
|
||||
|
||||
* config/i386/i386.opt: Add new Advanced Bit Manipulation (-mabm)
|
||||
instruction set feature flag. Add new (-mpopcnt) flag for popcnt
|
||||
instruction. Add new SSE4A (-msse4a) instruction set feature flag.
|
||||
* config/i386/i386.h: Add builtin definition for SSE4A.
|
||||
* config/i386/i386.md: Add support for ABM instructions
|
||||
(popcnt and lzcnt).
|
||||
* config/i386/sse.md: Add support for SSE4A instructions
|
||||
(movntss, movntsd, extrq, insertq).
|
||||
* config/i386/i386.c: Add support for ABM and SSE4A builtins.
|
||||
Add -march=amdfam10 flag.
|
||||
* config/i386/ammintrin.h: Add support for SSE4A intrinsics.
|
||||
* doc/invoke.texi: Add documentation on flags for sse4a, abm, popcnt
|
||||
and amdfam10.
|
||||
* doc/extend.texi: Add documentation for SSE4A builtins.
|
||||
|
||||
2007-01-24 Jakub Jelinek <jakub@redhat.com>
|
||||
|
||||
* config/i386/i386.h (x86_cmpxchg16b): Remove const.
|
||||
(TARGET_CMPXCHG16B): Define to x86_cmpxchg16b.
|
||||
* config/i386/i386.c (x86_cmpxchg16b): Remove const.
|
||||
(override_options): Add PTA_CX16 flag. Set x86_cmpxchg16b
|
||||
for CPUs that have PTA_CX16 set.
|
||||
|
||||
2007-01-18 Michael Meissner <michael.meissner@amd.com>
|
||||
|
||||
* config/i386/i386.c (ix86_compute_frame_layout): Make fprintf's
|
||||
in #if 0 code type correct.
|
||||
|
||||
2007-01-17 Eric Christopher <echristo@apple.com> (r120846)
|
||||
|
||||
* config.gcc: Support core2 processor.
|
||||
@ -62,7 +263,30 @@
|
||||
x86_pad_returns): Add m_CORE2.
|
||||
(override_options): Add entries for Core2.
|
||||
(ix86_issue_rate): Add case for Core2.
|
||||
|
||||
2006-10-28 Uros Bizjak <uros@kss-loka.si>
|
||||
|
||||
* config/i386/i386.h (GENERAL_REGNO_P): Use STACK_POINTER_REGNUM.
|
||||
(NON_QI_REG_P): Use IN_RANGE.
|
||||
(REX_INT_REGNO_P): Use IN_RANGE.
|
||||
(FP_REGNO_P): Use IN_RANGE.
|
||||
(SSE_REGNO_P): Use IN_RANGE.
|
||||
(REX_SSE_REGNO_P): Use IN_RANGE.
|
||||
(MMX_REGNO_P): Use IN_RANGE.
|
||||
(STACK_REGNO_P): New macro.
|
||||
(STACK_REG_P): Use STACK_REGNO_P.
|
||||
(NON_STACK_REG_P): Use STACK_REGNO_P.
|
||||
(REGNO_OK_FOR_INDEX_P): Use REX_INT_REGNO_P.
|
||||
(REGNO_OK_FOR_BASE_P): Use GENERAL_REGNO_P.
|
||||
(REG_OK_FOR_INDEX_NONSTRICT_P): Use REX_INT_REGNO_P.
|
||||
(REG_OK_FOR_BASE_NONSTRICT_P): Use GENERAL_REGNO_P.
|
||||
(HARD_REGNO_RENAME_OK): Use !IN_RANGE.
|
||||
|
||||
2006-10-28 Uros Bizjak <uros@kss-loka.si>
|
||||
|
||||
* config/i386/i386.c (output_387_ffreep): Create output from a
|
||||
template string for !HAVE_AS_IX86_FFREEP.
|
||||
|
||||
2006-10-27 Vladimir Makarov <vmakarov@redhat.com> (r118090)
|
||||
|
||||
* config/i386/i386.h (TARGET_GEODE):
|
||||
@ -95,7 +319,31 @@
|
||||
* config/i386/geode.md: New file.
|
||||
|
||||
* doc/invoke.texi: Add entry about geode processor.
|
||||
|
||||
|
||||
2006-10-24 Uros Bizjak <uros@kss-loka.si>
|
||||
|
||||
* config/i386/i386.h (FIRST_PSEUDO_REGISTER): Define to 54.
|
||||
(FIXED_REGISTERS, CALL_USED_REGISTERS): Add fpcr register.
|
||||
(REG_ALLOC_ORDER): Add one element to allocate fpcr register.
|
||||
(FRAME_POINTER_REGNUM): Update register number to 21.
|
||||
(REG_CLASS_CONTENTS): Update contents for added fpcr register.
|
||||
(HI_REGISTER_NAMES): Add "fpcr" for fpcr register.
|
||||
|
||||
* config/i386/i386.c (regclass_map): Add fpcr entry.
|
||||
(dbx_register_map, dbx64_register_map, svr4_dbx_register_map):
|
||||
Add fpcr entry.
|
||||
(print_reg): Assert REGNO (x) != FPCR_REG.
|
||||
|
||||
* config/i386/i386.md (FPCR_REG, R11_REG): New constants.
|
||||
(DIRFLAG_REG): Renumber.
|
||||
(x86_fnstcw_1, x86_fldcw_1): Use FPCR_REG instead of FPSR_REG.
|
||||
(*sibcall_1_rex64_v, *sibcall_value_1_rex64_v): Use R11_REG.
|
||||
(sse_prologue_save, *sse_prologue_save_insn): Renumber
|
||||
hardcoded SSE register numbers.
|
||||
|
||||
* config/i386/mmx.md (mmx_emms, mmx_femms): Renumber
|
||||
hardcoded MMX register numbers.
|
||||
|
||||
2006-10-24 Richard Guenther <rguenther@suse.de>
|
||||
|
||||
PR middle-end/28796
|
||||
@ -104,6 +352,17 @@
|
||||
for deciding optimizations in consistency with fold-const.c
|
||||
(fold_builtin_unordered_cmp): Likewise.
|
||||
|
||||
2006-10-24 Richard Guenther <rguenther@suse.de>
|
||||
|
||||
* builtins.c (fold_builtin_floor): Fold floor (x) where
|
||||
x is nonnegative to trunc (x).
|
||||
(fold_builtin_int_roundingfn): Fold lfloor (x) where x is
|
||||
nonnegative to FIX_TRUNC_EXPR.
|
||||
|
||||
2006-10-22 H.J. Lu <hongjiu.lu@intel.com>
|
||||
|
||||
* config/i386/tmmintrin.h: Remove the duplicated content.
|
||||
|
||||
2006-10-22 H.J. Lu <hongjiu.lu@intel.com> (r117958)
|
||||
|
||||
* config.gcc (i[34567]86-*-*): Add tmmintrin.h to extra_headers.
|
||||
@ -170,6 +429,18 @@
|
||||
|
||||
* doc/invoke.texi: Document -mssse3/-mno-ssse3 switches.
|
||||
|
||||
2006-10-21 H.J. Lu <hongjiu.lu@intel.com>
|
||||
|
||||
* config/i386/i386.md (UNSPEC_LDQQU): Renamed to ...
|
||||
(UNSPEC_LDDQU): This.
|
||||
* config/i386/sse.md (sse3_lddqu): Updated.
|
||||
|
||||
2006-10-21 Richard Guenther <rguenther@suse.de>
|
||||
|
||||
PR tree-optimization/3511
|
||||
* tree-ssa-pre.c (phi_translate): Fold CALL_EXPRs that
|
||||
got new invariant arguments during PHI translation.
|
||||
|
||||
2006-10-21 Richard Guenther <rguenther@suse.de>
|
||||
|
||||
* builtins.c (fold_builtin_classify): Fix typo.
|
||||
|
@ -7355,6 +7355,12 @@ fold_builtin_ceil (tree fndecl, tree arglist)
|
||||
}
|
||||
}
|
||||
|
||||
/* Fold floor (x) where x is nonnegative to trunc (x). */
|
||||
if (tree_expr_nonnegative_p (arg))
|
||||
return build_function_call_expr (mathfn_built_in (TREE_TYPE (arg),
|
||||
BUILT_IN_TRUNC),
|
||||
arglist);
|
||||
|
||||
return fold_trunc_transparent_mathfn (fndecl, arglist);
|
||||
}
|
||||
|
||||
@ -7442,6 +7448,18 @@ fold_builtin_int_roundingfn (tree fndecl, tree arglist)
|
||||
}
|
||||
}
|
||||
|
||||
switch (DECL_FUNCTION_CODE (fndecl))
|
||||
{
|
||||
CASE_FLT_FN (BUILT_IN_LFLOOR):
|
||||
CASE_FLT_FN (BUILT_IN_LLFLOOR):
|
||||
/* Fold lfloor (x) where x is nonnegative to FIX_TRUNC (x). */
|
||||
if (tree_expr_nonnegative_p (arg))
|
||||
return fold_build1 (FIX_TRUNC_EXPR, TREE_TYPE (TREE_TYPE (fndecl)),
|
||||
arg);
|
||||
break;
|
||||
default:;
|
||||
}
|
||||
|
||||
return fold_fixed_mathfn (fndecl, arglist);
|
||||
}
|
||||
|
||||
|
@ -269,12 +269,12 @@ xscale-*-*)
|
||||
i[34567]86-*-*)
|
||||
cpu_type=i386
|
||||
extra_headers="mmintrin.h mm3dnow.h xmmintrin.h emmintrin.h
|
||||
pmmintrin.h tmmintrin.h"
|
||||
pmmintrin.h tmmintrin.h ammintrin.h"
|
||||
;;
|
||||
x86_64-*-*)
|
||||
cpu_type=i386
|
||||
extra_headers="mmintrin.h mm3dnow.h xmmintrin.h emmintrin.h
|
||||
pmmintrin.h tmmintrin.h"
|
||||
pmmintrin.h tmmintrin.h ammintrin.h"
|
||||
need_64bit_hwint=yes
|
||||
;;
|
||||
ia64-*-*)
|
||||
@ -1209,14 +1209,14 @@ i[34567]86-*-solaris2*)
|
||||
# FIXME: -m64 for i[34567]86-*-* should be allowed just
|
||||
# like -m32 for x86_64-*-*.
|
||||
case X"${with_cpu}" in
|
||||
Xgeneric|Xcore2|Xnocona|Xx86-64|Xk8|Xopteron|Xathlon64|Xathlon-fx)
|
||||
Xgeneric|Xcore2|Xnocona|Xx86-64|Xamdfam10|Xbarcelona|Xk8|Xopteron|Xathlon64|Xathlon-fx)
|
||||
;;
|
||||
X)
|
||||
with_cpu=generic
|
||||
;;
|
||||
*)
|
||||
echo "Unsupported CPU used in --with-cpu=$with_cpu, supported values:" 1>&2
|
||||
echo "generic core2 nocona x86-64 k8 opteron athlon64 athlon-fx" 1>&2
|
||||
echo "generic core2 nocona x86-64amd fam10 barcelona k8 opteron athlon64 athlon-fx" 1>&2
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
@ -2515,6 +2515,9 @@ if test x$with_cpu = x ; then
|
||||
;;
|
||||
i686-*-* | i786-*-*)
|
||||
case ${target_noncanonical} in
|
||||
amdfam10-*|barcelona-*)
|
||||
with_cpu=amdfam10
|
||||
;;
|
||||
k8-*|opteron-*|athlon_64-*)
|
||||
with_cpu=k8
|
||||
;;
|
||||
@ -2555,6 +2558,9 @@ if test x$with_cpu = x ; then
|
||||
;;
|
||||
x86_64-*-*)
|
||||
case ${target_noncanonical} in
|
||||
amdfam10-*|barcelona-*)
|
||||
with_cpu=amdfam10
|
||||
;;
|
||||
k8-*|opteron-*|athlon_64-*)
|
||||
with_cpu=k8
|
||||
;;
|
||||
@ -2795,7 +2801,7 @@ case "${target}" in
|
||||
esac
|
||||
# OK
|
||||
;;
|
||||
"" | k8 | opteron | athlon64 | athlon-fx | nocona | core2 | generic)
|
||||
"" | amdfam10 | barcelona | k8 | opteron | athlon64 | athlon-fx | nocona | core2 | generic)
|
||||
# OK
|
||||
;;
|
||||
*)
|
||||
|
73
contrib/gcc/config/i386/ammintrin.h
Normal file
73
contrib/gcc/config/i386/ammintrin.h
Normal file
@ -0,0 +1,73 @@
|
||||
/* Copyright (C) 2007 Free Software Foundation, Inc.
|
||||
|
||||
This file is part of GCC.
|
||||
|
||||
GCC is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2, or (at your option)
|
||||
any later version.
|
||||
|
||||
GCC is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with GCC; see the file COPYING. If not, write to
|
||||
the Free Software Foundation, 51 Franklin Street, Fifth Floor,
|
||||
Boston, MA 02110-1301, USA. */
|
||||
|
||||
/* As a special exception, if you include this header file into source
|
||||
files compiled by GCC, this header file does not by itself cause
|
||||
the resulting executable to be covered by the GNU General Public
|
||||
License. This exception does not however invalidate any other
|
||||
reasons why the executable file might be covered by the GNU General
|
||||
Public License. */
|
||||
|
||||
/* Implemented from the specification included in the AMD Programmers
|
||||
Manual Update, version 2.x */
|
||||
|
||||
#ifndef _AMMINTRIN_H_INCLUDED
|
||||
#define _AMMINTRIN_H_INCLUDED
|
||||
|
||||
#ifndef __SSE4A__
|
||||
# error "SSE4A instruction set not enabled"
|
||||
#else
|
||||
|
||||
/* We need definitions from the SSE3, SSE2 and SSE header files*/
|
||||
#include <pmmintrin.h>
|
||||
|
||||
static __inline void __attribute__((__always_inline__))
|
||||
_mm_stream_sd (double * __P, __m128d __Y)
|
||||
{
|
||||
__builtin_ia32_movntsd (__P, (__v2df) __Y);
|
||||
}
|
||||
|
||||
static __inline void __attribute__((__always_inline__))
|
||||
_mm_stream_ss (float * __P, __m128 __Y)
|
||||
{
|
||||
__builtin_ia32_movntss (__P, (__v4sf) __Y);
|
||||
}
|
||||
|
||||
static __inline __m128i __attribute__((__always_inline__))
|
||||
_mm_extract_si64 (__m128i __X, __m128i __Y)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_extrq ((__v2di) __X, (__v16qi) __Y);
|
||||
}
|
||||
|
||||
#define _mm_extracti_si64(X, I, L) \
|
||||
((__m128i) __builtin_ia32_extrqi ((__v2di)(X), I, L))
|
||||
|
||||
static __inline __m128i __attribute__((__always_inline__))
|
||||
_mm_insert_si64 (__m128i __X,__m128i __Y)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_insertq ((__v2di)__X, (__v2di)__Y);
|
||||
}
|
||||
|
||||
#define _mm_inserti_si64(X, Y, I, L) \
|
||||
((__m128i) __builtin_ia32_insertqi ((__v2di)(X), (__v2di)(Y), I, L))
|
||||
|
||||
|
||||
#endif /* __SSE4A__ */
|
||||
|
||||
#endif /* _AMMINTRIN_H_INCLUDED */
|
@ -29,6 +29,8 @@
|
||||
(const_string "vector")]
|
||||
(const_string "direct")))
|
||||
|
||||
(define_attr "amdfam10_decode" "direct,vector,double"
|
||||
(const_string "direct"))
|
||||
;;
|
||||
;; decode0 decode1 decode2
|
||||
;; \ | /
|
||||
@ -131,18 +133,22 @@
|
||||
|
||||
;; Jump instructions are executed in the branch unit completely transparent to us
|
||||
(define_insn_reservation "athlon_branch" 0
|
||||
(and (eq_attr "cpu" "athlon,k8,generic64")
|
||||
(and (eq_attr "cpu" "athlon,k8,generic64,amdfam10")
|
||||
(eq_attr "type" "ibr"))
|
||||
"athlon-direct,athlon-ieu")
|
||||
(define_insn_reservation "athlon_call" 0
|
||||
(and (eq_attr "cpu" "athlon,k8,generic64")
|
||||
(eq_attr "type" "call,callv"))
|
||||
"athlon-vector,athlon-ieu")
|
||||
(define_insn_reservation "athlon_call_amdfam10" 0
|
||||
(and (eq_attr "cpu" "amdfam10")
|
||||
(eq_attr "type" "call,callv"))
|
||||
"athlon-double,athlon-ieu")
|
||||
|
||||
;; Latency of push operation is 3 cycles, but ESP value is available
|
||||
;; earlier
|
||||
(define_insn_reservation "athlon_push" 2
|
||||
(and (eq_attr "cpu" "athlon,k8,generic64")
|
||||
(and (eq_attr "cpu" "athlon,k8,generic64,amdfam10")
|
||||
(eq_attr "type" "push"))
|
||||
"athlon-direct,athlon-agu,athlon-store")
|
||||
(define_insn_reservation "athlon_pop" 4
|
||||
@ -153,12 +159,16 @@
|
||||
(and (eq_attr "cpu" "k8,generic64")
|
||||
(eq_attr "type" "pop"))
|
||||
"athlon-double,(athlon-ieu+athlon-load)")
|
||||
(define_insn_reservation "athlon_pop_amdfam10" 3
|
||||
(and (eq_attr "cpu" "amdfam10")
|
||||
(eq_attr "type" "pop"))
|
||||
"athlon-direct,(athlon-ieu+athlon-load)")
|
||||
(define_insn_reservation "athlon_leave" 3
|
||||
(and (eq_attr "cpu" "athlon")
|
||||
(eq_attr "type" "leave"))
|
||||
"athlon-vector,(athlon-ieu+athlon-load)")
|
||||
(define_insn_reservation "athlon_leave_k8" 3
|
||||
(and (eq_attr "cpu" "k8,generic64")
|
||||
(and (eq_attr "cpu" "k8,generic64,amdfam10")
|
||||
(eq_attr "type" "leave"))
|
||||
"athlon-double,(athlon-ieu+athlon-load)")
|
||||
|
||||
@ -167,6 +177,11 @@
|
||||
(and (eq_attr "cpu" "athlon,k8,generic64")
|
||||
(eq_attr "type" "lea"))
|
||||
"athlon-direct,athlon-agu,nothing")
|
||||
;; Lea executes in AGU unit with 1 cycle latency on AMDFAM10
|
||||
(define_insn_reservation "athlon_lea_amdfam10" 1
|
||||
(and (eq_attr "cpu" "amdfam10")
|
||||
(eq_attr "type" "lea"))
|
||||
"athlon-direct,athlon-agu,nothing")
|
||||
|
||||
;; Mul executes in special multiplier unit attached to IEU0
|
||||
(define_insn_reservation "athlon_imul" 5
|
||||
@ -176,29 +191,35 @@
|
||||
"athlon-vector,athlon-ieu0,athlon-mult,nothing,nothing,athlon-ieu0")
|
||||
;; ??? Widening multiply is vector or double.
|
||||
(define_insn_reservation "athlon_imul_k8_DI" 4
|
||||
(and (eq_attr "cpu" "k8,generic64")
|
||||
(and (eq_attr "cpu" "k8,generic64,amdfam10")
|
||||
(and (eq_attr "type" "imul")
|
||||
(and (eq_attr "mode" "DI")
|
||||
(eq_attr "memory" "none,unknown"))))
|
||||
"athlon-direct0,athlon-ieu0,athlon-mult,nothing,athlon-ieu0")
|
||||
(define_insn_reservation "athlon_imul_k8" 3
|
||||
(and (eq_attr "cpu" "k8,generic64")
|
||||
(and (eq_attr "cpu" "k8,generic64,amdfam10")
|
||||
(and (eq_attr "type" "imul")
|
||||
(eq_attr "memory" "none,unknown")))
|
||||
"athlon-direct0,athlon-ieu0,athlon-mult,athlon-ieu0")
|
||||
(define_insn_reservation "athlon_imul_amdfam10_HI" 4
|
||||
(and (eq_attr "cpu" "amdfam10")
|
||||
(and (eq_attr "type" "imul")
|
||||
(and (eq_attr "mode" "HI")
|
||||
(eq_attr "memory" "none,unknown"))))
|
||||
"athlon-vector,athlon-ieu0,athlon-mult,nothing,athlon-ieu0")
|
||||
(define_insn_reservation "athlon_imul_mem" 8
|
||||
(and (eq_attr "cpu" "athlon")
|
||||
(and (eq_attr "type" "imul")
|
||||
(eq_attr "memory" "load,both")))
|
||||
"athlon-vector,athlon-load,athlon-ieu,athlon-mult,nothing,nothing,athlon-ieu")
|
||||
(define_insn_reservation "athlon_imul_mem_k8_DI" 7
|
||||
(and (eq_attr "cpu" "k8,generic64")
|
||||
(and (eq_attr "cpu" "k8,generic64,amdfam10")
|
||||
(and (eq_attr "type" "imul")
|
||||
(and (eq_attr "mode" "DI")
|
||||
(eq_attr "memory" "load,both"))))
|
||||
"athlon-vector,athlon-load,athlon-ieu,athlon-mult,nothing,athlon-ieu")
|
||||
(define_insn_reservation "athlon_imul_mem_k8" 6
|
||||
(and (eq_attr "cpu" "k8,generic64")
|
||||
(and (eq_attr "cpu" "k8,generic64,amdfam10")
|
||||
(and (eq_attr "type" "imul")
|
||||
(eq_attr "memory" "load,both")))
|
||||
"athlon-vector,athlon-load,athlon-ieu,athlon-mult,athlon-ieu")
|
||||
@ -209,21 +230,23 @@
|
||||
;; other instructions.
|
||||
;; ??? Experiments show that the idiv can overlap with roughly 6 cycles
|
||||
;; of the other code
|
||||
;; Using the same heuristics for amdfam10 as K8 with idiv
|
||||
|
||||
(define_insn_reservation "athlon_idiv" 6
|
||||
(and (eq_attr "cpu" "athlon,k8,generic64")
|
||||
(and (eq_attr "cpu" "athlon,k8,generic64,amdfam10")
|
||||
(and (eq_attr "type" "idiv")
|
||||
(eq_attr "memory" "none,unknown")))
|
||||
"athlon-vector,(athlon-ieu0*6+(athlon-fpsched,athlon-fvector))")
|
||||
(define_insn_reservation "athlon_idiv_mem" 9
|
||||
(and (eq_attr "cpu" "athlon,k8,generic64")
|
||||
(and (eq_attr "cpu" "athlon,k8,generic64,amdfam10")
|
||||
(and (eq_attr "type" "idiv")
|
||||
(eq_attr "memory" "load,both")))
|
||||
"athlon-vector,((athlon-load,athlon-ieu0*6)+(athlon-fpsched,athlon-fvector))")
|
||||
;; The parallelism of string instructions is not documented. Model it same way
|
||||
;; as idiv to create smaller automata. This probably does not matter much.
|
||||
;; Using the same heuristics for amdfam10 as K8 with idiv
|
||||
(define_insn_reservation "athlon_str" 6
|
||||
(and (eq_attr "cpu" "athlon,k8,generic64")
|
||||
(and (eq_attr "cpu" "athlon,k8,generic64,amdfam10")
|
||||
(and (eq_attr "type" "str")
|
||||
(eq_attr "memory" "load,both,store")))
|
||||
"athlon-vector,athlon-load,athlon-ieu0*6")
|
||||
@ -234,34 +257,62 @@
|
||||
(and (eq_attr "unit" "integer,unknown")
|
||||
(eq_attr "memory" "none,unknown"))))
|
||||
"athlon-direct,athlon-ieu")
|
||||
(define_insn_reservation "athlon_idirect_amdfam10" 1
|
||||
(and (eq_attr "cpu" "amdfam10")
|
||||
(and (eq_attr "amdfam10_decode" "direct")
|
||||
(and (eq_attr "unit" "integer,unknown")
|
||||
(eq_attr "memory" "none,unknown"))))
|
||||
"athlon-direct,athlon-ieu")
|
||||
(define_insn_reservation "athlon_ivector" 2
|
||||
(and (eq_attr "cpu" "athlon,k8,generic64")
|
||||
(and (eq_attr "athlon_decode" "vector")
|
||||
(and (eq_attr "unit" "integer,unknown")
|
||||
(eq_attr "memory" "none,unknown"))))
|
||||
"athlon-vector,athlon-ieu,athlon-ieu")
|
||||
(define_insn_reservation "athlon_ivector_amdfam10" 2
|
||||
(and (eq_attr "cpu" "amdfam10")
|
||||
(and (eq_attr "amdfam10_decode" "vector")
|
||||
(and (eq_attr "unit" "integer,unknown")
|
||||
(eq_attr "memory" "none,unknown"))))
|
||||
"athlon-vector,athlon-ieu,athlon-ieu")
|
||||
|
||||
(define_insn_reservation "athlon_idirect_loadmov" 3
|
||||
(and (eq_attr "cpu" "athlon,k8,generic64")
|
||||
(and (eq_attr "cpu" "athlon,k8,generic64,amdfam10")
|
||||
(and (eq_attr "type" "imov")
|
||||
(eq_attr "memory" "load")))
|
||||
"athlon-direct,athlon-load")
|
||||
|
||||
(define_insn_reservation "athlon_idirect_load" 4
|
||||
(and (eq_attr "cpu" "athlon,k8,generic64")
|
||||
(and (eq_attr "athlon_decode" "direct")
|
||||
(and (eq_attr "unit" "integer,unknown")
|
||||
(eq_attr "memory" "load"))))
|
||||
"athlon-direct,athlon-load,athlon-ieu")
|
||||
(define_insn_reservation "athlon_idirect_load_amdfam10" 4
|
||||
(and (eq_attr "cpu" "amdfam10")
|
||||
(and (eq_attr "amdfam10_decode" "direct")
|
||||
(and (eq_attr "unit" "integer,unknown")
|
||||
(eq_attr "memory" "load"))))
|
||||
"athlon-direct,athlon-load,athlon-ieu")
|
||||
(define_insn_reservation "athlon_ivector_load" 6
|
||||
(and (eq_attr "cpu" "athlon,k8,generic64")
|
||||
(and (eq_attr "athlon_decode" "vector")
|
||||
(and (eq_attr "unit" "integer,unknown")
|
||||
(eq_attr "memory" "load"))))
|
||||
"athlon-vector,athlon-load,athlon-ieu,athlon-ieu")
|
||||
(define_insn_reservation "athlon_ivector_load_amdfam10" 6
|
||||
(and (eq_attr "cpu" "amdfam10")
|
||||
(and (eq_attr "amdfam10_decode" "vector")
|
||||
(and (eq_attr "unit" "integer,unknown")
|
||||
(eq_attr "memory" "load"))))
|
||||
"athlon-vector,athlon-load,athlon-ieu,athlon-ieu")
|
||||
|
||||
(define_insn_reservation "athlon_idirect_movstore" 1
|
||||
(and (eq_attr "cpu" "athlon,k8,generic64")
|
||||
(and (eq_attr "cpu" "athlon,k8,generic64,amdfam10")
|
||||
(and (eq_attr "type" "imov")
|
||||
(eq_attr "memory" "store")))
|
||||
"athlon-direct,athlon-agu,athlon-store")
|
||||
|
||||
(define_insn_reservation "athlon_idirect_both" 4
|
||||
(and (eq_attr "cpu" "athlon,k8,generic64")
|
||||
(and (eq_attr "athlon_decode" "direct")
|
||||
@ -270,6 +321,15 @@
|
||||
"athlon-direct,athlon-load,
|
||||
athlon-ieu,athlon-store,
|
||||
athlon-store")
|
||||
(define_insn_reservation "athlon_idirect_both_amdfam10" 4
|
||||
(and (eq_attr "cpu" "amdfam10")
|
||||
(and (eq_attr "amdfam10_decode" "direct")
|
||||
(and (eq_attr "unit" "integer,unknown")
|
||||
(eq_attr "memory" "both"))))
|
||||
"athlon-direct,athlon-load,
|
||||
athlon-ieu,athlon-store,
|
||||
athlon-store")
|
||||
|
||||
(define_insn_reservation "athlon_ivector_both" 6
|
||||
(and (eq_attr "cpu" "athlon,k8,generic64")
|
||||
(and (eq_attr "athlon_decode" "vector")
|
||||
@ -279,6 +339,16 @@
|
||||
athlon-ieu,
|
||||
athlon-ieu,
|
||||
athlon-store")
|
||||
(define_insn_reservation "athlon_ivector_both_amdfam10" 6
|
||||
(and (eq_attr "cpu" "amdfam10")
|
||||
(and (eq_attr "amdfam10_decode" "vector")
|
||||
(and (eq_attr "unit" "integer,unknown")
|
||||
(eq_attr "memory" "both"))))
|
||||
"athlon-vector,athlon-load,
|
||||
athlon-ieu,
|
||||
athlon-ieu,
|
||||
athlon-store")
|
||||
|
||||
(define_insn_reservation "athlon_idirect_store" 1
|
||||
(and (eq_attr "cpu" "athlon,k8,generic64")
|
||||
(and (eq_attr "athlon_decode" "direct")
|
||||
@ -286,6 +356,14 @@
|
||||
(eq_attr "memory" "store"))))
|
||||
"athlon-direct,(athlon-ieu+athlon-agu),
|
||||
athlon-store")
|
||||
(define_insn_reservation "athlon_idirect_store_amdfam10" 1
|
||||
(and (eq_attr "cpu" "amdfam10")
|
||||
(and (eq_attr "amdfam10_decode" "direct")
|
||||
(and (eq_attr "unit" "integer,unknown")
|
||||
(eq_attr "memory" "store"))))
|
||||
"athlon-direct,(athlon-ieu+athlon-agu),
|
||||
athlon-store")
|
||||
|
||||
(define_insn_reservation "athlon_ivector_store" 2
|
||||
(and (eq_attr "cpu" "athlon,k8,generic64")
|
||||
(and (eq_attr "athlon_decode" "vector")
|
||||
@ -293,6 +371,13 @@
|
||||
(eq_attr "memory" "store"))))
|
||||
"athlon-vector,(athlon-ieu+athlon-agu),athlon-ieu,
|
||||
athlon-store")
|
||||
(define_insn_reservation "athlon_ivector_store_amdfam10" 2
|
||||
(and (eq_attr "cpu" "amdfam10")
|
||||
(and (eq_attr "amdfam10_decode" "vector")
|
||||
(and (eq_attr "unit" "integer,unknown")
|
||||
(eq_attr "memory" "store"))))
|
||||
"athlon-vector,(athlon-ieu+athlon-agu),athlon-ieu,
|
||||
athlon-store")
|
||||
|
||||
;; Athlon floatin point unit
|
||||
(define_insn_reservation "athlon_fldxf" 12
|
||||
@ -302,7 +387,7 @@
|
||||
(eq_attr "mode" "XF"))))
|
||||
"athlon-vector,athlon-fpload2,athlon-fvector*9")
|
||||
(define_insn_reservation "athlon_fldxf_k8" 13
|
||||
(and (eq_attr "cpu" "k8,generic64")
|
||||
(and (eq_attr "cpu" "k8,generic64,amdfam10")
|
||||
(and (eq_attr "type" "fmov")
|
||||
(and (eq_attr "memory" "load")
|
||||
(eq_attr "mode" "XF"))))
|
||||
@ -314,7 +399,7 @@
|
||||
(eq_attr "memory" "load")))
|
||||
"athlon-direct,athlon-fpload,athlon-fany")
|
||||
(define_insn_reservation "athlon_fld_k8" 2
|
||||
(and (eq_attr "cpu" "k8,generic64")
|
||||
(and (eq_attr "cpu" "k8,generic64,amdfam10")
|
||||
(and (eq_attr "type" "fmov")
|
||||
(eq_attr "memory" "load")))
|
||||
"athlon-direct,athlon-fploadk8,athlon-fstore")
|
||||
@ -326,7 +411,7 @@
|
||||
(eq_attr "mode" "XF"))))
|
||||
"athlon-vector,(athlon-fpsched+athlon-agu),(athlon-store2+(athlon-fvector*7))")
|
||||
(define_insn_reservation "athlon_fstxf_k8" 8
|
||||
(and (eq_attr "cpu" "k8,generic64")
|
||||
(and (eq_attr "cpu" "k8,generic64,amdfam10")
|
||||
(and (eq_attr "type" "fmov")
|
||||
(and (eq_attr "memory" "store,both")
|
||||
(eq_attr "mode" "XF"))))
|
||||
@ -337,16 +422,16 @@
|
||||
(eq_attr "memory" "store,both")))
|
||||
"athlon-direct,(athlon-fpsched+athlon-agu),(athlon-fstore+athlon-store)")
|
||||
(define_insn_reservation "athlon_fst_k8" 2
|
||||
(and (eq_attr "cpu" "k8,generic64")
|
||||
(and (eq_attr "cpu" "k8,generic64,amdfam10")
|
||||
(and (eq_attr "type" "fmov")
|
||||
(eq_attr "memory" "store,both")))
|
||||
"athlon-direct,(athlon-fpsched+athlon-agu),(athlon-fstore+athlon-store)")
|
||||
(define_insn_reservation "athlon_fist" 4
|
||||
(and (eq_attr "cpu" "athlon,k8,generic64")
|
||||
(and (eq_attr "cpu" "athlon,k8,generic64,amdfam10")
|
||||
(eq_attr "type" "fistp,fisttp"))
|
||||
"athlon-direct,(athlon-fpsched+athlon-agu),(athlon-fstore+athlon-store)")
|
||||
(define_insn_reservation "athlon_fmov" 2
|
||||
(and (eq_attr "cpu" "athlon,k8,generic64")
|
||||
(and (eq_attr "cpu" "athlon,k8,generic64,amdfam10")
|
||||
(eq_attr "type" "fmov"))
|
||||
"athlon-direct,athlon-fpsched,athlon-faddmul")
|
||||
(define_insn_reservation "athlon_fadd_load" 4
|
||||
@ -355,12 +440,12 @@
|
||||
(eq_attr "memory" "load")))
|
||||
"athlon-direct,athlon-fpload,athlon-fadd")
|
||||
(define_insn_reservation "athlon_fadd_load_k8" 6
|
||||
(and (eq_attr "cpu" "k8,generic64")
|
||||
(and (eq_attr "cpu" "k8,generic64,amdfam10")
|
||||
(and (eq_attr "type" "fop")
|
||||
(eq_attr "memory" "load")))
|
||||
"athlon-direct,athlon-fploadk8,athlon-fadd")
|
||||
(define_insn_reservation "athlon_fadd" 4
|
||||
(and (eq_attr "cpu" "athlon,k8,generic64")
|
||||
(and (eq_attr "cpu" "athlon,k8,generic64,amdfam10")
|
||||
(eq_attr "type" "fop"))
|
||||
"athlon-direct,athlon-fpsched,athlon-fadd")
|
||||
(define_insn_reservation "athlon_fmul_load" 4
|
||||
@ -369,16 +454,16 @@
|
||||
(eq_attr "memory" "load")))
|
||||
"athlon-direct,athlon-fpload,athlon-fmul")
|
||||
(define_insn_reservation "athlon_fmul_load_k8" 6
|
||||
(and (eq_attr "cpu" "k8,generic64")
|
||||
(and (eq_attr "cpu" "k8,generic64,amdfam10")
|
||||
(and (eq_attr "type" "fmul")
|
||||
(eq_attr "memory" "load")))
|
||||
"athlon-direct,athlon-fploadk8,athlon-fmul")
|
||||
(define_insn_reservation "athlon_fmul" 4
|
||||
(and (eq_attr "cpu" "athlon,k8,generic64")
|
||||
(and (eq_attr "cpu" "athlon,k8,generic64,amdfam10")
|
||||
(eq_attr "type" "fmul"))
|
||||
"athlon-direct,athlon-fpsched,athlon-fmul")
|
||||
(define_insn_reservation "athlon_fsgn" 2
|
||||
(and (eq_attr "cpu" "athlon,k8,generic64")
|
||||
(and (eq_attr "cpu" "athlon,k8,generic64,amdfam10")
|
||||
(eq_attr "type" "fsgn"))
|
||||
"athlon-direct,athlon-fpsched,athlon-fmul")
|
||||
(define_insn_reservation "athlon_fdiv_load" 24
|
||||
@ -387,7 +472,7 @@
|
||||
(eq_attr "memory" "load")))
|
||||
"athlon-direct,athlon-fpload,athlon-fmul")
|
||||
(define_insn_reservation "athlon_fdiv_load_k8" 13
|
||||
(and (eq_attr "cpu" "k8,generic64")
|
||||
(and (eq_attr "cpu" "k8,generic64,amdfam10")
|
||||
(and (eq_attr "type" "fdiv")
|
||||
(eq_attr "memory" "load")))
|
||||
"athlon-direct,athlon-fploadk8,athlon-fmul")
|
||||
@ -396,16 +481,16 @@
|
||||
(eq_attr "type" "fdiv"))
|
||||
"athlon-direct,athlon-fpsched,athlon-fmul")
|
||||
(define_insn_reservation "athlon_fdiv_k8" 11
|
||||
(and (eq_attr "cpu" "k8,generic64")
|
||||
(and (eq_attr "cpu" "k8,generic64,amdfam10")
|
||||
(eq_attr "type" "fdiv"))
|
||||
"athlon-direct,athlon-fpsched,athlon-fmul")
|
||||
(define_insn_reservation "athlon_fpspc_load" 103
|
||||
(and (eq_attr "cpu" "athlon,k8,generic64")
|
||||
(and (eq_attr "cpu" "athlon,k8,generic64,amdfam10")
|
||||
(and (eq_attr "type" "fpspc")
|
||||
(eq_attr "memory" "load")))
|
||||
"athlon-vector,athlon-fpload,athlon-fvector")
|
||||
(define_insn_reservation "athlon_fpspc" 100
|
||||
(and (eq_attr "cpu" "athlon,k8,generic64")
|
||||
(and (eq_attr "cpu" "athlon,k8,generic64,amdfam10")
|
||||
(eq_attr "type" "fpspc"))
|
||||
"athlon-vector,athlon-fpsched,athlon-fvector")
|
||||
(define_insn_reservation "athlon_fcmov_load" 7
|
||||
@ -418,12 +503,12 @@
|
||||
(eq_attr "type" "fcmov"))
|
||||
"athlon-vector,athlon-fpsched,athlon-fvector")
|
||||
(define_insn_reservation "athlon_fcmov_load_k8" 17
|
||||
(and (eq_attr "cpu" "k8,generic64")
|
||||
(and (eq_attr "cpu" "k8,generic64,amdfam10")
|
||||
(and (eq_attr "type" "fcmov")
|
||||
(eq_attr "memory" "load")))
|
||||
"athlon-vector,athlon-fploadk8,athlon-fvector")
|
||||
(define_insn_reservation "athlon_fcmov_k8" 15
|
||||
(and (eq_attr "cpu" "k8,generic64")
|
||||
(and (eq_attr "cpu" "k8,generic64,amdfam10")
|
||||
(eq_attr "type" "fcmov"))
|
||||
"athlon-vector,athlon-fpsched,athlon-fvector")
|
||||
;; fcomi is vector decoded by uses only one pipe.
|
||||
@ -434,13 +519,13 @@
|
||||
(eq_attr "memory" "load"))))
|
||||
"athlon-vector,athlon-fpload,athlon-fadd")
|
||||
(define_insn_reservation "athlon_fcomi_load_k8" 5
|
||||
(and (eq_attr "cpu" "k8,generic64")
|
||||
(and (eq_attr "cpu" "k8,generic64,amdfam10")
|
||||
(and (eq_attr "type" "fcmp")
|
||||
(and (eq_attr "athlon_decode" "vector")
|
||||
(eq_attr "memory" "load"))))
|
||||
"athlon-vector,athlon-fploadk8,athlon-fadd")
|
||||
(define_insn_reservation "athlon_fcomi" 3
|
||||
(and (eq_attr "cpu" "athlon,k8,generic64")
|
||||
(and (eq_attr "cpu" "athlon,k8,generic64,amdfam10")
|
||||
(and (eq_attr "athlon_decode" "vector")
|
||||
(eq_attr "type" "fcmp")))
|
||||
"athlon-vector,athlon-fpsched,athlon-fadd")
|
||||
@ -450,18 +535,18 @@
|
||||
(eq_attr "memory" "load")))
|
||||
"athlon-direct,athlon-fpload,athlon-fadd")
|
||||
(define_insn_reservation "athlon_fcom_load_k8" 4
|
||||
(and (eq_attr "cpu" "k8,generic64")
|
||||
(and (eq_attr "cpu" "k8,generic64,amdfam10")
|
||||
(and (eq_attr "type" "fcmp")
|
||||
(eq_attr "memory" "load")))
|
||||
"athlon-direct,athlon-fploadk8,athlon-fadd")
|
||||
(define_insn_reservation "athlon_fcom" 2
|
||||
(and (eq_attr "cpu" "athlon,k8,generic64")
|
||||
(and (eq_attr "cpu" "athlon,k8,generic64,amdfam10")
|
||||
(eq_attr "type" "fcmp"))
|
||||
"athlon-direct,athlon-fpsched,athlon-fadd")
|
||||
;; Never seen by the scheduler because we still don't do post reg-stack
|
||||
;; scheduling.
|
||||
;(define_insn_reservation "athlon_fxch" 2
|
||||
; (and (eq_attr "cpu" "athlon,k8,generic64")
|
||||
; (and (eq_attr "cpu" "athlon,k8,generic64,amdfam10")
|
||||
; (eq_attr "type" "fxch"))
|
||||
; "athlon-direct,athlon-fpsched,athlon-fany")
|
||||
|
||||
@ -516,6 +601,23 @@
|
||||
(and (eq_attr "type" "mmxmov,ssemov")
|
||||
(eq_attr "memory" "load")))
|
||||
"athlon-direct,athlon-fploadk8,athlon-fstore")
|
||||
;; On AMDFAM10 all double, single and integer packed and scalar SSEx data
|
||||
;; loads generated are direct path, latency of 2 and do not use any FP
|
||||
;; executions units. No seperate entries for movlpx/movhpx loads, which
|
||||
;; are direct path, latency of 4 and use the FADD/FMUL FP execution units,
|
||||
;; as they will not be generated.
|
||||
(define_insn_reservation "athlon_sseld_amdfam10" 2
|
||||
(and (eq_attr "cpu" "amdfam10")
|
||||
(and (eq_attr "type" "ssemov")
|
||||
(eq_attr "memory" "load")))
|
||||
"athlon-direct,athlon-fploadk8")
|
||||
;; On AMDFAM10 MMX data loads generated are direct path, latency of 4
|
||||
;; and can use any FP executions units
|
||||
(define_insn_reservation "athlon_mmxld_amdfam10" 4
|
||||
(and (eq_attr "cpu" "amdfam10")
|
||||
(and (eq_attr "type" "mmxmov")
|
||||
(eq_attr "memory" "load")))
|
||||
"athlon-direct,athlon-fploadk8, athlon-fany")
|
||||
(define_insn_reservation "athlon_mmxssest" 3
|
||||
(and (eq_attr "cpu" "k8,generic64")
|
||||
(and (eq_attr "type" "mmxmov,ssemov")
|
||||
@ -533,6 +635,25 @@
|
||||
(and (eq_attr "type" "mmxmov,ssemov")
|
||||
(eq_attr "memory" "store,both")))
|
||||
"athlon-direct,(athlon-fpsched+athlon-agu),(athlon-fstore+athlon-store)")
|
||||
;; On AMDFAM10 all double, single and integer packed SSEx data stores
|
||||
;; generated are all double path, latency of 2 and use the FSTORE FP
|
||||
;; execution unit. No entries seperate for movupx/movdqu, which are
|
||||
;; vector path, latency of 3 and use the FSTORE*2 FP execution unit,
|
||||
;; as they will not be generated.
|
||||
(define_insn_reservation "athlon_ssest_amdfam10" 2
|
||||
(and (eq_attr "cpu" "amdfam10")
|
||||
(and (eq_attr "type" "ssemov")
|
||||
(and (eq_attr "mode" "V4SF,V2DF,TI")
|
||||
(eq_attr "memory" "store,both"))))
|
||||
"athlon-double,(athlon-fpsched+athlon-agu),((athlon-fstore+athlon-store)*2)")
|
||||
;; On AMDFAM10 all double, single and integer scalar SSEx and MMX
|
||||
;; data stores generated are all direct path, latency of 2 and use
|
||||
;; the FSTORE FP execution unit
|
||||
(define_insn_reservation "athlon_mmxssest_short_amdfam10" 2
|
||||
(and (eq_attr "cpu" "amdfam10")
|
||||
(and (eq_attr "type" "mmxmov,ssemov")
|
||||
(eq_attr "memory" "store,both")))
|
||||
"athlon-direct,(athlon-fpsched+athlon-agu),(athlon-fstore+athlon-store)")
|
||||
(define_insn_reservation "athlon_movaps_k8" 2
|
||||
(and (eq_attr "cpu" "k8,generic64")
|
||||
(and (eq_attr "type" "ssemov")
|
||||
@ -578,6 +699,11 @@
|
||||
(and (eq_attr "type" "sselog,sselog1")
|
||||
(eq_attr "memory" "load")))
|
||||
"athlon-double,athlon-fpload2k8,(athlon-fmul*2)")
|
||||
(define_insn_reservation "athlon_sselog_load_amdfam10" 4
|
||||
(and (eq_attr "cpu" "amdfam10")
|
||||
(and (eq_attr "type" "sselog,sselog1")
|
||||
(eq_attr "memory" "load")))
|
||||
"athlon-direct,athlon-fploadk8,(athlon-fadd|athlon-fmul)")
|
||||
(define_insn_reservation "athlon_sselog" 3
|
||||
(and (eq_attr "cpu" "athlon")
|
||||
(eq_attr "type" "sselog,sselog1"))
|
||||
@ -586,6 +712,11 @@
|
||||
(and (eq_attr "cpu" "k8,generic64")
|
||||
(eq_attr "type" "sselog,sselog1"))
|
||||
"athlon-double,athlon-fpsched,athlon-fmul")
|
||||
(define_insn_reservation "athlon_sselog_amdfam10" 2
|
||||
(and (eq_attr "cpu" "amdfam10")
|
||||
(eq_attr "type" "sselog,sselog1"))
|
||||
"athlon-direct,athlon-fpsched,(athlon-fadd|athlon-fmul)")
|
||||
|
||||
;; ??? pcmp executes in addmul, probably not worthwhile to bother about that.
|
||||
(define_insn_reservation "athlon_ssecmp_load" 2
|
||||
(and (eq_attr "cpu" "athlon")
|
||||
@ -594,13 +725,13 @@
|
||||
(eq_attr "memory" "load"))))
|
||||
"athlon-direct,athlon-fpload,athlon-fadd")
|
||||
(define_insn_reservation "athlon_ssecmp_load_k8" 4
|
||||
(and (eq_attr "cpu" "k8,generic64")
|
||||
(and (eq_attr "cpu" "k8,generic64,amdfam10")
|
||||
(and (eq_attr "type" "ssecmp")
|
||||
(and (eq_attr "mode" "SF,DF,DI,TI")
|
||||
(eq_attr "memory" "load"))))
|
||||
"athlon-direct,athlon-fploadk8,athlon-fadd")
|
||||
(define_insn_reservation "athlon_ssecmp" 2
|
||||
(and (eq_attr "cpu" "athlon,k8,generic64")
|
||||
(and (eq_attr "cpu" "athlon,k8,generic64,amdfam10")
|
||||
(and (eq_attr "type" "ssecmp")
|
||||
(eq_attr "mode" "SF,DF,DI,TI")))
|
||||
"athlon-direct,athlon-fpsched,athlon-fadd")
|
||||
@ -614,6 +745,11 @@
|
||||
(and (eq_attr "type" "ssecmp")
|
||||
(eq_attr "memory" "load")))
|
||||
"athlon-double,athlon-fpload2k8,(athlon-fadd*2)")
|
||||
(define_insn_reservation "athlon_ssecmpvector_load_amdfam10" 4
|
||||
(and (eq_attr "cpu" "amdfam10")
|
||||
(and (eq_attr "type" "ssecmp")
|
||||
(eq_attr "memory" "load")))
|
||||
"athlon-direct,athlon-fploadk8,athlon-fadd")
|
||||
(define_insn_reservation "athlon_ssecmpvector" 3
|
||||
(and (eq_attr "cpu" "athlon")
|
||||
(eq_attr "type" "ssecmp"))
|
||||
@ -622,6 +758,10 @@
|
||||
(and (eq_attr "cpu" "k8,generic64")
|
||||
(eq_attr "type" "ssecmp"))
|
||||
"athlon-double,athlon-fpsched,(athlon-fadd*2)")
|
||||
(define_insn_reservation "athlon_ssecmpvector_amdfam10" 2
|
||||
(and (eq_attr "cpu" "amdfam10")
|
||||
(eq_attr "type" "ssecmp"))
|
||||
"athlon-direct,athlon-fpsched,athlon-fadd")
|
||||
(define_insn_reservation "athlon_ssecomi_load" 4
|
||||
(and (eq_attr "cpu" "athlon")
|
||||
(and (eq_attr "type" "ssecomi")
|
||||
@ -632,10 +772,20 @@
|
||||
(and (eq_attr "type" "ssecomi")
|
||||
(eq_attr "memory" "load")))
|
||||
"athlon-vector,athlon-fploadk8,athlon-fadd")
|
||||
(define_insn_reservation "athlon_ssecomi_load_amdfam10" 5
|
||||
(and (eq_attr "cpu" "amdfam10")
|
||||
(and (eq_attr "type" "ssecomi")
|
||||
(eq_attr "memory" "load")))
|
||||
"athlon-direct,athlon-fploadk8,athlon-fadd")
|
||||
(define_insn_reservation "athlon_ssecomi" 4
|
||||
(and (eq_attr "cpu" "athlon,k8,generic64")
|
||||
(eq_attr "type" "ssecmp"))
|
||||
"athlon-vector,athlon-fpsched,athlon-fadd")
|
||||
(define_insn_reservation "athlon_ssecomi_amdfam10" 3
|
||||
(and (eq_attr "cpu" "amdfam10")
|
||||
;; It seems athlon_ssecomi has a bug in the attr_type, fixed for amdfam10
|
||||
(eq_attr "type" "ssecomi"))
|
||||
"athlon-direct,athlon-fpsched,athlon-fadd")
|
||||
(define_insn_reservation "athlon_sseadd_load" 4
|
||||
(and (eq_attr "cpu" "athlon")
|
||||
(and (eq_attr "type" "sseadd")
|
||||
@ -643,13 +793,13 @@
|
||||
(eq_attr "memory" "load"))))
|
||||
"athlon-direct,athlon-fpload,athlon-fadd")
|
||||
(define_insn_reservation "athlon_sseadd_load_k8" 6
|
||||
(and (eq_attr "cpu" "k8,generic64")
|
||||
(and (eq_attr "cpu" "k8,generic64,amdfam10")
|
||||
(and (eq_attr "type" "sseadd")
|
||||
(and (eq_attr "mode" "SF,DF,DI")
|
||||
(eq_attr "memory" "load"))))
|
||||
"athlon-direct,athlon-fploadk8,athlon-fadd")
|
||||
(define_insn_reservation "athlon_sseadd" 4
|
||||
(and (eq_attr "cpu" "athlon,k8,generic64")
|
||||
(and (eq_attr "cpu" "athlon,k8,generic64,amdfam10")
|
||||
(and (eq_attr "type" "sseadd")
|
||||
(eq_attr "mode" "SF,DF,DI")))
|
||||
"athlon-direct,athlon-fpsched,athlon-fadd")
|
||||
@ -663,6 +813,11 @@
|
||||
(and (eq_attr "type" "sseadd")
|
||||
(eq_attr "memory" "load")))
|
||||
"athlon-double,athlon-fpload2k8,(athlon-fadd*2)")
|
||||
(define_insn_reservation "athlon_sseaddvector_load_amdfam10" 6
|
||||
(and (eq_attr "cpu" "amdfam10")
|
||||
(and (eq_attr "type" "sseadd")
|
||||
(eq_attr "memory" "load")))
|
||||
"athlon-direct,athlon-fploadk8,athlon-fadd")
|
||||
(define_insn_reservation "athlon_sseaddvector" 5
|
||||
(and (eq_attr "cpu" "athlon")
|
||||
(eq_attr "type" "sseadd"))
|
||||
@ -671,6 +826,10 @@
|
||||
(and (eq_attr "cpu" "k8,generic64")
|
||||
(eq_attr "type" "sseadd"))
|
||||
"athlon-double,athlon-fpsched,(athlon-fadd*2)")
|
||||
(define_insn_reservation "athlon_sseaddvector_amdfam10" 4
|
||||
(and (eq_attr "cpu" "amdfam10")
|
||||
(eq_attr "type" "sseadd"))
|
||||
"athlon-direct,athlon-fpsched,athlon-fadd")
|
||||
|
||||
;; Conversions behaves very irregularly and the scheduling is critical here.
|
||||
;; Take each instruction separately. Assume that the mode is always set to the
|
||||
@ -684,12 +843,25 @@
|
||||
(and (eq_attr "mode" "DF")
|
||||
(eq_attr "memory" "load")))))
|
||||
"athlon-direct,athlon-fploadk8,athlon-fstore")
|
||||
(define_insn_reservation "athlon_ssecvt_cvtss2sd_load_amdfam10" 7
|
||||
(and (eq_attr "cpu" "amdfam10")
|
||||
(and (eq_attr "type" "ssecvt")
|
||||
(and (eq_attr "amdfam10_decode" "double")
|
||||
(and (eq_attr "mode" "DF")
|
||||
(eq_attr "memory" "load")))))
|
||||
"athlon-double,athlon-fploadk8,(athlon-faddmul+athlon-fstore)")
|
||||
(define_insn_reservation "athlon_ssecvt_cvtss2sd" 2
|
||||
(and (eq_attr "cpu" "athlon,k8,generic64")
|
||||
(and (eq_attr "type" "ssecvt")
|
||||
(and (eq_attr "athlon_decode" "direct")
|
||||
(eq_attr "mode" "DF"))))
|
||||
"athlon-direct,athlon-fpsched,athlon-fstore")
|
||||
(define_insn_reservation "athlon_ssecvt_cvtss2sd_amdfam10" 7
|
||||
(and (eq_attr "cpu" "amdfam10")
|
||||
(and (eq_attr "type" "ssecvt")
|
||||
(and (eq_attr "amdfam10_decode" "vector")
|
||||
(eq_attr "mode" "DF"))))
|
||||
"athlon-vector,athlon-fpsched,athlon-faddmul,(athlon-fstore*2)")
|
||||
;; cvtps2pd. Model same way the other double decoded FP conversions.
|
||||
(define_insn_reservation "athlon_ssecvt_cvtps2pd_load_k8" 5
|
||||
(and (eq_attr "cpu" "k8,athlon,generic64")
|
||||
@ -698,12 +870,25 @@
|
||||
(and (eq_attr "mode" "V2DF,V4SF,TI")
|
||||
(eq_attr "memory" "load")))))
|
||||
"athlon-double,athlon-fpload2k8,(athlon-fstore*2)")
|
||||
(define_insn_reservation "athlon_ssecvt_cvtps2pd_load_amdfam10" 4
|
||||
(and (eq_attr "cpu" "amdfam10")
|
||||
(and (eq_attr "type" "ssecvt")
|
||||
(and (eq_attr "amdfam10_decode" "direct")
|
||||
(and (eq_attr "mode" "V2DF,V4SF,TI")
|
||||
(eq_attr "memory" "load")))))
|
||||
"athlon-direct,athlon-fploadk8,athlon-fstore")
|
||||
(define_insn_reservation "athlon_ssecvt_cvtps2pd_k8" 3
|
||||
(and (eq_attr "cpu" "k8,athlon,generic64")
|
||||
(and (eq_attr "type" "ssecvt")
|
||||
(and (eq_attr "athlon_decode" "double")
|
||||
(eq_attr "mode" "V2DF,V4SF,TI"))))
|
||||
"athlon-double,athlon-fpsched,athlon-fstore,athlon-fstore")
|
||||
(define_insn_reservation "athlon_ssecvt_cvtps2pd_amdfam10" 2
|
||||
(and (eq_attr "cpu" "amdfam10")
|
||||
(and (eq_attr "type" "ssecvt")
|
||||
(and (eq_attr "amdfam10_decode" "direct")
|
||||
(eq_attr "mode" "V2DF,V4SF,TI"))))
|
||||
"athlon-direct,athlon-fpsched,athlon-fstore")
|
||||
;; cvtsi2sd mem,reg is directpath path (cvtsi2sd reg,reg is doublepath)
|
||||
;; cvtsi2sd has troughput 1 and is executed in store unit with latency of 6
|
||||
(define_insn_reservation "athlon_sseicvt_cvtsi2sd_load" 6
|
||||
@ -713,6 +898,13 @@
|
||||
(and (eq_attr "mode" "SF,DF")
|
||||
(eq_attr "memory" "load")))))
|
||||
"athlon-direct,athlon-fploadk8,athlon-fstore")
|
||||
(define_insn_reservation "athlon_sseicvt_cvtsi2sd_load_amdfam10" 9
|
||||
(and (eq_attr "cpu" "amdfam10")
|
||||
(and (eq_attr "type" "sseicvt")
|
||||
(and (eq_attr "amdfam10_decode" "double")
|
||||
(and (eq_attr "mode" "SF,DF")
|
||||
(eq_attr "memory" "load")))))
|
||||
"athlon-double,athlon-fploadk8,(athlon-faddmul+athlon-fstore)")
|
||||
;; cvtsi2ss mem, reg is doublepath
|
||||
(define_insn_reservation "athlon_sseicvt_cvtsi2ss_load" 9
|
||||
(and (eq_attr "cpu" "athlon")
|
||||
@ -728,6 +920,13 @@
|
||||
(and (eq_attr "mode" "SF,DF")
|
||||
(eq_attr "memory" "load")))))
|
||||
"athlon-double,athlon-fploadk8,(athlon-fstore*2)")
|
||||
(define_insn_reservation "athlon_sseicvt_cvtsi2ss_load_amdfam10" 9
|
||||
(and (eq_attr "cpu" "amdfam10")
|
||||
(and (eq_attr "type" "sseicvt")
|
||||
(and (eq_attr "amdfam10_decode" "double")
|
||||
(and (eq_attr "mode" "SF,DF")
|
||||
(eq_attr "memory" "load")))))
|
||||
"athlon-double,athlon-fploadk8,(athlon-faddmul+athlon-fstore)")
|
||||
;; cvtsi2sd reg,reg is double decoded (vector on Athlon)
|
||||
(define_insn_reservation "athlon_sseicvt_cvtsi2sd_k8" 11
|
||||
(and (eq_attr "cpu" "k8,athlon,generic64")
|
||||
@ -736,6 +935,13 @@
|
||||
(and (eq_attr "mode" "SF,DF")
|
||||
(eq_attr "memory" "none")))))
|
||||
"athlon-double,athlon-fploadk8,athlon-fstore")
|
||||
(define_insn_reservation "athlon_sseicvt_cvtsi2sd_amdfam10" 14
|
||||
(and (eq_attr "cpu" "amdfam10")
|
||||
(and (eq_attr "type" "sseicvt")
|
||||
(and (eq_attr "amdfam10_decode" "vector")
|
||||
(and (eq_attr "mode" "SF,DF")
|
||||
(eq_attr "memory" "none")))))
|
||||
"athlon-vector,athlon-fploadk8,(athlon-faddmul+athlon-fstore)")
|
||||
;; cvtsi2ss reg, reg is doublepath
|
||||
(define_insn_reservation "athlon_sseicvt_cvtsi2ss" 14
|
||||
(and (eq_attr "cpu" "athlon,k8,generic64")
|
||||
@ -744,6 +950,13 @@
|
||||
(and (eq_attr "mode" "SF,DF")
|
||||
(eq_attr "memory" "none")))))
|
||||
"athlon-vector,athlon-fploadk8,(athlon-fvector*2)")
|
||||
(define_insn_reservation "athlon_sseicvt_cvtsi2ss_amdfam10" 14
|
||||
(and (eq_attr "cpu" "amdfam10")
|
||||
(and (eq_attr "type" "sseicvt")
|
||||
(and (eq_attr "amdfam10_decode" "vector")
|
||||
(and (eq_attr "mode" "SF,DF")
|
||||
(eq_attr "memory" "none")))))
|
||||
"athlon-vector,athlon-fploadk8,(athlon-faddmul+athlon-fstore)")
|
||||
;; cvtsd2ss mem,reg is doublepath, troughput unknown, latency 9
|
||||
(define_insn_reservation "athlon_ssecvt_cvtsd2ss_load_k8" 9
|
||||
(and (eq_attr "cpu" "k8,athlon,generic64")
|
||||
@ -752,6 +965,13 @@
|
||||
(and (eq_attr "mode" "SF")
|
||||
(eq_attr "memory" "load")))))
|
||||
"athlon-double,athlon-fploadk8,(athlon-fstore*3)")
|
||||
(define_insn_reservation "athlon_ssecvt_cvtsd2ss_load_amdfam10" 9
|
||||
(and (eq_attr "cpu" "amdfam10")
|
||||
(and (eq_attr "type" "ssecvt")
|
||||
(and (eq_attr "amdfam10_decode" "double")
|
||||
(and (eq_attr "mode" "SF")
|
||||
(eq_attr "memory" "load")))))
|
||||
"athlon-double,athlon-fploadk8,(athlon-faddmul+athlon-fstore)")
|
||||
;; cvtsd2ss reg,reg is vectorpath, troughput unknown, latency 12
|
||||
(define_insn_reservation "athlon_ssecvt_cvtsd2ss" 12
|
||||
(and (eq_attr "cpu" "athlon,k8,generic64")
|
||||
@ -760,6 +980,13 @@
|
||||
(and (eq_attr "mode" "SF")
|
||||
(eq_attr "memory" "none")))))
|
||||
"athlon-vector,athlon-fpsched,(athlon-fvector*3)")
|
||||
(define_insn_reservation "athlon_ssecvt_cvtsd2ss_amdfam10" 8
|
||||
(and (eq_attr "cpu" "amdfam10")
|
||||
(and (eq_attr "type" "ssecvt")
|
||||
(and (eq_attr "amdfam10_decode" "vector")
|
||||
(and (eq_attr "mode" "SF")
|
||||
(eq_attr "memory" "none")))))
|
||||
"athlon-vector,athlon-fpsched,athlon-faddmul,(athlon-fstore*2)")
|
||||
(define_insn_reservation "athlon_ssecvt_cvtpd2ps_load_k8" 8
|
||||
(and (eq_attr "cpu" "athlon,k8,generic64")
|
||||
(and (eq_attr "type" "ssecvt")
|
||||
@ -767,6 +994,13 @@
|
||||
(and (eq_attr "mode" "V4SF,V2DF,TI")
|
||||
(eq_attr "memory" "load")))))
|
||||
"athlon-double,athlon-fpload2k8,(athlon-fstore*3)")
|
||||
(define_insn_reservation "athlon_ssecvt_cvtpd2ps_load_amdfam10" 9
|
||||
(and (eq_attr "cpu" "amdfam10")
|
||||
(and (eq_attr "type" "ssecvt")
|
||||
(and (eq_attr "amdfam10_decode" "double")
|
||||
(and (eq_attr "mode" "V4SF,V2DF,TI")
|
||||
(eq_attr "memory" "load")))))
|
||||
"athlon-double,athlon-fploadk8,(athlon-faddmul+athlon-fstore)")
|
||||
;; cvtpd2ps mem,reg is vectorpath, troughput unknown, latency 10
|
||||
;; ??? Why it is fater than cvtsd2ss?
|
||||
(define_insn_reservation "athlon_ssecvt_cvtpd2ps" 8
|
||||
@ -776,6 +1010,13 @@
|
||||
(and (eq_attr "mode" "V4SF,V2DF,TI")
|
||||
(eq_attr "memory" "none")))))
|
||||
"athlon-vector,athlon-fpsched,athlon-fvector*2")
|
||||
(define_insn_reservation "athlon_ssecvt_cvtpd2ps_amdfam10" 7
|
||||
(and (eq_attr "cpu" "amdfam10")
|
||||
(and (eq_attr "type" "ssecvt")
|
||||
(and (eq_attr "amdfam10_decode" "double")
|
||||
(and (eq_attr "mode" "V4SF,V2DF,TI")
|
||||
(eq_attr "memory" "none")))))
|
||||
"athlon-double,athlon-fpsched,(athlon-faddmul+athlon-fstore)")
|
||||
;; cvtsd2si mem,reg is doublepath, troughput 1, latency 9
|
||||
(define_insn_reservation "athlon_secvt_cvtsX2si_load" 9
|
||||
(and (eq_attr "cpu" "athlon,k8,generic64")
|
||||
@ -784,6 +1025,13 @@
|
||||
(and (eq_attr "mode" "SI,DI")
|
||||
(eq_attr "memory" "load")))))
|
||||
"athlon-vector,athlon-fploadk8,athlon-fvector")
|
||||
(define_insn_reservation "athlon_secvt_cvtsX2si_load_amdfam10" 10
|
||||
(and (eq_attr "cpu" "amdfam10")
|
||||
(and (eq_attr "type" "sseicvt")
|
||||
(and (eq_attr "amdfam10_decode" "double")
|
||||
(and (eq_attr "mode" "SI,DI")
|
||||
(eq_attr "memory" "load")))))
|
||||
"athlon-double,athlon-fploadk8,(athlon-fadd+athlon-fstore)")
|
||||
;; cvtsd2si reg,reg is doublepath, troughput 1, latency 9
|
||||
(define_insn_reservation "athlon_ssecvt_cvtsX2si" 9
|
||||
(and (eq_attr "cpu" "athlon")
|
||||
@ -799,6 +1047,29 @@
|
||||
(and (eq_attr "mode" "SI,DI")
|
||||
(eq_attr "memory" "none")))))
|
||||
"athlon-double,athlon-fpsched,athlon-fstore")
|
||||
(define_insn_reservation "athlon_ssecvt_cvtsX2si_amdfam10" 8
|
||||
(and (eq_attr "cpu" "amdfam10")
|
||||
(and (eq_attr "type" "sseicvt")
|
||||
(and (eq_attr "amdfam10_decode" "double")
|
||||
(and (eq_attr "mode" "SI,DI")
|
||||
(eq_attr "memory" "none")))))
|
||||
"athlon-double,athlon-fpsched,(athlon-fadd+athlon-fstore)")
|
||||
;; cvtpd2dq reg,mem is doublepath, troughput 1, latency 9 on amdfam10
|
||||
(define_insn_reservation "athlon_sseicvt_cvtpd2dq_load_amdfam10" 9
|
||||
(and (eq_attr "cpu" "amdfam10")
|
||||
(and (eq_attr "type" "sseicvt")
|
||||
(and (eq_attr "amdfam10_decode" "double")
|
||||
(and (eq_attr "mode" "TI")
|
||||
(eq_attr "memory" "load")))))
|
||||
"athlon-double,athlon-fploadk8,(athlon-faddmul+athlon-fstore)")
|
||||
;; cvtpd2dq reg,mem is doublepath, troughput 1, latency 7 on amdfam10
|
||||
(define_insn_reservation "athlon_sseicvt_cvtpd2dq_amdfam10" 7
|
||||
(and (eq_attr "cpu" "amdfam10")
|
||||
(and (eq_attr "type" "sseicvt")
|
||||
(and (eq_attr "amdfam10_decode" "double")
|
||||
(and (eq_attr "mode" "TI")
|
||||
(eq_attr "memory" "none")))))
|
||||
"athlon-double,athlon-fpsched,(athlon-faddmul+athlon-fstore)")
|
||||
|
||||
|
||||
(define_insn_reservation "athlon_ssemul_load" 4
|
||||
@ -808,13 +1079,13 @@
|
||||
(eq_attr "memory" "load"))))
|
||||
"athlon-direct,athlon-fpload,athlon-fmul")
|
||||
(define_insn_reservation "athlon_ssemul_load_k8" 6
|
||||
(and (eq_attr "cpu" "k8,generic64")
|
||||
(and (eq_attr "cpu" "k8,generic64,amdfam10")
|
||||
(and (eq_attr "type" "ssemul")
|
||||
(and (eq_attr "mode" "SF,DF")
|
||||
(eq_attr "memory" "load"))))
|
||||
"athlon-direct,athlon-fploadk8,athlon-fmul")
|
||||
(define_insn_reservation "athlon_ssemul" 4
|
||||
(and (eq_attr "cpu" "athlon,k8,generic64")
|
||||
(and (eq_attr "cpu" "athlon,k8,generic64,amdfam10")
|
||||
(and (eq_attr "type" "ssemul")
|
||||
(eq_attr "mode" "SF,DF")))
|
||||
"athlon-direct,athlon-fpsched,athlon-fmul")
|
||||
@ -828,6 +1099,11 @@
|
||||
(and (eq_attr "type" "ssemul")
|
||||
(eq_attr "memory" "load")))
|
||||
"athlon-double,athlon-fpload2k8,(athlon-fmul*2)")
|
||||
(define_insn_reservation "athlon_ssemulvector_load_amdfam10" 6
|
||||
(and (eq_attr "cpu" "amdfam10")
|
||||
(and (eq_attr "type" "ssemul")
|
||||
(eq_attr "memory" "load")))
|
||||
"athlon-direct,athlon-fploadk8,athlon-fmul")
|
||||
(define_insn_reservation "athlon_ssemulvector" 5
|
||||
(and (eq_attr "cpu" "athlon")
|
||||
(eq_attr "type" "ssemul"))
|
||||
@ -836,6 +1112,10 @@
|
||||
(and (eq_attr "cpu" "k8,generic64")
|
||||
(eq_attr "type" "ssemul"))
|
||||
"athlon-double,athlon-fpsched,(athlon-fmul*2)")
|
||||
(define_insn_reservation "athlon_ssemulvector_amdfam10" 4
|
||||
(and (eq_attr "cpu" "amdfam10")
|
||||
(eq_attr "type" "ssemul"))
|
||||
"athlon-direct,athlon-fpsched,athlon-fmul")
|
||||
;; divsd timings. divss is faster
|
||||
(define_insn_reservation "athlon_ssediv_load" 20
|
||||
(and (eq_attr "cpu" "athlon")
|
||||
@ -844,13 +1124,13 @@
|
||||
(eq_attr "memory" "load"))))
|
||||
"athlon-direct,athlon-fpload,athlon-fmul*17")
|
||||
(define_insn_reservation "athlon_ssediv_load_k8" 22
|
||||
(and (eq_attr "cpu" "k8,generic64")
|
||||
(and (eq_attr "cpu" "k8,generic64,amdfam10")
|
||||
(and (eq_attr "type" "ssediv")
|
||||
(and (eq_attr "mode" "SF,DF")
|
||||
(eq_attr "memory" "load"))))
|
||||
"athlon-direct,athlon-fploadk8,athlon-fmul*17")
|
||||
(define_insn_reservation "athlon_ssediv" 20
|
||||
(and (eq_attr "cpu" "athlon,k8,generic64")
|
||||
(and (eq_attr "cpu" "athlon,k8,generic64,amdfam10")
|
||||
(and (eq_attr "type" "ssediv")
|
||||
(eq_attr "mode" "SF,DF")))
|
||||
"athlon-direct,athlon-fpsched,athlon-fmul*17")
|
||||
@ -864,6 +1144,11 @@
|
||||
(and (eq_attr "type" "ssediv")
|
||||
(eq_attr "memory" "load")))
|
||||
"athlon-double,athlon-fpload2k8,athlon-fmul*34")
|
||||
(define_insn_reservation "athlon_ssedivvector_load_amdfam10" 22
|
||||
(and (eq_attr "cpu" "amdfam10")
|
||||
(and (eq_attr "type" "ssediv")
|
||||
(eq_attr "memory" "load")))
|
||||
"athlon-direct,athlon-fploadk8,athlon-fmul*17")
|
||||
(define_insn_reservation "athlon_ssedivvector" 39
|
||||
(and (eq_attr "cpu" "athlon")
|
||||
(eq_attr "type" "ssediv"))
|
||||
@ -872,3 +1157,12 @@
|
||||
(and (eq_attr "cpu" "k8,generic64")
|
||||
(eq_attr "type" "ssediv"))
|
||||
"athlon-double,athlon-fmul*34")
|
||||
(define_insn_reservation "athlon_ssedivvector_amdfam10" 20
|
||||
(and (eq_attr "cpu" "amdfam10")
|
||||
(eq_attr "type" "ssediv"))
|
||||
"athlon-direct,athlon-fmul*17")
|
||||
(define_insn_reservation "athlon_sseins_amdfam10" 5
|
||||
(and (eq_attr "cpu" "amdfam10")
|
||||
(and (eq_attr "type" "sseins")
|
||||
(eq_attr "mode" "TI")))
|
||||
"athlon-vector,athlon-fpsched,athlon-faddmul")
|
||||
|
@ -1,5 +1,5 @@
|
||||
/* Subroutines for the gcc driver.
|
||||
Copyright (C) 2006 Free Software Foundation, Inc.
|
||||
Copyright (C) 2006, 2007 Free Software Foundation, Inc.
|
||||
|
||||
This file is part of GCC.
|
||||
|
||||
@ -40,6 +40,7 @@ const char *host_detect_local_cpu (int argc, const char **argv);
|
||||
|
||||
#define bit_SSE3 (1 << 0)
|
||||
#define bit_SSSE3 (1 << 9)
|
||||
#define bit_SSE4a (1 << 6)
|
||||
#define bit_CMPXCHG16B (1 << 13)
|
||||
|
||||
#define bit_3DNOW (1 << 31)
|
||||
@ -68,7 +69,7 @@ const char *host_detect_local_cpu (int argc, const char **argv)
|
||||
unsigned int ext_level;
|
||||
unsigned char has_mmx = 0, has_3dnow = 0, has_3dnowp = 0, has_sse = 0;
|
||||
unsigned char has_sse2 = 0, has_sse3 = 0, has_ssse3 = 0, has_cmov = 0;
|
||||
unsigned char has_longmode = 0, has_cmpxchg8b = 0;
|
||||
unsigned char has_longmode = 0, has_cmpxchg8b = 0, has_sse4a = 0;
|
||||
unsigned char is_amd = 0;
|
||||
unsigned int family = 0;
|
||||
bool arch;
|
||||
@ -120,6 +121,7 @@ const char *host_detect_local_cpu (int argc, const char **argv)
|
||||
has_3dnow = !!(edx & bit_3DNOW);
|
||||
has_3dnowp = !!(edx & bit_3DNOWP);
|
||||
has_longmode = !!(edx & bit_LM);
|
||||
has_sse4a = !!(ecx & bit_SSE4a);
|
||||
}
|
||||
|
||||
is_amd = vendor == *(unsigned int*)"Auth";
|
||||
@ -132,6 +134,8 @@ const char *host_detect_local_cpu (int argc, const char **argv)
|
||||
processor = PROCESSOR_ATHLON;
|
||||
if (has_sse2 || has_longmode)
|
||||
processor = PROCESSOR_K8;
|
||||
if (has_sse4a)
|
||||
processor = PROCESSOR_AMDFAM10;
|
||||
}
|
||||
else
|
||||
{
|
||||
@ -266,6 +270,9 @@ const char *host_detect_local_cpu (int argc, const char **argv)
|
||||
case PROCESSOR_NOCONA:
|
||||
cpu = "nocona";
|
||||
break;
|
||||
case PROCESSOR_AMDFAM10:
|
||||
cpu = "amdfam10";
|
||||
break;
|
||||
case PROCESSOR_GENERIC32:
|
||||
case PROCESSOR_GENERIC64:
|
||||
cpu = "generic";
|
||||
|
@ -548,6 +548,71 @@ struct processor_costs k8_cost = {
|
||||
COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
|
||||
};
|
||||
|
||||
struct processor_costs amdfam10_cost = {
|
||||
COSTS_N_INSNS (1), /* cost of an add instruction */
|
||||
COSTS_N_INSNS (2), /* cost of a lea instruction */
|
||||
COSTS_N_INSNS (1), /* variable shift costs */
|
||||
COSTS_N_INSNS (1), /* constant shift costs */
|
||||
{COSTS_N_INSNS (3), /* cost of starting multiply for QI */
|
||||
COSTS_N_INSNS (4), /* HI */
|
||||
COSTS_N_INSNS (3), /* SI */
|
||||
COSTS_N_INSNS (4), /* DI */
|
||||
COSTS_N_INSNS (5)}, /* other */
|
||||
0, /* cost of multiply per each bit set */
|
||||
{COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
|
||||
COSTS_N_INSNS (35), /* HI */
|
||||
COSTS_N_INSNS (51), /* SI */
|
||||
COSTS_N_INSNS (83), /* DI */
|
||||
COSTS_N_INSNS (83)}, /* other */
|
||||
COSTS_N_INSNS (1), /* cost of movsx */
|
||||
COSTS_N_INSNS (1), /* cost of movzx */
|
||||
8, /* "large" insn */
|
||||
9, /* MOVE_RATIO */
|
||||
4, /* cost for loading QImode using movzbl */
|
||||
{3, 4, 3}, /* cost of loading integer registers
|
||||
in QImode, HImode and SImode.
|
||||
Relative to reg-reg move (2). */
|
||||
{3, 4, 3}, /* cost of storing integer registers */
|
||||
4, /* cost of reg,reg fld/fst */
|
||||
{4, 4, 12}, /* cost of loading fp registers
|
||||
in SFmode, DFmode and XFmode */
|
||||
{6, 6, 8}, /* cost of storing fp registers
|
||||
in SFmode, DFmode and XFmode */
|
||||
2, /* cost of moving MMX register */
|
||||
{3, 3}, /* cost of loading MMX registers
|
||||
in SImode and DImode */
|
||||
{4, 4}, /* cost of storing MMX registers
|
||||
in SImode and DImode */
|
||||
2, /* cost of moving SSE register */
|
||||
{4, 4, 3}, /* cost of loading SSE registers
|
||||
in SImode, DImode and TImode */
|
||||
{4, 4, 5}, /* cost of storing SSE registers
|
||||
in SImode, DImode and TImode */
|
||||
3, /* MMX or SSE register to integer */
|
||||
/* On K8:
|
||||
MOVD reg64, xmmreg Double FSTORE 4
|
||||
MOVD reg32, xmmreg Double FSTORE 4
|
||||
On AMDFAM10:
|
||||
MOVD reg64, xmmreg Double FADD 3
|
||||
1/1 1/1
|
||||
MOVD reg32, xmmreg Double FADD 3
|
||||
1/1 1/1 */
|
||||
64, /* size of prefetch block */
|
||||
/* New AMD processors never drop prefetches; if they cannot be performed
|
||||
immediately, they are queued. We set number of simultaneous prefetches
|
||||
to a large constant to reflect this (it probably is not a good idea not
|
||||
to limit number of prefetches at all, as their execution also takes some
|
||||
time). */
|
||||
100, /* number of parallel prefetches */
|
||||
2, /* Branch cost */
|
||||
COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
|
||||
COSTS_N_INSNS (4), /* cost of FMUL instruction. */
|
||||
COSTS_N_INSNS (19), /* cost of FDIV instruction. */
|
||||
COSTS_N_INSNS (2), /* cost of FABS instruction. */
|
||||
COSTS_N_INSNS (2), /* cost of FCHS instruction. */
|
||||
COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
|
||||
};
|
||||
|
||||
static const
|
||||
struct processor_costs pentium4_cost = {
|
||||
COSTS_N_INSNS (1), /* cost of an add instruction */
|
||||
@ -834,11 +899,13 @@ const struct processor_costs *ix86_cost = &pentium_cost;
|
||||
#define m_PENT4 (1<<PROCESSOR_PENTIUM4)
|
||||
#define m_K8 (1<<PROCESSOR_K8)
|
||||
#define m_ATHLON_K8 (m_K8 | m_ATHLON)
|
||||
#define m_AMDFAM10 (1<<PROCESSOR_AMDFAM10)
|
||||
#define m_NOCONA (1<<PROCESSOR_NOCONA)
|
||||
#define m_CORE2 (1<<PROCESSOR_CORE2)
|
||||
#define m_GENERIC32 (1<<PROCESSOR_GENERIC32)
|
||||
#define m_GENERIC64 (1<<PROCESSOR_GENERIC64)
|
||||
#define m_GENERIC (m_GENERIC32 | m_GENERIC64)
|
||||
#define m_ATHLON_K8_AMDFAM10 (m_K8 | m_ATHLON | m_AMDFAM10)
|
||||
|
||||
/* Generic instruction choice should be common subset of supported CPUs
|
||||
(PPro/PENT4/NOCONA/CORE2/Athlon/K8). */
|
||||
@ -846,22 +913,30 @@ const struct processor_costs *ix86_cost = &pentium_cost;
|
||||
/* Leave is not affecting Nocona SPEC2000 results negatively, so enabling for
|
||||
Generic64 seems like good code size tradeoff. We can't enable it for 32bit
|
||||
generic because it is not working well with PPro base chips. */
|
||||
const int x86_use_leave = m_386 | m_K6_GEODE | m_ATHLON_K8 | m_CORE2 | m_GENERIC64;
|
||||
const int x86_push_memory = m_386 | m_K6_GEODE | m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC;
|
||||
const int x86_use_leave = m_386 | m_K6_GEODE | m_ATHLON_K8_AMDFAM10 | m_CORE2
|
||||
| m_GENERIC64;
|
||||
const int x86_push_memory = m_386 | m_K6_GEODE | m_ATHLON_K8_AMDFAM10 | m_PENT4
|
||||
| m_NOCONA | m_CORE2 | m_GENERIC;
|
||||
const int x86_zero_extend_with_and = m_486 | m_PENT;
|
||||
const int x86_movx = m_ATHLON_K8 | m_PPRO | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC | m_GEODE /* m_386 | m_K6 */;
|
||||
/* Enable to zero extend integer registers to avoid partial dependencies */
|
||||
const int x86_movx = m_ATHLON_K8_AMDFAM10 | m_PPRO | m_PENT4 | m_NOCONA
|
||||
| m_CORE2 | m_GENERIC | m_GEODE /* m_386 | m_K6 */;
|
||||
const int x86_double_with_add = ~m_386;
|
||||
const int x86_use_bit_test = m_386;
|
||||
const int x86_unroll_strlen = m_486 | m_PENT | m_PPRO | m_ATHLON_K8 | m_K6 | m_CORE2 | m_GENERIC;
|
||||
const int x86_cmove = m_PPRO | m_GEODE | m_ATHLON_K8 | m_PENT4 | m_NOCONA;
|
||||
const int x86_3dnow_a = m_ATHLON_K8;
|
||||
const int x86_deep_branch = m_PPRO | m_K6_GEODE | m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC;
|
||||
const int x86_unroll_strlen = m_486 | m_PENT | m_PPRO | m_ATHLON_K8_AMDFAM10
|
||||
| m_K6 | m_CORE2 | m_GENERIC;
|
||||
const int x86_cmove = m_PPRO | m_GEODE | m_ATHLON_K8_AMDFAM10 | m_PENT4
|
||||
| m_NOCONA;
|
||||
const int x86_3dnow_a = m_ATHLON_K8_AMDFAM10;
|
||||
const int x86_deep_branch = m_PPRO | m_K6_GEODE | m_ATHLON_K8_AMDFAM10
|
||||
| m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC;
|
||||
/* Branch hints were put in P4 based on simulation result. But
|
||||
after P4 was made, no performance benefit was observed with
|
||||
branch hints. It also increases the code size. As the result,
|
||||
icc never generates branch hints. */
|
||||
const int x86_branch_hints = 0;
|
||||
const int x86_use_sahf = m_PPRO | m_K6_GEODE | m_PENT4 | m_NOCONA | m_GENERIC32; /*m_GENERIC | m_ATHLON_K8 ? */
|
||||
const int x86_use_sahf = m_PPRO | m_K6_GEODE | m_PENT4 | m_NOCONA | m_GENERIC32;
|
||||
/*m_GENERIC | m_ATHLON_K8 ? */
|
||||
/* We probably ought to watch for partial register stalls on Generic32
|
||||
compilation setting as well. However in current implementation the
|
||||
partial register stalls are not eliminated very well - they can
|
||||
@ -873,13 +948,16 @@ const int x86_use_sahf = m_PPRO | m_K6_GEODE | m_PENT4 | m_NOCONA | m_GENERIC32;
|
||||
const int x86_partial_reg_stall = m_PPRO;
|
||||
const int x86_partial_flag_reg_stall = m_CORE2 | m_GENERIC;
|
||||
const int x86_use_himode_fiop = m_386 | m_486 | m_K6_GEODE;
|
||||
const int x86_use_simode_fiop = ~(m_PPRO | m_ATHLON_K8 | m_PENT | m_CORE2 | m_GENERIC);
|
||||
const int x86_use_simode_fiop = ~(m_PPRO | m_ATHLON_K8_AMDFAM10 | m_PENT
|
||||
| m_CORE2 | m_GENERIC);
|
||||
const int x86_use_mov0 = m_K6;
|
||||
const int x86_use_cltd = ~(m_PENT | m_K6 | m_CORE2 | m_GENERIC);
|
||||
const int x86_read_modify_write = ~m_PENT;
|
||||
const int x86_read_modify = ~(m_PENT | m_PPRO);
|
||||
const int x86_split_long_moves = m_PPRO;
|
||||
const int x86_promote_QImode = m_K6_GEODE | m_PENT | m_386 | m_486 | m_ATHLON_K8 | m_CORE2 | m_GENERIC; /* m_PENT4 ? */
|
||||
const int x86_promote_QImode = m_K6_GEODE | m_PENT | m_386 | m_486
|
||||
| m_ATHLON_K8_AMDFAM10 | m_CORE2 | m_GENERIC;
|
||||
/* m_PENT4 ? */
|
||||
const int x86_fast_prefix = ~(m_PENT | m_486 | m_386);
|
||||
const int x86_single_stringop = m_386 | m_PENT4 | m_NOCONA;
|
||||
const int x86_qimode_math = ~(0);
|
||||
@ -889,18 +967,37 @@ const int x86_promote_qi_regs = 0;
|
||||
if our scheme for avoiding partial stalls was more effective. */
|
||||
const int x86_himode_math = ~(m_PPRO);
|
||||
const int x86_promote_hi_regs = m_PPRO;
|
||||
const int x86_sub_esp_4 = m_ATHLON_K8 | m_PPRO | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC;
|
||||
const int x86_sub_esp_8 = m_ATHLON_K8 | m_PPRO | m_386 | m_486 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC;
|
||||
const int x86_add_esp_4 = m_ATHLON_K8 | m_K6_GEODE | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC;
|
||||
const int x86_add_esp_8 = m_ATHLON_K8 | m_PPRO | m_K6_GEODE | m_386 | m_486 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC;
|
||||
const int x86_integer_DFmode_moves = ~(m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC | m_GEODE);
|
||||
const int x86_partial_reg_dependency = m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC;
|
||||
const int x86_memory_mismatch_stall = m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC;
|
||||
const int x86_accumulate_outgoing_args = m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC;
|
||||
/* Enable if add/sub rsp is preferred over 1 or 2 push/pop */
|
||||
const int x86_sub_esp_4 = m_ATHLON_K8_AMDFAM10 | m_PPRO | m_PENT4 | m_NOCONA
|
||||
| m_CORE2 | m_GENERIC;
|
||||
const int x86_sub_esp_8 = m_ATHLON_K8_AMDFAM10 | m_PPRO | m_386 | m_486
|
||||
| m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC;
|
||||
const int x86_add_esp_4 = m_ATHLON_K8_AMDFAM10 | m_K6_GEODE | m_PENT4 | m_NOCONA
|
||||
| m_CORE2 | m_GENERIC;
|
||||
const int x86_add_esp_8 = m_ATHLON_K8_AMDFAM10 | m_PPRO | m_K6_GEODE | m_386
|
||||
| m_486 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC;
|
||||
/* Enable if integer moves are preferred for DFmode copies */
|
||||
const int x86_integer_DFmode_moves = ~(m_ATHLON_K8_AMDFAM10 | m_PENT4 | m_NOCONA
|
||||
| m_PPRO | m_CORE2 | m_GENERIC | m_GEODE);
|
||||
const int x86_partial_reg_dependency = m_ATHLON_K8_AMDFAM10 | m_PENT4 | m_NOCONA
|
||||
| m_CORE2 | m_GENERIC;
|
||||
const int x86_memory_mismatch_stall = m_ATHLON_K8_AMDFAM10 | m_PENT4 | m_NOCONA
|
||||
| m_CORE2 | m_GENERIC;
|
||||
/* If ACCUMULATE_OUTGOING_ARGS is enabled, the maximum amount of space required
|
||||
for outgoing arguments will be computed and placed into the variable
|
||||
`current_function_outgoing_args_size'. No space will be pushed onto the stack
|
||||
for each call; instead, the function prologue should increase the stack frame
|
||||
size by this amount. Setting both PUSH_ARGS and ACCUMULATE_OUTGOING_ARGS is
|
||||
not proper. */
|
||||
const int x86_accumulate_outgoing_args = m_ATHLON_K8_AMDFAM10 | m_PENT4
|
||||
| m_NOCONA | m_PPRO | m_CORE2
|
||||
| m_GENERIC;
|
||||
const int x86_prologue_using_move = m_ATHLON_K8 | m_PPRO | m_CORE2 | m_GENERIC;
|
||||
const int x86_epilogue_using_move = m_ATHLON_K8 | m_PPRO | m_CORE2 | m_GENERIC;
|
||||
const int x86_shift1 = ~m_486;
|
||||
const int x86_arch_always_fancy_math_387 = m_PENT | m_PPRO | m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC;
|
||||
const int x86_arch_always_fancy_math_387 = m_PENT | m_PPRO
|
||||
| m_ATHLON_K8_AMDFAM10 | m_PENT4
|
||||
| m_NOCONA | m_CORE2 | m_GENERIC;
|
||||
/* In Generic model we have an conflict here in between PPro/Pentium4 based chips
|
||||
that thread 128bit SSE registers as single units versus K8 based chips that
|
||||
divide SSE registers to two 64bit halves.
|
||||
@ -910,15 +1007,67 @@ const int x86_arch_always_fancy_math_387 = m_PENT | m_PPRO | m_ATHLON_K8 | m_PEN
|
||||
this option on P4 brings over 20% SPECfp regression, while enabling it on
|
||||
K8 brings roughly 2.4% regression that can be partly masked by careful scheduling
|
||||
of moves. */
|
||||
const int x86_sse_partial_reg_dependency = m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC;
|
||||
const int x86_sse_partial_reg_dependency = m_PENT4 | m_NOCONA | m_PPRO | m_CORE2
|
||||
| m_GENERIC | m_AMDFAM10;
|
||||
/* Set for machines where the type and dependencies are resolved on SSE
|
||||
register parts instead of whole registers, so we may maintain just
|
||||
lower part of scalar values in proper format leaving the upper part
|
||||
undefined. */
|
||||
const int x86_sse_split_regs = m_ATHLON_K8;
|
||||
const int x86_sse_typeless_stores = m_ATHLON_K8;
|
||||
/* Code generation for scalar reg-reg moves of single and double precision data:
|
||||
if (x86_sse_partial_reg_dependency == true | x86_sse_split_regs == true)
|
||||
movaps reg, reg
|
||||
else
|
||||
movss reg, reg
|
||||
if (x86_sse_partial_reg_dependency == true)
|
||||
movapd reg, reg
|
||||
else
|
||||
movsd reg, reg
|
||||
|
||||
Code generation for scalar loads of double precision data:
|
||||
if (x86_sse_split_regs == true)
|
||||
movlpd mem, reg (gas syntax)
|
||||
else
|
||||
movsd mem, reg
|
||||
|
||||
Code generation for unaligned packed loads of single precision data
|
||||
(x86_sse_unaligned_move_optimal overrides x86_sse_partial_reg_dependency):
|
||||
if (x86_sse_unaligned_move_optimal)
|
||||
movups mem, reg
|
||||
|
||||
if (x86_sse_partial_reg_dependency == true)
|
||||
{
|
||||
xorps reg, reg
|
||||
movlps mem, reg
|
||||
movhps mem+8, reg
|
||||
}
|
||||
else
|
||||
{
|
||||
movlps mem, reg
|
||||
movhps mem+8, reg
|
||||
}
|
||||
|
||||
Code generation for unaligned packed loads of double precision data
|
||||
(x86_sse_unaligned_move_optimal overrides x86_sse_split_regs):
|
||||
if (x86_sse_unaligned_move_optimal)
|
||||
movupd mem, reg
|
||||
|
||||
if (x86_sse_split_regs == true)
|
||||
{
|
||||
movlpd mem, reg
|
||||
movhpd mem+8, reg
|
||||
}
|
||||
else
|
||||
{
|
||||
movsd mem, reg
|
||||
movhpd mem+8, reg
|
||||
}
|
||||
*/
|
||||
const int x86_sse_unaligned_move_optimal = m_AMDFAM10;
|
||||
const int x86_sse_typeless_stores = m_ATHLON_K8_AMDFAM10;
|
||||
|
||||
const int x86_sse_load0_by_pxor = m_PPRO | m_PENT4 | m_NOCONA;
|
||||
const int x86_use_ffreep = m_ATHLON_K8;
|
||||
const int x86_use_ffreep = m_ATHLON_K8_AMDFAM10;
|
||||
const int x86_rep_movl_optimal = m_386 | m_PENT | m_PPRO | m_K6_GEODE | m_CORE2;
|
||||
const int x86_use_incdec = ~(m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC);
|
||||
|
||||
@ -926,21 +1075,22 @@ const int x86_use_incdec = ~(m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC);
|
||||
integer data in xmm registers. Which results in pretty abysmal code. */
|
||||
const int x86_inter_unit_moves = 0 /* ~(m_ATHLON_K8) */;
|
||||
|
||||
const int x86_ext_80387_constants = m_K6_GEODE | m_ATHLON | m_PENT4 | m_NOCONA | m_CORE2 | m_PPRO | m_GENERIC32;
|
||||
const int x86_ext_80387_constants = m_K6_GEODE | m_ATHLON | m_PENT4
|
||||
| m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC32;
|
||||
/* Some CPU cores are not able to predict more than 4 branch instructions in
|
||||
the 16 byte window. */
|
||||
const int x86_four_jump_limit = m_PPRO | m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC;
|
||||
const int x86_schedule = m_PPRO | m_ATHLON_K8 | m_K6_GEODE | m_PENT | m_CORE2 | m_GENERIC;
|
||||
const int x86_use_bt = m_ATHLON_K8;
|
||||
const int x86_four_jump_limit = m_PPRO | m_ATHLON_K8 | m_PENT4
|
||||
| m_NOCONA | m_CORE2 | m_GENERIC;
|
||||
const int x86_schedule = m_PPRO | m_ATHLON_K8 | m_K6_GEODE | m_PENT
|
||||
| m_CORE2 | m_GENERIC;
|
||||
const int x86_use_bt = m_ATHLON_K8_AMDFAM10;
|
||||
/* Compare and exchange was added for 80486. */
|
||||
const int x86_cmpxchg = ~m_386;
|
||||
/* Compare and exchange 8 bytes was added for pentium. */
|
||||
const int x86_cmpxchg8b = ~(m_386 | m_486);
|
||||
/* Compare and exchange 16 bytes was added for nocona. */
|
||||
const int x86_cmpxchg16b = m_NOCONA | m_CORE2;
|
||||
/* Exchange and add was added for 80486. */
|
||||
const int x86_xadd = ~m_386;
|
||||
const int x86_pad_returns = m_ATHLON_K8 | m_CORE2 | m_GENERIC;
|
||||
const int x86_pad_returns = m_ATHLON_K8_AMDFAM10 | m_CORE2 | m_GENERIC;
|
||||
|
||||
/* In case the average insn count for single function invocation is
|
||||
lower than this constant, emit fast (but longer) prologue and
|
||||
@ -966,8 +1116,8 @@ enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
|
||||
FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
|
||||
/* arg pointer */
|
||||
NON_Q_REGS,
|
||||
/* flags, fpsr, dirflag, frame */
|
||||
NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
|
||||
/* flags, fpsr, fpcr, dirflag, frame */
|
||||
NO_REGS, NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
|
||||
SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
|
||||
SSE_REGS, SSE_REGS,
|
||||
MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
|
||||
@ -984,7 +1134,7 @@ int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
|
||||
{
|
||||
0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
|
||||
12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
|
||||
-1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
|
||||
-1, -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, dir, frame */
|
||||
21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
|
||||
29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
|
||||
-1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
|
||||
@ -1007,7 +1157,7 @@ int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
|
||||
{
|
||||
0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
|
||||
33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
|
||||
-1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
|
||||
-1, -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, dir, frame */
|
||||
17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
|
||||
41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
|
||||
8,9,10,11,12,13,14,15, /* extended integer registers */
|
||||
@ -1072,7 +1222,7 @@ int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
|
||||
{
|
||||
0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
|
||||
11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
|
||||
-1, 9, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
|
||||
-1, 9, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, dir, frame */
|
||||
21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
|
||||
29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
|
||||
-1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
|
||||
@ -1157,6 +1307,9 @@ enum processor_type ix86_arch;
|
||||
/* true if sse prefetch instruction is not NOOP. */
|
||||
int x86_prefetch_sse;
|
||||
|
||||
/* true if cmpxchg16b is supported. */
|
||||
int x86_cmpxchg16b;
|
||||
|
||||
/* ix86_regparm_string as a number */
|
||||
static int ix86_regparm;
|
||||
|
||||
@ -1511,8 +1664,8 @@ ix86_handle_option (size_t code, const char *arg ATTRIBUTE_UNUSED, int value)
|
||||
case OPT_msse:
|
||||
if (!value)
|
||||
{
|
||||
target_flags &= ~(MASK_SSE2 | MASK_SSE3 | MASK_SSSE3);
|
||||
target_flags_explicit |= MASK_SSE2 | MASK_SSE3 | MASK_SSSE3;
|
||||
target_flags &= ~(MASK_SSE2 | MASK_SSE3 | MASK_SSSE3 | MASK_SSE4A);
|
||||
target_flags_explicit |= (MASK_SSE2 | MASK_SSE3 | MASK_SSSE3 | MASK_SSE4A);
|
||||
}
|
||||
return true;
|
||||
|
||||
@ -1527,11 +1680,12 @@ ix86_handle_option (size_t code, const char *arg ATTRIBUTE_UNUSED, int value)
|
||||
case OPT_msse3:
|
||||
if (!value)
|
||||
{
|
||||
target_flags &= ~MASK_SSSE3;
|
||||
target_flags_explicit |= MASK_SSSE3;
|
||||
target_flags &= ~(MASK_SSSE3 | MASK_SSE4A);
|
||||
target_flags_explicit |= (MASK_SSSE3 | MASK_SSE4A);
|
||||
}
|
||||
return true;
|
||||
|
||||
|
||||
default:
|
||||
return true;
|
||||
}
|
||||
@ -1580,7 +1734,8 @@ override_options (void)
|
||||
{&nocona_cost, 0, 0, 0, 0, 0, 0, 0},
|
||||
{&core2_cost, 0, 0, 16, 7, 16, 7, 16},
|
||||
{&generic32_cost, 0, 0, 16, 7, 16, 7, 16},
|
||||
{&generic64_cost, 0, 0, 16, 7, 16, 7, 16}
|
||||
{&generic64_cost, 0, 0, 16, 7, 16, 7, 16},
|
||||
{&amdfam10_cost, 0, 0, 32, 24, 32, 7, 32}
|
||||
};
|
||||
|
||||
static const char * const cpu_names[] = TARGET_CPU_DEFAULT_NAMES;
|
||||
@ -1598,7 +1753,11 @@ override_options (void)
|
||||
PTA_3DNOW = 32,
|
||||
PTA_3DNOW_A = 64,
|
||||
PTA_64BIT = 128,
|
||||
PTA_SSSE3 = 256
|
||||
PTA_SSSE3 = 256,
|
||||
PTA_CX16 = 512,
|
||||
PTA_POPCNT = 1024,
|
||||
PTA_ABM = 2048,
|
||||
PTA_SSE4A = 4096
|
||||
} flags;
|
||||
}
|
||||
const processor_alias_table[] =
|
||||
@ -1625,10 +1784,10 @@ override_options (void)
|
||||
{"prescott", PROCESSOR_NOCONA, PTA_SSE | PTA_SSE2 | PTA_SSE3
|
||||
| PTA_MMX | PTA_PREFETCH_SSE},
|
||||
{"nocona", PROCESSOR_NOCONA, PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_64BIT
|
||||
| PTA_MMX | PTA_PREFETCH_SSE},
|
||||
| PTA_MMX | PTA_PREFETCH_SSE | PTA_CX16},
|
||||
{"core2", PROCESSOR_CORE2, PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_SSSE3
|
||||
| PTA_64BIT | PTA_MMX
|
||||
| PTA_PREFETCH_SSE},
|
||||
| PTA_PREFETCH_SSE | PTA_CX16},
|
||||
{"geode", PROCESSOR_GEODE, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
|
||||
| PTA_3DNOW_A},
|
||||
{"k6", PROCESSOR_K6, PTA_MMX},
|
||||
@ -1663,6 +1822,15 @@ override_options (void)
|
||||
| PTA_SSE3 },
|
||||
{"athlon-fx", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
|
||||
| PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
|
||||
{"amdfam10", PROCESSOR_AMDFAM10, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
|
||||
| PTA_64BIT | PTA_3DNOW_A | PTA_SSE
|
||||
| PTA_SSE2 | PTA_SSE3 | PTA_POPCNT
|
||||
| PTA_ABM | PTA_SSE4A | PTA_CX16},
|
||||
{"barcelona", PROCESSOR_AMDFAM10, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
|
||||
| PTA_64BIT | PTA_3DNOW_A | PTA_SSE
|
||||
| PTA_SSE2 | PTA_SSE3 | PTA_POPCNT
|
||||
| PTA_ABM | PTA_SSE4A | PTA_CX16},
|
||||
{"generic32", PROCESSOR_GENERIC32, 0 /* flags are only used for -march switch. */ },
|
||||
{"generic32", PROCESSOR_GENERIC32, 0 /* flags are only used for -march switch. */ },
|
||||
{"generic64", PROCESSOR_GENERIC64, PTA_64BIT /* flags are only used for -march switch. */ },
|
||||
};
|
||||
@ -1825,6 +1993,18 @@ override_options (void)
|
||||
target_flags |= MASK_SSSE3;
|
||||
if (processor_alias_table[i].flags & PTA_PREFETCH_SSE)
|
||||
x86_prefetch_sse = true;
|
||||
if (processor_alias_table[i].flags & PTA_CX16)
|
||||
x86_cmpxchg16b = true;
|
||||
if (processor_alias_table[i].flags & PTA_POPCNT
|
||||
&& !(target_flags_explicit & MASK_POPCNT))
|
||||
target_flags |= MASK_POPCNT;
|
||||
if (processor_alias_table[i].flags & PTA_ABM
|
||||
&& !(target_flags_explicit & MASK_ABM))
|
||||
target_flags |= MASK_ABM;
|
||||
if (processor_alias_table[i].flags & PTA_SSE4A
|
||||
&& !(target_flags_explicit & MASK_SSE4A))
|
||||
target_flags |= MASK_SSE4A;
|
||||
|
||||
if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
|
||||
error ("CPU you selected does not support x86-64 "
|
||||
"instruction set");
|
||||
@ -2003,6 +2183,10 @@ override_options (void)
|
||||
if (TARGET_SSSE3)
|
||||
target_flags |= MASK_SSE3;
|
||||
|
||||
/* Turn on SSE3 builtins for -msse4a. */
|
||||
if (TARGET_SSE4A)
|
||||
target_flags |= MASK_SSE3;
|
||||
|
||||
/* Turn on SSE2 builtins for -msse3. */
|
||||
if (TARGET_SSE3)
|
||||
target_flags |= MASK_SSE2;
|
||||
@ -2022,6 +2206,10 @@ override_options (void)
|
||||
if (TARGET_3DNOW)
|
||||
target_flags |= MASK_MMX;
|
||||
|
||||
/* Turn on POPCNT builtins for -mabm. */
|
||||
if (TARGET_ABM)
|
||||
target_flags |= MASK_POPCNT;
|
||||
|
||||
if (TARGET_64BIT)
|
||||
{
|
||||
if (TARGET_ALIGN_DOUBLE)
|
||||
@ -5362,18 +5550,22 @@ ix86_compute_frame_layout (struct ix86_frame *frame)
|
||||
frame->to_allocate -= frame->red_zone_size;
|
||||
frame->stack_pointer_offset -= frame->red_zone_size;
|
||||
#if 0
|
||||
fprintf (stderr, "nregs: %i\n", frame->nregs);
|
||||
fprintf (stderr, "size: %i\n", size);
|
||||
fprintf (stderr, "alignment1: %i\n", stack_alignment_needed);
|
||||
fprintf (stderr, "padding1: %i\n", frame->padding1);
|
||||
fprintf (stderr, "va_arg: %i\n", frame->va_arg_size);
|
||||
fprintf (stderr, "padding2: %i\n", frame->padding2);
|
||||
fprintf (stderr, "to_allocate: %i\n", frame->to_allocate);
|
||||
fprintf (stderr, "red_zone_size: %i\n", frame->red_zone_size);
|
||||
fprintf (stderr, "frame_pointer_offset: %i\n", frame->frame_pointer_offset);
|
||||
fprintf (stderr, "hard_frame_pointer_offset: %i\n",
|
||||
frame->hard_frame_pointer_offset);
|
||||
fprintf (stderr, "stack_pointer_offset: %i\n", frame->stack_pointer_offset);
|
||||
fprintf (stderr, "\n");
|
||||
fprintf (stderr, "nregs: %ld\n", (long)frame->nregs);
|
||||
fprintf (stderr, "size: %ld\n", (long)size);
|
||||
fprintf (stderr, "alignment1: %ld\n", (long)stack_alignment_needed);
|
||||
fprintf (stderr, "padding1: %ld\n", (long)frame->padding1);
|
||||
fprintf (stderr, "va_arg: %ld\n", (long)frame->va_arg_size);
|
||||
fprintf (stderr, "padding2: %ld\n", (long)frame->padding2);
|
||||
fprintf (stderr, "to_allocate: %ld\n", (long)frame->to_allocate);
|
||||
fprintf (stderr, "red_zone_size: %ld\n", (long)frame->red_zone_size);
|
||||
fprintf (stderr, "frame_pointer_offset: %ld\n", (long)frame->frame_pointer_offset);
|
||||
fprintf (stderr, "hard_frame_pointer_offset: %ld\n",
|
||||
(long)frame->hard_frame_pointer_offset);
|
||||
fprintf (stderr, "stack_pointer_offset: %ld\n", (long)frame->stack_pointer_offset);
|
||||
fprintf (stderr, "current_function_is_leaf: %ld\n", (long)current_function_is_leaf);
|
||||
fprintf (stderr, "current_function_calls_alloca: %ld\n", (long)current_function_calls_alloca);
|
||||
fprintf (stderr, "x86_current_function_calls_tls_descriptor: %ld\n", (long)ix86_current_function_calls_tls_descriptor);
|
||||
#endif
|
||||
}
|
||||
|
||||
@ -7664,7 +7856,8 @@ print_reg (rtx x, int code, FILE *file)
|
||||
gcc_assert (REGNO (x) != ARG_POINTER_REGNUM
|
||||
&& REGNO (x) != FRAME_POINTER_REGNUM
|
||||
&& REGNO (x) != FLAGS_REG
|
||||
&& REGNO (x) != FPSR_REG);
|
||||
&& REGNO (x) != FPSR_REG
|
||||
&& REGNO (x) != FPCR_REG);
|
||||
|
||||
if (ASSEMBLER_DIALECT == ASM_ATT || USER_LABEL_PREFIX[0] == 0)
|
||||
putc ('%', file);
|
||||
@ -8859,17 +9052,15 @@ output_387_ffreep (rtx *operands ATTRIBUTE_UNUSED, int opno)
|
||||
#if HAVE_AS_IX86_FFREEP
|
||||
return opno ? "ffreep\t%y1" : "ffreep\t%y0";
|
||||
#else
|
||||
switch (REGNO (operands[opno]))
|
||||
{
|
||||
case FIRST_STACK_REG + 0: return ".word\t0xc0df";
|
||||
case FIRST_STACK_REG + 1: return ".word\t0xc1df";
|
||||
case FIRST_STACK_REG + 2: return ".word\t0xc2df";
|
||||
case FIRST_STACK_REG + 3: return ".word\t0xc3df";
|
||||
case FIRST_STACK_REG + 4: return ".word\t0xc4df";
|
||||
case FIRST_STACK_REG + 5: return ".word\t0xc5df";
|
||||
case FIRST_STACK_REG + 6: return ".word\t0xc6df";
|
||||
case FIRST_STACK_REG + 7: return ".word\t0xc7df";
|
||||
}
|
||||
{
|
||||
static char retval[] = ".word\t0xc_df";
|
||||
int regno = REGNO (operands[opno]);
|
||||
|
||||
gcc_assert (FP_REGNO_P (regno));
|
||||
|
||||
retval[9] = '0' + (regno - FIRST_STACK_REG);
|
||||
return retval;
|
||||
}
|
||||
#endif
|
||||
|
||||
return opno ? "fstp\t%y1" : "fstp\t%y0";
|
||||
@ -9247,8 +9438,16 @@ ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[])
|
||||
}
|
||||
|
||||
if (TARGET_SSE2 && mode == V2DFmode)
|
||||
{
|
||||
rtx zero;
|
||||
{
|
||||
rtx zero;
|
||||
|
||||
if (TARGET_SSE_UNALIGNED_MOVE_OPTIMAL)
|
||||
{
|
||||
op0 = gen_lowpart (V2DFmode, op0);
|
||||
op1 = gen_lowpart (V2DFmode, op1);
|
||||
emit_insn (gen_sse2_movupd (op0, op1));
|
||||
return;
|
||||
}
|
||||
|
||||
/* When SSE registers are split into halves, we can avoid
|
||||
writing to the top half twice. */
|
||||
@ -9276,7 +9475,15 @@ ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[])
|
||||
emit_insn (gen_sse2_loadhpd (op0, op0, m));
|
||||
}
|
||||
else
|
||||
{
|
||||
{
|
||||
if (TARGET_SSE_UNALIGNED_MOVE_OPTIMAL)
|
||||
{
|
||||
op0 = gen_lowpart (V4SFmode, op0);
|
||||
op1 = gen_lowpart (V4SFmode, op1);
|
||||
emit_insn (gen_sse_movups (op0, op1));
|
||||
return;
|
||||
}
|
||||
|
||||
if (TARGET_SSE_PARTIAL_REG_DEPENDENCY)
|
||||
emit_move_insn (op0, CONST0_RTX (mode));
|
||||
else
|
||||
@ -13833,6 +14040,7 @@ ix86_issue_rate (void)
|
||||
case PROCESSOR_PENTIUM4:
|
||||
case PROCESSOR_ATHLON:
|
||||
case PROCESSOR_K8:
|
||||
case PROCESSOR_AMDFAM10:
|
||||
case PROCESSOR_NOCONA:
|
||||
case PROCESSOR_GENERIC32:
|
||||
case PROCESSOR_GENERIC64:
|
||||
@ -14031,6 +14239,7 @@ ix86_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
|
||||
|
||||
case PROCESSOR_ATHLON:
|
||||
case PROCESSOR_K8:
|
||||
case PROCESSOR_AMDFAM10:
|
||||
case PROCESSOR_GENERIC32:
|
||||
case PROCESSOR_GENERIC64:
|
||||
memory = get_attr_memory (insn);
|
||||
@ -14744,6 +14953,14 @@ enum ix86_builtins
|
||||
IX86_BUILTIN_PABSW128,
|
||||
IX86_BUILTIN_PABSD128,
|
||||
|
||||
/* AMDFAM10 - SSE4A New Instructions. */
|
||||
IX86_BUILTIN_MOVNTSD,
|
||||
IX86_BUILTIN_MOVNTSS,
|
||||
IX86_BUILTIN_EXTRQI,
|
||||
IX86_BUILTIN_EXTRQ,
|
||||
IX86_BUILTIN_INSERTQI,
|
||||
IX86_BUILTIN_INSERTQ,
|
||||
|
||||
IX86_BUILTIN_VEC_INIT_V2SI,
|
||||
IX86_BUILTIN_VEC_INIT_V4HI,
|
||||
IX86_BUILTIN_VEC_INIT_V8QI,
|
||||
@ -15468,6 +15685,18 @@ ix86_init_mmx_sse_builtins (void)
|
||||
= build_function_type_list (void_type_node,
|
||||
pchar_type_node, V16QI_type_node, NULL_TREE);
|
||||
|
||||
tree v2di_ftype_v2di_unsigned_unsigned
|
||||
= build_function_type_list (V2DI_type_node, V2DI_type_node,
|
||||
unsigned_type_node, unsigned_type_node,
|
||||
NULL_TREE);
|
||||
tree v2di_ftype_v2di_v2di_unsigned_unsigned
|
||||
= build_function_type_list (V2DI_type_node, V2DI_type_node, V2DI_type_node,
|
||||
unsigned_type_node, unsigned_type_node,
|
||||
NULL_TREE);
|
||||
tree v2di_ftype_v2di_v16qi
|
||||
= build_function_type_list (V2DI_type_node, V2DI_type_node, V16QI_type_node,
|
||||
NULL_TREE);
|
||||
|
||||
tree float80_type;
|
||||
tree float128_type;
|
||||
tree ftype;
|
||||
@ -15804,6 +16033,20 @@ ix86_init_mmx_sse_builtins (void)
|
||||
def_builtin (MASK_SSSE3, "__builtin_ia32_palignr", di_ftype_di_di_int,
|
||||
IX86_BUILTIN_PALIGNR);
|
||||
|
||||
/* AMDFAM10 SSE4A New built-ins */
|
||||
def_builtin (MASK_SSE4A, "__builtin_ia32_movntsd",
|
||||
void_ftype_pdouble_v2df, IX86_BUILTIN_MOVNTSD);
|
||||
def_builtin (MASK_SSE4A, "__builtin_ia32_movntss",
|
||||
void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTSS);
|
||||
def_builtin (MASK_SSE4A, "__builtin_ia32_extrqi",
|
||||
v2di_ftype_v2di_unsigned_unsigned, IX86_BUILTIN_EXTRQI);
|
||||
def_builtin (MASK_SSE4A, "__builtin_ia32_extrq",
|
||||
v2di_ftype_v2di_v16qi, IX86_BUILTIN_EXTRQ);
|
||||
def_builtin (MASK_SSE4A, "__builtin_ia32_insertqi",
|
||||
v2di_ftype_v2di_v2di_unsigned_unsigned, IX86_BUILTIN_INSERTQI);
|
||||
def_builtin (MASK_SSE4A, "__builtin_ia32_insertq",
|
||||
v2di_ftype_v2di_v2di, IX86_BUILTIN_INSERTQ);
|
||||
|
||||
/* Access to the vec_init patterns. */
|
||||
ftype = build_function_type_list (V2SI_type_node, integer_type_node,
|
||||
integer_type_node, NULL_TREE);
|
||||
@ -16300,9 +16543,9 @@ ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
|
||||
enum insn_code icode;
|
||||
tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
|
||||
tree arglist = TREE_OPERAND (exp, 1);
|
||||
tree arg0, arg1, arg2;
|
||||
rtx op0, op1, op2, pat;
|
||||
enum machine_mode tmode, mode0, mode1, mode2, mode3;
|
||||
tree arg0, arg1, arg2, arg3;
|
||||
rtx op0, op1, op2, op3, pat;
|
||||
enum machine_mode tmode, mode0, mode1, mode2, mode3, mode4;
|
||||
unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
|
||||
|
||||
switch (fcode)
|
||||
@ -16818,6 +17061,114 @@ ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
|
||||
emit_insn (pat);
|
||||
return target;
|
||||
|
||||
case IX86_BUILTIN_MOVNTSD:
|
||||
return ix86_expand_store_builtin (CODE_FOR_sse4a_vmmovntv2df, arglist);
|
||||
|
||||
case IX86_BUILTIN_MOVNTSS:
|
||||
return ix86_expand_store_builtin (CODE_FOR_sse4a_vmmovntv4sf, arglist);
|
||||
|
||||
case IX86_BUILTIN_INSERTQ:
|
||||
case IX86_BUILTIN_EXTRQ:
|
||||
icode = (fcode == IX86_BUILTIN_EXTRQ
|
||||
? CODE_FOR_sse4a_extrq
|
||||
: CODE_FOR_sse4a_insertq);
|
||||
arg0 = TREE_VALUE (arglist);
|
||||
arg1 = TREE_VALUE (TREE_CHAIN (arglist));
|
||||
op0 = expand_normal (arg0);
|
||||
op1 = expand_normal (arg1);
|
||||
tmode = insn_data[icode].operand[0].mode;
|
||||
mode1 = insn_data[icode].operand[1].mode;
|
||||
mode2 = insn_data[icode].operand[2].mode;
|
||||
if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
|
||||
op0 = copy_to_mode_reg (mode1, op0);
|
||||
if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
|
||||
op1 = copy_to_mode_reg (mode2, op1);
|
||||
if (optimize || target == 0
|
||||
|| GET_MODE (target) != tmode
|
||||
|| ! (*insn_data[icode].operand[0].predicate) (target, tmode))
|
||||
target = gen_reg_rtx (tmode);
|
||||
pat = GEN_FCN (icode) (target, op0, op1);
|
||||
if (! pat)
|
||||
return NULL_RTX;
|
||||
emit_insn (pat);
|
||||
return target;
|
||||
|
||||
case IX86_BUILTIN_EXTRQI:
|
||||
icode = CODE_FOR_sse4a_extrqi;
|
||||
arg0 = TREE_VALUE (arglist);
|
||||
arg1 = TREE_VALUE (TREE_CHAIN (arglist));
|
||||
arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
|
||||
op0 = expand_normal (arg0);
|
||||
op1 = expand_normal (arg1);
|
||||
op2 = expand_normal (arg2);
|
||||
tmode = insn_data[icode].operand[0].mode;
|
||||
mode1 = insn_data[icode].operand[1].mode;
|
||||
mode2 = insn_data[icode].operand[2].mode;
|
||||
mode3 = insn_data[icode].operand[3].mode;
|
||||
if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
|
||||
op0 = copy_to_mode_reg (mode1, op0);
|
||||
if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
|
||||
{
|
||||
error ("index mask must be an immediate");
|
||||
return gen_reg_rtx (tmode);
|
||||
}
|
||||
if (! (*insn_data[icode].operand[3].predicate) (op2, mode3))
|
||||
{
|
||||
error ("length mask must be an immediate");
|
||||
return gen_reg_rtx (tmode);
|
||||
}
|
||||
if (optimize || target == 0
|
||||
|| GET_MODE (target) != tmode
|
||||
|| ! (*insn_data[icode].operand[0].predicate) (target, tmode))
|
||||
target = gen_reg_rtx (tmode);
|
||||
pat = GEN_FCN (icode) (target, op0, op1, op2);
|
||||
if (! pat)
|
||||
return NULL_RTX;
|
||||
emit_insn (pat);
|
||||
return target;
|
||||
|
||||
case IX86_BUILTIN_INSERTQI:
|
||||
icode = CODE_FOR_sse4a_insertqi;
|
||||
arg0 = TREE_VALUE (arglist);
|
||||
arg1 = TREE_VALUE (TREE_CHAIN (arglist));
|
||||
arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
|
||||
arg3 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (TREE_CHAIN (arglist))));
|
||||
op0 = expand_normal (arg0);
|
||||
op1 = expand_normal (arg1);
|
||||
op2 = expand_normal (arg2);
|
||||
op3 = expand_normal (arg3);
|
||||
tmode = insn_data[icode].operand[0].mode;
|
||||
mode1 = insn_data[icode].operand[1].mode;
|
||||
mode2 = insn_data[icode].operand[2].mode;
|
||||
mode3 = insn_data[icode].operand[3].mode;
|
||||
mode4 = insn_data[icode].operand[4].mode;
|
||||
|
||||
if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
|
||||
op0 = copy_to_mode_reg (mode1, op0);
|
||||
|
||||
if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
|
||||
op1 = copy_to_mode_reg (mode2, op1);
|
||||
|
||||
if (! (*insn_data[icode].operand[3].predicate) (op2, mode3))
|
||||
{
|
||||
error ("index mask must be an immediate");
|
||||
return gen_reg_rtx (tmode);
|
||||
}
|
||||
if (! (*insn_data[icode].operand[4].predicate) (op3, mode4))
|
||||
{
|
||||
error ("length mask must be an immediate");
|
||||
return gen_reg_rtx (tmode);
|
||||
}
|
||||
if (optimize || target == 0
|
||||
|| GET_MODE (target) != tmode
|
||||
|| ! (*insn_data[icode].operand[0].predicate) (target, tmode))
|
||||
target = gen_reg_rtx (tmode);
|
||||
pat = GEN_FCN (icode) (target, op0, op1, op2, op3);
|
||||
if (! pat)
|
||||
return NULL_RTX;
|
||||
emit_insn (pat);
|
||||
return target;
|
||||
|
||||
case IX86_BUILTIN_VEC_INIT_V2SI:
|
||||
case IX86_BUILTIN_VEC_INIT_V4HI:
|
||||
case IX86_BUILTIN_VEC_INIT_V8QI:
|
||||
|
@ -141,6 +141,7 @@ extern const struct processor_costs *ix86_cost;
|
||||
#define TARGET_GENERIC32 (ix86_tune == PROCESSOR_GENERIC32)
|
||||
#define TARGET_GENERIC64 (ix86_tune == PROCESSOR_GENERIC64)
|
||||
#define TARGET_GENERIC (TARGET_GENERIC32 || TARGET_GENERIC64)
|
||||
#define TARGET_AMDFAM10 (ix86_tune == PROCESSOR_AMDFAM10)
|
||||
|
||||
#define TUNEMASK (1 << ix86_tune)
|
||||
extern const int x86_use_leave, x86_push_memory, x86_zero_extend_with_and;
|
||||
@ -159,15 +160,16 @@ extern const int x86_accumulate_outgoing_args, x86_prologue_using_move;
|
||||
extern const int x86_epilogue_using_move, x86_decompose_lea;
|
||||
extern const int x86_arch_always_fancy_math_387, x86_shift1;
|
||||
extern const int x86_sse_partial_reg_dependency, x86_sse_split_regs;
|
||||
extern const int x86_sse_unaligned_move_optimal;
|
||||
extern const int x86_sse_typeless_stores, x86_sse_load0_by_pxor;
|
||||
extern const int x86_use_ffreep;
|
||||
extern const int x86_inter_unit_moves, x86_schedule;
|
||||
extern const int x86_use_bt;
|
||||
extern const int x86_cmpxchg, x86_cmpxchg8b, x86_cmpxchg16b, x86_xadd;
|
||||
extern const int x86_cmpxchg, x86_cmpxchg8b, x86_xadd;
|
||||
extern const int x86_use_incdec;
|
||||
extern const int x86_pad_returns;
|
||||
extern const int x86_partial_flag_reg_stall;
|
||||
extern int x86_prefetch_sse;
|
||||
extern int x86_prefetch_sse, x86_cmpxchg16b;
|
||||
|
||||
#define TARGET_USE_LEAVE (x86_use_leave & TUNEMASK)
|
||||
#define TARGET_PUSH_MEMORY (x86_push_memory & TUNEMASK)
|
||||
@ -207,6 +209,8 @@ extern int x86_prefetch_sse;
|
||||
#define TARGET_PARTIAL_REG_DEPENDENCY (x86_partial_reg_dependency & TUNEMASK)
|
||||
#define TARGET_SSE_PARTIAL_REG_DEPENDENCY \
|
||||
(x86_sse_partial_reg_dependency & TUNEMASK)
|
||||
#define TARGET_SSE_UNALIGNED_MOVE_OPTIMAL \
|
||||
(x86_sse_unaligned_move_optimal & TUNEMASK)
|
||||
#define TARGET_SSE_SPLIT_REGS (x86_sse_split_regs & TUNEMASK)
|
||||
#define TARGET_SSE_TYPELESS_STORES (x86_sse_typeless_stores & TUNEMASK)
|
||||
#define TARGET_SSE_LOAD0_BY_PXOR (x86_sse_load0_by_pxor & TUNEMASK)
|
||||
@ -237,7 +241,7 @@ extern int x86_prefetch_sse;
|
||||
|
||||
#define TARGET_CMPXCHG (x86_cmpxchg & (1 << ix86_arch))
|
||||
#define TARGET_CMPXCHG8B (x86_cmpxchg8b & (1 << ix86_arch))
|
||||
#define TARGET_CMPXCHG16B (x86_cmpxchg16b & (1 << ix86_arch))
|
||||
#define TARGET_CMPXCHG16B (x86_cmpxchg16b)
|
||||
#define TARGET_XADD (x86_xadd & (1 << ix86_arch))
|
||||
|
||||
#ifndef TARGET_64BIT_DEFAULT
|
||||
@ -399,6 +403,8 @@ extern const char *host_detect_local_cpu (int argc, const char **argv);
|
||||
} \
|
||||
else if (TARGET_K8) \
|
||||
builtin_define ("__tune_k8__"); \
|
||||
else if (TARGET_AMDFAM10) \
|
||||
builtin_define ("__tune_amdfam10__"); \
|
||||
else if (TARGET_PENTIUM4) \
|
||||
builtin_define ("__tune_pentium4__"); \
|
||||
else if (TARGET_NOCONA) \
|
||||
@ -420,6 +426,8 @@ extern const char *host_detect_local_cpu (int argc, const char **argv);
|
||||
builtin_define ("__SSE3__"); \
|
||||
if (TARGET_SSSE3) \
|
||||
builtin_define ("__SSSE3__"); \
|
||||
if (TARGET_SSE4A) \
|
||||
builtin_define ("__SSE4A__"); \
|
||||
if (TARGET_SSE_MATH && TARGET_SSE) \
|
||||
builtin_define ("__SSE_MATH__"); \
|
||||
if (TARGET_SSE_MATH && TARGET_SSE2) \
|
||||
@ -475,6 +483,11 @@ extern const char *host_detect_local_cpu (int argc, const char **argv);
|
||||
builtin_define ("__k8"); \
|
||||
builtin_define ("__k8__"); \
|
||||
} \
|
||||
else if (ix86_arch == PROCESSOR_AMDFAM10) \
|
||||
{ \
|
||||
builtin_define ("__amdfam10"); \
|
||||
builtin_define ("__amdfam10__"); \
|
||||
} \
|
||||
else if (ix86_arch == PROCESSOR_PENTIUM4) \
|
||||
{ \
|
||||
builtin_define ("__pentium4"); \
|
||||
@ -513,13 +526,14 @@ extern const char *host_detect_local_cpu (int argc, const char **argv);
|
||||
#define TARGET_CPU_DEFAULT_nocona 17
|
||||
#define TARGET_CPU_DEFAULT_core2 18
|
||||
#define TARGET_CPU_DEFAULT_generic 19
|
||||
#define TARGET_CPU_DEFAULT_amdfam10 20
|
||||
|
||||
#define TARGET_CPU_DEFAULT_NAMES {"i386", "i486", "pentium", "pentium-mmx",\
|
||||
"pentiumpro", "pentium2", "pentium3", \
|
||||
"pentium4", "geode", "k6", "k6-2", "k6-3", \
|
||||
"athlon", "athlon-4", "k8", \
|
||||
"pentium-m", "prescott", "nocona", \
|
||||
"core2", "generic"}
|
||||
"core2", "generic", "amdfam10"}
|
||||
|
||||
#ifndef CC1_SPEC
|
||||
#define CC1_SPEC "%(cc1_cpu) "
|
||||
@ -734,7 +748,7 @@ extern const char *host_detect_local_cpu (int argc, const char **argv);
|
||||
eliminated during reloading in favor of either the stack or frame
|
||||
pointer. */
|
||||
|
||||
#define FIRST_PSEUDO_REGISTER 53
|
||||
#define FIRST_PSEUDO_REGISTER 54
|
||||
|
||||
/* Number of hardware registers that go into the DWARF-2 unwind info.
|
||||
If not defined, equals FIRST_PSEUDO_REGISTER. */
|
||||
@ -754,8 +768,8 @@ extern const char *host_detect_local_cpu (int argc, const char **argv);
|
||||
#define FIXED_REGISTERS \
|
||||
/*ax,dx,cx,bx,si,di,bp,sp,st,st1,st2,st3,st4,st5,st6,st7*/ \
|
||||
{ 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, \
|
||||
/*arg,flags,fpsr,dir,frame*/ \
|
||||
1, 1, 1, 1, 1, \
|
||||
/*arg,flags,fpsr,fpcr,dir,frame*/ \
|
||||
1, 1, 1, 1, 1, 1, \
|
||||
/*xmm0,xmm1,xmm2,xmm3,xmm4,xmm5,xmm6,xmm7*/ \
|
||||
0, 0, 0, 0, 0, 0, 0, 0, \
|
||||
/*mmx0,mmx1,mmx2,mmx3,mmx4,mmx5,mmx6,mmx7*/ \
|
||||
@ -782,10 +796,10 @@ extern const char *host_detect_local_cpu (int argc, const char **argv);
|
||||
#define CALL_USED_REGISTERS \
|
||||
/*ax,dx,cx,bx,si,di,bp,sp,st,st1,st2,st3,st4,st5,st6,st7*/ \
|
||||
{ 1, 1, 1, 0, 3, 3, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, \
|
||||
/*arg,flags,fpsr,dir,frame*/ \
|
||||
1, 1, 1, 1, 1, \
|
||||
/*arg,flags,fpsr,fpcr,dir,frame*/ \
|
||||
1, 1, 1, 1, 1, 1, \
|
||||
/*xmm0,xmm1,xmm2,xmm3,xmm4,xmm5,xmm6,xmm7*/ \
|
||||
1, 1, 1, 1, 1, 1, 1, 1, \
|
||||
1, 1, 1, 1, 1, 1, 1, 1, \
|
||||
/*mmx0,mmx1,mmx2,mmx3,mmx4,mmx5,mmx6,mmx7*/ \
|
||||
1, 1, 1, 1, 1, 1, 1, 1, \
|
||||
/* r8, r9, r10, r11, r12, r13, r14, r15*/ \
|
||||
@ -806,11 +820,11 @@ extern const char *host_detect_local_cpu (int argc, const char **argv);
|
||||
{ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17,\
|
||||
18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, \
|
||||
33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, \
|
||||
48, 49, 50, 51, 52 }
|
||||
48, 49, 50, 51, 52, 53 }
|
||||
|
||||
/* ORDER_REGS_FOR_LOCAL_ALLOC is a macro which permits reg_alloc_order
|
||||
to be rearranged based on a particular function. When using sse math,
|
||||
we want to allocate SSE before x87 registers and vice vera. */
|
||||
we want to allocate SSE before x87 registers and vice versa. */
|
||||
|
||||
#define ORDER_REGS_FOR_LOCAL_ALLOC x86_order_regs_for_local_alloc ()
|
||||
|
||||
@ -972,7 +986,7 @@ do { \
|
||||
#define HARD_FRAME_POINTER_REGNUM 6
|
||||
|
||||
/* Base register for access to local variables of the function. */
|
||||
#define FRAME_POINTER_REGNUM 20
|
||||
#define FRAME_POINTER_REGNUM 21
|
||||
|
||||
/* First floating point reg */
|
||||
#define FIRST_FLOAT_REG 8
|
||||
@ -1085,7 +1099,7 @@ do { \
|
||||
opcode needs reg %ebx. But some systems pass args to the OS in ebx,
|
||||
and the "b" register constraint is useful in asms for syscalls.
|
||||
|
||||
The flags and fpsr registers are in no class. */
|
||||
The flags, fpsr and fpcr registers are in no class. */
|
||||
|
||||
enum reg_class
|
||||
{
|
||||
@ -1166,21 +1180,21 @@ enum reg_class
|
||||
{ 0x10, 0x0 }, { 0x20, 0x0 }, /* SIREG, DIREG */ \
|
||||
{ 0x03, 0x0 }, /* AD_REGS */ \
|
||||
{ 0x0f, 0x0 }, /* Q_REGS */ \
|
||||
{ 0x1100f0, 0x1fe0 }, /* NON_Q_REGS */ \
|
||||
{ 0x7f, 0x1fe0 }, /* INDEX_REGS */ \
|
||||
{ 0x1100ff, 0x0 }, /* LEGACY_REGS */ \
|
||||
{ 0x1100ff, 0x1fe0 }, /* GENERAL_REGS */ \
|
||||
{ 0x2100f0, 0x3fc0 }, /* NON_Q_REGS */ \
|
||||
{ 0x7f, 0x3fc0 }, /* INDEX_REGS */ \
|
||||
{ 0x2100ff, 0x0 }, /* LEGACY_REGS */ \
|
||||
{ 0x2100ff, 0x3fc0 }, /* GENERAL_REGS */ \
|
||||
{ 0x100, 0x0 }, { 0x0200, 0x0 },/* FP_TOP_REG, FP_SECOND_REG */\
|
||||
{ 0xff00, 0x0 }, /* FLOAT_REGS */ \
|
||||
{ 0x1fe00000,0x1fe000 }, /* SSE_REGS */ \
|
||||
{ 0xe0000000, 0x1f }, /* MMX_REGS */ \
|
||||
{ 0x1fe00100,0x1fe000 }, /* FP_TOP_SSE_REG */ \
|
||||
{ 0x1fe00200,0x1fe000 }, /* FP_SECOND_SSE_REG */ \
|
||||
{ 0x1fe0ff00,0x1fe000 }, /* FLOAT_SSE_REGS */ \
|
||||
{ 0x1ffff, 0x1fe0 }, /* FLOAT_INT_REGS */ \
|
||||
{ 0x1fe100ff,0x1fffe0 }, /* INT_SSE_REGS */ \
|
||||
{ 0x1fe1ffff,0x1fffe0 }, /* FLOAT_INT_SSE_REGS */ \
|
||||
{ 0xffffffff,0x1fffff } \
|
||||
{ 0x3fc00000,0x3fc000 }, /* SSE_REGS */ \
|
||||
{ 0xc0000000, 0x3f }, /* MMX_REGS */ \
|
||||
{ 0x3fc00100,0x3fc000 }, /* FP_TOP_SSE_REG */ \
|
||||
{ 0x3fc00200,0x3fc000 }, /* FP_SECOND_SSE_REG */ \
|
||||
{ 0x3fc0ff00,0x3fc000 }, /* FLOAT_SSE_REGS */ \
|
||||
{ 0x1ffff, 0x3fc0 }, /* FLOAT_INT_REGS */ \
|
||||
{ 0x3fc100ff,0x3fffc0 }, /* INT_SSE_REGS */ \
|
||||
{ 0x3fc1ffff,0x3fffc0 }, /* FLOAT_INT_SSE_REGS */ \
|
||||
{ 0xffffffff,0x3fffff } \
|
||||
}
|
||||
|
||||
/* The same information, inverted:
|
||||
@ -1196,11 +1210,10 @@ enum reg_class
|
||||
|
||||
#define SMALL_REGISTER_CLASSES 1
|
||||
|
||||
#define QI_REG_P(X) \
|
||||
(REG_P (X) && REGNO (X) < 4)
|
||||
#define QI_REG_P(X) (REG_P (X) && REGNO (X) < 4)
|
||||
|
||||
#define GENERAL_REGNO_P(N) \
|
||||
((N) < 8 || REX_INT_REGNO_P (N))
|
||||
((N) <= STACK_POINTER_REGNUM || REX_INT_REGNO_P (N))
|
||||
|
||||
#define GENERAL_REG_P(X) \
|
||||
(REG_P (X) && GENERAL_REGNO_P (REGNO (X)))
|
||||
@ -1208,39 +1221,38 @@ enum reg_class
|
||||
#define ANY_QI_REG_P(X) (TARGET_64BIT ? GENERAL_REG_P(X) : QI_REG_P (X))
|
||||
|
||||
#define NON_QI_REG_P(X) \
|
||||
(REG_P (X) && REGNO (X) >= 4 && REGNO (X) < FIRST_PSEUDO_REGISTER)
|
||||
(REG_P (X) && IN_RANGE (REGNO (X), 4, FIRST_PSEUDO_REGISTER - 1))
|
||||
|
||||
#define REX_INT_REGNO_P(N) ((N) >= FIRST_REX_INT_REG && (N) <= LAST_REX_INT_REG)
|
||||
#define REX_INT_REGNO_P(N) \
|
||||
IN_RANGE ((N), FIRST_REX_INT_REG, LAST_REX_INT_REG)
|
||||
#define REX_INT_REG_P(X) (REG_P (X) && REX_INT_REGNO_P (REGNO (X)))
|
||||
|
||||
#define FP_REG_P(X) (REG_P (X) && FP_REGNO_P (REGNO (X)))
|
||||
#define FP_REGNO_P(N) ((N) >= FIRST_STACK_REG && (N) <= LAST_STACK_REG)
|
||||
#define FP_REGNO_P(N) IN_RANGE ((N), FIRST_STACK_REG, LAST_STACK_REG)
|
||||
#define ANY_FP_REG_P(X) (REG_P (X) && ANY_FP_REGNO_P (REGNO (X)))
|
||||
#define ANY_FP_REGNO_P(N) (FP_REGNO_P (N) || SSE_REGNO_P (N))
|
||||
|
||||
#define SSE_REGNO_P(N) \
|
||||
(((N) >= FIRST_SSE_REG && (N) <= LAST_SSE_REG) \
|
||||
|| ((N) >= FIRST_REX_SSE_REG && (N) <= LAST_REX_SSE_REG))
|
||||
#define SSE_REG_P(N) (REG_P (N) && SSE_REGNO_P (REGNO (N)))
|
||||
#define SSE_REGNO_P(N) \
|
||||
(IN_RANGE ((N), FIRST_SSE_REG, LAST_SSE_REG) \
|
||||
|| REX_SSE_REGNO_P (N))
|
||||
|
||||
#define REX_SSE_REGNO_P(N) \
|
||||
((N) >= FIRST_REX_SSE_REG && (N) <= LAST_REX_SSE_REG)
|
||||
IN_RANGE ((N), FIRST_REX_SSE_REG, LAST_REX_SSE_REG)
|
||||
|
||||
#define SSE_REGNO(N) \
|
||||
((N) < 8 ? FIRST_SSE_REG + (N) : FIRST_REX_SSE_REG + (N) - 8)
|
||||
#define SSE_REG_P(N) (REG_P (N) && SSE_REGNO_P (REGNO (N)))
|
||||
|
||||
#define SSE_FLOAT_MODE_P(MODE) \
|
||||
((TARGET_SSE && (MODE) == SFmode) || (TARGET_SSE2 && (MODE) == DFmode))
|
||||
|
||||
#define MMX_REGNO_P(N) ((N) >= FIRST_MMX_REG && (N) <= LAST_MMX_REG)
|
||||
#define MMX_REG_P(XOP) (REG_P (XOP) && MMX_REGNO_P (REGNO (XOP)))
|
||||
#define MMX_REGNO_P(N) IN_RANGE ((N), FIRST_MMX_REG, LAST_MMX_REG)
|
||||
|
||||
#define STACK_REG_P(XOP) \
|
||||
(REG_P (XOP) && \
|
||||
REGNO (XOP) >= FIRST_STACK_REG && \
|
||||
REGNO (XOP) <= LAST_STACK_REG)
|
||||
|
||||
#define NON_STACK_REG_P(XOP) (REG_P (XOP) && ! STACK_REG_P (XOP))
|
||||
#define STACK_REG_P(XOP) (REG_P (XOP) && STACK_REGNO_P (REGNO (XOP)))
|
||||
#define NON_STACK_REG_P(XOP) \
|
||||
(REG_P (XOP) && ! STACK_REGNO_P (REGNO (XOP)))
|
||||
#define STACK_REGNO_P(N) IN_RANGE ((N), FIRST_STACK_REG, LAST_STACK_REG)
|
||||
|
||||
#define STACK_TOP_P(XOP) (REG_P (XOP) && REGNO (XOP) == FIRST_STACK_REG)
|
||||
|
||||
@ -1588,21 +1600,15 @@ typedef struct ix86_args {
|
||||
|
||||
#define REGNO_OK_FOR_INDEX_P(REGNO) \
|
||||
((REGNO) < STACK_POINTER_REGNUM \
|
||||
|| (REGNO >= FIRST_REX_INT_REG \
|
||||
&& (REGNO) <= LAST_REX_INT_REG) \
|
||||
|| ((unsigned) reg_renumber[(REGNO)] >= FIRST_REX_INT_REG \
|
||||
&& (unsigned) reg_renumber[(REGNO)] <= LAST_REX_INT_REG) \
|
||||
|| (unsigned) reg_renumber[(REGNO)] < STACK_POINTER_REGNUM)
|
||||
|| REX_INT_REGNO_P (REGNO) \
|
||||
|| (unsigned) reg_renumber[(REGNO)] < STACK_POINTER_REGNUM \
|
||||
|| REX_INT_REGNO_P ((unsigned) reg_renumber[(REGNO)]))
|
||||
|
||||
#define REGNO_OK_FOR_BASE_P(REGNO) \
|
||||
((REGNO) <= STACK_POINTER_REGNUM \
|
||||
(GENERAL_REGNO_P (REGNO) \
|
||||
|| (REGNO) == ARG_POINTER_REGNUM \
|
||||
|| (REGNO) == FRAME_POINTER_REGNUM \
|
||||
|| (REGNO >= FIRST_REX_INT_REG \
|
||||
&& (REGNO) <= LAST_REX_INT_REG) \
|
||||
|| ((unsigned) reg_renumber[(REGNO)] >= FIRST_REX_INT_REG \
|
||||
&& (unsigned) reg_renumber[(REGNO)] <= LAST_REX_INT_REG) \
|
||||
|| (unsigned) reg_renumber[(REGNO)] <= STACK_POINTER_REGNUM)
|
||||
|| GENERAL_REGNO_P ((unsigned) reg_renumber[(REGNO)]))
|
||||
|
||||
#define REGNO_OK_FOR_SIREG_P(REGNO) \
|
||||
((REGNO) == 4 || reg_renumber[(REGNO)] == 4)
|
||||
@ -1626,16 +1632,13 @@ typedef struct ix86_args {
|
||||
/* Non strict versions, pseudos are ok. */
|
||||
#define REG_OK_FOR_INDEX_NONSTRICT_P(X) \
|
||||
(REGNO (X) < STACK_POINTER_REGNUM \
|
||||
|| (REGNO (X) >= FIRST_REX_INT_REG \
|
||||
&& REGNO (X) <= LAST_REX_INT_REG) \
|
||||
|| REX_INT_REGNO_P (REGNO (X)) \
|
||||
|| REGNO (X) >= FIRST_PSEUDO_REGISTER)
|
||||
|
||||
#define REG_OK_FOR_BASE_NONSTRICT_P(X) \
|
||||
(REGNO (X) <= STACK_POINTER_REGNUM \
|
||||
(GENERAL_REGNO_P (REGNO (X)) \
|
||||
|| REGNO (X) == ARG_POINTER_REGNUM \
|
||||
|| REGNO (X) == FRAME_POINTER_REGNUM \
|
||||
|| (REGNO (X) >= FIRST_REX_INT_REG \
|
||||
&& REGNO (X) <= LAST_REX_INT_REG) \
|
||||
|| REGNO (X) >= FIRST_PSEUDO_REGISTER)
|
||||
|
||||
/* Strict versions, hard registers only */
|
||||
@ -1940,9 +1943,9 @@ do { \
|
||||
#define HI_REGISTER_NAMES \
|
||||
{"ax","dx","cx","bx","si","di","bp","sp", \
|
||||
"st","st(1)","st(2)","st(3)","st(4)","st(5)","st(6)","st(7)", \
|
||||
"argp", "flags", "fpsr", "dirflag", "frame", \
|
||||
"argp", "flags", "fpsr", "fpcr", "dirflag", "frame", \
|
||||
"xmm0","xmm1","xmm2","xmm3","xmm4","xmm5","xmm6","xmm7", \
|
||||
"mm0", "mm1", "mm2", "mm3", "mm4", "mm5", "mm6", "mm7" , \
|
||||
"mm0", "mm1", "mm2", "mm3", "mm4", "mm5", "mm6", "mm7", \
|
||||
"r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15", \
|
||||
"xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15"}
|
||||
|
||||
@ -2108,6 +2111,7 @@ enum processor_type
|
||||
PROCESSOR_CORE2,
|
||||
PROCESSOR_GENERIC32,
|
||||
PROCESSOR_GENERIC64,
|
||||
PROCESSOR_AMDFAM10,
|
||||
PROCESSOR_max
|
||||
};
|
||||
|
||||
@ -2247,7 +2251,7 @@ enum ix86_stack_slot
|
||||
??? Maybe Pentium chips benefits from renaming, someone can try.... */
|
||||
|
||||
#define HARD_REGNO_RENAME_OK(SRC, TARGET) \
|
||||
((SRC) < FIRST_STACK_REG || (SRC) > LAST_STACK_REG)
|
||||
(! IN_RANGE ((SRC), FIRST_STACK_REG, LAST_STACK_REG))
|
||||
|
||||
|
||||
#define DLL_IMPORT_EXPORT_PREFIX '#'
|
||||
|
@ -104,7 +104,7 @@
|
||||
(UNSPEC_MFENCE 44)
|
||||
(UNSPEC_LFENCE 45)
|
||||
(UNSPEC_PSADBW 46)
|
||||
(UNSPEC_LDQQU 47)
|
||||
(UNSPEC_LDDQU 47)
|
||||
|
||||
; Generic math support
|
||||
(UNSPEC_COPYSIGN 50)
|
||||
@ -153,6 +153,12 @@
|
||||
(UNSPEC_PSHUFB 120)
|
||||
(UNSPEC_PSIGN 121)
|
||||
(UNSPEC_PALIGNR 122)
|
||||
|
||||
; For SSE4A support
|
||||
(UNSPEC_EXTRQI 130)
|
||||
(UNSPEC_EXTRQ 131)
|
||||
(UNSPEC_INSERTQI 132)
|
||||
(UNSPEC_INSERTQ 133)
|
||||
])
|
||||
|
||||
(define_constants
|
||||
@ -178,7 +184,9 @@
|
||||
(SP_REG 7)
|
||||
(FLAGS_REG 17)
|
||||
(FPSR_REG 18)
|
||||
(DIRFLAG_REG 19)
|
||||
(FPCR_REG 19)
|
||||
(DIRFLAG_REG 20)
|
||||
(R11_REG 41)
|
||||
])
|
||||
|
||||
;; Insns whose names begin with "x86_" are emitted by gen_FOO calls
|
||||
@ -192,7 +200,8 @@
|
||||
|
||||
;; Processor type. This attribute must exactly match the processor_type
|
||||
;; enumeration in i386.h.
|
||||
(define_attr "cpu" "i386,i486,pentium,pentiumpro,geode,k6,athlon,pentium4,k8,nocona,core2,generic32,generic64"
|
||||
(define_attr "cpu" "i386,i486,pentium,pentiumpro,geode,k6,athlon,pentium4,k8,
|
||||
nocona,core2,generic32,generic64,amdfam10"
|
||||
(const (symbol_ref "ix86_tune")))
|
||||
|
||||
;; A basic instruction type. Refinements due to arguments to be
|
||||
@ -203,10 +212,10 @@
|
||||
incdec,ishift,ishift1,rotate,rotate1,imul,idiv,
|
||||
icmp,test,ibr,setcc,icmov,
|
||||
push,pop,call,callv,leave,
|
||||
str,cld,
|
||||
str,,bitmanip,cld,
|
||||
fmov,fop,fsgn,fmul,fdiv,fpspc,fcmov,fcmp,fxch,fistp,fisttp,frndint,
|
||||
sselog,sselog1,sseiadd,sseishft,sseimul,
|
||||
sse,ssemov,sseadd,ssemul,ssecmp,ssecomi,ssecvt,sseicvt,ssediv,
|
||||
sse,ssemov,sseadd,ssemul,ssecmp,ssecomi,ssecvt,sseicvt,ssediv,sseins,
|
||||
mmx,mmxmov,mmxadd,mmxmul,mmxcmp,mmxcvt,mmxshft"
|
||||
(const_string "other"))
|
||||
|
||||
@ -220,7 +229,7 @@
|
||||
(cond [(eq_attr "type" "fmov,fop,fsgn,fmul,fdiv,fpspc,fcmov,fcmp,fxch,fistp,fisttp,frndint")
|
||||
(const_string "i387")
|
||||
(eq_attr "type" "sselog,sselog1,sseiadd,sseishft,sseimul,
|
||||
sse,ssemov,sseadd,ssemul,ssecmp,ssecomi,ssecvt,sseicvt,ssediv")
|
||||
sse,ssemov,sseadd,ssemul,ssecmp,ssecomi,ssecvt,sseicvt,ssediv,sseins")
|
||||
(const_string "sse")
|
||||
(eq_attr "type" "mmx,mmxmov,mmxadd,mmxmul,mmxcmp,mmxcvt,mmxshft")
|
||||
(const_string "mmx")
|
||||
@ -230,7 +239,8 @@
|
||||
|
||||
;; The (bounding maximum) length of an instruction immediate.
|
||||
(define_attr "length_immediate" ""
|
||||
(cond [(eq_attr "type" "incdec,setcc,icmov,str,cld,lea,other,multi,idiv,leave")
|
||||
(cond [(eq_attr "type" "incdec,setcc,icmov,str,cld,lea,other,multi,idiv,leave,
|
||||
bitmanip")
|
||||
(const_int 0)
|
||||
(eq_attr "unit" "i387,sse,mmx")
|
||||
(const_int 0)
|
||||
@ -284,7 +294,7 @@
|
||||
;; Set when 0f opcode prefix is used.
|
||||
(define_attr "prefix_0f" ""
|
||||
(if_then_else
|
||||
(ior (eq_attr "type" "imovx,setcc,icmov")
|
||||
(ior (eq_attr "type" "imovx,setcc,icmov,bitmanip")
|
||||
(eq_attr "unit" "sse,mmx"))
|
||||
(const_int 1)
|
||||
(const_int 0)))
|
||||
@ -413,7 +423,7 @@
|
||||
(const_string "load")
|
||||
(and (eq_attr "type"
|
||||
"!alu1,negnot,ishift1,
|
||||
imov,imovx,icmp,test,
|
||||
imov,imovx,icmp,test,bitmanip,
|
||||
fmov,fcmp,fsgn,
|
||||
sse,ssemov,ssecmp,ssecomi,ssecvt,sseicvt,sselog1,
|
||||
mmx,mmxmov,mmxcmp,mmxcvt")
|
||||
@ -968,10 +978,11 @@
|
||||
"sahf"
|
||||
[(set_attr "length" "1")
|
||||
(set_attr "athlon_decode" "vector")
|
||||
(set_attr "amdfam10_decode" "direct")
|
||||
(set_attr "mode" "SI")])
|
||||
|
||||
;; Pentium Pro can do steps 1 through 3 in one go.
|
||||
|
||||
;; comi*, ucomi*, fcomi*, ficomi*,fucomi* (i387 instructions set condition codes)
|
||||
(define_insn "*cmpfp_i_mixed"
|
||||
[(set (reg:CCFP FLAGS_REG)
|
||||
(compare:CCFP (match_operand 0 "register_operand" "f,x")
|
||||
@ -985,7 +996,8 @@
|
||||
(if_then_else (match_operand:SF 1 "" "")
|
||||
(const_string "SF")
|
||||
(const_string "DF")))
|
||||
(set_attr "athlon_decode" "vector")])
|
||||
(set_attr "athlon_decode" "vector")
|
||||
(set_attr "amdfam10_decode" "direct")])
|
||||
|
||||
(define_insn "*cmpfp_i_sse"
|
||||
[(set (reg:CCFP FLAGS_REG)
|
||||
@ -1000,7 +1012,8 @@
|
||||
(if_then_else (match_operand:SF 1 "" "")
|
||||
(const_string "SF")
|
||||
(const_string "DF")))
|
||||
(set_attr "athlon_decode" "vector")])
|
||||
(set_attr "athlon_decode" "vector")
|
||||
(set_attr "amdfam10_decode" "direct")])
|
||||
|
||||
(define_insn "*cmpfp_i_i387"
|
||||
[(set (reg:CCFP FLAGS_REG)
|
||||
@ -1019,7 +1032,8 @@
|
||||
(const_string "DF")
|
||||
]
|
||||
(const_string "XF")))
|
||||
(set_attr "athlon_decode" "vector")])
|
||||
(set_attr "athlon_decode" "vector")
|
||||
(set_attr "amdfam10_decode" "direct")])
|
||||
|
||||
(define_insn "*cmpfp_iu_mixed"
|
||||
[(set (reg:CCFPU FLAGS_REG)
|
||||
@ -1034,7 +1048,8 @@
|
||||
(if_then_else (match_operand:SF 1 "" "")
|
||||
(const_string "SF")
|
||||
(const_string "DF")))
|
||||
(set_attr "athlon_decode" "vector")])
|
||||
(set_attr "athlon_decode" "vector")
|
||||
(set_attr "amdfam10_decode" "direct")])
|
||||
|
||||
(define_insn "*cmpfp_iu_sse"
|
||||
[(set (reg:CCFPU FLAGS_REG)
|
||||
@ -1049,7 +1064,8 @@
|
||||
(if_then_else (match_operand:SF 1 "" "")
|
||||
(const_string "SF")
|
||||
(const_string "DF")))
|
||||
(set_attr "athlon_decode" "vector")])
|
||||
(set_attr "athlon_decode" "vector")
|
||||
(set_attr "amdfam10_decode" "direct")])
|
||||
|
||||
(define_insn "*cmpfp_iu_387"
|
||||
[(set (reg:CCFPU FLAGS_REG)
|
||||
@ -1068,7 +1084,8 @@
|
||||
(const_string "DF")
|
||||
]
|
||||
(const_string "XF")))
|
||||
(set_attr "athlon_decode" "vector")])
|
||||
(set_attr "athlon_decode" "vector")
|
||||
(set_attr "amdfam10_decode" "direct")])
|
||||
|
||||
;; Move instructions.
|
||||
|
||||
@ -1274,7 +1291,8 @@
|
||||
[(set_attr "type" "imov")
|
||||
(set_attr "mode" "SI")
|
||||
(set_attr "pent_pair" "np")
|
||||
(set_attr "athlon_decode" "vector")])
|
||||
(set_attr "athlon_decode" "vector")
|
||||
(set_attr "amdfam10_decode" "double")])
|
||||
|
||||
(define_expand "movhi"
|
||||
[(set (match_operand:HI 0 "nonimmediate_operand" "")
|
||||
@ -1391,8 +1409,10 @@
|
||||
[(set_attr "type" "imov")
|
||||
(set_attr "mode" "SI")
|
||||
(set_attr "pent_pair" "np")
|
||||
(set_attr "athlon_decode" "vector")])
|
||||
(set_attr "athlon_decode" "vector")
|
||||
(set_attr "amdfam10_decode" "double")])
|
||||
|
||||
;; Not added amdfam10_decode since TARGET_PARTIAL_REG_STALL is disabled for AMDFAM10
|
||||
(define_insn "*swaphi_2"
|
||||
[(set (match_operand:HI 0 "register_operand" "+r")
|
||||
(match_operand:HI 1 "register_operand" "+r"))
|
||||
@ -1565,8 +1585,10 @@
|
||||
[(set_attr "type" "imov")
|
||||
(set_attr "mode" "SI")
|
||||
(set_attr "pent_pair" "np")
|
||||
(set_attr "athlon_decode" "vector")])
|
||||
(set_attr "athlon_decode" "vector")
|
||||
(set_attr "amdfam10_decode" "vector")])
|
||||
|
||||
;; Not added amdfam10_decode since TARGET_PARTIAL_REG_STALL is disabled for AMDFAM10
|
||||
(define_insn "*swapqi_2"
|
||||
[(set (match_operand:QI 0 "register_operand" "+q")
|
||||
(match_operand:QI 1 "register_operand" "+q"))
|
||||
@ -2120,7 +2142,8 @@
|
||||
[(set_attr "type" "imov")
|
||||
(set_attr "mode" "DI")
|
||||
(set_attr "pent_pair" "np")
|
||||
(set_attr "athlon_decode" "vector")])
|
||||
(set_attr "athlon_decode" "vector")
|
||||
(set_attr "amdfam10_decode" "double")])
|
||||
|
||||
(define_expand "movti"
|
||||
[(set (match_operand:TI 0 "nonimmediate_operand" "")
|
||||
@ -4150,7 +4173,8 @@
|
||||
"cvttss2si{q}\t{%1, %0|%0, %1}"
|
||||
[(set_attr "type" "sseicvt")
|
||||
(set_attr "mode" "SF")
|
||||
(set_attr "athlon_decode" "double,vector")])
|
||||
(set_attr "athlon_decode" "double,vector")
|
||||
(set_attr "amdfam10_decode" "double,double")])
|
||||
|
||||
(define_insn "fix_truncdfdi_sse"
|
||||
[(set (match_operand:DI 0 "register_operand" "=r,r")
|
||||
@ -4159,7 +4183,8 @@
|
||||
"cvttsd2si{q}\t{%1, %0|%0, %1}"
|
||||
[(set_attr "type" "sseicvt")
|
||||
(set_attr "mode" "DF")
|
||||
(set_attr "athlon_decode" "double,vector")])
|
||||
(set_attr "athlon_decode" "double,vector")
|
||||
(set_attr "amdfam10_decode" "double,double")])
|
||||
|
||||
(define_insn "fix_truncsfsi_sse"
|
||||
[(set (match_operand:SI 0 "register_operand" "=r,r")
|
||||
@ -4168,7 +4193,8 @@
|
||||
"cvttss2si\t{%1, %0|%0, %1}"
|
||||
[(set_attr "type" "sseicvt")
|
||||
(set_attr "mode" "DF")
|
||||
(set_attr "athlon_decode" "double,vector")])
|
||||
(set_attr "athlon_decode" "double,vector")
|
||||
(set_attr "amdfam10_decode" "double,double")])
|
||||
|
||||
(define_insn "fix_truncdfsi_sse"
|
||||
[(set (match_operand:SI 0 "register_operand" "=r,r")
|
||||
@ -4177,7 +4203,8 @@
|
||||
"cvttsd2si\t{%1, %0|%0, %1}"
|
||||
[(set_attr "type" "sseicvt")
|
||||
(set_attr "mode" "DF")
|
||||
(set_attr "athlon_decode" "double,vector")])
|
||||
(set_attr "athlon_decode" "double,vector")
|
||||
(set_attr "amdfam10_decode" "double,double")])
|
||||
|
||||
;; Avoid vector decoded forms of the instruction.
|
||||
(define_peephole2
|
||||
@ -4423,7 +4450,7 @@
|
||||
|
||||
(define_insn "x86_fnstcw_1"
|
||||
[(set (match_operand:HI 0 "memory_operand" "=m")
|
||||
(unspec:HI [(reg:HI FPSR_REG)] UNSPEC_FSTCW))]
|
||||
(unspec:HI [(reg:HI FPCR_REG)] UNSPEC_FSTCW))]
|
||||
"TARGET_80387"
|
||||
"fnstcw\t%0"
|
||||
[(set_attr "length" "2")
|
||||
@ -4431,14 +4458,15 @@
|
||||
(set_attr "unit" "i387")])
|
||||
|
||||
(define_insn "x86_fldcw_1"
|
||||
[(set (reg:HI FPSR_REG)
|
||||
[(set (reg:HI FPCR_REG)
|
||||
(unspec:HI [(match_operand:HI 0 "memory_operand" "m")] UNSPEC_FLDCW))]
|
||||
"TARGET_80387"
|
||||
"fldcw\t%0"
|
||||
[(set_attr "length" "2")
|
||||
(set_attr "mode" "HI")
|
||||
(set_attr "unit" "i387")
|
||||
(set_attr "athlon_decode" "vector")])
|
||||
(set_attr "athlon_decode" "vector")
|
||||
(set_attr "amdfam10_decode" "vector")])
|
||||
|
||||
;; Conversion between fixed point and floating point.
|
||||
|
||||
@ -4489,6 +4517,7 @@
|
||||
(set_attr "mode" "SF")
|
||||
(set_attr "unit" "*,i387,*,*")
|
||||
(set_attr "athlon_decode" "*,*,vector,double")
|
||||
(set_attr "amdfam10_decode" "*,*,vector,double")
|
||||
(set_attr "fp_int_src" "true")])
|
||||
|
||||
(define_insn "*floatsisf2_sse"
|
||||
@ -4499,6 +4528,7 @@
|
||||
[(set_attr "type" "sseicvt")
|
||||
(set_attr "mode" "SF")
|
||||
(set_attr "athlon_decode" "vector,double")
|
||||
(set_attr "amdfam10_decode" "vector,double")
|
||||
(set_attr "fp_int_src" "true")])
|
||||
|
||||
(define_insn "*floatsisf2_i387"
|
||||
@ -4532,6 +4562,7 @@
|
||||
(set_attr "mode" "SF")
|
||||
(set_attr "unit" "*,i387,*,*")
|
||||
(set_attr "athlon_decode" "*,*,vector,double")
|
||||
(set_attr "amdfam10_decode" "*,*,vector,double")
|
||||
(set_attr "fp_int_src" "true")])
|
||||
|
||||
(define_insn "*floatdisf2_sse"
|
||||
@ -4542,6 +4573,7 @@
|
||||
[(set_attr "type" "sseicvt")
|
||||
(set_attr "mode" "SF")
|
||||
(set_attr "athlon_decode" "vector,double")
|
||||
(set_attr "amdfam10_decode" "vector,double")
|
||||
(set_attr "fp_int_src" "true")])
|
||||
|
||||
(define_insn "*floatdisf2_i387"
|
||||
@ -4600,6 +4632,7 @@
|
||||
(set_attr "mode" "DF")
|
||||
(set_attr "unit" "*,i387,*,*")
|
||||
(set_attr "athlon_decode" "*,*,double,direct")
|
||||
(set_attr "amdfam10_decode" "*,*,vector,double")
|
||||
(set_attr "fp_int_src" "true")])
|
||||
|
||||
(define_insn "*floatsidf2_sse"
|
||||
@ -4610,6 +4643,7 @@
|
||||
[(set_attr "type" "sseicvt")
|
||||
(set_attr "mode" "DF")
|
||||
(set_attr "athlon_decode" "double,direct")
|
||||
(set_attr "amdfam10_decode" "vector,double")
|
||||
(set_attr "fp_int_src" "true")])
|
||||
|
||||
(define_insn "*floatsidf2_i387"
|
||||
@ -4643,6 +4677,7 @@
|
||||
(set_attr "mode" "DF")
|
||||
(set_attr "unit" "*,i387,*,*")
|
||||
(set_attr "athlon_decode" "*,*,double,direct")
|
||||
(set_attr "amdfam10_decode" "*,*,vector,double")
|
||||
(set_attr "fp_int_src" "true")])
|
||||
|
||||
(define_insn "*floatdidf2_sse"
|
||||
@ -4653,6 +4688,7 @@
|
||||
[(set_attr "type" "sseicvt")
|
||||
(set_attr "mode" "DF")
|
||||
(set_attr "athlon_decode" "double,direct")
|
||||
(set_attr "amdfam10_decode" "vector,double")
|
||||
(set_attr "fp_int_src" "true")])
|
||||
|
||||
(define_insn "*floatdidf2_i387"
|
||||
@ -6860,6 +6896,14 @@
|
||||
"TARGET_64BIT"
|
||||
"")
|
||||
|
||||
;; On AMDFAM10
|
||||
;; IMUL reg64, reg64, imm8 Direct
|
||||
;; IMUL reg64, mem64, imm8 VectorPath
|
||||
;; IMUL reg64, reg64, imm32 Direct
|
||||
;; IMUL reg64, mem64, imm32 VectorPath
|
||||
;; IMUL reg64, reg64 Direct
|
||||
;; IMUL reg64, mem64 Direct
|
||||
|
||||
(define_insn "*muldi3_1_rex64"
|
||||
[(set (match_operand:DI 0 "register_operand" "=r,r,r")
|
||||
(mult:DI (match_operand:DI 1 "nonimmediate_operand" "%rm,rm,0")
|
||||
@ -6882,6 +6926,11 @@
|
||||
(match_operand 1 "memory_operand" ""))
|
||||
(const_string "vector")]
|
||||
(const_string "direct")))
|
||||
(set (attr "amdfam10_decode")
|
||||
(cond [(and (eq_attr "alternative" "0,1")
|
||||
(match_operand 1 "memory_operand" ""))
|
||||
(const_string "vector")]
|
||||
(const_string "direct")))
|
||||
(set_attr "mode" "DI")])
|
||||
|
||||
(define_expand "mulsi3"
|
||||
@ -6892,6 +6941,14 @@
|
||||
""
|
||||
"")
|
||||
|
||||
;; On AMDFAM10
|
||||
;; IMUL reg32, reg32, imm8 Direct
|
||||
;; IMUL reg32, mem32, imm8 VectorPath
|
||||
;; IMUL reg32, reg32, imm32 Direct
|
||||
;; IMUL reg32, mem32, imm32 VectorPath
|
||||
;; IMUL reg32, reg32 Direct
|
||||
;; IMUL reg32, mem32 Direct
|
||||
|
||||
(define_insn "*mulsi3_1"
|
||||
[(set (match_operand:SI 0 "register_operand" "=r,r,r")
|
||||
(mult:SI (match_operand:SI 1 "nonimmediate_operand" "%rm,rm,0")
|
||||
@ -6913,6 +6970,11 @@
|
||||
(match_operand 1 "memory_operand" ""))
|
||||
(const_string "vector")]
|
||||
(const_string "direct")))
|
||||
(set (attr "amdfam10_decode")
|
||||
(cond [(and (eq_attr "alternative" "0,1")
|
||||
(match_operand 1 "memory_operand" ""))
|
||||
(const_string "vector")]
|
||||
(const_string "direct")))
|
||||
(set_attr "mode" "SI")])
|
||||
|
||||
(define_insn "*mulsi3_1_zext"
|
||||
@ -6938,6 +7000,11 @@
|
||||
(match_operand 1 "memory_operand" ""))
|
||||
(const_string "vector")]
|
||||
(const_string "direct")))
|
||||
(set (attr "amdfam10_decode")
|
||||
(cond [(and (eq_attr "alternative" "0,1")
|
||||
(match_operand 1 "memory_operand" ""))
|
||||
(const_string "vector")]
|
||||
(const_string "direct")))
|
||||
(set_attr "mode" "SI")])
|
||||
|
||||
(define_expand "mulhi3"
|
||||
@ -6948,6 +7015,13 @@
|
||||
"TARGET_HIMODE_MATH"
|
||||
"")
|
||||
|
||||
;; On AMDFAM10
|
||||
;; IMUL reg16, reg16, imm8 VectorPath
|
||||
;; IMUL reg16, mem16, imm8 VectorPath
|
||||
;; IMUL reg16, reg16, imm16 VectorPath
|
||||
;; IMUL reg16, mem16, imm16 VectorPath
|
||||
;; IMUL reg16, reg16 Direct
|
||||
;; IMUL reg16, mem16 Direct
|
||||
(define_insn "*mulhi3_1"
|
||||
[(set (match_operand:HI 0 "register_operand" "=r,r,r")
|
||||
(mult:HI (match_operand:HI 1 "nonimmediate_operand" "%rm,rm,0")
|
||||
@ -6966,6 +7040,10 @@
|
||||
(eq_attr "alternative" "1,2")
|
||||
(const_string "vector")]
|
||||
(const_string "direct")))
|
||||
(set (attr "amdfam10_decode")
|
||||
(cond [(eq_attr "alternative" "0,1")
|
||||
(const_string "vector")]
|
||||
(const_string "direct")))
|
||||
(set_attr "mode" "HI")])
|
||||
|
||||
(define_expand "mulqi3"
|
||||
@ -6976,6 +7054,10 @@
|
||||
"TARGET_QIMODE_MATH"
|
||||
"")
|
||||
|
||||
;;On AMDFAM10
|
||||
;; MUL reg8 Direct
|
||||
;; MUL mem8 Direct
|
||||
|
||||
(define_insn "*mulqi3_1"
|
||||
[(set (match_operand:QI 0 "register_operand" "=a")
|
||||
(mult:QI (match_operand:QI 1 "nonimmediate_operand" "%0")
|
||||
@ -6990,6 +7072,7 @@
|
||||
(if_then_else (eq_attr "cpu" "athlon")
|
||||
(const_string "vector")
|
||||
(const_string "direct")))
|
||||
(set_attr "amdfam10_decode" "direct")
|
||||
(set_attr "mode" "QI")])
|
||||
|
||||
(define_expand "umulqihi3"
|
||||
@ -7016,6 +7099,7 @@
|
||||
(if_then_else (eq_attr "cpu" "athlon")
|
||||
(const_string "vector")
|
||||
(const_string "direct")))
|
||||
(set_attr "amdfam10_decode" "direct")
|
||||
(set_attr "mode" "QI")])
|
||||
|
||||
(define_expand "mulqihi3"
|
||||
@ -7040,6 +7124,7 @@
|
||||
(if_then_else (eq_attr "cpu" "athlon")
|
||||
(const_string "vector")
|
||||
(const_string "direct")))
|
||||
(set_attr "amdfam10_decode" "direct")
|
||||
(set_attr "mode" "QI")])
|
||||
|
||||
(define_expand "umulditi3"
|
||||
@ -7066,6 +7151,7 @@
|
||||
(if_then_else (eq_attr "cpu" "athlon")
|
||||
(const_string "vector")
|
||||
(const_string "double")))
|
||||
(set_attr "amdfam10_decode" "double")
|
||||
(set_attr "mode" "DI")])
|
||||
|
||||
;; We can't use this pattern in 64bit mode, since it results in two separate 32bit registers
|
||||
@ -7093,6 +7179,7 @@
|
||||
(if_then_else (eq_attr "cpu" "athlon")
|
||||
(const_string "vector")
|
||||
(const_string "double")))
|
||||
(set_attr "amdfam10_decode" "double")
|
||||
(set_attr "mode" "SI")])
|
||||
|
||||
(define_expand "mulditi3"
|
||||
@ -7119,6 +7206,7 @@
|
||||
(if_then_else (eq_attr "cpu" "athlon")
|
||||
(const_string "vector")
|
||||
(const_string "double")))
|
||||
(set_attr "amdfam10_decode" "double")
|
||||
(set_attr "mode" "DI")])
|
||||
|
||||
(define_expand "mulsidi3"
|
||||
@ -7145,6 +7233,7 @@
|
||||
(if_then_else (eq_attr "cpu" "athlon")
|
||||
(const_string "vector")
|
||||
(const_string "double")))
|
||||
(set_attr "amdfam10_decode" "double")
|
||||
(set_attr "mode" "SI")])
|
||||
|
||||
(define_expand "umuldi3_highpart"
|
||||
@ -7181,6 +7270,7 @@
|
||||
(if_then_else (eq_attr "cpu" "athlon")
|
||||
(const_string "vector")
|
||||
(const_string "double")))
|
||||
(set_attr "amdfam10_decode" "double")
|
||||
(set_attr "mode" "DI")])
|
||||
|
||||
(define_expand "umulsi3_highpart"
|
||||
@ -7216,6 +7306,7 @@
|
||||
(if_then_else (eq_attr "cpu" "athlon")
|
||||
(const_string "vector")
|
||||
(const_string "double")))
|
||||
(set_attr "amdfam10_decode" "double")
|
||||
(set_attr "mode" "SI")])
|
||||
|
||||
(define_insn "*umulsi3_highpart_zext"
|
||||
@ -7238,6 +7329,7 @@
|
||||
(if_then_else (eq_attr "cpu" "athlon")
|
||||
(const_string "vector")
|
||||
(const_string "double")))
|
||||
(set_attr "amdfam10_decode" "double")
|
||||
(set_attr "mode" "SI")])
|
||||
|
||||
(define_expand "smuldi3_highpart"
|
||||
@ -7273,6 +7365,7 @@
|
||||
(if_then_else (eq_attr "cpu" "athlon")
|
||||
(const_string "vector")
|
||||
(const_string "double")))
|
||||
(set_attr "amdfam10_decode" "double")
|
||||
(set_attr "mode" "DI")])
|
||||
|
||||
(define_expand "smulsi3_highpart"
|
||||
@ -7307,6 +7400,7 @@
|
||||
(if_then_else (eq_attr "cpu" "athlon")
|
||||
(const_string "vector")
|
||||
(const_string "double")))
|
||||
(set_attr "amdfam10_decode" "double")
|
||||
(set_attr "mode" "SI")])
|
||||
|
||||
(define_insn "*smulsi3_highpart_zext"
|
||||
@ -7328,6 +7422,7 @@
|
||||
(if_then_else (eq_attr "cpu" "athlon")
|
||||
(const_string "vector")
|
||||
(const_string "double")))
|
||||
(set_attr "amdfam10_decode" "double")
|
||||
(set_attr "mode" "SI")])
|
||||
|
||||
;; The patterns that match these are at the end of this file.
|
||||
@ -10309,7 +10404,8 @@
|
||||
[(set_attr "type" "ishift")
|
||||
(set_attr "prefix_0f" "1")
|
||||
(set_attr "mode" "DI")
|
||||
(set_attr "athlon_decode" "vector")])
|
||||
(set_attr "athlon_decode" "vector")
|
||||
(set_attr "amdfam10_decode" "vector")])
|
||||
|
||||
(define_expand "x86_64_shift_adj"
|
||||
[(set (reg:CCZ FLAGS_REG)
|
||||
@ -10524,7 +10620,8 @@
|
||||
(set_attr "prefix_0f" "1")
|
||||
(set_attr "mode" "SI")
|
||||
(set_attr "pent_pair" "np")
|
||||
(set_attr "athlon_decode" "vector")])
|
||||
(set_attr "athlon_decode" "vector")
|
||||
(set_attr "amdfam10_decode" "vector")])
|
||||
|
||||
(define_expand "x86_shift_adj_1"
|
||||
[(set (reg:CCZ FLAGS_REG)
|
||||
@ -11284,7 +11381,8 @@
|
||||
[(set_attr "type" "ishift")
|
||||
(set_attr "prefix_0f" "1")
|
||||
(set_attr "mode" "DI")
|
||||
(set_attr "athlon_decode" "vector")])
|
||||
(set_attr "athlon_decode" "vector")
|
||||
(set_attr "amdfam10_decode" "vector")])
|
||||
|
||||
(define_expand "ashrdi3"
|
||||
[(set (match_operand:DI 0 "shiftdi_operand" "")
|
||||
@ -14156,7 +14254,7 @@
|
||||
[(set_attr "type" "call")])
|
||||
|
||||
(define_insn "*sibcall_1_rex64_v"
|
||||
[(call (mem:QI (reg:DI 40))
|
||||
[(call (mem:QI (reg:DI R11_REG))
|
||||
(match_operand 0 "" ""))]
|
||||
"SIBLING_CALL_P (insn) && TARGET_64BIT"
|
||||
"jmp\t*%%r11"
|
||||
@ -14558,7 +14656,23 @@
|
||||
[(set (match_dup 0) (xor:SI (match_dup 0) (const_int 31)))
|
||||
(clobber (reg:CC FLAGS_REG))])]
|
||||
""
|
||||
"")
|
||||
{
|
||||
if (TARGET_ABM)
|
||||
{
|
||||
emit_insn (gen_clzsi2_abm (operands[0], operands[1]));
|
||||
DONE;
|
||||
}
|
||||
})
|
||||
|
||||
(define_insn "clzsi2_abm"
|
||||
[(set (match_operand:SI 0 "register_operand" "=r")
|
||||
(clz:SI (match_operand:SI 1 "nonimmediate_operand" "")))
|
||||
(clobber (reg:CC FLAGS_REG))]
|
||||
"TARGET_ABM"
|
||||
"lzcnt{l}\t{%1, %0|%0, %1}"
|
||||
[(set_attr "prefix_rep" "1")
|
||||
(set_attr "type" "bitmanip")
|
||||
(set_attr "mode" "SI")])
|
||||
|
||||
(define_insn "*bsr"
|
||||
[(set (match_operand:SI 0 "register_operand" "=r")
|
||||
@ -14567,7 +14681,44 @@
|
||||
(clobber (reg:CC FLAGS_REG))]
|
||||
""
|
||||
"bsr{l}\t{%1, %0|%0, %1}"
|
||||
[(set_attr "prefix_0f" "1")])
|
||||
[(set_attr "prefix_0f" "1")
|
||||
(set_attr "mode" "SI")])
|
||||
|
||||
(define_insn "popcountsi2"
|
||||
[(set (match_operand:SI 0 "register_operand" "=r")
|
||||
(popcount:SI (match_operand:SI 1 "nonimmediate_operand" "")))
|
||||
(clobber (reg:CC FLAGS_REG))]
|
||||
"TARGET_POPCNT"
|
||||
"popcnt{l}\t{%1, %0|%0, %1}"
|
||||
[(set_attr "prefix_rep" "1")
|
||||
(set_attr "type" "bitmanip")
|
||||
(set_attr "mode" "SI")])
|
||||
|
||||
(define_insn "*popcountsi2_cmp"
|
||||
[(set (reg FLAGS_REG)
|
||||
(compare
|
||||
(popcount:SI (match_operand:SI 1 "nonimmediate_operand" "rm"))
|
||||
(const_int 0)))
|
||||
(set (match_operand:SI 0 "register_operand" "=r")
|
||||
(popcount:SI (match_dup 1)))]
|
||||
"TARGET_POPCNT && ix86_match_ccmode (insn, CCZmode)"
|
||||
"popcnt{l}\t{%1, %0|%0, %1}"
|
||||
[(set_attr "prefix_rep" "1")
|
||||
(set_attr "type" "bitmanip")
|
||||
(set_attr "mode" "SI")])
|
||||
|
||||
(define_insn "*popcountsi2_cmp_zext"
|
||||
[(set (reg FLAGS_REG)
|
||||
(compare
|
||||
(popcount:SI (match_operand:SI 1 "nonimmediate_operand" "rm"))
|
||||
(const_int 0)))
|
||||
(set (match_operand:DI 0 "register_operand" "=r")
|
||||
(zero_extend:DI(popcount:SI (match_dup 1))))]
|
||||
"TARGET_64BIT && TARGET_POPCNT && ix86_match_ccmode (insn, CCZmode)"
|
||||
"popcnt{l}\t{%1, %0|%0, %1}"
|
||||
[(set_attr "prefix_rep" "1")
|
||||
(set_attr "type" "bitmanip")
|
||||
(set_attr "mode" "SI")])
|
||||
|
||||
(define_expand "clzdi2"
|
||||
[(parallel
|
||||
@ -14579,7 +14730,23 @@
|
||||
[(set (match_dup 0) (xor:DI (match_dup 0) (const_int 63)))
|
||||
(clobber (reg:CC FLAGS_REG))])]
|
||||
"TARGET_64BIT"
|
||||
"")
|
||||
{
|
||||
if (TARGET_ABM)
|
||||
{
|
||||
emit_insn (gen_clzdi2_abm (operands[0], operands[1]));
|
||||
DONE;
|
||||
}
|
||||
})
|
||||
|
||||
(define_insn "clzdi2_abm"
|
||||
[(set (match_operand:DI 0 "register_operand" "=r")
|
||||
(clz:DI (match_operand:DI 1 "nonimmediate_operand" "")))
|
||||
(clobber (reg:CC FLAGS_REG))]
|
||||
"TARGET_64BIT && TARGET_ABM"
|
||||
"lzcnt{q}\t{%1, %0|%0, %1}"
|
||||
[(set_attr "prefix_rep" "1")
|
||||
(set_attr "type" "bitmanip")
|
||||
(set_attr "mode" "DI")])
|
||||
|
||||
(define_insn "*bsr_rex64"
|
||||
[(set (match_operand:DI 0 "register_operand" "=r")
|
||||
@ -14588,7 +14755,92 @@
|
||||
(clobber (reg:CC FLAGS_REG))]
|
||||
"TARGET_64BIT"
|
||||
"bsr{q}\t{%1, %0|%0, %1}"
|
||||
[(set_attr "prefix_0f" "1")])
|
||||
[(set_attr "prefix_0f" "1")
|
||||
(set_attr "mode" "DI")])
|
||||
|
||||
(define_insn "popcountdi2"
|
||||
[(set (match_operand:DI 0 "register_operand" "=r")
|
||||
(popcount:DI (match_operand:DI 1 "nonimmediate_operand" "")))
|
||||
(clobber (reg:CC FLAGS_REG))]
|
||||
"TARGET_64BIT && TARGET_POPCNT"
|
||||
"popcnt{q}\t{%1, %0|%0, %1}"
|
||||
[(set_attr "prefix_rep" "1")
|
||||
(set_attr "type" "bitmanip")
|
||||
(set_attr "mode" "DI")])
|
||||
|
||||
(define_insn "*popcountdi2_cmp"
|
||||
[(set (reg FLAGS_REG)
|
||||
(compare
|
||||
(popcount:DI (match_operand:DI 1 "nonimmediate_operand" "rm"))
|
||||
(const_int 0)))
|
||||
(set (match_operand:DI 0 "register_operand" "=r")
|
||||
(popcount:DI (match_dup 1)))]
|
||||
"TARGET_64BIT && TARGET_POPCNT && ix86_match_ccmode (insn, CCZmode)"
|
||||
"popcnt{q}\t{%1, %0|%0, %1}"
|
||||
[(set_attr "prefix_rep" "1")
|
||||
(set_attr "type" "bitmanip")
|
||||
(set_attr "mode" "DI")])
|
||||
|
||||
(define_expand "clzhi2"
|
||||
[(parallel
|
||||
[(set (match_operand:HI 0 "register_operand" "")
|
||||
(minus:HI (const_int 15)
|
||||
(clz:HI (match_operand:HI 1 "nonimmediate_operand" ""))))
|
||||
(clobber (reg:CC FLAGS_REG))])
|
||||
(parallel
|
||||
[(set (match_dup 0) (xor:HI (match_dup 0) (const_int 15)))
|
||||
(clobber (reg:CC FLAGS_REG))])]
|
||||
""
|
||||
{
|
||||
if (TARGET_ABM)
|
||||
{
|
||||
emit_insn (gen_clzhi2_abm (operands[0], operands[1]));
|
||||
DONE;
|
||||
}
|
||||
})
|
||||
|
||||
(define_insn "clzhi2_abm"
|
||||
[(set (match_operand:HI 0 "register_operand" "=r")
|
||||
(clz:HI (match_operand:HI 1 "nonimmediate_operand" "")))
|
||||
(clobber (reg:CC FLAGS_REG))]
|
||||
"TARGET_ABM"
|
||||
"lzcnt{w}\t{%1, %0|%0, %1}"
|
||||
[(set_attr "prefix_rep" "1")
|
||||
(set_attr "type" "bitmanip")
|
||||
(set_attr "mode" "HI")])
|
||||
|
||||
(define_insn "*bsrhi"
|
||||
[(set (match_operand:HI 0 "register_operand" "=r")
|
||||
(minus:HI (const_int 15)
|
||||
(clz:HI (match_operand:HI 1 "nonimmediate_operand" "rm"))))
|
||||
(clobber (reg:CC FLAGS_REG))]
|
||||
""
|
||||
"bsr{w}\t{%1, %0|%0, %1}"
|
||||
[(set_attr "prefix_0f" "1")
|
||||
(set_attr "mode" "HI")])
|
||||
|
||||
(define_insn "popcounthi2"
|
||||
[(set (match_operand:HI 0 "register_operand" "=r")
|
||||
(popcount:HI (match_operand:HI 1 "nonimmediate_operand" "")))
|
||||
(clobber (reg:CC FLAGS_REG))]
|
||||
"TARGET_POPCNT"
|
||||
"popcnt{w}\t{%1, %0|%0, %1}"
|
||||
[(set_attr "prefix_rep" "1")
|
||||
(set_attr "type" "bitmanip")
|
||||
(set_attr "mode" "HI")])
|
||||
|
||||
(define_insn "*popcounthi2_cmp"
|
||||
[(set (reg FLAGS_REG)
|
||||
(compare
|
||||
(popcount:HI (match_operand:HI 1 "nonimmediate_operand" "rm"))
|
||||
(const_int 0)))
|
||||
(set (match_operand:HI 0 "register_operand" "=r")
|
||||
(popcount:HI (match_dup 1)))]
|
||||
"TARGET_POPCNT && ix86_match_ccmode (insn, CCZmode)"
|
||||
"popcnt{w}\t{%1, %0|%0, %1}"
|
||||
[(set_attr "prefix_rep" "1")
|
||||
(set_attr "type" "bitmanip")
|
||||
(set_attr "mode" "HI")])
|
||||
|
||||
;; Thread-local storage patterns for ELF.
|
||||
;;
|
||||
@ -15503,7 +15755,8 @@
|
||||
"sqrtss\t{%1, %0|%0, %1}"
|
||||
[(set_attr "type" "sse")
|
||||
(set_attr "mode" "SF")
|
||||
(set_attr "athlon_decode" "*")])
|
||||
(set_attr "athlon_decode" "*")
|
||||
(set_attr "amdfam10_decode" "*")])
|
||||
|
||||
(define_insn "*sqrtsf2_i387"
|
||||
[(set (match_operand:SF 0 "register_operand" "=f")
|
||||
@ -15541,7 +15794,8 @@
|
||||
"sqrtsd\t{%1, %0|%0, %1}"
|
||||
[(set_attr "type" "sse")
|
||||
(set_attr "mode" "DF")
|
||||
(set_attr "athlon_decode" "*")])
|
||||
(set_attr "athlon_decode" "*")
|
||||
(set_attr "amdfam10_decode" "*")])
|
||||
|
||||
(define_insn "*sqrtdf2_i387"
|
||||
[(set (match_operand:DF 0 "register_operand" "=f")
|
||||
@ -15570,7 +15824,8 @@
|
||||
"fsqrt"
|
||||
[(set_attr "type" "fpspc")
|
||||
(set_attr "mode" "XF")
|
||||
(set_attr "athlon_decode" "direct")])
|
||||
(set_attr "athlon_decode" "direct")
|
||||
(set_attr "amdfam10_decode" "direct")])
|
||||
|
||||
(define_insn "*sqrtextendsfxf2_i387"
|
||||
[(set (match_operand:XF 0 "register_operand" "=f")
|
||||
@ -15590,7 +15845,8 @@
|
||||
"fsqrt"
|
||||
[(set_attr "type" "fpspc")
|
||||
(set_attr "mode" "XF")
|
||||
(set_attr "athlon_decode" "direct")])
|
||||
(set_attr "athlon_decode" "direct")
|
||||
(set_attr "amdfam10_decode" "direct")])
|
||||
|
||||
(define_insn "fpremxf4"
|
||||
[(set (match_operand:XF 0 "register_operand" "=f")
|
||||
@ -20391,7 +20647,7 @@
|
||||
(mult:DI (match_operand:DI 1 "memory_operand" "")
|
||||
(match_operand:DI 2 "immediate_operand" "")))
|
||||
(clobber (reg:CC FLAGS_REG))])]
|
||||
"(TARGET_K8 || TARGET_GENERIC64) && !optimize_size
|
||||
"(TARGET_K8 || TARGET_GENERIC64 || TARGET_AMDFAM10) && !optimize_size
|
||||
&& !satisfies_constraint_K (operands[2])"
|
||||
[(set (match_dup 3) (match_dup 1))
|
||||
(parallel [(set (match_dup 0) (mult:DI (match_dup 3) (match_dup 2)))
|
||||
@ -20404,7 +20660,7 @@
|
||||
(mult:SI (match_operand:SI 1 "memory_operand" "")
|
||||
(match_operand:SI 2 "immediate_operand" "")))
|
||||
(clobber (reg:CC FLAGS_REG))])]
|
||||
"(TARGET_K8 || TARGET_GENERIC64) && !optimize_size
|
||||
"(TARGET_K8 || TARGET_GENERIC64 || TARGET_AMDFAM10) && !optimize_size
|
||||
&& !satisfies_constraint_K (operands[2])"
|
||||
[(set (match_dup 3) (match_dup 1))
|
||||
(parallel [(set (match_dup 0) (mult:SI (match_dup 3) (match_dup 2)))
|
||||
@ -20418,7 +20674,7 @@
|
||||
(mult:SI (match_operand:SI 1 "memory_operand" "")
|
||||
(match_operand:SI 2 "immediate_operand" ""))))
|
||||
(clobber (reg:CC FLAGS_REG))])]
|
||||
"(TARGET_K8 || TARGET_GENERIC64) && !optimize_size
|
||||
"(TARGET_K8 || TARGET_GENERIC64 || TARGET_AMDFAM10) && !optimize_size
|
||||
&& !satisfies_constraint_K (operands[2])"
|
||||
[(set (match_dup 3) (match_dup 1))
|
||||
(parallel [(set (match_dup 0) (zero_extend:DI (mult:SI (match_dup 3) (match_dup 2))))
|
||||
@ -20435,7 +20691,7 @@
|
||||
(match_operand:DI 2 "const_int_operand" "")))
|
||||
(clobber (reg:CC FLAGS_REG))])
|
||||
(match_scratch:DI 3 "r")]
|
||||
"(TARGET_K8 || TARGET_GENERIC64) && !optimize_size
|
||||
"(TARGET_K8 || TARGET_GENERIC64 || TARGET_AMDFAM10) && !optimize_size
|
||||
&& satisfies_constraint_K (operands[2])"
|
||||
[(set (match_dup 3) (match_dup 2))
|
||||
(parallel [(set (match_dup 0) (mult:DI (match_dup 0) (match_dup 3)))
|
||||
@ -20451,7 +20707,7 @@
|
||||
(match_operand:SI 2 "const_int_operand" "")))
|
||||
(clobber (reg:CC FLAGS_REG))])
|
||||
(match_scratch:SI 3 "r")]
|
||||
"(TARGET_K8 || TARGET_GENERIC64) && !optimize_size
|
||||
"(TARGET_K8 || TARGET_GENERIC64 || TARGET_AMDFAM10) && !optimize_size
|
||||
&& satisfies_constraint_K (operands[2])"
|
||||
[(set (match_dup 3) (match_dup 2))
|
||||
(parallel [(set (match_dup 0) (mult:SI (match_dup 0) (match_dup 3)))
|
||||
@ -20467,7 +20723,7 @@
|
||||
(match_operand:HI 2 "immediate_operand" "")))
|
||||
(clobber (reg:CC FLAGS_REG))])
|
||||
(match_scratch:HI 3 "r")]
|
||||
"(TARGET_K8 || TARGET_GENERIC64) && !optimize_size"
|
||||
"(TARGET_K8 || TARGET_GENERIC64 || TARGET_AMDFAM10) && !optimize_size"
|
||||
[(set (match_dup 3) (match_dup 2))
|
||||
(parallel [(set (match_dup 0) (mult:HI (match_dup 0) (match_dup 3)))
|
||||
(clobber (reg:CC FLAGS_REG))])]
|
||||
@ -20646,7 +20902,7 @@
|
||||
|
||||
(define_insn "*sibcall_value_1_rex64_v"
|
||||
[(set (match_operand 0 "" "")
|
||||
(call (mem:QI (reg:DI 40))
|
||||
(call (mem:QI (reg:DI R11_REG))
|
||||
(match_operand:DI 1 "" "")))]
|
||||
"SIBLING_CALL_P (insn) && TARGET_64BIT"
|
||||
"jmp\t*%%r11"
|
||||
@ -20665,14 +20921,14 @@
|
||||
|
||||
(define_expand "sse_prologue_save"
|
||||
[(parallel [(set (match_operand:BLK 0 "" "")
|
||||
(unspec:BLK [(reg:DI 21)
|
||||
(reg:DI 22)
|
||||
(unspec:BLK [(reg:DI 22)
|
||||
(reg:DI 23)
|
||||
(reg:DI 24)
|
||||
(reg:DI 25)
|
||||
(reg:DI 26)
|
||||
(reg:DI 27)
|
||||
(reg:DI 28)] UNSPEC_SSE_PROLOGUE_SAVE))
|
||||
(reg:DI 28)
|
||||
(reg:DI 29)] UNSPEC_SSE_PROLOGUE_SAVE))
|
||||
(use (match_operand:DI 1 "register_operand" ""))
|
||||
(use (match_operand:DI 2 "immediate_operand" ""))
|
||||
(use (label_ref:DI (match_operand 3 "" "")))])]
|
||||
@ -20682,14 +20938,14 @@
|
||||
(define_insn "*sse_prologue_save_insn"
|
||||
[(set (mem:BLK (plus:DI (match_operand:DI 0 "register_operand" "R")
|
||||
(match_operand:DI 4 "const_int_operand" "n")))
|
||||
(unspec:BLK [(reg:DI 21)
|
||||
(reg:DI 22)
|
||||
(unspec:BLK [(reg:DI 22)
|
||||
(reg:DI 23)
|
||||
(reg:DI 24)
|
||||
(reg:DI 25)
|
||||
(reg:DI 26)
|
||||
(reg:DI 27)
|
||||
(reg:DI 28)] UNSPEC_SSE_PROLOGUE_SAVE))
|
||||
(reg:DI 28)
|
||||
(reg:DI 29)] UNSPEC_SSE_PROLOGUE_SAVE))
|
||||
(use (match_operand:DI 1 "register_operand" "r"))
|
||||
(use (match_operand:DI 2 "const_int_operand" "i"))
|
||||
(use (label_ref:DI (match_operand 3 "" "X")))]
|
||||
|
@ -1,6 +1,6 @@
|
||||
; Options for the IA-32 and AMD64 ports of the compiler.
|
||||
|
||||
; Copyright (C) 2005 Free Software Foundation, Inc.
|
||||
; Copyright (C) 2005, 2006, 2007 Free Software Foundation, Inc.
|
||||
;
|
||||
; This file is part of GCC.
|
||||
;
|
||||
@ -201,6 +201,22 @@ mssse3
|
||||
Target Report Mask(SSSE3)
|
||||
Support MMX, SSE, SSE2, SSE3 and SSSE3 built-in functions and code generation
|
||||
|
||||
msse4a
|
||||
Target Report Mask(SSE4A)
|
||||
Support MMX, SSE, SSE2, SSE3 and SSE4A built-in functions and code generation
|
||||
|
||||
mpopcnt
|
||||
Target Report Mask(POPCNT)
|
||||
Support code generation of popcount instruction for popcount built-ins
|
||||
namely __builtin_popcount, __builtin_popcountl and __builtin_popcountll
|
||||
|
||||
mabm
|
||||
Target Report Mask(ABM)
|
||||
Support code generation of Advanced Bit Manipulation (ABM) instructions,
|
||||
which include popcnt and lzcnt instructions, for popcount and clz built-ins
|
||||
namely __builtin_popcount, __builtin_popcountl, __builtin_popcountll and
|
||||
__builtin_clz, __builtin_clzl, __builtin_clzll
|
||||
|
||||
msseregparm
|
||||
Target RejectNegative Mask(SSEREGPARM)
|
||||
Use SSE register passing conventions for SF and DF mode
|
||||
|
@ -1396,14 +1396,14 @@
|
||||
(clobber (reg:XF 13))
|
||||
(clobber (reg:XF 14))
|
||||
(clobber (reg:XF 15))
|
||||
(clobber (reg:DI 29))
|
||||
(clobber (reg:DI 30))
|
||||
(clobber (reg:DI 31))
|
||||
(clobber (reg:DI 32))
|
||||
(clobber (reg:DI 33))
|
||||
(clobber (reg:DI 34))
|
||||
(clobber (reg:DI 35))
|
||||
(clobber (reg:DI 36))]
|
||||
(clobber (reg:DI 36))
|
||||
(clobber (reg:DI 37))]
|
||||
"TARGET_MMX"
|
||||
"emms"
|
||||
[(set_attr "type" "mmx")
|
||||
@ -1419,14 +1419,14 @@
|
||||
(clobber (reg:XF 13))
|
||||
(clobber (reg:XF 14))
|
||||
(clobber (reg:XF 15))
|
||||
(clobber (reg:DI 29))
|
||||
(clobber (reg:DI 30))
|
||||
(clobber (reg:DI 31))
|
||||
(clobber (reg:DI 32))
|
||||
(clobber (reg:DI 33))
|
||||
(clobber (reg:DI 34))
|
||||
(clobber (reg:DI 35))
|
||||
(clobber (reg:DI 36))]
|
||||
(clobber (reg:DI 36))
|
||||
(clobber (reg:DI 37))]
|
||||
"TARGET_3DNOW"
|
||||
"femms"
|
||||
[(set_attr "type" "mmx")
|
||||
|
@ -1,4 +1,4 @@
|
||||
/* Copyright (C) 2003, 2004, 2005, 2006 Free Software Foundation, Inc.
|
||||
/* Copyright (C) 2003, 2004, 2005, 2006, 2007 Free Software Foundation, Inc.
|
||||
|
||||
This file is part of GCC.
|
||||
|
||||
@ -30,7 +30,11 @@
|
||||
#ifndef _PMMINTRIN_H_INCLUDED
|
||||
#define _PMMINTRIN_H_INCLUDED
|
||||
|
||||
#ifdef __SSE3__
|
||||
#ifndef __SSE3__
|
||||
# error "SSE3 instruction set not enabled"
|
||||
#else
|
||||
|
||||
/* We need definitions from the SSE2 and SSE header files*/
|
||||
#include <xmmintrin.h>
|
||||
#include <emmintrin.h>
|
||||
|
||||
|
@ -1,5 +1,5 @@
|
||||
;; GCC machine description for SSE instructions
|
||||
;; Copyright (C) 2005, 2006
|
||||
;; Copyright (C) 2005, 2006, 2007
|
||||
;; Free Software Foundation, Inc.
|
||||
;;
|
||||
;; This file is part of GCC.
|
||||
@ -261,7 +261,7 @@
|
||||
(define_insn "sse3_lddqu"
|
||||
[(set (match_operand:V16QI 0 "register_operand" "=x")
|
||||
(unspec:V16QI [(match_operand:V16QI 1 "memory_operand" "m")]
|
||||
UNSPEC_LDQQU))]
|
||||
UNSPEC_LDDQU))]
|
||||
"TARGET_SSE3"
|
||||
"lddqu\t{%1, %0|%0, %1}"
|
||||
[(set_attr "type" "ssecvt")
|
||||
@ -920,6 +920,7 @@
|
||||
"cvtsi2ss\t{%2, %0|%0, %2}"
|
||||
[(set_attr "type" "sseicvt")
|
||||
(set_attr "athlon_decode" "vector,double")
|
||||
(set_attr "amdfam10_decode" "vector,double")
|
||||
(set_attr "mode" "SF")])
|
||||
|
||||
(define_insn "sse_cvtsi2ssq"
|
||||
@ -933,6 +934,7 @@
|
||||
"cvtsi2ssq\t{%2, %0|%0, %2}"
|
||||
[(set_attr "type" "sseicvt")
|
||||
(set_attr "athlon_decode" "vector,double")
|
||||
(set_attr "amdfam10_decode" "vector,double")
|
||||
(set_attr "mode" "SF")])
|
||||
|
||||
(define_insn "sse_cvtss2si"
|
||||
@ -946,6 +948,7 @@
|
||||
"cvtss2si\t{%1, %0|%0, %1}"
|
||||
[(set_attr "type" "sseicvt")
|
||||
(set_attr "athlon_decode" "double,vector")
|
||||
(set_attr "amdfam10_decode" "double,double")
|
||||
(set_attr "mode" "SI")])
|
||||
|
||||
(define_insn "sse_cvtss2siq"
|
||||
@ -959,6 +962,7 @@
|
||||
"cvtss2siq\t{%1, %0|%0, %1}"
|
||||
[(set_attr "type" "sseicvt")
|
||||
(set_attr "athlon_decode" "double,vector")
|
||||
(set_attr "amdfam10_decode" "double,double")
|
||||
(set_attr "mode" "DI")])
|
||||
|
||||
(define_insn "sse_cvttss2si"
|
||||
@ -971,6 +975,7 @@
|
||||
"cvttss2si\t{%1, %0|%0, %1}"
|
||||
[(set_attr "type" "sseicvt")
|
||||
(set_attr "athlon_decode" "double,vector")
|
||||
(set_attr "amdfam10_decode" "double,double")
|
||||
(set_attr "mode" "SI")])
|
||||
|
||||
(define_insn "sse_cvttss2siq"
|
||||
@ -983,6 +988,7 @@
|
||||
"cvttss2siq\t{%1, %0|%0, %1}"
|
||||
[(set_attr "type" "sseicvt")
|
||||
(set_attr "athlon_decode" "double,vector")
|
||||
(set_attr "amdfam10_decode" "double,double")
|
||||
(set_attr "mode" "DI")])
|
||||
|
||||
(define_insn "sse2_cvtdq2ps"
|
||||
@ -1852,7 +1858,8 @@
|
||||
"cvtsi2sd\t{%2, %0|%0, %2}"
|
||||
[(set_attr "type" "sseicvt")
|
||||
(set_attr "mode" "DF")
|
||||
(set_attr "athlon_decode" "double,direct")])
|
||||
(set_attr "athlon_decode" "double,direct")
|
||||
(set_attr "amdfam10_decode" "vector,double")])
|
||||
|
||||
(define_insn "sse2_cvtsi2sdq"
|
||||
[(set (match_operand:V2DF 0 "register_operand" "=x,x")
|
||||
@ -1865,7 +1872,8 @@
|
||||
"cvtsi2sdq\t{%2, %0|%0, %2}"
|
||||
[(set_attr "type" "sseicvt")
|
||||
(set_attr "mode" "DF")
|
||||
(set_attr "athlon_decode" "double,direct")])
|
||||
(set_attr "athlon_decode" "double,direct")
|
||||
(set_attr "amdfam10_decode" "vector,double")])
|
||||
|
||||
(define_insn "sse2_cvtsd2si"
|
||||
[(set (match_operand:SI 0 "register_operand" "=r,r")
|
||||
@ -1878,6 +1886,7 @@
|
||||
"cvtsd2si\t{%1, %0|%0, %1}"
|
||||
[(set_attr "type" "sseicvt")
|
||||
(set_attr "athlon_decode" "double,vector")
|
||||
(set_attr "amdfam10_decode" "double,double")
|
||||
(set_attr "mode" "SI")])
|
||||
|
||||
(define_insn "sse2_cvtsd2siq"
|
||||
@ -1891,6 +1900,7 @@
|
||||
"cvtsd2siq\t{%1, %0|%0, %1}"
|
||||
[(set_attr "type" "sseicvt")
|
||||
(set_attr "athlon_decode" "double,vector")
|
||||
(set_attr "amdfam10_decode" "double,double")
|
||||
(set_attr "mode" "DI")])
|
||||
|
||||
(define_insn "sse2_cvttsd2si"
|
||||
@ -1903,7 +1913,8 @@
|
||||
"cvttsd2si\t{%1, %0|%0, %1}"
|
||||
[(set_attr "type" "sseicvt")
|
||||
(set_attr "mode" "SI")
|
||||
(set_attr "athlon_decode" "double,vector")])
|
||||
(set_attr "athlon_decode" "double,vector")
|
||||
(set_attr "amdfam10_decode" "double,double")])
|
||||
|
||||
(define_insn "sse2_cvttsd2siq"
|
||||
[(set (match_operand:DI 0 "register_operand" "=r,r")
|
||||
@ -1915,7 +1926,8 @@
|
||||
"cvttsd2siq\t{%1, %0|%0, %1}"
|
||||
[(set_attr "type" "sseicvt")
|
||||
(set_attr "mode" "DI")
|
||||
(set_attr "athlon_decode" "double,vector")])
|
||||
(set_attr "athlon_decode" "double,vector")
|
||||
(set_attr "amdfam10_decode" "double,double")])
|
||||
|
||||
(define_insn "sse2_cvtdq2pd"
|
||||
[(set (match_operand:V2DF 0 "register_operand" "=x")
|
||||
@ -1946,7 +1958,8 @@
|
||||
"TARGET_SSE2"
|
||||
"cvtpd2dq\t{%1, %0|%0, %1}"
|
||||
[(set_attr "type" "ssecvt")
|
||||
(set_attr "mode" "TI")])
|
||||
(set_attr "mode" "TI")
|
||||
(set_attr "amdfam10_decode" "double")])
|
||||
|
||||
(define_expand "sse2_cvttpd2dq"
|
||||
[(set (match_operand:V4SI 0 "register_operand" "")
|
||||
@ -1964,7 +1977,8 @@
|
||||
"TARGET_SSE2"
|
||||
"cvttpd2dq\t{%1, %0|%0, %1}"
|
||||
[(set_attr "type" "ssecvt")
|
||||
(set_attr "mode" "TI")])
|
||||
(set_attr "mode" "TI")
|
||||
(set_attr "amdfam10_decode" "double")])
|
||||
|
||||
(define_insn "sse2_cvtsd2ss"
|
||||
[(set (match_operand:V4SF 0 "register_operand" "=x,x")
|
||||
@ -1978,20 +1992,22 @@
|
||||
"cvtsd2ss\t{%2, %0|%0, %2}"
|
||||
[(set_attr "type" "ssecvt")
|
||||
(set_attr "athlon_decode" "vector,double")
|
||||
(set_attr "amdfam10_decode" "vector,double")
|
||||
(set_attr "mode" "SF")])
|
||||
|
||||
(define_insn "sse2_cvtss2sd"
|
||||
[(set (match_operand:V2DF 0 "register_operand" "=x")
|
||||
[(set (match_operand:V2DF 0 "register_operand" "=x,x")
|
||||
(vec_merge:V2DF
|
||||
(float_extend:V2DF
|
||||
(vec_select:V2SF
|
||||
(match_operand:V4SF 2 "nonimmediate_operand" "xm")
|
||||
(match_operand:V4SF 2 "nonimmediate_operand" "x,m")
|
||||
(parallel [(const_int 0) (const_int 1)])))
|
||||
(match_operand:V2DF 1 "register_operand" "0")
|
||||
(match_operand:V2DF 1 "register_operand" "0,0")
|
||||
(const_int 1)))]
|
||||
"TARGET_SSE2"
|
||||
"cvtss2sd\t{%2, %0|%0, %2}"
|
||||
[(set_attr "type" "ssecvt")
|
||||
(set_attr "amdfam10_decode" "vector,double")
|
||||
(set_attr "mode" "DF")])
|
||||
|
||||
(define_expand "sse2_cvtpd2ps"
|
||||
@ -2012,7 +2028,8 @@
|
||||
"TARGET_SSE2"
|
||||
"cvtpd2ps\t{%1, %0|%0, %1}"
|
||||
[(set_attr "type" "ssecvt")
|
||||
(set_attr "mode" "V4SF")])
|
||||
(set_attr "mode" "V4SF")
|
||||
(set_attr "amdfam10_decode" "double")])
|
||||
|
||||
(define_insn "sse2_cvtps2pd"
|
||||
[(set (match_operand:V2DF 0 "register_operand" "=x")
|
||||
@ -2023,7 +2040,8 @@
|
||||
"TARGET_SSE2"
|
||||
"cvtps2pd\t{%1, %0|%0, %1}"
|
||||
[(set_attr "type" "ssecvt")
|
||||
(set_attr "mode" "V2DF")])
|
||||
(set_attr "mode" "V2DF")
|
||||
(set_attr "amdfam10_decode" "direct")])
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;;
|
||||
@ -4524,3 +4542,92 @@
|
||||
"pabs<mmxvecsize>\t{%1, %0|%0, %1}";
|
||||
[(set_attr "type" "sselog1")
|
||||
(set_attr "mode" "DI")])
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;;
|
||||
;; AMD SSE4A instructions
|
||||
;;
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
(define_insn "sse4a_vmmovntv2df"
|
||||
[(set (match_operand:DF 0 "memory_operand" "=m")
|
||||
(unspec:DF [(vec_select:DF
|
||||
(match_operand:V2DF 1 "register_operand" "x")
|
||||
(parallel [(const_int 0)]))]
|
||||
UNSPEC_MOVNT))]
|
||||
"TARGET_SSE4A"
|
||||
"movntsd\t{%1, %0|%0, %1}"
|
||||
[(set_attr "type" "ssemov")
|
||||
(set_attr "mode" "DF")])
|
||||
|
||||
(define_insn "sse4a_movntdf"
|
||||
[(set (match_operand:DF 0 "memory_operand" "=m")
|
||||
(unspec:DF [(match_operand:DF 1 "register_operand" "x")]
|
||||
UNSPEC_MOVNT))]
|
||||
"TARGET_SSE4A"
|
||||
"movntsd\t{%1, %0|%0, %1}"
|
||||
[(set_attr "type" "ssemov")
|
||||
(set_attr "mode" "DF")])
|
||||
|
||||
(define_insn "sse4a_vmmovntv4sf"
|
||||
[(set (match_operand:SF 0 "memory_operand" "=m")
|
||||
(unspec:SF [(vec_select:SF
|
||||
(match_operand:V4SF 1 "register_operand" "x")
|
||||
(parallel [(const_int 0)]))]
|
||||
UNSPEC_MOVNT))]
|
||||
"TARGET_SSE4A"
|
||||
"movntss\t{%1, %0|%0, %1}"
|
||||
[(set_attr "type" "ssemov")
|
||||
(set_attr "mode" "SF")])
|
||||
|
||||
(define_insn "sse4a_movntsf"
|
||||
[(set (match_operand:SF 0 "memory_operand" "=m")
|
||||
(unspec:SF [(match_operand:SF 1 "register_operand" "x")]
|
||||
UNSPEC_MOVNT))]
|
||||
"TARGET_SSE4A"
|
||||
"movntss\t{%1, %0|%0, %1}"
|
||||
[(set_attr "type" "ssemov")
|
||||
(set_attr "mode" "SF")])
|
||||
|
||||
(define_insn "sse4a_extrqi"
|
||||
[(set (match_operand:V2DI 0 "register_operand" "=x")
|
||||
(unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
|
||||
(match_operand 2 "const_int_operand" "")
|
||||
(match_operand 3 "const_int_operand" "")]
|
||||
UNSPEC_EXTRQI))]
|
||||
"TARGET_SSE4A"
|
||||
"extrq\t{%3, %2, %0|%0, %2, %3}"
|
||||
[(set_attr "type" "sse")
|
||||
(set_attr "mode" "TI")])
|
||||
|
||||
(define_insn "sse4a_extrq"
|
||||
[(set (match_operand:V2DI 0 "register_operand" "=x")
|
||||
(unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
|
||||
(match_operand:V16QI 2 "register_operand" "x")]
|
||||
UNSPEC_EXTRQ))]
|
||||
"TARGET_SSE4A"
|
||||
"extrq\t{%2, %0|%0, %2}"
|
||||
[(set_attr "type" "sse")
|
||||
(set_attr "mode" "TI")])
|
||||
|
||||
(define_insn "sse4a_insertqi"
|
||||
[(set (match_operand:V2DI 0 "register_operand" "=x")
|
||||
(unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
|
||||
(match_operand:V2DI 2 "register_operand" "x")
|
||||
(match_operand 3 "const_int_operand" "")
|
||||
(match_operand 4 "const_int_operand" "")]
|
||||
UNSPEC_INSERTQI))]
|
||||
"TARGET_SSE4A"
|
||||
"insertq\t{%4, %3, %2, %0|%0, %2, %3, %4}"
|
||||
[(set_attr "type" "sseins")
|
||||
(set_attr "mode" "TI")])
|
||||
|
||||
(define_insn "sse4a_insertq"
|
||||
[(set (match_operand:V2DI 0 "register_operand" "=x")
|
||||
(unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
|
||||
(match_operand:V2DI 2 "register_operand" "x")]
|
||||
UNSPEC_INSERTQ))]
|
||||
"TARGET_SSE4A"
|
||||
"insertq\t{%2, %0|%0, %2}"
|
||||
[(set_attr "type" "sseins")
|
||||
(set_attr "mode" "TI")])
|
||||
|
@ -1,4 +1,4 @@
|
||||
/* Copyright (C) 2006 Free Software Foundation, Inc.
|
||||
/* Copyright (C) 2006, 2007 Free Software Foundation, Inc.
|
||||
|
||||
This file is part of GCC.
|
||||
|
||||
@ -30,231 +30,11 @@
|
||||
#ifndef _TMMINTRIN_H_INCLUDED
|
||||
#define _TMMINTRIN_H_INCLUDED
|
||||
|
||||
#ifdef __SSSE3__
|
||||
#include <pmmintrin.h>
|
||||
|
||||
static __inline __m128i __attribute__((__always_inline__))
|
||||
_mm_hadd_epi16 (__m128i __X, __m128i __Y)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_phaddw128 ((__v8hi)__X, (__v8hi)__Y);
|
||||
}
|
||||
|
||||
static __inline __m128i __attribute__((__always_inline__))
|
||||
_mm_hadd_epi32 (__m128i __X, __m128i __Y)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_phaddd128 ((__v4si)__X, (__v4si)__Y);
|
||||
}
|
||||
|
||||
static __inline __m128i __attribute__((__always_inline__))
|
||||
_mm_hadds_epi16 (__m128i __X, __m128i __Y)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_phaddsw128 ((__v8hi)__X, (__v8hi)__Y);
|
||||
}
|
||||
|
||||
static __inline __m64 __attribute__((__always_inline__))
|
||||
_mm_hadd_pi16 (__m64 __X, __m64 __Y)
|
||||
{
|
||||
return (__m64) __builtin_ia32_phaddw ((__v4hi)__X, (__v4hi)__Y);
|
||||
}
|
||||
|
||||
static __inline __m64 __attribute__((__always_inline__))
|
||||
_mm_hadd_pi32 (__m64 __X, __m64 __Y)
|
||||
{
|
||||
return (__m64) __builtin_ia32_phaddd ((__v2si)__X, (__v2si)__Y);
|
||||
}
|
||||
|
||||
static __inline __m64 __attribute__((__always_inline__))
|
||||
_mm_hadds_pi16 (__m64 __X, __m64 __Y)
|
||||
{
|
||||
return (__m64) __builtin_ia32_phaddsw ((__v4hi)__X, (__v4hi)__Y);
|
||||
}
|
||||
|
||||
static __inline __m128i __attribute__((__always_inline__))
|
||||
_mm_hsub_epi16 (__m128i __X, __m128i __Y)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_phsubw128 ((__v8hi)__X, (__v8hi)__Y);
|
||||
}
|
||||
|
||||
static __inline __m128i __attribute__((__always_inline__))
|
||||
_mm_hsub_epi32 (__m128i __X, __m128i __Y)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_phsubd128 ((__v4si)__X, (__v4si)__Y);
|
||||
}
|
||||
|
||||
static __inline __m128i __attribute__((__always_inline__))
|
||||
_mm_hsubs_epi16 (__m128i __X, __m128i __Y)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_phsubsw128 ((__v8hi)__X, (__v8hi)__Y);
|
||||
}
|
||||
|
||||
static __inline __m64 __attribute__((__always_inline__))
|
||||
_mm_hsub_pi16 (__m64 __X, __m64 __Y)
|
||||
{
|
||||
return (__m64) __builtin_ia32_phsubw ((__v4hi)__X, (__v4hi)__Y);
|
||||
}
|
||||
|
||||
static __inline __m64 __attribute__((__always_inline__))
|
||||
_mm_hsub_pi32 (__m64 __X, __m64 __Y)
|
||||
{
|
||||
return (__m64) __builtin_ia32_phsubd ((__v2si)__X, (__v2si)__Y);
|
||||
}
|
||||
|
||||
static __inline __m64 __attribute__((__always_inline__))
|
||||
_mm_hsubs_pi16 (__m64 __X, __m64 __Y)
|
||||
{
|
||||
return (__m64) __builtin_ia32_phsubsw ((__v4hi)__X, (__v4hi)__Y);
|
||||
}
|
||||
|
||||
static __inline __m128i __attribute__((__always_inline__))
|
||||
_mm_maddubs_epi16 (__m128i __X, __m128i __Y)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_pmaddubsw128 ((__v16qi)__X, (__v16qi)__Y);
|
||||
}
|
||||
|
||||
static __inline __m64 __attribute__((__always_inline__))
|
||||
_mm_maddubs_pi16 (__m64 __X, __m64 __Y)
|
||||
{
|
||||
return (__m64) __builtin_ia32_pmaddubsw ((__v8qi)__X, (__v8qi)__Y);
|
||||
}
|
||||
|
||||
static __inline __m128i __attribute__((__always_inline__))
|
||||
_mm_mulhrs_epi16 (__m128i __X, __m128i __Y)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_pmulhrsw128 ((__v8hi)__X, (__v8hi)__Y);
|
||||
}
|
||||
|
||||
static __inline __m64 __attribute__((__always_inline__))
|
||||
_mm_mulhrs_pi16 (__m64 __X, __m64 __Y)
|
||||
{
|
||||
return (__m64) __builtin_ia32_pmulhrsw ((__v4hi)__X, (__v4hi)__Y);
|
||||
}
|
||||
|
||||
static __inline __m128i __attribute__((__always_inline__))
|
||||
_mm_shuffle_epi8 (__m128i __X, __m128i __Y)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_pshufb128 ((__v16qi)__X, (__v16qi)__Y);
|
||||
}
|
||||
|
||||
static __inline __m64 __attribute__((__always_inline__))
|
||||
_mm_shuffle_pi8 (__m64 __X, __m64 __Y)
|
||||
{
|
||||
return (__m64) __builtin_ia32_pshufb ((__v8qi)__X, (__v8qi)__Y);
|
||||
}
|
||||
|
||||
static __inline __m128i __attribute__((__always_inline__))
|
||||
_mm_sign_epi8 (__m128i __X, __m128i __Y)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_psignb128 ((__v16qi)__X, (__v16qi)__Y);
|
||||
}
|
||||
|
||||
static __inline __m128i __attribute__((__always_inline__))
|
||||
_mm_sign_epi16 (__m128i __X, __m128i __Y)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_psignw128 ((__v8hi)__X, (__v8hi)__Y);
|
||||
}
|
||||
|
||||
static __inline __m128i __attribute__((__always_inline__))
|
||||
_mm_sign_epi32 (__m128i __X, __m128i __Y)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_psignd128 ((__v4si)__X, (__v4si)__Y);
|
||||
}
|
||||
|
||||
static __inline __m64 __attribute__((__always_inline__))
|
||||
_mm_sign_pi8 (__m64 __X, __m64 __Y)
|
||||
{
|
||||
return (__m64) __builtin_ia32_psignb ((__v8qi)__X, (__v8qi)__Y);
|
||||
}
|
||||
|
||||
static __inline __m64 __attribute__((__always_inline__))
|
||||
_mm_sign_pi16 (__m64 __X, __m64 __Y)
|
||||
{
|
||||
return (__m64) __builtin_ia32_psignw ((__v4hi)__X, (__v4hi)__Y);
|
||||
}
|
||||
|
||||
static __inline __m64 __attribute__((__always_inline__))
|
||||
_mm_sign_pi32 (__m64 __X, __m64 __Y)
|
||||
{
|
||||
return (__m64) __builtin_ia32_psignd ((__v2si)__X, (__v2si)__Y);
|
||||
}
|
||||
|
||||
#define _mm_alignr_epi8(__X, __Y, __N) \
|
||||
((__m128i)__builtin_ia32_palignr128 ((__v2di) __X, (__v2di) __Y, (__N) * 8))
|
||||
|
||||
#define _mm_alignr_pi8(__X, __Y, __N) \
|
||||
((__m64)__builtin_ia32_palignr ((long long) (__X), (long long) (__Y), (__N) * 8))
|
||||
|
||||
static __inline __m128i __attribute__((__always_inline__))
|
||||
_mm_abs_epi8 (__m128i __X)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_pabsb128 ((__v16qi)__X);
|
||||
}
|
||||
|
||||
static __inline __m128i __attribute__((__always_inline__))
|
||||
_mm_abs_epi16 (__m128i __X)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_pabsw128 ((__v8hi)__X);
|
||||
}
|
||||
|
||||
static __inline __m128i __attribute__((__always_inline__))
|
||||
_mm_abs_epi32 (__m128i __X)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_pabsd128 ((__v4si)__X);
|
||||
}
|
||||
|
||||
static __inline __m64 __attribute__((__always_inline__))
|
||||
_mm_abs_pi8 (__m64 __X)
|
||||
{
|
||||
return (__m64) __builtin_ia32_pabsb ((__v8qi)__X);
|
||||
}
|
||||
|
||||
static __inline __m64 __attribute__((__always_inline__))
|
||||
_mm_abs_pi16 (__m64 __X)
|
||||
{
|
||||
return (__m64) __builtin_ia32_pabsw ((__v4hi)__X);
|
||||
}
|
||||
|
||||
static __inline __m64 __attribute__((__always_inline__))
|
||||
_mm_abs_pi32 (__m64 __X)
|
||||
{
|
||||
return (__m64) __builtin_ia32_pabsd ((__v2si)__X);
|
||||
}
|
||||
|
||||
#endif /* __SSSE3__ */
|
||||
|
||||
#endif /* _TMMINTRIN_H_INCLUDED */
|
||||
/* Copyright (C) 2006 Free Software Foundation, Inc.
|
||||
|
||||
This file is part of GCC.
|
||||
|
||||
GCC is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2, or (at your option)
|
||||
any later version.
|
||||
|
||||
GCC is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with GCC; see the file COPYING. If not, write to
|
||||
the Free Software Foundation, 59 Temple Place - Suite 330,
|
||||
Boston, MA 02111-1307, USA. */
|
||||
|
||||
/* As a special exception, if you include this header file into source
|
||||
files compiled by GCC, this header file does not by itself cause
|
||||
the resulting executable to be covered by the GNU General Public
|
||||
License. This exception does not however invalidate any other
|
||||
reasons why the executable file might be covered by the GNU General
|
||||
Public License. */
|
||||
|
||||
/* Implemented from the specification included in the Intel C++ Compiler
|
||||
User Guide and Reference, version 9.1. */
|
||||
|
||||
#ifndef _TMMINTRIN_H_INCLUDED
|
||||
#define _TMMINTRIN_H_INCLUDED
|
||||
|
||||
#ifdef __SSSE3__
|
||||
#ifndef __SSSE3__
|
||||
# error "SSSE3 instruction set not enabled"
|
||||
#else
|
||||
|
||||
/* We need definitions from the SSE3, SSE2 and SSE header files*/
|
||||
#include <pmmintrin.h>
|
||||
|
||||
static __inline __m128i __attribute__((__always_inline__))
|
||||
|
@ -7255,6 +7255,23 @@ v4si __builtin_ia32_pabsd128 (v4si)
|
||||
v8hi __builtin_ia32_pabsw128 (v8hi)
|
||||
@end smallexample
|
||||
|
||||
The following built-in functions are available when @option{-msse4a} is used.
|
||||
|
||||
@smallexample
|
||||
void _mm_stream_sd (double*,__m128d);
|
||||
Generates the @code{movntsd} machine instruction.
|
||||
void _mm_stream_ss (float*,__m128);
|
||||
Generates the @code{movntss} machine instruction.
|
||||
__m128i _mm_extract_si64 (__m128i, __m128i);
|
||||
Generates the @code{extrq} machine instruction with only SSE register operands.
|
||||
__m128i _mm_extracti_si64 (__m128i, int, int);
|
||||
Generates the @code{extrq} machine instruction with SSE register and immediate operands.
|
||||
__m128i _mm_insert_si64 (__m128i, __m128i);
|
||||
Generates the @code{insertq} machine instruction with only SSE register operands.
|
||||
__m128i _mm_inserti_si64 (__m128i, __m128i, int, int);
|
||||
Generates the @code{insertq} machine instruction with SSE register and immediate operands.
|
||||
@end smallexample
|
||||
|
||||
The following built-in functions are available when @option{-m3dnow} is used.
|
||||
All of them generate the machine instruction that is part of the name.
|
||||
|
||||
|
@ -513,7 +513,7 @@ in the following sections.
|
||||
-mno-fp-ret-in-387 -msoft-float -msvr3-shlib @gol
|
||||
-mno-wide-multiply -mrtd -malign-double @gol
|
||||
-mpreferred-stack-boundary=@var{num} @gol
|
||||
-mmmx -msse -msse2 -msse3 -mssse3 -m3dnow @gol
|
||||
-mmmx -msse -msse2 -msse3 -mssse3 -msse4a -m3dnow -mpopcnt -mabm @gol
|
||||
-mthreads -mno-align-stringops -minline-all-stringops @gol
|
||||
-mpush-args -maccumulate-outgoing-args -m128bit-long-double @gol
|
||||
-m96bit-long-double -mregparm=@var{num} -msseregparm @gol
|
||||
@ -9059,6 +9059,10 @@ instruction set support.
|
||||
@item k8, opteron, athlon64, athlon-fx
|
||||
AMD K8 core based CPUs with x86-64 instruction set support. (This supersets
|
||||
MMX, SSE, SSE2, 3dNOW!, enhanced 3dNOW! and 64-bit instruction set extensions.)
|
||||
@item amdfam10, barcelona
|
||||
AMD Family 10h core based CPUs with x86-64 instruction set support. (This
|
||||
supersets MMX, SSE, SSE2, SSE3, SSE4A, 3dNOW!, enhanced 3dNOW!, ABM and 64-bit
|
||||
instruction set extensions.)
|
||||
@item k8-sse3, opteron-sse3, athlon64-sse3
|
||||
Improved versions of k8, opteron and athlon64 with SSE3 instruction set support.
|
||||
@item winchip-c6
|
||||
@ -9355,8 +9359,14 @@ preferred alignment to @option{-mpreferred-stack-boundary=2}.
|
||||
@itemx -mno-sse3
|
||||
@item -mssse3
|
||||
@itemx -mno-ssse3
|
||||
@item -msse4a
|
||||
@item -mno-sse4a
|
||||
@item -m3dnow
|
||||
@itemx -mno-3dnow
|
||||
@item -mpopcnt
|
||||
@itemx -mno-popcnt
|
||||
@item -mabm
|
||||
@itemx -mno-abm
|
||||
@opindex mmmx
|
||||
@opindex mno-mmx
|
||||
@opindex msse
|
||||
@ -9364,7 +9374,7 @@ preferred alignment to @option{-mpreferred-stack-boundary=2}.
|
||||
@opindex m3dnow
|
||||
@opindex mno-3dnow
|
||||
These switches enable or disable the use of instructions in the MMX,
|
||||
SSE, SSE2, SSE3, SSSE3 or 3DNow! extended instruction sets.
|
||||
SSE, SSE2, SSE3, SSSE3, SSE4A, ABM or 3DNow! extended instruction sets.
|
||||
These extensions are also available as built-in functions: see
|
||||
@ref{X86 Built-in Functions}, for details of the functions enabled and
|
||||
disabled by these switches.
|
||||
|
@ -2802,9 +2802,13 @@ operand_equal_p (tree arg0, tree arg1, unsigned int flags)
|
||||
|
||||
case ARRAY_REF:
|
||||
case ARRAY_RANGE_REF:
|
||||
/* Operands 2 and 3 may be null. */
|
||||
/* Operands 2 and 3 may be null.
|
||||
Compare the array index by value if it is constant first as we
|
||||
may have different types but same value here. */
|
||||
return (OP_SAME (0)
|
||||
&& OP_SAME (1)
|
||||
&& (tree_int_cst_equal (TREE_OPERAND (arg0, 1),
|
||||
TREE_OPERAND (arg1, 1))
|
||||
|| OP_SAME (1))
|
||||
&& OP_SAME_WITH_NULL (2)
|
||||
&& OP_SAME_WITH_NULL (3));
|
||||
|
||||
|
@ -1600,9 +1600,7 @@ canonicalize_addr_expr (tree *expr_p)
|
||||
/* All checks succeeded. Build a new node to merge the cast. */
|
||||
*expr_p = build4 (ARRAY_REF, dctype, obj_expr,
|
||||
TYPE_MIN_VALUE (TYPE_DOMAIN (datype)),
|
||||
TYPE_MIN_VALUE (TYPE_DOMAIN (datype)),
|
||||
size_binop (EXACT_DIV_EXPR, TYPE_SIZE_UNIT (dctype),
|
||||
size_int (TYPE_ALIGN_UNIT (dctype))));
|
||||
NULL_TREE, NULL_TREE);
|
||||
*expr_p = build1 (ADDR_EXPR, ctype, *expr_p);
|
||||
}
|
||||
|
||||
|
@ -1621,9 +1621,7 @@ maybe_fold_offset_to_array_ref (tree base, tree offset, tree orig_type)
|
||||
if (!integer_zerop (elt_offset))
|
||||
idx = int_const_binop (PLUS_EXPR, idx, elt_offset, 0);
|
||||
|
||||
return build4 (ARRAY_REF, orig_type, base, idx, min_idx,
|
||||
size_int (tree_low_cst (elt_size, 1)
|
||||
/ (TYPE_ALIGN_UNIT (elt_type))));
|
||||
return build4 (ARRAY_REF, orig_type, base, idx, NULL_TREE, NULL_TREE);
|
||||
}
|
||||
|
||||
|
||||
|
@ -1076,6 +1076,7 @@ phi_translate (tree expr, value_set_t set, basic_block pred,
|
||||
tree newexpr;
|
||||
tree vh = get_value_handle (expr);
|
||||
bool listchanged = false;
|
||||
bool invariantarg = false;
|
||||
VEC (tree, gc) *vuses = VALUE_HANDLE_VUSES (vh);
|
||||
VEC (tree, gc) *tvuses;
|
||||
|
||||
@ -1134,10 +1135,26 @@ phi_translate (tree expr, value_set_t set, basic_block pred,
|
||||
if (newval != oldval)
|
||||
{
|
||||
listchanged = true;
|
||||
invariantarg |= is_gimple_min_invariant (newval);
|
||||
TREE_VALUE (newwalker) = get_value_handle (newval);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* In case of new invariant args we might try to fold the call
|
||||
again. */
|
||||
if (invariantarg)
|
||||
{
|
||||
tree tmp = fold_ternary (CALL_EXPR, TREE_TYPE (expr),
|
||||
newop0, newarglist, newop2);
|
||||
if (tmp)
|
||||
{
|
||||
STRIP_TYPE_NOPS (tmp);
|
||||
if (is_gimple_min_invariant (tmp))
|
||||
return tmp;
|
||||
}
|
||||
}
|
||||
|
||||
if (listchanged)
|
||||
vn_lookup_or_add (newarglist, NULL);
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user