Backport SSSE3 instruction set support to base gcc.

Enabled by default for -march=core2

Obtained from:	gcc 4.3 (rev. 117958, 121687, 121726, 123639; GPLv2)
MFC after:	2 weeks
This commit is contained in:
Martin Matuska 2011-03-14 13:31:34 +00:00
parent 28e5e0bf4c
commit c031e7949e
Notes: svn2git 2020-12-20 02:59:44 +00:00
svn path=/head/; revision=219639
11 changed files with 1312 additions and 20 deletions

View File

@ -268,11 +268,13 @@ xscale-*-*)
;;
i[34567]86-*-*)
cpu_type=i386
extra_headers="mmintrin.h mm3dnow.h xmmintrin.h emmintrin.h pmmintrin.h"
extra_headers="mmintrin.h mm3dnow.h xmmintrin.h emmintrin.h
pmmintrin.h tmmintrin.h"
;;
x86_64-*-*)
cpu_type=i386
extra_headers="mmintrin.h mm3dnow.h xmmintrin.h emmintrin.h pmmintrin.h"
extra_headers="mmintrin.h mm3dnow.h xmmintrin.h emmintrin.h
pmmintrin.h tmmintrin.h"
need_64bit_hwint=yes
;;
ia64-*-*)

View File

@ -1511,16 +1511,24 @@ ix86_handle_option (size_t code, const char *arg ATTRIBUTE_UNUSED, int value)
case OPT_msse:
if (!value)
{
target_flags &= ~(MASK_SSE2 | MASK_SSE3);
target_flags_explicit |= MASK_SSE2 | MASK_SSE3;
target_flags &= ~(MASK_SSE2 | MASK_SSE3 | MASK_SSSE3);
target_flags_explicit |= MASK_SSE2 | MASK_SSE3 | MASK_SSSE3;
}
return true;
case OPT_msse2:
if (!value)
{
target_flags &= ~MASK_SSE3;
target_flags_explicit |= MASK_SSE3;
target_flags &= ~(MASK_SSE3 | MASK_SSSE3);
target_flags_explicit |= MASK_SSE3 | MASK_SSSE3;
}
return true;
case OPT_msse3:
if (!value)
{
target_flags &= ~MASK_SSSE3;
target_flags_explicit |= MASK_SSSE3;
}
return true;
@ -1589,7 +1597,8 @@ override_options (void)
PTA_PREFETCH_SSE = 16,
PTA_3DNOW = 32,
PTA_3DNOW_A = 64,
PTA_64BIT = 128
PTA_64BIT = 128,
PTA_SSSE3 = 256
} flags;
}
const processor_alias_table[] =
@ -1617,7 +1626,7 @@ override_options (void)
| PTA_MMX | PTA_PREFETCH_SSE},
{"nocona", PROCESSOR_NOCONA, PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_64BIT
| PTA_MMX | PTA_PREFETCH_SSE},
{"core2", PROCESSOR_CORE2, PTA_SSE | PTA_SSE2 | PTA_SSE3
{"core2", PROCESSOR_CORE2, PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_SSSE3
| PTA_64BIT | PTA_MMX
| PTA_PREFETCH_SSE},
{"geode", PROCESSOR_GEODE, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
@ -1811,6 +1820,9 @@ override_options (void)
if (processor_alias_table[i].flags & PTA_SSE3
&& !(target_flags_explicit & MASK_SSE3))
target_flags |= MASK_SSE3;
if (processor_alias_table[i].flags & PTA_SSSE3
&& !(target_flags_explicit & MASK_SSSE3))
target_flags |= MASK_SSSE3;
if (processor_alias_table[i].flags & PTA_PREFETCH_SSE)
x86_prefetch_sse = true;
if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
@ -1987,6 +1999,10 @@ override_options (void)
if (!TARGET_80387)
target_flags |= MASK_NO_FANCY_MATH_387;
/* Turn on SSE3 builtins for -mssse3. */
if (TARGET_SSSE3)
target_flags |= MASK_SSE3;
/* Turn on SSE2 builtins for -msse3. */
if (TARGET_SSE3)
target_flags |= MASK_SSE2;
@ -14693,6 +14709,41 @@ enum ix86_builtins
IX86_BUILTIN_MONITOR,
IX86_BUILTIN_MWAIT,
/* SSSE3. */
IX86_BUILTIN_PHADDW,
IX86_BUILTIN_PHADDD,
IX86_BUILTIN_PHADDSW,
IX86_BUILTIN_PHSUBW,
IX86_BUILTIN_PHSUBD,
IX86_BUILTIN_PHSUBSW,
IX86_BUILTIN_PMADDUBSW,
IX86_BUILTIN_PMULHRSW,
IX86_BUILTIN_PSHUFB,
IX86_BUILTIN_PSIGNB,
IX86_BUILTIN_PSIGNW,
IX86_BUILTIN_PSIGND,
IX86_BUILTIN_PALIGNR,
IX86_BUILTIN_PABSB,
IX86_BUILTIN_PABSW,
IX86_BUILTIN_PABSD,
IX86_BUILTIN_PHADDW128,
IX86_BUILTIN_PHADDD128,
IX86_BUILTIN_PHADDSW128,
IX86_BUILTIN_PHSUBW128,
IX86_BUILTIN_PHSUBD128,
IX86_BUILTIN_PHSUBSW128,
IX86_BUILTIN_PMADDUBSW128,
IX86_BUILTIN_PMULHRSW128,
IX86_BUILTIN_PSHUFB128,
IX86_BUILTIN_PSIGNB128,
IX86_BUILTIN_PSIGNW128,
IX86_BUILTIN_PSIGND128,
IX86_BUILTIN_PALIGNR128,
IX86_BUILTIN_PABSB128,
IX86_BUILTIN_PABSW128,
IX86_BUILTIN_PABSD128,
IX86_BUILTIN_VEC_INIT_V2SI,
IX86_BUILTIN_VEC_INIT_V4HI,
IX86_BUILTIN_VEC_INIT_V8QI,
@ -15034,7 +15085,33 @@ static const struct builtin_description bdesc_2arg[] =
{ MASK_SSE3, CODE_FOR_sse3_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, 0, 0 },
{ MASK_SSE3, CODE_FOR_sse3_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, 0, 0 },
{ MASK_SSE3, CODE_FOR_sse3_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, 0, 0 },
{ MASK_SSE3, CODE_FOR_sse3_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, 0, 0 }
{ MASK_SSE3, CODE_FOR_sse3_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, 0, 0 },
/* SSSE3 */
{ MASK_SSSE3, CODE_FOR_ssse3_phaddwv8hi3, "__builtin_ia32_phaddw128", IX86_BUILTIN_PHADDW128, 0, 0 },
{ MASK_SSSE3, CODE_FOR_ssse3_phaddwv4hi3, "__builtin_ia32_phaddw", IX86_BUILTIN_PHADDW, 0, 0 },
{ MASK_SSSE3, CODE_FOR_ssse3_phadddv4si3, "__builtin_ia32_phaddd128", IX86_BUILTIN_PHADDD128, 0, 0 },
{ MASK_SSSE3, CODE_FOR_ssse3_phadddv2si3, "__builtin_ia32_phaddd", IX86_BUILTIN_PHADDD, 0, 0 },
{ MASK_SSSE3, CODE_FOR_ssse3_phaddswv8hi3, "__builtin_ia32_phaddsw128", IX86_BUILTIN_PHADDSW128, 0, 0 },
{ MASK_SSSE3, CODE_FOR_ssse3_phaddswv4hi3, "__builtin_ia32_phaddsw", IX86_BUILTIN_PHADDSW, 0, 0 },
{ MASK_SSSE3, CODE_FOR_ssse3_phsubwv8hi3, "__builtin_ia32_phsubw128", IX86_BUILTIN_PHSUBW128, 0, 0 },
{ MASK_SSSE3, CODE_FOR_ssse3_phsubwv4hi3, "__builtin_ia32_phsubw", IX86_BUILTIN_PHSUBW, 0, 0 },
{ MASK_SSSE3, CODE_FOR_ssse3_phsubdv4si3, "__builtin_ia32_phsubd128", IX86_BUILTIN_PHSUBD128, 0, 0 },
{ MASK_SSSE3, CODE_FOR_ssse3_phsubdv2si3, "__builtin_ia32_phsubd", IX86_BUILTIN_PHSUBD, 0, 0 },
{ MASK_SSSE3, CODE_FOR_ssse3_phsubswv8hi3, "__builtin_ia32_phsubsw128", IX86_BUILTIN_PHSUBSW128, 0, 0 },
{ MASK_SSSE3, CODE_FOR_ssse3_phsubswv4hi3, "__builtin_ia32_phsubsw", IX86_BUILTIN_PHSUBSW, 0, 0 },
{ MASK_SSSE3, CODE_FOR_ssse3_pmaddubswv8hi3, "__builtin_ia32_pmaddubsw128", IX86_BUILTIN_PMADDUBSW128, 0, 0 },
{ MASK_SSSE3, CODE_FOR_ssse3_pmaddubswv4hi3, "__builtin_ia32_pmaddubsw", IX86_BUILTIN_PMADDUBSW, 0, 0 },
{ MASK_SSSE3, CODE_FOR_ssse3_pmulhrswv8hi3, "__builtin_ia32_pmulhrsw128", IX86_BUILTIN_PMULHRSW128, 0, 0 },
{ MASK_SSSE3, CODE_FOR_ssse3_pmulhrswv4hi3, "__builtin_ia32_pmulhrsw", IX86_BUILTIN_PMULHRSW, 0, 0 },
{ MASK_SSSE3, CODE_FOR_ssse3_pshufbv16qi3, "__builtin_ia32_pshufb128", IX86_BUILTIN_PSHUFB128, 0, 0 },
{ MASK_SSSE3, CODE_FOR_ssse3_pshufbv8qi3, "__builtin_ia32_pshufb", IX86_BUILTIN_PSHUFB, 0, 0 },
{ MASK_SSSE3, CODE_FOR_ssse3_psignv16qi3, "__builtin_ia32_psignb128", IX86_BUILTIN_PSIGNB128, 0, 0 },
{ MASK_SSSE3, CODE_FOR_ssse3_psignv8qi3, "__builtin_ia32_psignb", IX86_BUILTIN_PSIGNB, 0, 0 },
{ MASK_SSSE3, CODE_FOR_ssse3_psignv8hi3, "__builtin_ia32_psignw128", IX86_BUILTIN_PSIGNW128, 0, 0 },
{ MASK_SSSE3, CODE_FOR_ssse3_psignv4hi3, "__builtin_ia32_psignw", IX86_BUILTIN_PSIGNW, 0, 0 },
{ MASK_SSSE3, CODE_FOR_ssse3_psignv4si3, "__builtin_ia32_psignd128", IX86_BUILTIN_PSIGND128, 0, 0 },
{ MASK_SSSE3, CODE_FOR_ssse3_psignv2si3, "__builtin_ia32_psignd", IX86_BUILTIN_PSIGND, 0, 0 }
};
static const struct builtin_description bdesc_1arg[] =
@ -15081,6 +15158,14 @@ static const struct builtin_description bdesc_1arg[] =
/* SSE3 */
{ MASK_SSE3, CODE_FOR_sse3_movshdup, 0, IX86_BUILTIN_MOVSHDUP, 0, 0 },
{ MASK_SSE3, CODE_FOR_sse3_movsldup, 0, IX86_BUILTIN_MOVSLDUP, 0, 0 },
/* SSSE3 */
{ MASK_SSSE3, CODE_FOR_absv16qi2, "__builtin_ia32_pabsb128", IX86_BUILTIN_PABSB128, 0, 0 },
{ MASK_SSSE3, CODE_FOR_absv8qi2, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB, 0, 0 },
{ MASK_SSSE3, CODE_FOR_absv8hi2, "__builtin_ia32_pabsw128", IX86_BUILTIN_PABSW128, 0, 0 },
{ MASK_SSSE3, CODE_FOR_absv4hi2, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW, 0, 0 },
{ MASK_SSSE3, CODE_FOR_absv4si2, "__builtin_ia32_pabsd128", IX86_BUILTIN_PABSD128, 0, 0 },
{ MASK_SSSE3, CODE_FOR_absv2si2, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD, 0, 0 },
};
static void
@ -15215,6 +15300,16 @@ ix86_init_mmx_sse_builtins (void)
/* Normal vector unops. */
tree v4sf_ftype_v4sf
= build_function_type_list (V4SF_type_node, V4SF_type_node, NULL_TREE);
tree v16qi_ftype_v16qi
= build_function_type_list (V16QI_type_node, V16QI_type_node, NULL_TREE);
tree v8hi_ftype_v8hi
= build_function_type_list (V8HI_type_node, V8HI_type_node, NULL_TREE);
tree v4si_ftype_v4si
= build_function_type_list (V4SI_type_node, V4SI_type_node, NULL_TREE);
tree v8qi_ftype_v8qi
= build_function_type_list (V8QI_type_node, V8QI_type_node, NULL_TREE);
tree v4hi_ftype_v4hi
= build_function_type_list (V4HI_type_node, V4HI_type_node, NULL_TREE);
/* Normal vector binops. */
tree v4sf_ftype_v4sf_v4sf
@ -15234,6 +15329,12 @@ ix86_init_mmx_sse_builtins (void)
long_long_unsigned_type_node,
long_long_unsigned_type_node, NULL_TREE);
tree di_ftype_di_di_int
= build_function_type_list (long_long_unsigned_type_node,
long_long_unsigned_type_node,
long_long_unsigned_type_node,
integer_type_node, NULL_TREE);
tree v2si_ftype_v2sf
= build_function_type_list (V2SI_type_node, V2SF_type_node, NULL_TREE);
tree v2sf_ftype_v2si
@ -15335,6 +15436,9 @@ ix86_init_mmx_sse_builtins (void)
tree v2di_ftype_v2di_int
= build_function_type_list (V2DI_type_node,
V2DI_type_node, integer_type_node, NULL_TREE);
tree v2di_ftype_v2di_v2di_int
= build_function_type_list (V2DI_type_node, V2DI_type_node,
V2DI_type_node, integer_type_node, NULL_TREE);
tree v4si_ftype_v4si_int
= build_function_type_list (V4SI_type_node,
V4SI_type_node, integer_type_node, NULL_TREE);
@ -15451,6 +15555,50 @@ ix86_init_mmx_sse_builtins (void)
def_builtin (d->mask, d->name, type, d->code);
}
/* Add all builtins that are more or less simple operations on 1 operand. */
for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
{
enum machine_mode mode;
tree type;
if (d->name == 0)
continue;
mode = insn_data[d->icode].operand[1].mode;
switch (mode)
{
case V16QImode:
type = v16qi_ftype_v16qi;
break;
case V8HImode:
type = v8hi_ftype_v8hi;
break;
case V4SImode:
type = v4si_ftype_v4si;
break;
case V2DFmode:
type = v2df_ftype_v2df;
break;
case V4SFmode:
type = v4sf_ftype_v4sf;
break;
case V8QImode:
type = v8qi_ftype_v8qi;
break;
case V4HImode:
type = v4hi_ftype_v4hi;
break;
case V2SImode:
type = v2si_ftype_v2si;
break;
default:
abort ();
}
def_builtin (d->mask, d->name, type, d->code);
}
/* Add the remaining MMX insns with somewhat more complicated types. */
def_builtin (MASK_MMX, "__builtin_ia32_emms", void_ftype_void, IX86_BUILTIN_EMMS);
def_builtin (MASK_MMX, "__builtin_ia32_psllw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSLLW);
@ -15650,6 +15798,12 @@ ix86_init_mmx_sse_builtins (void)
def_builtin (MASK_SSE3, "__builtin_ia32_lddqu",
v16qi_ftype_pcchar, IX86_BUILTIN_LDDQU);
/* SSSE3. */
def_builtin (MASK_SSSE3, "__builtin_ia32_palignr128",
v2di_ftype_v2di_v2di_int, IX86_BUILTIN_PALIGNR128);
def_builtin (MASK_SSSE3, "__builtin_ia32_palignr", di_ftype_di_di_int,
IX86_BUILTIN_PALIGNR);
/* Access to the vec_init patterns. */
ftype = build_function_type_list (V2SI_type_node, integer_type_node,
integer_type_node, NULL_TREE);
@ -16148,7 +16302,7 @@ ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
tree arglist = TREE_OPERAND (exp, 1);
tree arg0, arg1, arg2;
rtx op0, op1, op2, pat;
enum machine_mode tmode, mode0, mode1, mode2;
enum machine_mode tmode, mode0, mode1, mode2, mode3;
unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
switch (fcode)
@ -16618,6 +16772,52 @@ ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
return ix86_expand_unop_builtin (CODE_FOR_sse3_lddqu, arglist,
target, 1);
case IX86_BUILTIN_PALIGNR:
case IX86_BUILTIN_PALIGNR128:
if (fcode == IX86_BUILTIN_PALIGNR)
{
icode = CODE_FOR_ssse3_palignrdi;
mode = DImode;
}
else
{
icode = CODE_FOR_ssse3_palignrti;
mode = V2DImode;
}
arg0 = TREE_VALUE (arglist);
arg1 = TREE_VALUE (TREE_CHAIN (arglist));
arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
tmode = insn_data[icode].operand[0].mode;
mode1 = insn_data[icode].operand[1].mode;
mode2 = insn_data[icode].operand[2].mode;
mode3 = insn_data[icode].operand[3].mode;
if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
{
op0 = copy_to_reg (op0);
op0 = simplify_gen_subreg (mode1, op0, GET_MODE (op0), 0);
}
if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
{
op1 = copy_to_reg (op1);
op1 = simplify_gen_subreg (mode2, op1, GET_MODE (op1), 0);
}
if (! (*insn_data[icode].operand[3].predicate) (op2, mode3))
{
error ("shift must be an immediate");
return const0_rtx;
}
target = gen_reg_rtx (mode);
pat = GEN_FCN (icode) (simplify_gen_subreg (tmode, target, mode, 0),
op0, op1, op2);
if (! pat)
return 0;
emit_insn (pat);
return target;
case IX86_BUILTIN_VEC_INIT_V2SI:
case IX86_BUILTIN_VEC_INIT_V4HI:
case IX86_BUILTIN_VEC_INIT_V8QI:

View File

@ -418,6 +418,8 @@ extern const char *host_detect_local_cpu (int argc, const char **argv);
builtin_define ("__SSE2__"); \
if (TARGET_SSE3) \
builtin_define ("__SSE3__"); \
if (TARGET_SSSE3) \
builtin_define ("__SSSE3__"); \
if (TARGET_SSE_MATH && TARGET_SSE) \
builtin_define ("__SSE_MATH__"); \
if (TARGET_SSE_MATH && TARGET_SSE2) \

View File

@ -148,6 +148,11 @@
(UNSPEC_SP_TEST 101)
(UNSPEC_SP_TLS_SET 102)
(UNSPEC_SP_TLS_TEST 103)
; SSSE3
(UNSPEC_PSHUFB 120)
(UNSPEC_PSIGN 121)
(UNSPEC_PALIGNR 122)
])
(define_constants
@ -20948,6 +20953,6 @@
}
[(set_attr "type" "multi")])
(include "sse.md")
(include "mmx.md")
(include "sse.md")
(include "sync.md")

View File

@ -197,6 +197,10 @@ msse3
Target Report Mask(SSE3)
Support MMX, SSE, SSE2 and SSE3 built-in functions and code generation
mssse3
Target Report Mask(SSSE3)
Support MMX, SSE, SSE2, SSE3 and SSSE3 built-in functions and code generation
msseregparm
Target RejectNegative Mask(SSEREGPARM)
Use SSE register passing conventions for SF and DF mode

View File

@ -3949,3 +3949,578 @@
;; zero extended to 64bit, we only need to set up 32bit registers.
"monitor"
[(set_attr "length" "3")])
;; SSSE3
(define_insn "ssse3_phaddwv8hi3"
[(set (match_operand:V8HI 0 "register_operand" "=x")
(vec_concat:V8HI
(vec_concat:V4HI
(vec_concat:V2HI
(plus:HI
(vec_select:HI
(match_operand:V8HI 1 "register_operand" "0")
(parallel [(const_int 0)]))
(vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
(plus:HI
(vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
(vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
(vec_concat:V2HI
(plus:HI
(vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
(vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
(plus:HI
(vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
(vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
(vec_concat:V4HI
(vec_concat:V2HI
(plus:HI
(vec_select:HI
(match_operand:V8HI 2 "nonimmediate_operand" "xm")
(parallel [(const_int 0)]))
(vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
(plus:HI
(vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
(vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
(vec_concat:V2HI
(plus:HI
(vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
(vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
(plus:HI
(vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
(vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
"TARGET_SSSE3"
"phaddw\t{%2, %0|%0, %2}"
[(set_attr "type" "sseiadd")
(set_attr "mode" "TI")])
(define_insn "ssse3_phaddwv4hi3"
[(set (match_operand:V4HI 0 "register_operand" "=y")
(vec_concat:V4HI
(vec_concat:V2HI
(plus:HI
(vec_select:HI
(match_operand:V4HI 1 "register_operand" "0")
(parallel [(const_int 0)]))
(vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
(plus:HI
(vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
(vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
(vec_concat:V2HI
(plus:HI
(vec_select:HI
(match_operand:V4HI 2 "nonimmediate_operand" "ym")
(parallel [(const_int 0)]))
(vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
(plus:HI
(vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
(vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
"TARGET_SSSE3"
"phaddw\t{%2, %0|%0, %2}"
[(set_attr "type" "sseiadd")
(set_attr "mode" "DI")])
(define_insn "ssse3_phadddv4si3"
[(set (match_operand:V4SI 0 "register_operand" "=x")
(vec_concat:V4SI
(vec_concat:V2SI
(plus:SI
(vec_select:SI
(match_operand:V4SI 1 "register_operand" "0")
(parallel [(const_int 0)]))
(vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
(plus:SI
(vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
(vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
(vec_concat:V2SI
(plus:SI
(vec_select:SI
(match_operand:V4SI 2 "nonimmediate_operand" "xm")
(parallel [(const_int 0)]))
(vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
(plus:SI
(vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
(vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))))]
"TARGET_SSSE3"
"phaddd\t{%2, %0|%0, %2}"
[(set_attr "type" "sseiadd")
(set_attr "mode" "TI")])
(define_insn "ssse3_phadddv2si3"
[(set (match_operand:V2SI 0 "register_operand" "=y")
(vec_concat:V2SI
(plus:SI
(vec_select:SI
(match_operand:V2SI 1 "register_operand" "0")
(parallel [(const_int 0)]))
(vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
(plus:SI
(vec_select:SI
(match_operand:V2SI 2 "nonimmediate_operand" "ym")
(parallel [(const_int 0)]))
(vec_select:SI (match_dup 2) (parallel [(const_int 1)])))))]
"TARGET_SSSE3"
"phaddd\t{%2, %0|%0, %2}"
[(set_attr "type" "sseiadd")
(set_attr "mode" "DI")])
(define_insn "ssse3_phaddswv8hi3"
[(set (match_operand:V8HI 0 "register_operand" "=x")
(vec_concat:V8HI
(vec_concat:V4HI
(vec_concat:V2HI
(ss_plus:HI
(vec_select:HI
(match_operand:V8HI 1 "register_operand" "0")
(parallel [(const_int 0)]))
(vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
(ss_plus:HI
(vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
(vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
(vec_concat:V2HI
(ss_plus:HI
(vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
(vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
(ss_plus:HI
(vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
(vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
(vec_concat:V4HI
(vec_concat:V2HI
(ss_plus:HI
(vec_select:HI
(match_operand:V8HI 2 "nonimmediate_operand" "xm")
(parallel [(const_int 0)]))
(vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
(ss_plus:HI
(vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
(vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
(vec_concat:V2HI
(ss_plus:HI
(vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
(vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
(ss_plus:HI
(vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
(vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
"TARGET_SSSE3"
"phaddsw\t{%2, %0|%0, %2}"
[(set_attr "type" "sseiadd")
(set_attr "mode" "TI")])
(define_insn "ssse3_phaddswv4hi3"
[(set (match_operand:V4HI 0 "register_operand" "=y")
(vec_concat:V4HI
(vec_concat:V2HI
(ss_plus:HI
(vec_select:HI
(match_operand:V4HI 1 "register_operand" "0")
(parallel [(const_int 0)]))
(vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
(ss_plus:HI
(vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
(vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
(vec_concat:V2HI
(ss_plus:HI
(vec_select:HI
(match_operand:V4HI 2 "nonimmediate_operand" "ym")
(parallel [(const_int 0)]))
(vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
(ss_plus:HI
(vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
(vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
"TARGET_SSSE3"
"phaddsw\t{%2, %0|%0, %2}"
[(set_attr "type" "sseiadd")
(set_attr "mode" "DI")])
(define_insn "ssse3_phsubwv8hi3"
[(set (match_operand:V8HI 0 "register_operand" "=x")
(vec_concat:V8HI
(vec_concat:V4HI
(vec_concat:V2HI
(minus:HI
(vec_select:HI
(match_operand:V8HI 1 "register_operand" "0")
(parallel [(const_int 0)]))
(vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
(minus:HI
(vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
(vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
(vec_concat:V2HI
(minus:HI
(vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
(vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
(minus:HI
(vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
(vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
(vec_concat:V4HI
(vec_concat:V2HI
(minus:HI
(vec_select:HI
(match_operand:V8HI 2 "nonimmediate_operand" "xm")
(parallel [(const_int 0)]))
(vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
(minus:HI
(vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
(vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
(vec_concat:V2HI
(minus:HI
(vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
(vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
(minus:HI
(vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
(vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
"TARGET_SSSE3"
"phsubw\t{%2, %0|%0, %2}"
[(set_attr "type" "sseiadd")
(set_attr "mode" "TI")])
(define_insn "ssse3_phsubwv4hi3"
[(set (match_operand:V4HI 0 "register_operand" "=y")
(vec_concat:V4HI
(vec_concat:V2HI
(minus:HI
(vec_select:HI
(match_operand:V4HI 1 "register_operand" "0")
(parallel [(const_int 0)]))
(vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
(minus:HI
(vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
(vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
(vec_concat:V2HI
(minus:HI
(vec_select:HI
(match_operand:V4HI 2 "nonimmediate_operand" "ym")
(parallel [(const_int 0)]))
(vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
(minus:HI
(vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
(vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
"TARGET_SSSE3"
"phsubw\t{%2, %0|%0, %2}"
[(set_attr "type" "sseiadd")
(set_attr "mode" "DI")])
(define_insn "ssse3_phsubdv4si3"
[(set (match_operand:V4SI 0 "register_operand" "=x")
(vec_concat:V4SI
(vec_concat:V2SI
(minus:SI
(vec_select:SI
(match_operand:V4SI 1 "register_operand" "0")
(parallel [(const_int 0)]))
(vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
(minus:SI
(vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
(vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
(vec_concat:V2SI
(minus:SI
(vec_select:SI
(match_operand:V4SI 2 "nonimmediate_operand" "xm")
(parallel [(const_int 0)]))
(vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
(minus:SI
(vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
(vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))))]
"TARGET_SSSE3"
"phsubd\t{%2, %0|%0, %2}"
[(set_attr "type" "sseiadd")
(set_attr "mode" "TI")])
(define_insn "ssse3_phsubdv2si3"
[(set (match_operand:V2SI 0 "register_operand" "=y")
(vec_concat:V2SI
(minus:SI
(vec_select:SI
(match_operand:V2SI 1 "register_operand" "0")
(parallel [(const_int 0)]))
(vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
(minus:SI
(vec_select:SI
(match_operand:V2SI 2 "nonimmediate_operand" "ym")
(parallel [(const_int 0)]))
(vec_select:SI (match_dup 2) (parallel [(const_int 1)])))))]
"TARGET_SSSE3"
"phsubd\t{%2, %0|%0, %2}"
[(set_attr "type" "sseiadd")
(set_attr "mode" "DI")])
(define_insn "ssse3_phsubswv8hi3"
[(set (match_operand:V8HI 0 "register_operand" "=x")
(vec_concat:V8HI
(vec_concat:V4HI
(vec_concat:V2HI
(ss_minus:HI
(vec_select:HI
(match_operand:V8HI 1 "register_operand" "0")
(parallel [(const_int 0)]))
(vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
(ss_minus:HI
(vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
(vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
(vec_concat:V2HI
(ss_minus:HI
(vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
(vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
(ss_minus:HI
(vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
(vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
(vec_concat:V4HI
(vec_concat:V2HI
(ss_minus:HI
(vec_select:HI
(match_operand:V8HI 2 "nonimmediate_operand" "xm")
(parallel [(const_int 0)]))
(vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
(ss_minus:HI
(vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
(vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
(vec_concat:V2HI
(ss_minus:HI
(vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
(vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
(ss_minus:HI
(vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
(vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
"TARGET_SSSE3"
"phsubsw\t{%2, %0|%0, %2}"
[(set_attr "type" "sseiadd")
(set_attr "mode" "TI")])
(define_insn "ssse3_phsubswv4hi3"
[(set (match_operand:V4HI 0 "register_operand" "=y")
(vec_concat:V4HI
(vec_concat:V2HI
(ss_minus:HI
(vec_select:HI
(match_operand:V4HI 1 "register_operand" "0")
(parallel [(const_int 0)]))
(vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
(ss_minus:HI
(vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
(vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
(vec_concat:V2HI
(ss_minus:HI
(vec_select:HI
(match_operand:V4HI 2 "nonimmediate_operand" "ym")
(parallel [(const_int 0)]))
(vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
(ss_minus:HI
(vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
(vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
"TARGET_SSSE3"
"phsubsw\t{%2, %0|%0, %2}"
[(set_attr "type" "sseiadd")
(set_attr "mode" "DI")])
(define_insn "ssse3_pmaddubswv8hi3"
[(set (match_operand:V8HI 0 "register_operand" "=x")
(ss_plus:V8HI
(mult:V8HI
(zero_extend:V8HI
(vec_select:V4QI
(match_operand:V16QI 1 "nonimmediate_operand" "%0")
(parallel [(const_int 0)
(const_int 2)
(const_int 4)
(const_int 6)
(const_int 8)
(const_int 10)
(const_int 12)
(const_int 14)])))
(sign_extend:V8HI
(vec_select:V8QI
(match_operand:V16QI 2 "nonimmediate_operand" "xm")
(parallel [(const_int 0)
(const_int 2)
(const_int 4)
(const_int 6)
(const_int 8)
(const_int 10)
(const_int 12)
(const_int 14)]))))
(mult:V8HI
(zero_extend:V8HI
(vec_select:V16QI (match_dup 1)
(parallel [(const_int 1)
(const_int 3)
(const_int 5)
(const_int 7)
(const_int 9)
(const_int 11)
(const_int 13)
(const_int 15)])))
(sign_extend:V8HI
(vec_select:V16QI (match_dup 2)
(parallel [(const_int 1)
(const_int 3)
(const_int 5)
(const_int 7)
(const_int 9)
(const_int 11)
(const_int 13)
(const_int 15)]))))))]
"TARGET_SSSE3"
"pmaddubsw\t{%2, %0|%0, %2}"
[(set_attr "type" "sseiadd")
(set_attr "mode" "TI")])
(define_insn "ssse3_pmaddubswv4hi3"
[(set (match_operand:V4HI 0 "register_operand" "=y")
(ss_plus:V4HI
(mult:V4HI
(zero_extend:V4HI
(vec_select:V4QI
(match_operand:V8QI 1 "nonimmediate_operand" "%0")
(parallel [(const_int 0)
(const_int 2)
(const_int 4)
(const_int 6)])))
(sign_extend:V4HI
(vec_select:V4QI
(match_operand:V8QI 2 "nonimmediate_operand" "ym")
(parallel [(const_int 0)
(const_int 2)
(const_int 4)
(const_int 6)]))))
(mult:V4HI
(zero_extend:V4HI
(vec_select:V8QI (match_dup 1)
(parallel [(const_int 1)
(const_int 3)
(const_int 5)
(const_int 7)])))
(sign_extend:V4HI
(vec_select:V8QI (match_dup 2)
(parallel [(const_int 1)
(const_int 3)
(const_int 5)
(const_int 7)]))))))]
"TARGET_SSSE3"
"pmaddubsw\t{%2, %0|%0, %2}"
[(set_attr "type" "sseiadd")
(set_attr "mode" "DI")])
(define_insn "ssse3_pmulhrswv8hi3"
[(set (match_operand:V8HI 0 "register_operand" "=x")
(truncate:V8HI
(lshiftrt:V8SI
(plus:V8SI
(lshiftrt:V8SI
(mult:V8SI
(sign_extend:V8SI
(match_operand:V8HI 1 "nonimmediate_operand" "%0"))
(sign_extend:V8SI
(match_operand:V8HI 2 "nonimmediate_operand" "xm")))
(const_int 14))
(const_vector:V8HI [(const_int 1) (const_int 1)
(const_int 1) (const_int 1)
(const_int 1) (const_int 1)
(const_int 1) (const_int 1)]))
(const_int 1))))]
"TARGET_SSSE3 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
"pmulhrsw\t{%2, %0|%0, %2}"
[(set_attr "type" "sseimul")
(set_attr "mode" "TI")])
(define_insn "ssse3_pmulhrswv4hi3"
[(set (match_operand:V4HI 0 "register_operand" "=y")
(truncate:V4HI
(lshiftrt:V4SI
(plus:V4SI
(lshiftrt:V4SI
(mult:V4SI
(sign_extend:V4SI
(match_operand:V4HI 1 "nonimmediate_operand" "%0"))
(sign_extend:V4SI
(match_operand:V4HI 2 "nonimmediate_operand" "ym")))
(const_int 14))
(const_vector:V4HI [(const_int 1) (const_int 1)
(const_int 1) (const_int 1)]))
(const_int 1))))]
"TARGET_SSSE3 && ix86_binary_operator_ok (MULT, V4HImode, operands)"
"pmulhrsw\t{%2, %0|%0, %2}"
[(set_attr "type" "sseimul")
(set_attr "mode" "DI")])
(define_insn "ssse3_pshufbv16qi3"
[(set (match_operand:V16QI 0 "register_operand" "=x")
(unspec:V16QI [(match_operand:V16QI 1 "register_operand" "0")
(match_operand:V16QI 2 "nonimmediate_operand" "xm")]
UNSPEC_PSHUFB))]
"TARGET_SSSE3"
"pshufb\t{%2, %0|%0, %2}";
[(set_attr "type" "sselog1")
(set_attr "mode" "TI")])
(define_insn "ssse3_pshufbv8qi3"
[(set (match_operand:V8QI 0 "register_operand" "=y")
(unspec:V8QI [(match_operand:V8QI 1 "register_operand" "0")
(match_operand:V8QI 2 "nonimmediate_operand" "ym")]
UNSPEC_PSHUFB))]
"TARGET_SSSE3"
"pshufb\t{%2, %0|%0, %2}";
[(set_attr "type" "sselog1")
(set_attr "mode" "DI")])
(define_insn "ssse3_psign<mode>3"
[(set (match_operand:SSEMODE124 0 "register_operand" "=x")
(unspec:SSEMODE124 [(match_operand:SSEMODE124 1 "register_operand" "0")
(match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")]
UNSPEC_PSIGN))]
"TARGET_SSSE3"
"psign<ssevecsize>\t{%2, %0|%0, %2}";
[(set_attr "type" "sselog1")
(set_attr "mode" "TI")])
(define_insn "ssse3_psign<mode>3"
[(set (match_operand:MMXMODEI 0 "register_operand" "=y")
(unspec:MMXMODEI [(match_operand:MMXMODEI 1 "register_operand" "0")
(match_operand:MMXMODEI 2 "nonimmediate_operand" "ym")]
UNSPEC_PSIGN))]
"TARGET_SSSE3"
"psign<mmxvecsize>\t{%2, %0|%0, %2}";
[(set_attr "type" "sselog1")
(set_attr "mode" "DI")])
(define_insn "ssse3_palignrti"
[(set (match_operand:TI 0 "register_operand" "=x")
(unspec:TI [(match_operand:TI 1 "register_operand" "0")
(match_operand:TI 2 "nonimmediate_operand" "xm")
(match_operand:SI 3 "const_0_to_255_mul_8_operand" "n")]
UNSPEC_PALIGNR))]
"TARGET_SSSE3"
{
operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
return "palignr\t{%3, %2, %0|%0, %2, %3}";
}
[(set_attr "type" "sseishft")
(set_attr "mode" "TI")])
(define_insn "ssse3_palignrdi"
[(set (match_operand:DI 0 "register_operand" "=y")
(unspec:DI [(match_operand:DI 1 "register_operand" "0")
(match_operand:DI 2 "nonimmediate_operand" "ym")
(match_operand:SI 3 "const_0_to_255_mul_8_operand" "n")]
UNSPEC_PALIGNR))]
"TARGET_SSSE3"
{
operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
return "palignr\t{%3, %2, %0|%0, %2, %3}";
}
[(set_attr "type" "sseishft")
(set_attr "mode" "DI")])
(define_insn "abs<mode>2"
[(set (match_operand:SSEMODE124 0 "register_operand" "=x")
(abs:SSEMODE124 (match_operand:SSEMODE124 1 "nonimmediate_operand" "xm")))]
"TARGET_SSSE3"
"pabs<ssevecsize>\t{%1, %0|%0, %1}";
[(set_attr "type" "sselog1")
(set_attr "mode" "TI")])
(define_insn "abs<mode>2"
[(set (match_operand:MMXMODEI 0 "register_operand" "=y")
(abs:MMXMODEI (match_operand:MMXMODEI 1 "nonimmediate_operand" "ym")))]
"TARGET_SSSE3"
"pabs<mmxvecsize>\t{%1, %0|%0, %1}";
[(set_attr "type" "sselog1")
(set_attr "mode" "DI")])

View File

@ -0,0 +1,448 @@
/* Copyright (C) 2006 Free Software Foundation, Inc.
This file is part of GCC.
GCC is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2, or (at your option)
any later version.
GCC is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with GCC; see the file COPYING. If not, write to
the Free Software Foundation, 59 Temple Place - Suite 330,
Boston, MA 02111-1307, USA. */
/* As a special exception, if you include this header file into source
files compiled by GCC, this header file does not by itself cause
the resulting executable to be covered by the GNU General Public
License. This exception does not however invalidate any other
reasons why the executable file might be covered by the GNU General
Public License. */
/* Implemented from the specification included in the Intel C++ Compiler
User Guide and Reference, version 9.1. */
#ifndef _TMMINTRIN_H_INCLUDED
#define _TMMINTRIN_H_INCLUDED
#ifdef __SSSE3__
#include <pmmintrin.h>
static __inline __m128i __attribute__((__always_inline__))
_mm_hadd_epi16 (__m128i __X, __m128i __Y)
{
return (__m128i) __builtin_ia32_phaddw128 ((__v8hi)__X, (__v8hi)__Y);
}
static __inline __m128i __attribute__((__always_inline__))
_mm_hadd_epi32 (__m128i __X, __m128i __Y)
{
return (__m128i) __builtin_ia32_phaddd128 ((__v4si)__X, (__v4si)__Y);
}
static __inline __m128i __attribute__((__always_inline__))
_mm_hadds_epi16 (__m128i __X, __m128i __Y)
{
return (__m128i) __builtin_ia32_phaddsw128 ((__v8hi)__X, (__v8hi)__Y);
}
static __inline __m64 __attribute__((__always_inline__))
_mm_hadd_pi16 (__m64 __X, __m64 __Y)
{
return (__m64) __builtin_ia32_phaddw ((__v4hi)__X, (__v4hi)__Y);
}
static __inline __m64 __attribute__((__always_inline__))
_mm_hadd_pi32 (__m64 __X, __m64 __Y)
{
return (__m64) __builtin_ia32_phaddd ((__v2si)__X, (__v2si)__Y);
}
static __inline __m64 __attribute__((__always_inline__))
_mm_hadds_pi16 (__m64 __X, __m64 __Y)
{
return (__m64) __builtin_ia32_phaddsw ((__v4hi)__X, (__v4hi)__Y);
}
static __inline __m128i __attribute__((__always_inline__))
_mm_hsub_epi16 (__m128i __X, __m128i __Y)
{
return (__m128i) __builtin_ia32_phsubw128 ((__v8hi)__X, (__v8hi)__Y);
}
static __inline __m128i __attribute__((__always_inline__))
_mm_hsub_epi32 (__m128i __X, __m128i __Y)
{
return (__m128i) __builtin_ia32_phsubd128 ((__v4si)__X, (__v4si)__Y);
}
static __inline __m128i __attribute__((__always_inline__))
_mm_hsubs_epi16 (__m128i __X, __m128i __Y)
{
return (__m128i) __builtin_ia32_phsubsw128 ((__v8hi)__X, (__v8hi)__Y);
}
static __inline __m64 __attribute__((__always_inline__))
_mm_hsub_pi16 (__m64 __X, __m64 __Y)
{
return (__m64) __builtin_ia32_phsubw ((__v4hi)__X, (__v4hi)__Y);
}
static __inline __m64 __attribute__((__always_inline__))
_mm_hsub_pi32 (__m64 __X, __m64 __Y)
{
return (__m64) __builtin_ia32_phsubd ((__v2si)__X, (__v2si)__Y);
}
static __inline __m64 __attribute__((__always_inline__))
_mm_hsubs_pi16 (__m64 __X, __m64 __Y)
{
return (__m64) __builtin_ia32_phsubsw ((__v4hi)__X, (__v4hi)__Y);
}
static __inline __m128i __attribute__((__always_inline__))
_mm_maddubs_epi16 (__m128i __X, __m128i __Y)
{
return (__m128i) __builtin_ia32_pmaddubsw128 ((__v16qi)__X, (__v16qi)__Y);
}
static __inline __m64 __attribute__((__always_inline__))
_mm_maddubs_pi16 (__m64 __X, __m64 __Y)
{
return (__m64) __builtin_ia32_pmaddubsw ((__v8qi)__X, (__v8qi)__Y);
}
static __inline __m128i __attribute__((__always_inline__))
_mm_mulhrs_epi16 (__m128i __X, __m128i __Y)
{
return (__m128i) __builtin_ia32_pmulhrsw128 ((__v8hi)__X, (__v8hi)__Y);
}
static __inline __m64 __attribute__((__always_inline__))
_mm_mulhrs_pi16 (__m64 __X, __m64 __Y)
{
return (__m64) __builtin_ia32_pmulhrsw ((__v4hi)__X, (__v4hi)__Y);
}
static __inline __m128i __attribute__((__always_inline__))
_mm_shuffle_epi8 (__m128i __X, __m128i __Y)
{
return (__m128i) __builtin_ia32_pshufb128 ((__v16qi)__X, (__v16qi)__Y);
}
static __inline __m64 __attribute__((__always_inline__))
_mm_shuffle_pi8 (__m64 __X, __m64 __Y)
{
return (__m64) __builtin_ia32_pshufb ((__v8qi)__X, (__v8qi)__Y);
}
static __inline __m128i __attribute__((__always_inline__))
_mm_sign_epi8 (__m128i __X, __m128i __Y)
{
return (__m128i) __builtin_ia32_psignb128 ((__v16qi)__X, (__v16qi)__Y);
}
static __inline __m128i __attribute__((__always_inline__))
_mm_sign_epi16 (__m128i __X, __m128i __Y)
{
return (__m128i) __builtin_ia32_psignw128 ((__v8hi)__X, (__v8hi)__Y);
}
static __inline __m128i __attribute__((__always_inline__))
_mm_sign_epi32 (__m128i __X, __m128i __Y)
{
return (__m128i) __builtin_ia32_psignd128 ((__v4si)__X, (__v4si)__Y);
}
static __inline __m64 __attribute__((__always_inline__))
_mm_sign_pi8 (__m64 __X, __m64 __Y)
{
return (__m64) __builtin_ia32_psignb ((__v8qi)__X, (__v8qi)__Y);
}
static __inline __m64 __attribute__((__always_inline__))
_mm_sign_pi16 (__m64 __X, __m64 __Y)
{
return (__m64) __builtin_ia32_psignw ((__v4hi)__X, (__v4hi)__Y);
}
static __inline __m64 __attribute__((__always_inline__))
_mm_sign_pi32 (__m64 __X, __m64 __Y)
{
return (__m64) __builtin_ia32_psignd ((__v2si)__X, (__v2si)__Y);
}
#define _mm_alignr_epi8(__X, __Y, __N) \
((__m128i)__builtin_ia32_palignr128 ((__v2di) __X, (__v2di) __Y, (__N) * 8))
#define _mm_alignr_pi8(__X, __Y, __N) \
((__m64)__builtin_ia32_palignr ((long long) (__X), (long long) (__Y), (__N) * 8))
static __inline __m128i __attribute__((__always_inline__))
_mm_abs_epi8 (__m128i __X)
{
return (__m128i) __builtin_ia32_pabsb128 ((__v16qi)__X);
}
static __inline __m128i __attribute__((__always_inline__))
_mm_abs_epi16 (__m128i __X)
{
return (__m128i) __builtin_ia32_pabsw128 ((__v8hi)__X);
}
static __inline __m128i __attribute__((__always_inline__))
_mm_abs_epi32 (__m128i __X)
{
return (__m128i) __builtin_ia32_pabsd128 ((__v4si)__X);
}
static __inline __m64 __attribute__((__always_inline__))
_mm_abs_pi8 (__m64 __X)
{
return (__m64) __builtin_ia32_pabsb ((__v8qi)__X);
}
static __inline __m64 __attribute__((__always_inline__))
_mm_abs_pi16 (__m64 __X)
{
return (__m64) __builtin_ia32_pabsw ((__v4hi)__X);
}
static __inline __m64 __attribute__((__always_inline__))
_mm_abs_pi32 (__m64 __X)
{
return (__m64) __builtin_ia32_pabsd ((__v2si)__X);
}
#endif /* __SSSE3__ */
#endif /* _TMMINTRIN_H_INCLUDED */
/* Copyright (C) 2006 Free Software Foundation, Inc.
This file is part of GCC.
GCC is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2, or (at your option)
any later version.
GCC is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with GCC; see the file COPYING. If not, write to
the Free Software Foundation, 59 Temple Place - Suite 330,
Boston, MA 02111-1307, USA. */
/* As a special exception, if you include this header file into source
files compiled by GCC, this header file does not by itself cause
the resulting executable to be covered by the GNU General Public
License. This exception does not however invalidate any other
reasons why the executable file might be covered by the GNU General
Public License. */
/* Implemented from the specification included in the Intel C++ Compiler
User Guide and Reference, version 9.1. */
#ifndef _TMMINTRIN_H_INCLUDED
#define _TMMINTRIN_H_INCLUDED
#ifdef __SSSE3__
#include <pmmintrin.h>
static __inline __m128i __attribute__((__always_inline__))
_mm_hadd_epi16 (__m128i __X, __m128i __Y)
{
return (__m128i) __builtin_ia32_phaddw128 ((__v8hi)__X, (__v8hi)__Y);
}
static __inline __m128i __attribute__((__always_inline__))
_mm_hadd_epi32 (__m128i __X, __m128i __Y)
{
return (__m128i) __builtin_ia32_phaddd128 ((__v4si)__X, (__v4si)__Y);
}
static __inline __m128i __attribute__((__always_inline__))
_mm_hadds_epi16 (__m128i __X, __m128i __Y)
{
return (__m128i) __builtin_ia32_phaddsw128 ((__v8hi)__X, (__v8hi)__Y);
}
static __inline __m64 __attribute__((__always_inline__))
_mm_hadd_pi16 (__m64 __X, __m64 __Y)
{
return (__m64) __builtin_ia32_phaddw ((__v4hi)__X, (__v4hi)__Y);
}
static __inline __m64 __attribute__((__always_inline__))
_mm_hadd_pi32 (__m64 __X, __m64 __Y)
{
return (__m64) __builtin_ia32_phaddd ((__v2si)__X, (__v2si)__Y);
}
static __inline __m64 __attribute__((__always_inline__))
_mm_hadds_pi16 (__m64 __X, __m64 __Y)
{
return (__m64) __builtin_ia32_phaddsw ((__v4hi)__X, (__v4hi)__Y);
}
static __inline __m128i __attribute__((__always_inline__))
_mm_hsub_epi16 (__m128i __X, __m128i __Y)
{
return (__m128i) __builtin_ia32_phsubw128 ((__v8hi)__X, (__v8hi)__Y);
}
static __inline __m128i __attribute__((__always_inline__))
_mm_hsub_epi32 (__m128i __X, __m128i __Y)
{
return (__m128i) __builtin_ia32_phsubd128 ((__v4si)__X, (__v4si)__Y);
}
static __inline __m128i __attribute__((__always_inline__))
_mm_hsubs_epi16 (__m128i __X, __m128i __Y)
{
return (__m128i) __builtin_ia32_phsubsw128 ((__v8hi)__X, (__v8hi)__Y);
}
static __inline __m64 __attribute__((__always_inline__))
_mm_hsub_pi16 (__m64 __X, __m64 __Y)
{
return (__m64) __builtin_ia32_phsubw ((__v4hi)__X, (__v4hi)__Y);
}
static __inline __m64 __attribute__((__always_inline__))
_mm_hsub_pi32 (__m64 __X, __m64 __Y)
{
return (__m64) __builtin_ia32_phsubd ((__v2si)__X, (__v2si)__Y);
}
static __inline __m64 __attribute__((__always_inline__))
_mm_hsubs_pi16 (__m64 __X, __m64 __Y)
{
return (__m64) __builtin_ia32_phsubsw ((__v4hi)__X, (__v4hi)__Y);
}
static __inline __m128i __attribute__((__always_inline__))
_mm_maddubs_epi16 (__m128i __X, __m128i __Y)
{
return (__m128i) __builtin_ia32_pmaddubsw128 ((__v16qi)__X, (__v16qi)__Y);
}
static __inline __m64 __attribute__((__always_inline__))
_mm_maddubs_pi16 (__m64 __X, __m64 __Y)
{
return (__m64) __builtin_ia32_pmaddubsw ((__v8qi)__X, (__v8qi)__Y);
}
static __inline __m128i __attribute__((__always_inline__))
_mm_mulhrs_epi16 (__m128i __X, __m128i __Y)
{
return (__m128i) __builtin_ia32_pmulhrsw128 ((__v8hi)__X, (__v8hi)__Y);
}
static __inline __m64 __attribute__((__always_inline__))
_mm_mulhrs_pi16 (__m64 __X, __m64 __Y)
{
return (__m64) __builtin_ia32_pmulhrsw ((__v4hi)__X, (__v4hi)__Y);
}
static __inline __m128i __attribute__((__always_inline__))
_mm_shuffle_epi8 (__m128i __X, __m128i __Y)
{
return (__m128i) __builtin_ia32_pshufb128 ((__v16qi)__X, (__v16qi)__Y);
}
static __inline __m64 __attribute__((__always_inline__))
_mm_shuffle_pi8 (__m64 __X, __m64 __Y)
{
return (__m64) __builtin_ia32_pshufb ((__v8qi)__X, (__v8qi)__Y);
}
static __inline __m128i __attribute__((__always_inline__))
_mm_sign_epi8 (__m128i __X, __m128i __Y)
{
return (__m128i) __builtin_ia32_psignb128 ((__v16qi)__X, (__v16qi)__Y);
}
static __inline __m128i __attribute__((__always_inline__))
_mm_sign_epi16 (__m128i __X, __m128i __Y)
{
return (__m128i) __builtin_ia32_psignw128 ((__v8hi)__X, (__v8hi)__Y);
}
static __inline __m128i __attribute__((__always_inline__))
_mm_sign_epi32 (__m128i __X, __m128i __Y)
{
return (__m128i) __builtin_ia32_psignd128 ((__v4si)__X, (__v4si)__Y);
}
static __inline __m64 __attribute__((__always_inline__))
_mm_sign_pi8 (__m64 __X, __m64 __Y)
{
return (__m64) __builtin_ia32_psignb ((__v8qi)__X, (__v8qi)__Y);
}
static __inline __m64 __attribute__((__always_inline__))
_mm_sign_pi16 (__m64 __X, __m64 __Y)
{
return (__m64) __builtin_ia32_psignw ((__v4hi)__X, (__v4hi)__Y);
}
static __inline __m64 __attribute__((__always_inline__))
_mm_sign_pi32 (__m64 __X, __m64 __Y)
{
return (__m64) __builtin_ia32_psignd ((__v2si)__X, (__v2si)__Y);
}
#define _mm_alignr_epi8(__X, __Y, __N) \
((__m128i)__builtin_ia32_palignr128 ((__v2di) __X, (__v2di) __Y, (__N) * 8))
#define _mm_alignr_pi8(__X, __Y, __N) \
((__m64)__builtin_ia32_palignr ((long long) (__X), (long long) (__Y), (__N) * 8))
static __inline __m128i __attribute__((__always_inline__))
_mm_abs_epi8 (__m128i __X)
{
return (__m128i) __builtin_ia32_pabsb128 ((__v16qi)__X);
}
static __inline __m128i __attribute__((__always_inline__))
_mm_abs_epi16 (__m128i __X)
{
return (__m128i) __builtin_ia32_pabsw128 ((__v8hi)__X);
}
static __inline __m128i __attribute__((__always_inline__))
_mm_abs_epi32 (__m128i __X)
{
return (__m128i) __builtin_ia32_pabsd128 ((__v4si)__X);
}
static __inline __m64 __attribute__((__always_inline__))
_mm_abs_pi8 (__m64 __X)
{
return (__m64) __builtin_ia32_pabsb ((__v8qi)__X);
}
static __inline __m64 __attribute__((__always_inline__))
_mm_abs_pi16 (__m64 __X)
{
return (__m64) __builtin_ia32_pabsw ((__v4hi)__X);
}
static __inline __m64 __attribute__((__always_inline__))
_mm_abs_pi32 (__m64 __X)
{
return (__m64) __builtin_ia32_pabsd ((__v2si)__X);
}
#endif /* __SSSE3__ */
#endif /* _TMMINTRIN_H_INCLUDED */

View File

@ -1243,7 +1243,9 @@ do { \
} while (0)
/* For backward source compatibility. */
#ifdef __SSE2__
#include <emmintrin.h>
#endif
#endif /* __SSE__ */
#endif /* _XMMINTRIN_H_INCLUDED */

View File

@ -7211,6 +7211,52 @@ The following built-in functions are available when @option{-msse3} is used.
Generates the @code{movddup} machine instruction as a load from memory.
@end table
The following built-in functions are available when @option{-mssse3} is used.
All of them generate the machine instruction that is part of the name
with MMX registers.
@smallexample
v2si __builtin_ia32_phaddd (v2si, v2si)
v4hi __builtin_ia32_phaddw (v4hi, v4hi)
v4hi __builtin_ia32_phaddsw (v4hi, v4hi)
v2si __builtin_ia32_phsubd (v2si, v2si)
v4hi __builtin_ia32_phsubw (v4hi, v4hi)
v4hi __builtin_ia32_phsubsw (v4hi, v4hi)
v8qi __builtin_ia32_pmaddubsw (v8qi, v8qi)
v4hi __builtin_ia32_pmulhrsw (v4hi, v4hi)
v8qi __builtin_ia32_pshufb (v8qi, v8qi)
v8qi __builtin_ia32_psignb (v8qi, v8qi)
v2si __builtin_ia32_psignd (v2si, v2si)
v4hi __builtin_ia32_psignw (v4hi, v4hi)
long long __builtin_ia32_palignr (long long, long long, int)
v8qi __builtin_ia32_pabsb (v8qi)
v2si __builtin_ia32_pabsd (v2si)
v4hi __builtin_ia32_pabsw (v4hi)
@end smallexample
The following built-in functions are available when @option{-mssse3} is used.
All of them generate the machine instruction that is part of the name
with SSE registers.
@smallexample
v4si __builtin_ia32_phaddd128 (v4si, v4si)
v8hi __builtin_ia32_phaddw128 (v8hi, v8hi)
v8hi __builtin_ia32_phaddsw128 (v8hi, v8hi)
v4si __builtin_ia32_phsubd128 (v4si, v4si)
v8hi __builtin_ia32_phsubw128 (v8hi, v8hi)
v8hi __builtin_ia32_phsubsw128 (v8hi, v8hi)
v16qi __builtin_ia32_pmaddubsw128 (v16qi, v16qi)
v8hi __builtin_ia32_pmulhrsw128 (v8hi, v8hi)
v16qi __builtin_ia32_pshufb128 (v16qi, v16qi)
v16qi __builtin_ia32_psignb128 (v16qi, v16qi)
v4si __builtin_ia32_psignd128 (v4si, v4si)
v8hi __builtin_ia32_psignw128 (v8hi, v8hi)
v2di __builtin_ia32_palignr (v2di, v2di, int)
v16qi __builtin_ia32_pabsb128 (v16qi)
v4si __builtin_ia32_pabsd128 (v4si)
v8hi __builtin_ia32_pabsw128 (v8hi)
@end smallexample
The following built-in functions are available when @option{-m3dnow} is used.
All of them generate the machine instruction that is part of the name.

View File

@ -541,7 +541,7 @@ in the following sections.
\&\-mno\-fp\-ret\-in\-387 \-msoft\-float \-msvr3\-shlib
\&\-mno\-wide\-multiply \-mrtd \-malign\-double
\&\-mpreferred\-stack\-boundary=\fR\fInum\fR
\&\fB\-mmmx \-msse \-msse2 \-msse3 \-m3dnow
\&\fB\-mmmx \-msse \-msse2 \-msse3 \-mssse3 \-m3dnow
\&\-mthreads \-mno\-align\-stringops \-minline\-all\-stringops
\&\-mpush\-args \-maccumulate\-outgoing\-args \-m128bit\-long\-double
\&\-m96bit\-long\-double \-mregparm=\fR\fInum\fR \fB\-msseregparm
@ -8735,7 +8735,7 @@ Improved version of Intel Pentium4 \s-1CPU\s0 with 64\-bit extensions, \s-1MMX\s
\&\s-1SSE2\s0 and \s-1SSE3\s0 instruction set support.
.IP "\fIcore2\fR" 4
.IX Item "core2"
Intel Core2 \s-1CPU\s0 with 64\-bit extensions, \s-1MMX\s0, \s-1SSE\s0, \s-1SSE2\s0 and \s-1SSE3\s0
Intel Core2 \s-1CPU\s0 with 64\-bit extensions, \s-1MMX\s0, \s-1SSE\s0, \s-1SSE2\s0, \s-1SSE3\s0 and \s-1SSSE3\s0
instruction set support.
.IP "\fIk6\fR" 4
.IX Item "k6"
@ -9057,15 +9057,20 @@ preferred alignment to \fB\-mpreferred\-stack\-boundary=2\fR.
.IX Item "-msse3"
.IP "\fB\-mno\-sse3\fR" 4
.IX Item "-mno-sse3"
.IP "\fB\-mssse3\fR" 4
.IX Item "-mssse3"
.IP "\fB\-mno\-ssse3\fR" 4
.IX Item "-mno-ssse3"
.IP "\fB\-m3dnow\fR" 4
.IX Item "-m3dnow"
.IP "\fB\-mno\-3dnow\fR" 4
.IX Item "-mno-3dnow"
.PD
These switches enable or disable the use of instructions in the \s-1MMX\s0,
\&\s-1SSE\s0, \s-1SSE2\s0 or 3DNow! extended instruction sets. These extensions are
also available as built-in functions: see \fBX86 Built-in Functions\fR,
for details of the functions enabled and disabled by these switches.
\&\s-1SSE\s0, \s-1SSE2\s0, \s-1SSE3\s0, \s-1SSSE3\s0 or 3DNow! extended instruction sets.
These extensions are also available as built-in functions: see
\fBX86 Built-in Functions\fR, for details of the functions enabled and
disabled by these switches.
.Sp
To have \s-1SSE/SSE2\s0 instructions generated automatically from floating-point
code (as opposed to 387 instructions), see \fB\-mfpmath=sse\fR.

View File

@ -533,7 +533,7 @@ Objective-C and Objective-C++ Dialects}.
-mno-fp-ret-in-387 -msoft-float -msvr3-shlib @gol
-mno-wide-multiply -mrtd -malign-double @gol
-mpreferred-stack-boundary=@var{num} @gol
-mmmx -msse -msse2 -msse3 -m3dnow @gol
-mmmx -msse -msse2 -msse3 -mssse3 -m3dnow @gol
-mthreads -mno-align-stringops -minline-all-stringops @gol
-mpush-args -maccumulate-outgoing-args -m128bit-long-double @gol
-m96bit-long-double -mregparm=@var{num} -msseregparm @gol
@ -9679,6 +9679,8 @@ preferred alignment to @option{-mpreferred-stack-boundary=2}.
@itemx -mno-sse2
@item -msse3
@itemx -mno-sse3
@item -mssse3
@itemx -mno-ssse3
@item -m3dnow
@itemx -mno-3dnow
@opindex mmmx
@ -9688,9 +9690,10 @@ preferred alignment to @option{-mpreferred-stack-boundary=2}.
@opindex m3dnow
@opindex mno-3dnow
These switches enable or disable the use of instructions in the MMX,
SSE, SSE2 or 3DNow! extended instruction sets. These extensions are
also available as built-in functions: see @ref{X86 Built-in Functions},
for details of the functions enabled and disabled by these switches.
SSE, SSE2, SSE3, SSSE3 or 3DNow! extended instruction sets.
These extensions are also available as built-in functions: see
@ref{X86 Built-in Functions}, for details of the functions enabled and
disabled by these switches.
To have SSE/SSE2 instructions generated automatically from floating-point
code (as opposed to 387 instructions), see @option{-mfpmath=sse}.