Backport SSSE3 instruction set support to base gcc.
Enabled by default for -march=core2 Obtained from: gcc 4.3 (rev. 117958, 121687, 121726, 123639; GPLv2) MFC after: 2 weeks
This commit is contained in:
parent
03f06701bb
commit
36f9eb3065
@ -268,11 +268,13 @@ xscale-*-*)
|
||||
;;
|
||||
i[34567]86-*-*)
|
||||
cpu_type=i386
|
||||
extra_headers="mmintrin.h mm3dnow.h xmmintrin.h emmintrin.h pmmintrin.h"
|
||||
extra_headers="mmintrin.h mm3dnow.h xmmintrin.h emmintrin.h
|
||||
pmmintrin.h tmmintrin.h"
|
||||
;;
|
||||
x86_64-*-*)
|
||||
cpu_type=i386
|
||||
extra_headers="mmintrin.h mm3dnow.h xmmintrin.h emmintrin.h pmmintrin.h"
|
||||
extra_headers="mmintrin.h mm3dnow.h xmmintrin.h emmintrin.h
|
||||
pmmintrin.h tmmintrin.h"
|
||||
need_64bit_hwint=yes
|
||||
;;
|
||||
ia64-*-*)
|
||||
|
@ -1511,16 +1511,24 @@ ix86_handle_option (size_t code, const char *arg ATTRIBUTE_UNUSED, int value)
|
||||
case OPT_msse:
|
||||
if (!value)
|
||||
{
|
||||
target_flags &= ~(MASK_SSE2 | MASK_SSE3);
|
||||
target_flags_explicit |= MASK_SSE2 | MASK_SSE3;
|
||||
target_flags &= ~(MASK_SSE2 | MASK_SSE3 | MASK_SSSE3);
|
||||
target_flags_explicit |= MASK_SSE2 | MASK_SSE3 | MASK_SSSE3;
|
||||
}
|
||||
return true;
|
||||
|
||||
case OPT_msse2:
|
||||
if (!value)
|
||||
{
|
||||
target_flags &= ~MASK_SSE3;
|
||||
target_flags_explicit |= MASK_SSE3;
|
||||
target_flags &= ~(MASK_SSE3 | MASK_SSSE3);
|
||||
target_flags_explicit |= MASK_SSE3 | MASK_SSSE3;
|
||||
}
|
||||
return true;
|
||||
|
||||
case OPT_msse3:
|
||||
if (!value)
|
||||
{
|
||||
target_flags &= ~MASK_SSSE3;
|
||||
target_flags_explicit |= MASK_SSSE3;
|
||||
}
|
||||
return true;
|
||||
|
||||
@ -1589,7 +1597,8 @@ override_options (void)
|
||||
PTA_PREFETCH_SSE = 16,
|
||||
PTA_3DNOW = 32,
|
||||
PTA_3DNOW_A = 64,
|
||||
PTA_64BIT = 128
|
||||
PTA_64BIT = 128,
|
||||
PTA_SSSE3 = 256
|
||||
} flags;
|
||||
}
|
||||
const processor_alias_table[] =
|
||||
@ -1617,7 +1626,7 @@ override_options (void)
|
||||
| PTA_MMX | PTA_PREFETCH_SSE},
|
||||
{"nocona", PROCESSOR_NOCONA, PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_64BIT
|
||||
| PTA_MMX | PTA_PREFETCH_SSE},
|
||||
{"core2", PROCESSOR_CORE2, PTA_SSE | PTA_SSE2 | PTA_SSE3
|
||||
{"core2", PROCESSOR_CORE2, PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_SSSE3
|
||||
| PTA_64BIT | PTA_MMX
|
||||
| PTA_PREFETCH_SSE},
|
||||
{"geode", PROCESSOR_GEODE, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
|
||||
@ -1811,6 +1820,9 @@ override_options (void)
|
||||
if (processor_alias_table[i].flags & PTA_SSE3
|
||||
&& !(target_flags_explicit & MASK_SSE3))
|
||||
target_flags |= MASK_SSE3;
|
||||
if (processor_alias_table[i].flags & PTA_SSSE3
|
||||
&& !(target_flags_explicit & MASK_SSSE3))
|
||||
target_flags |= MASK_SSSE3;
|
||||
if (processor_alias_table[i].flags & PTA_PREFETCH_SSE)
|
||||
x86_prefetch_sse = true;
|
||||
if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
|
||||
@ -1987,6 +1999,10 @@ override_options (void)
|
||||
if (!TARGET_80387)
|
||||
target_flags |= MASK_NO_FANCY_MATH_387;
|
||||
|
||||
/* Turn on SSE3 builtins for -mssse3. */
|
||||
if (TARGET_SSSE3)
|
||||
target_flags |= MASK_SSE3;
|
||||
|
||||
/* Turn on SSE2 builtins for -msse3. */
|
||||
if (TARGET_SSE3)
|
||||
target_flags |= MASK_SSE2;
|
||||
@ -14693,6 +14709,41 @@ enum ix86_builtins
|
||||
IX86_BUILTIN_MONITOR,
|
||||
IX86_BUILTIN_MWAIT,
|
||||
|
||||
/* SSSE3. */
|
||||
IX86_BUILTIN_PHADDW,
|
||||
IX86_BUILTIN_PHADDD,
|
||||
IX86_BUILTIN_PHADDSW,
|
||||
IX86_BUILTIN_PHSUBW,
|
||||
IX86_BUILTIN_PHSUBD,
|
||||
IX86_BUILTIN_PHSUBSW,
|
||||
IX86_BUILTIN_PMADDUBSW,
|
||||
IX86_BUILTIN_PMULHRSW,
|
||||
IX86_BUILTIN_PSHUFB,
|
||||
IX86_BUILTIN_PSIGNB,
|
||||
IX86_BUILTIN_PSIGNW,
|
||||
IX86_BUILTIN_PSIGND,
|
||||
IX86_BUILTIN_PALIGNR,
|
||||
IX86_BUILTIN_PABSB,
|
||||
IX86_BUILTIN_PABSW,
|
||||
IX86_BUILTIN_PABSD,
|
||||
|
||||
IX86_BUILTIN_PHADDW128,
|
||||
IX86_BUILTIN_PHADDD128,
|
||||
IX86_BUILTIN_PHADDSW128,
|
||||
IX86_BUILTIN_PHSUBW128,
|
||||
IX86_BUILTIN_PHSUBD128,
|
||||
IX86_BUILTIN_PHSUBSW128,
|
||||
IX86_BUILTIN_PMADDUBSW128,
|
||||
IX86_BUILTIN_PMULHRSW128,
|
||||
IX86_BUILTIN_PSHUFB128,
|
||||
IX86_BUILTIN_PSIGNB128,
|
||||
IX86_BUILTIN_PSIGNW128,
|
||||
IX86_BUILTIN_PSIGND128,
|
||||
IX86_BUILTIN_PALIGNR128,
|
||||
IX86_BUILTIN_PABSB128,
|
||||
IX86_BUILTIN_PABSW128,
|
||||
IX86_BUILTIN_PABSD128,
|
||||
|
||||
IX86_BUILTIN_VEC_INIT_V2SI,
|
||||
IX86_BUILTIN_VEC_INIT_V4HI,
|
||||
IX86_BUILTIN_VEC_INIT_V8QI,
|
||||
@ -15034,7 +15085,33 @@ static const struct builtin_description bdesc_2arg[] =
|
||||
{ MASK_SSE3, CODE_FOR_sse3_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, 0, 0 },
|
||||
{ MASK_SSE3, CODE_FOR_sse3_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, 0, 0 },
|
||||
{ MASK_SSE3, CODE_FOR_sse3_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, 0, 0 },
|
||||
{ MASK_SSE3, CODE_FOR_sse3_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, 0, 0 }
|
||||
{ MASK_SSE3, CODE_FOR_sse3_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, 0, 0 },
|
||||
|
||||
/* SSSE3 */
|
||||
{ MASK_SSSE3, CODE_FOR_ssse3_phaddwv8hi3, "__builtin_ia32_phaddw128", IX86_BUILTIN_PHADDW128, 0, 0 },
|
||||
{ MASK_SSSE3, CODE_FOR_ssse3_phaddwv4hi3, "__builtin_ia32_phaddw", IX86_BUILTIN_PHADDW, 0, 0 },
|
||||
{ MASK_SSSE3, CODE_FOR_ssse3_phadddv4si3, "__builtin_ia32_phaddd128", IX86_BUILTIN_PHADDD128, 0, 0 },
|
||||
{ MASK_SSSE3, CODE_FOR_ssse3_phadddv2si3, "__builtin_ia32_phaddd", IX86_BUILTIN_PHADDD, 0, 0 },
|
||||
{ MASK_SSSE3, CODE_FOR_ssse3_phaddswv8hi3, "__builtin_ia32_phaddsw128", IX86_BUILTIN_PHADDSW128, 0, 0 },
|
||||
{ MASK_SSSE3, CODE_FOR_ssse3_phaddswv4hi3, "__builtin_ia32_phaddsw", IX86_BUILTIN_PHADDSW, 0, 0 },
|
||||
{ MASK_SSSE3, CODE_FOR_ssse3_phsubwv8hi3, "__builtin_ia32_phsubw128", IX86_BUILTIN_PHSUBW128, 0, 0 },
|
||||
{ MASK_SSSE3, CODE_FOR_ssse3_phsubwv4hi3, "__builtin_ia32_phsubw", IX86_BUILTIN_PHSUBW, 0, 0 },
|
||||
{ MASK_SSSE3, CODE_FOR_ssse3_phsubdv4si3, "__builtin_ia32_phsubd128", IX86_BUILTIN_PHSUBD128, 0, 0 },
|
||||
{ MASK_SSSE3, CODE_FOR_ssse3_phsubdv2si3, "__builtin_ia32_phsubd", IX86_BUILTIN_PHSUBD, 0, 0 },
|
||||
{ MASK_SSSE3, CODE_FOR_ssse3_phsubswv8hi3, "__builtin_ia32_phsubsw128", IX86_BUILTIN_PHSUBSW128, 0, 0 },
|
||||
{ MASK_SSSE3, CODE_FOR_ssse3_phsubswv4hi3, "__builtin_ia32_phsubsw", IX86_BUILTIN_PHSUBSW, 0, 0 },
|
||||
{ MASK_SSSE3, CODE_FOR_ssse3_pmaddubswv8hi3, "__builtin_ia32_pmaddubsw128", IX86_BUILTIN_PMADDUBSW128, 0, 0 },
|
||||
{ MASK_SSSE3, CODE_FOR_ssse3_pmaddubswv4hi3, "__builtin_ia32_pmaddubsw", IX86_BUILTIN_PMADDUBSW, 0, 0 },
|
||||
{ MASK_SSSE3, CODE_FOR_ssse3_pmulhrswv8hi3, "__builtin_ia32_pmulhrsw128", IX86_BUILTIN_PMULHRSW128, 0, 0 },
|
||||
{ MASK_SSSE3, CODE_FOR_ssse3_pmulhrswv4hi3, "__builtin_ia32_pmulhrsw", IX86_BUILTIN_PMULHRSW, 0, 0 },
|
||||
{ MASK_SSSE3, CODE_FOR_ssse3_pshufbv16qi3, "__builtin_ia32_pshufb128", IX86_BUILTIN_PSHUFB128, 0, 0 },
|
||||
{ MASK_SSSE3, CODE_FOR_ssse3_pshufbv8qi3, "__builtin_ia32_pshufb", IX86_BUILTIN_PSHUFB, 0, 0 },
|
||||
{ MASK_SSSE3, CODE_FOR_ssse3_psignv16qi3, "__builtin_ia32_psignb128", IX86_BUILTIN_PSIGNB128, 0, 0 },
|
||||
{ MASK_SSSE3, CODE_FOR_ssse3_psignv8qi3, "__builtin_ia32_psignb", IX86_BUILTIN_PSIGNB, 0, 0 },
|
||||
{ MASK_SSSE3, CODE_FOR_ssse3_psignv8hi3, "__builtin_ia32_psignw128", IX86_BUILTIN_PSIGNW128, 0, 0 },
|
||||
{ MASK_SSSE3, CODE_FOR_ssse3_psignv4hi3, "__builtin_ia32_psignw", IX86_BUILTIN_PSIGNW, 0, 0 },
|
||||
{ MASK_SSSE3, CODE_FOR_ssse3_psignv4si3, "__builtin_ia32_psignd128", IX86_BUILTIN_PSIGND128, 0, 0 },
|
||||
{ MASK_SSSE3, CODE_FOR_ssse3_psignv2si3, "__builtin_ia32_psignd", IX86_BUILTIN_PSIGND, 0, 0 }
|
||||
};
|
||||
|
||||
static const struct builtin_description bdesc_1arg[] =
|
||||
@ -15081,6 +15158,14 @@ static const struct builtin_description bdesc_1arg[] =
|
||||
/* SSE3 */
|
||||
{ MASK_SSE3, CODE_FOR_sse3_movshdup, 0, IX86_BUILTIN_MOVSHDUP, 0, 0 },
|
||||
{ MASK_SSE3, CODE_FOR_sse3_movsldup, 0, IX86_BUILTIN_MOVSLDUP, 0, 0 },
|
||||
|
||||
/* SSSE3 */
|
||||
{ MASK_SSSE3, CODE_FOR_absv16qi2, "__builtin_ia32_pabsb128", IX86_BUILTIN_PABSB128, 0, 0 },
|
||||
{ MASK_SSSE3, CODE_FOR_absv8qi2, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB, 0, 0 },
|
||||
{ MASK_SSSE3, CODE_FOR_absv8hi2, "__builtin_ia32_pabsw128", IX86_BUILTIN_PABSW128, 0, 0 },
|
||||
{ MASK_SSSE3, CODE_FOR_absv4hi2, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW, 0, 0 },
|
||||
{ MASK_SSSE3, CODE_FOR_absv4si2, "__builtin_ia32_pabsd128", IX86_BUILTIN_PABSD128, 0, 0 },
|
||||
{ MASK_SSSE3, CODE_FOR_absv2si2, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD, 0, 0 },
|
||||
};
|
||||
|
||||
static void
|
||||
@ -15215,6 +15300,16 @@ ix86_init_mmx_sse_builtins (void)
|
||||
/* Normal vector unops. */
|
||||
tree v4sf_ftype_v4sf
|
||||
= build_function_type_list (V4SF_type_node, V4SF_type_node, NULL_TREE);
|
||||
tree v16qi_ftype_v16qi
|
||||
= build_function_type_list (V16QI_type_node, V16QI_type_node, NULL_TREE);
|
||||
tree v8hi_ftype_v8hi
|
||||
= build_function_type_list (V8HI_type_node, V8HI_type_node, NULL_TREE);
|
||||
tree v4si_ftype_v4si
|
||||
= build_function_type_list (V4SI_type_node, V4SI_type_node, NULL_TREE);
|
||||
tree v8qi_ftype_v8qi
|
||||
= build_function_type_list (V8QI_type_node, V8QI_type_node, NULL_TREE);
|
||||
tree v4hi_ftype_v4hi
|
||||
= build_function_type_list (V4HI_type_node, V4HI_type_node, NULL_TREE);
|
||||
|
||||
/* Normal vector binops. */
|
||||
tree v4sf_ftype_v4sf_v4sf
|
||||
@ -15234,6 +15329,12 @@ ix86_init_mmx_sse_builtins (void)
|
||||
long_long_unsigned_type_node,
|
||||
long_long_unsigned_type_node, NULL_TREE);
|
||||
|
||||
tree di_ftype_di_di_int
|
||||
= build_function_type_list (long_long_unsigned_type_node,
|
||||
long_long_unsigned_type_node,
|
||||
long_long_unsigned_type_node,
|
||||
integer_type_node, NULL_TREE);
|
||||
|
||||
tree v2si_ftype_v2sf
|
||||
= build_function_type_list (V2SI_type_node, V2SF_type_node, NULL_TREE);
|
||||
tree v2sf_ftype_v2si
|
||||
@ -15335,6 +15436,9 @@ ix86_init_mmx_sse_builtins (void)
|
||||
tree v2di_ftype_v2di_int
|
||||
= build_function_type_list (V2DI_type_node,
|
||||
V2DI_type_node, integer_type_node, NULL_TREE);
|
||||
tree v2di_ftype_v2di_v2di_int
|
||||
= build_function_type_list (V2DI_type_node, V2DI_type_node,
|
||||
V2DI_type_node, integer_type_node, NULL_TREE);
|
||||
tree v4si_ftype_v4si_int
|
||||
= build_function_type_list (V4SI_type_node,
|
||||
V4SI_type_node, integer_type_node, NULL_TREE);
|
||||
@ -15451,6 +15555,50 @@ ix86_init_mmx_sse_builtins (void)
|
||||
def_builtin (d->mask, d->name, type, d->code);
|
||||
}
|
||||
|
||||
/* Add all builtins that are more or less simple operations on 1 operand. */
|
||||
for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
|
||||
{
|
||||
enum machine_mode mode;
|
||||
tree type;
|
||||
|
||||
if (d->name == 0)
|
||||
continue;
|
||||
mode = insn_data[d->icode].operand[1].mode;
|
||||
|
||||
switch (mode)
|
||||
{
|
||||
case V16QImode:
|
||||
type = v16qi_ftype_v16qi;
|
||||
break;
|
||||
case V8HImode:
|
||||
type = v8hi_ftype_v8hi;
|
||||
break;
|
||||
case V4SImode:
|
||||
type = v4si_ftype_v4si;
|
||||
break;
|
||||
case V2DFmode:
|
||||
type = v2df_ftype_v2df;
|
||||
break;
|
||||
case V4SFmode:
|
||||
type = v4sf_ftype_v4sf;
|
||||
break;
|
||||
case V8QImode:
|
||||
type = v8qi_ftype_v8qi;
|
||||
break;
|
||||
case V4HImode:
|
||||
type = v4hi_ftype_v4hi;
|
||||
break;
|
||||
case V2SImode:
|
||||
type = v2si_ftype_v2si;
|
||||
break;
|
||||
|
||||
default:
|
||||
abort ();
|
||||
}
|
||||
|
||||
def_builtin (d->mask, d->name, type, d->code);
|
||||
}
|
||||
|
||||
/* Add the remaining MMX insns with somewhat more complicated types. */
|
||||
def_builtin (MASK_MMX, "__builtin_ia32_emms", void_ftype_void, IX86_BUILTIN_EMMS);
|
||||
def_builtin (MASK_MMX, "__builtin_ia32_psllw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSLLW);
|
||||
@ -15650,6 +15798,12 @@ ix86_init_mmx_sse_builtins (void)
|
||||
def_builtin (MASK_SSE3, "__builtin_ia32_lddqu",
|
||||
v16qi_ftype_pcchar, IX86_BUILTIN_LDDQU);
|
||||
|
||||
/* SSSE3. */
|
||||
def_builtin (MASK_SSSE3, "__builtin_ia32_palignr128",
|
||||
v2di_ftype_v2di_v2di_int, IX86_BUILTIN_PALIGNR128);
|
||||
def_builtin (MASK_SSSE3, "__builtin_ia32_palignr", di_ftype_di_di_int,
|
||||
IX86_BUILTIN_PALIGNR);
|
||||
|
||||
/* Access to the vec_init patterns. */
|
||||
ftype = build_function_type_list (V2SI_type_node, integer_type_node,
|
||||
integer_type_node, NULL_TREE);
|
||||
@ -16148,7 +16302,7 @@ ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
|
||||
tree arglist = TREE_OPERAND (exp, 1);
|
||||
tree arg0, arg1, arg2;
|
||||
rtx op0, op1, op2, pat;
|
||||
enum machine_mode tmode, mode0, mode1, mode2;
|
||||
enum machine_mode tmode, mode0, mode1, mode2, mode3;
|
||||
unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
|
||||
|
||||
switch (fcode)
|
||||
@ -16618,6 +16772,52 @@ ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
|
||||
return ix86_expand_unop_builtin (CODE_FOR_sse3_lddqu, arglist,
|
||||
target, 1);
|
||||
|
||||
case IX86_BUILTIN_PALIGNR:
|
||||
case IX86_BUILTIN_PALIGNR128:
|
||||
if (fcode == IX86_BUILTIN_PALIGNR)
|
||||
{
|
||||
icode = CODE_FOR_ssse3_palignrdi;
|
||||
mode = DImode;
|
||||
}
|
||||
else
|
||||
{
|
||||
icode = CODE_FOR_ssse3_palignrti;
|
||||
mode = V2DImode;
|
||||
}
|
||||
arg0 = TREE_VALUE (arglist);
|
||||
arg1 = TREE_VALUE (TREE_CHAIN (arglist));
|
||||
arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
|
||||
op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
|
||||
op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
|
||||
op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
|
||||
tmode = insn_data[icode].operand[0].mode;
|
||||
mode1 = insn_data[icode].operand[1].mode;
|
||||
mode2 = insn_data[icode].operand[2].mode;
|
||||
mode3 = insn_data[icode].operand[3].mode;
|
||||
|
||||
if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
|
||||
{
|
||||
op0 = copy_to_reg (op0);
|
||||
op0 = simplify_gen_subreg (mode1, op0, GET_MODE (op0), 0);
|
||||
}
|
||||
if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
|
||||
{
|
||||
op1 = copy_to_reg (op1);
|
||||
op1 = simplify_gen_subreg (mode2, op1, GET_MODE (op1), 0);
|
||||
}
|
||||
if (! (*insn_data[icode].operand[3].predicate) (op2, mode3))
|
||||
{
|
||||
error ("shift must be an immediate");
|
||||
return const0_rtx;
|
||||
}
|
||||
target = gen_reg_rtx (mode);
|
||||
pat = GEN_FCN (icode) (simplify_gen_subreg (tmode, target, mode, 0),
|
||||
op0, op1, op2);
|
||||
if (! pat)
|
||||
return 0;
|
||||
emit_insn (pat);
|
||||
return target;
|
||||
|
||||
case IX86_BUILTIN_VEC_INIT_V2SI:
|
||||
case IX86_BUILTIN_VEC_INIT_V4HI:
|
||||
case IX86_BUILTIN_VEC_INIT_V8QI:
|
||||
|
@ -418,6 +418,8 @@ extern const char *host_detect_local_cpu (int argc, const char **argv);
|
||||
builtin_define ("__SSE2__"); \
|
||||
if (TARGET_SSE3) \
|
||||
builtin_define ("__SSE3__"); \
|
||||
if (TARGET_SSSE3) \
|
||||
builtin_define ("__SSSE3__"); \
|
||||
if (TARGET_SSE_MATH && TARGET_SSE) \
|
||||
builtin_define ("__SSE_MATH__"); \
|
||||
if (TARGET_SSE_MATH && TARGET_SSE2) \
|
||||
|
@ -148,6 +148,11 @@
|
||||
(UNSPEC_SP_TEST 101)
|
||||
(UNSPEC_SP_TLS_SET 102)
|
||||
(UNSPEC_SP_TLS_TEST 103)
|
||||
|
||||
; SSSE3
|
||||
(UNSPEC_PSHUFB 120)
|
||||
(UNSPEC_PSIGN 121)
|
||||
(UNSPEC_PALIGNR 122)
|
||||
])
|
||||
|
||||
(define_constants
|
||||
@ -20948,6 +20953,6 @@
|
||||
}
|
||||
[(set_attr "type" "multi")])
|
||||
|
||||
(include "sse.md")
|
||||
(include "mmx.md")
|
||||
(include "sse.md")
|
||||
(include "sync.md")
|
||||
|
@ -197,6 +197,10 @@ msse3
|
||||
Target Report Mask(SSE3)
|
||||
Support MMX, SSE, SSE2 and SSE3 built-in functions and code generation
|
||||
|
||||
mssse3
|
||||
Target Report Mask(SSSE3)
|
||||
Support MMX, SSE, SSE2, SSE3 and SSSE3 built-in functions and code generation
|
||||
|
||||
msseregparm
|
||||
Target RejectNegative Mask(SSEREGPARM)
|
||||
Use SSE register passing conventions for SF and DF mode
|
||||
|
@ -3949,3 +3949,578 @@
|
||||
;; zero extended to 64bit, we only need to set up 32bit registers.
|
||||
"monitor"
|
||||
[(set_attr "length" "3")])
|
||||
|
||||
;; SSSE3
|
||||
(define_insn "ssse3_phaddwv8hi3"
|
||||
[(set (match_operand:V8HI 0 "register_operand" "=x")
|
||||
(vec_concat:V8HI
|
||||
(vec_concat:V4HI
|
||||
(vec_concat:V2HI
|
||||
(plus:HI
|
||||
(vec_select:HI
|
||||
(match_operand:V8HI 1 "register_operand" "0")
|
||||
(parallel [(const_int 0)]))
|
||||
(vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
|
||||
(plus:HI
|
||||
(vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
|
||||
(vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
|
||||
(vec_concat:V2HI
|
||||
(plus:HI
|
||||
(vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
|
||||
(vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
|
||||
(plus:HI
|
||||
(vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
|
||||
(vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
|
||||
(vec_concat:V4HI
|
||||
(vec_concat:V2HI
|
||||
(plus:HI
|
||||
(vec_select:HI
|
||||
(match_operand:V8HI 2 "nonimmediate_operand" "xm")
|
||||
(parallel [(const_int 0)]))
|
||||
(vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
|
||||
(plus:HI
|
||||
(vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
|
||||
(vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
|
||||
(vec_concat:V2HI
|
||||
(plus:HI
|
||||
(vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
|
||||
(vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
|
||||
(plus:HI
|
||||
(vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
|
||||
(vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
|
||||
"TARGET_SSSE3"
|
||||
"phaddw\t{%2, %0|%0, %2}"
|
||||
[(set_attr "type" "sseiadd")
|
||||
(set_attr "mode" "TI")])
|
||||
|
||||
(define_insn "ssse3_phaddwv4hi3"
|
||||
[(set (match_operand:V4HI 0 "register_operand" "=y")
|
||||
(vec_concat:V4HI
|
||||
(vec_concat:V2HI
|
||||
(plus:HI
|
||||
(vec_select:HI
|
||||
(match_operand:V4HI 1 "register_operand" "0")
|
||||
(parallel [(const_int 0)]))
|
||||
(vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
|
||||
(plus:HI
|
||||
(vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
|
||||
(vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
|
||||
(vec_concat:V2HI
|
||||
(plus:HI
|
||||
(vec_select:HI
|
||||
(match_operand:V4HI 2 "nonimmediate_operand" "ym")
|
||||
(parallel [(const_int 0)]))
|
||||
(vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
|
||||
(plus:HI
|
||||
(vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
|
||||
(vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
|
||||
"TARGET_SSSE3"
|
||||
"phaddw\t{%2, %0|%0, %2}"
|
||||
[(set_attr "type" "sseiadd")
|
||||
(set_attr "mode" "DI")])
|
||||
|
||||
(define_insn "ssse3_phadddv4si3"
|
||||
[(set (match_operand:V4SI 0 "register_operand" "=x")
|
||||
(vec_concat:V4SI
|
||||
(vec_concat:V2SI
|
||||
(plus:SI
|
||||
(vec_select:SI
|
||||
(match_operand:V4SI 1 "register_operand" "0")
|
||||
(parallel [(const_int 0)]))
|
||||
(vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
|
||||
(plus:SI
|
||||
(vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
|
||||
(vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
|
||||
(vec_concat:V2SI
|
||||
(plus:SI
|
||||
(vec_select:SI
|
||||
(match_operand:V4SI 2 "nonimmediate_operand" "xm")
|
||||
(parallel [(const_int 0)]))
|
||||
(vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
|
||||
(plus:SI
|
||||
(vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
|
||||
(vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))))]
|
||||
"TARGET_SSSE3"
|
||||
"phaddd\t{%2, %0|%0, %2}"
|
||||
[(set_attr "type" "sseiadd")
|
||||
(set_attr "mode" "TI")])
|
||||
|
||||
(define_insn "ssse3_phadddv2si3"
|
||||
[(set (match_operand:V2SI 0 "register_operand" "=y")
|
||||
(vec_concat:V2SI
|
||||
(plus:SI
|
||||
(vec_select:SI
|
||||
(match_operand:V2SI 1 "register_operand" "0")
|
||||
(parallel [(const_int 0)]))
|
||||
(vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
|
||||
(plus:SI
|
||||
(vec_select:SI
|
||||
(match_operand:V2SI 2 "nonimmediate_operand" "ym")
|
||||
(parallel [(const_int 0)]))
|
||||
(vec_select:SI (match_dup 2) (parallel [(const_int 1)])))))]
|
||||
"TARGET_SSSE3"
|
||||
"phaddd\t{%2, %0|%0, %2}"
|
||||
[(set_attr "type" "sseiadd")
|
||||
(set_attr "mode" "DI")])
|
||||
|
||||
(define_insn "ssse3_phaddswv8hi3"
|
||||
[(set (match_operand:V8HI 0 "register_operand" "=x")
|
||||
(vec_concat:V8HI
|
||||
(vec_concat:V4HI
|
||||
(vec_concat:V2HI
|
||||
(ss_plus:HI
|
||||
(vec_select:HI
|
||||
(match_operand:V8HI 1 "register_operand" "0")
|
||||
(parallel [(const_int 0)]))
|
||||
(vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
|
||||
(ss_plus:HI
|
||||
(vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
|
||||
(vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
|
||||
(vec_concat:V2HI
|
||||
(ss_plus:HI
|
||||
(vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
|
||||
(vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
|
||||
(ss_plus:HI
|
||||
(vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
|
||||
(vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
|
||||
(vec_concat:V4HI
|
||||
(vec_concat:V2HI
|
||||
(ss_plus:HI
|
||||
(vec_select:HI
|
||||
(match_operand:V8HI 2 "nonimmediate_operand" "xm")
|
||||
(parallel [(const_int 0)]))
|
||||
(vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
|
||||
(ss_plus:HI
|
||||
(vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
|
||||
(vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
|
||||
(vec_concat:V2HI
|
||||
(ss_plus:HI
|
||||
(vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
|
||||
(vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
|
||||
(ss_plus:HI
|
||||
(vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
|
||||
(vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
|
||||
"TARGET_SSSE3"
|
||||
"phaddsw\t{%2, %0|%0, %2}"
|
||||
[(set_attr "type" "sseiadd")
|
||||
(set_attr "mode" "TI")])
|
||||
|
||||
(define_insn "ssse3_phaddswv4hi3"
|
||||
[(set (match_operand:V4HI 0 "register_operand" "=y")
|
||||
(vec_concat:V4HI
|
||||
(vec_concat:V2HI
|
||||
(ss_plus:HI
|
||||
(vec_select:HI
|
||||
(match_operand:V4HI 1 "register_operand" "0")
|
||||
(parallel [(const_int 0)]))
|
||||
(vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
|
||||
(ss_plus:HI
|
||||
(vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
|
||||
(vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
|
||||
(vec_concat:V2HI
|
||||
(ss_plus:HI
|
||||
(vec_select:HI
|
||||
(match_operand:V4HI 2 "nonimmediate_operand" "ym")
|
||||
(parallel [(const_int 0)]))
|
||||
(vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
|
||||
(ss_plus:HI
|
||||
(vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
|
||||
(vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
|
||||
"TARGET_SSSE3"
|
||||
"phaddsw\t{%2, %0|%0, %2}"
|
||||
[(set_attr "type" "sseiadd")
|
||||
(set_attr "mode" "DI")])
|
||||
|
||||
(define_insn "ssse3_phsubwv8hi3"
|
||||
[(set (match_operand:V8HI 0 "register_operand" "=x")
|
||||
(vec_concat:V8HI
|
||||
(vec_concat:V4HI
|
||||
(vec_concat:V2HI
|
||||
(minus:HI
|
||||
(vec_select:HI
|
||||
(match_operand:V8HI 1 "register_operand" "0")
|
||||
(parallel [(const_int 0)]))
|
||||
(vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
|
||||
(minus:HI
|
||||
(vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
|
||||
(vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
|
||||
(vec_concat:V2HI
|
||||
(minus:HI
|
||||
(vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
|
||||
(vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
|
||||
(minus:HI
|
||||
(vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
|
||||
(vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
|
||||
(vec_concat:V4HI
|
||||
(vec_concat:V2HI
|
||||
(minus:HI
|
||||
(vec_select:HI
|
||||
(match_operand:V8HI 2 "nonimmediate_operand" "xm")
|
||||
(parallel [(const_int 0)]))
|
||||
(vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
|
||||
(minus:HI
|
||||
(vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
|
||||
(vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
|
||||
(vec_concat:V2HI
|
||||
(minus:HI
|
||||
(vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
|
||||
(vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
|
||||
(minus:HI
|
||||
(vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
|
||||
(vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
|
||||
"TARGET_SSSE3"
|
||||
"phsubw\t{%2, %0|%0, %2}"
|
||||
[(set_attr "type" "sseiadd")
|
||||
(set_attr "mode" "TI")])
|
||||
|
||||
(define_insn "ssse3_phsubwv4hi3"
|
||||
[(set (match_operand:V4HI 0 "register_operand" "=y")
|
||||
(vec_concat:V4HI
|
||||
(vec_concat:V2HI
|
||||
(minus:HI
|
||||
(vec_select:HI
|
||||
(match_operand:V4HI 1 "register_operand" "0")
|
||||
(parallel [(const_int 0)]))
|
||||
(vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
|
||||
(minus:HI
|
||||
(vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
|
||||
(vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
|
||||
(vec_concat:V2HI
|
||||
(minus:HI
|
||||
(vec_select:HI
|
||||
(match_operand:V4HI 2 "nonimmediate_operand" "ym")
|
||||
(parallel [(const_int 0)]))
|
||||
(vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
|
||||
(minus:HI
|
||||
(vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
|
||||
(vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
|
||||
"TARGET_SSSE3"
|
||||
"phsubw\t{%2, %0|%0, %2}"
|
||||
[(set_attr "type" "sseiadd")
|
||||
(set_attr "mode" "DI")])
|
||||
|
||||
(define_insn "ssse3_phsubdv4si3"
|
||||
[(set (match_operand:V4SI 0 "register_operand" "=x")
|
||||
(vec_concat:V4SI
|
||||
(vec_concat:V2SI
|
||||
(minus:SI
|
||||
(vec_select:SI
|
||||
(match_operand:V4SI 1 "register_operand" "0")
|
||||
(parallel [(const_int 0)]))
|
||||
(vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
|
||||
(minus:SI
|
||||
(vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
|
||||
(vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
|
||||
(vec_concat:V2SI
|
||||
(minus:SI
|
||||
(vec_select:SI
|
||||
(match_operand:V4SI 2 "nonimmediate_operand" "xm")
|
||||
(parallel [(const_int 0)]))
|
||||
(vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
|
||||
(minus:SI
|
||||
(vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
|
||||
(vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))))]
|
||||
"TARGET_SSSE3"
|
||||
"phsubd\t{%2, %0|%0, %2}"
|
||||
[(set_attr "type" "sseiadd")
|
||||
(set_attr "mode" "TI")])
|
||||
|
||||
(define_insn "ssse3_phsubdv2si3"
|
||||
[(set (match_operand:V2SI 0 "register_operand" "=y")
|
||||
(vec_concat:V2SI
|
||||
(minus:SI
|
||||
(vec_select:SI
|
||||
(match_operand:V2SI 1 "register_operand" "0")
|
||||
(parallel [(const_int 0)]))
|
||||
(vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
|
||||
(minus:SI
|
||||
(vec_select:SI
|
||||
(match_operand:V2SI 2 "nonimmediate_operand" "ym")
|
||||
(parallel [(const_int 0)]))
|
||||
(vec_select:SI (match_dup 2) (parallel [(const_int 1)])))))]
|
||||
"TARGET_SSSE3"
|
||||
"phsubd\t{%2, %0|%0, %2}"
|
||||
[(set_attr "type" "sseiadd")
|
||||
(set_attr "mode" "DI")])
|
||||
|
||||
(define_insn "ssse3_phsubswv8hi3"
|
||||
[(set (match_operand:V8HI 0 "register_operand" "=x")
|
||||
(vec_concat:V8HI
|
||||
(vec_concat:V4HI
|
||||
(vec_concat:V2HI
|
||||
(ss_minus:HI
|
||||
(vec_select:HI
|
||||
(match_operand:V8HI 1 "register_operand" "0")
|
||||
(parallel [(const_int 0)]))
|
||||
(vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
|
||||
(ss_minus:HI
|
||||
(vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
|
||||
(vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
|
||||
(vec_concat:V2HI
|
||||
(ss_minus:HI
|
||||
(vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
|
||||
(vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
|
||||
(ss_minus:HI
|
||||
(vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
|
||||
(vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
|
||||
(vec_concat:V4HI
|
||||
(vec_concat:V2HI
|
||||
(ss_minus:HI
|
||||
(vec_select:HI
|
||||
(match_operand:V8HI 2 "nonimmediate_operand" "xm")
|
||||
(parallel [(const_int 0)]))
|
||||
(vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
|
||||
(ss_minus:HI
|
||||
(vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
|
||||
(vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
|
||||
(vec_concat:V2HI
|
||||
(ss_minus:HI
|
||||
(vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
|
||||
(vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
|
||||
(ss_minus:HI
|
||||
(vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
|
||||
(vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
|
||||
"TARGET_SSSE3"
|
||||
"phsubsw\t{%2, %0|%0, %2}"
|
||||
[(set_attr "type" "sseiadd")
|
||||
(set_attr "mode" "TI")])
|
||||
|
||||
(define_insn "ssse3_phsubswv4hi3"
|
||||
[(set (match_operand:V4HI 0 "register_operand" "=y")
|
||||
(vec_concat:V4HI
|
||||
(vec_concat:V2HI
|
||||
(ss_minus:HI
|
||||
(vec_select:HI
|
||||
(match_operand:V4HI 1 "register_operand" "0")
|
||||
(parallel [(const_int 0)]))
|
||||
(vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
|
||||
(ss_minus:HI
|
||||
(vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
|
||||
(vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
|
||||
(vec_concat:V2HI
|
||||
(ss_minus:HI
|
||||
(vec_select:HI
|
||||
(match_operand:V4HI 2 "nonimmediate_operand" "ym")
|
||||
(parallel [(const_int 0)]))
|
||||
(vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
|
||||
(ss_minus:HI
|
||||
(vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
|
||||
(vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
|
||||
"TARGET_SSSE3"
|
||||
"phsubsw\t{%2, %0|%0, %2}"
|
||||
[(set_attr "type" "sseiadd")
|
||||
(set_attr "mode" "DI")])
|
||||
|
||||
(define_insn "ssse3_pmaddubswv8hi3"
|
||||
[(set (match_operand:V8HI 0 "register_operand" "=x")
|
||||
(ss_plus:V8HI
|
||||
(mult:V8HI
|
||||
(zero_extend:V8HI
|
||||
(vec_select:V4QI
|
||||
(match_operand:V16QI 1 "nonimmediate_operand" "%0")
|
||||
(parallel [(const_int 0)
|
||||
(const_int 2)
|
||||
(const_int 4)
|
||||
(const_int 6)
|
||||
(const_int 8)
|
||||
(const_int 10)
|
||||
(const_int 12)
|
||||
(const_int 14)])))
|
||||
(sign_extend:V8HI
|
||||
(vec_select:V8QI
|
||||
(match_operand:V16QI 2 "nonimmediate_operand" "xm")
|
||||
(parallel [(const_int 0)
|
||||
(const_int 2)
|
||||
(const_int 4)
|
||||
(const_int 6)
|
||||
(const_int 8)
|
||||
(const_int 10)
|
||||
(const_int 12)
|
||||
(const_int 14)]))))
|
||||
(mult:V8HI
|
||||
(zero_extend:V8HI
|
||||
(vec_select:V16QI (match_dup 1)
|
||||
(parallel [(const_int 1)
|
||||
(const_int 3)
|
||||
(const_int 5)
|
||||
(const_int 7)
|
||||
(const_int 9)
|
||||
(const_int 11)
|
||||
(const_int 13)
|
||||
(const_int 15)])))
|
||||
(sign_extend:V8HI
|
||||
(vec_select:V16QI (match_dup 2)
|
||||
(parallel [(const_int 1)
|
||||
(const_int 3)
|
||||
(const_int 5)
|
||||
(const_int 7)
|
||||
(const_int 9)
|
||||
(const_int 11)
|
||||
(const_int 13)
|
||||
(const_int 15)]))))))]
|
||||
"TARGET_SSSE3"
|
||||
"pmaddubsw\t{%2, %0|%0, %2}"
|
||||
[(set_attr "type" "sseiadd")
|
||||
(set_attr "mode" "TI")])
|
||||
|
||||
(define_insn "ssse3_pmaddubswv4hi3"
|
||||
[(set (match_operand:V4HI 0 "register_operand" "=y")
|
||||
(ss_plus:V4HI
|
||||
(mult:V4HI
|
||||
(zero_extend:V4HI
|
||||
(vec_select:V4QI
|
||||
(match_operand:V8QI 1 "nonimmediate_operand" "%0")
|
||||
(parallel [(const_int 0)
|
||||
(const_int 2)
|
||||
(const_int 4)
|
||||
(const_int 6)])))
|
||||
(sign_extend:V4HI
|
||||
(vec_select:V4QI
|
||||
(match_operand:V8QI 2 "nonimmediate_operand" "ym")
|
||||
(parallel [(const_int 0)
|
||||
(const_int 2)
|
||||
(const_int 4)
|
||||
(const_int 6)]))))
|
||||
(mult:V4HI
|
||||
(zero_extend:V4HI
|
||||
(vec_select:V8QI (match_dup 1)
|
||||
(parallel [(const_int 1)
|
||||
(const_int 3)
|
||||
(const_int 5)
|
||||
(const_int 7)])))
|
||||
(sign_extend:V4HI
|
||||
(vec_select:V8QI (match_dup 2)
|
||||
(parallel [(const_int 1)
|
||||
(const_int 3)
|
||||
(const_int 5)
|
||||
(const_int 7)]))))))]
|
||||
"TARGET_SSSE3"
|
||||
"pmaddubsw\t{%2, %0|%0, %2}"
|
||||
[(set_attr "type" "sseiadd")
|
||||
(set_attr "mode" "DI")])
|
||||
|
||||
(define_insn "ssse3_pmulhrswv8hi3"
|
||||
[(set (match_operand:V8HI 0 "register_operand" "=x")
|
||||
(truncate:V8HI
|
||||
(lshiftrt:V8SI
|
||||
(plus:V8SI
|
||||
(lshiftrt:V8SI
|
||||
(mult:V8SI
|
||||
(sign_extend:V8SI
|
||||
(match_operand:V8HI 1 "nonimmediate_operand" "%0"))
|
||||
(sign_extend:V8SI
|
||||
(match_operand:V8HI 2 "nonimmediate_operand" "xm")))
|
||||
(const_int 14))
|
||||
(const_vector:V8HI [(const_int 1) (const_int 1)
|
||||
(const_int 1) (const_int 1)
|
||||
(const_int 1) (const_int 1)
|
||||
(const_int 1) (const_int 1)]))
|
||||
(const_int 1))))]
|
||||
"TARGET_SSSE3 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
|
||||
"pmulhrsw\t{%2, %0|%0, %2}"
|
||||
[(set_attr "type" "sseimul")
|
||||
(set_attr "mode" "TI")])
|
||||
|
||||
(define_insn "ssse3_pmulhrswv4hi3"
|
||||
[(set (match_operand:V4HI 0 "register_operand" "=y")
|
||||
(truncate:V4HI
|
||||
(lshiftrt:V4SI
|
||||
(plus:V4SI
|
||||
(lshiftrt:V4SI
|
||||
(mult:V4SI
|
||||
(sign_extend:V4SI
|
||||
(match_operand:V4HI 1 "nonimmediate_operand" "%0"))
|
||||
(sign_extend:V4SI
|
||||
(match_operand:V4HI 2 "nonimmediate_operand" "ym")))
|
||||
(const_int 14))
|
||||
(const_vector:V4HI [(const_int 1) (const_int 1)
|
||||
(const_int 1) (const_int 1)]))
|
||||
(const_int 1))))]
|
||||
"TARGET_SSSE3 && ix86_binary_operator_ok (MULT, V4HImode, operands)"
|
||||
"pmulhrsw\t{%2, %0|%0, %2}"
|
||||
[(set_attr "type" "sseimul")
|
||||
(set_attr "mode" "DI")])
|
||||
|
||||
(define_insn "ssse3_pshufbv16qi3"
|
||||
[(set (match_operand:V16QI 0 "register_operand" "=x")
|
||||
(unspec:V16QI [(match_operand:V16QI 1 "register_operand" "0")
|
||||
(match_operand:V16QI 2 "nonimmediate_operand" "xm")]
|
||||
UNSPEC_PSHUFB))]
|
||||
"TARGET_SSSE3"
|
||||
"pshufb\t{%2, %0|%0, %2}";
|
||||
[(set_attr "type" "sselog1")
|
||||
(set_attr "mode" "TI")])
|
||||
|
||||
(define_insn "ssse3_pshufbv8qi3"
|
||||
[(set (match_operand:V8QI 0 "register_operand" "=y")
|
||||
(unspec:V8QI [(match_operand:V8QI 1 "register_operand" "0")
|
||||
(match_operand:V8QI 2 "nonimmediate_operand" "ym")]
|
||||
UNSPEC_PSHUFB))]
|
||||
"TARGET_SSSE3"
|
||||
"pshufb\t{%2, %0|%0, %2}";
|
||||
[(set_attr "type" "sselog1")
|
||||
(set_attr "mode" "DI")])
|
||||
|
||||
(define_insn "ssse3_psign<mode>3"
|
||||
[(set (match_operand:SSEMODE124 0 "register_operand" "=x")
|
||||
(unspec:SSEMODE124 [(match_operand:SSEMODE124 1 "register_operand" "0")
|
||||
(match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")]
|
||||
UNSPEC_PSIGN))]
|
||||
"TARGET_SSSE3"
|
||||
"psign<ssevecsize>\t{%2, %0|%0, %2}";
|
||||
[(set_attr "type" "sselog1")
|
||||
(set_attr "mode" "TI")])
|
||||
|
||||
(define_insn "ssse3_psign<mode>3"
|
||||
[(set (match_operand:MMXMODEI 0 "register_operand" "=y")
|
||||
(unspec:MMXMODEI [(match_operand:MMXMODEI 1 "register_operand" "0")
|
||||
(match_operand:MMXMODEI 2 "nonimmediate_operand" "ym")]
|
||||
UNSPEC_PSIGN))]
|
||||
"TARGET_SSSE3"
|
||||
"psign<mmxvecsize>\t{%2, %0|%0, %2}";
|
||||
[(set_attr "type" "sselog1")
|
||||
(set_attr "mode" "DI")])
|
||||
|
||||
(define_insn "ssse3_palignrti"
|
||||
[(set (match_operand:TI 0 "register_operand" "=x")
|
||||
(unspec:TI [(match_operand:TI 1 "register_operand" "0")
|
||||
(match_operand:TI 2 "nonimmediate_operand" "xm")
|
||||
(match_operand:SI 3 "const_0_to_255_mul_8_operand" "n")]
|
||||
UNSPEC_PALIGNR))]
|
||||
"TARGET_SSSE3"
|
||||
{
|
||||
operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
|
||||
return "palignr\t{%3, %2, %0|%0, %2, %3}";
|
||||
}
|
||||
[(set_attr "type" "sseishft")
|
||||
(set_attr "mode" "TI")])
|
||||
|
||||
(define_insn "ssse3_palignrdi"
|
||||
[(set (match_operand:DI 0 "register_operand" "=y")
|
||||
(unspec:DI [(match_operand:DI 1 "register_operand" "0")
|
||||
(match_operand:DI 2 "nonimmediate_operand" "ym")
|
||||
(match_operand:SI 3 "const_0_to_255_mul_8_operand" "n")]
|
||||
UNSPEC_PALIGNR))]
|
||||
"TARGET_SSSE3"
|
||||
{
|
||||
operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
|
||||
return "palignr\t{%3, %2, %0|%0, %2, %3}";
|
||||
}
|
||||
[(set_attr "type" "sseishft")
|
||||
(set_attr "mode" "DI")])
|
||||
|
||||
(define_insn "abs<mode>2"
|
||||
[(set (match_operand:SSEMODE124 0 "register_operand" "=x")
|
||||
(abs:SSEMODE124 (match_operand:SSEMODE124 1 "nonimmediate_operand" "xm")))]
|
||||
"TARGET_SSSE3"
|
||||
"pabs<ssevecsize>\t{%1, %0|%0, %1}";
|
||||
[(set_attr "type" "sselog1")
|
||||
(set_attr "mode" "TI")])
|
||||
|
||||
(define_insn "abs<mode>2"
|
||||
[(set (match_operand:MMXMODEI 0 "register_operand" "=y")
|
||||
(abs:MMXMODEI (match_operand:MMXMODEI 1 "nonimmediate_operand" "ym")))]
|
||||
"TARGET_SSSE3"
|
||||
"pabs<mmxvecsize>\t{%1, %0|%0, %1}";
|
||||
[(set_attr "type" "sselog1")
|
||||
(set_attr "mode" "DI")])
|
||||
|
448
contrib/gcc/config/i386/tmmintrin.h
Normal file
448
contrib/gcc/config/i386/tmmintrin.h
Normal file
@ -0,0 +1,448 @@
|
||||
/* Copyright (C) 2006 Free Software Foundation, Inc.
|
||||
|
||||
This file is part of GCC.
|
||||
|
||||
GCC is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2, or (at your option)
|
||||
any later version.
|
||||
|
||||
GCC is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with GCC; see the file COPYING. If not, write to
|
||||
the Free Software Foundation, 59 Temple Place - Suite 330,
|
||||
Boston, MA 02111-1307, USA. */
|
||||
|
||||
/* As a special exception, if you include this header file into source
|
||||
files compiled by GCC, this header file does not by itself cause
|
||||
the resulting executable to be covered by the GNU General Public
|
||||
License. This exception does not however invalidate any other
|
||||
reasons why the executable file might be covered by the GNU General
|
||||
Public License. */
|
||||
|
||||
/* Implemented from the specification included in the Intel C++ Compiler
|
||||
User Guide and Reference, version 9.1. */
|
||||
|
||||
#ifndef _TMMINTRIN_H_INCLUDED
|
||||
#define _TMMINTRIN_H_INCLUDED
|
||||
|
||||
#ifdef __SSSE3__
|
||||
#include <pmmintrin.h>
|
||||
|
||||
static __inline __m128i __attribute__((__always_inline__))
|
||||
_mm_hadd_epi16 (__m128i __X, __m128i __Y)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_phaddw128 ((__v8hi)__X, (__v8hi)__Y);
|
||||
}
|
||||
|
||||
static __inline __m128i __attribute__((__always_inline__))
|
||||
_mm_hadd_epi32 (__m128i __X, __m128i __Y)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_phaddd128 ((__v4si)__X, (__v4si)__Y);
|
||||
}
|
||||
|
||||
static __inline __m128i __attribute__((__always_inline__))
|
||||
_mm_hadds_epi16 (__m128i __X, __m128i __Y)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_phaddsw128 ((__v8hi)__X, (__v8hi)__Y);
|
||||
}
|
||||
|
||||
static __inline __m64 __attribute__((__always_inline__))
|
||||
_mm_hadd_pi16 (__m64 __X, __m64 __Y)
|
||||
{
|
||||
return (__m64) __builtin_ia32_phaddw ((__v4hi)__X, (__v4hi)__Y);
|
||||
}
|
||||
|
||||
static __inline __m64 __attribute__((__always_inline__))
|
||||
_mm_hadd_pi32 (__m64 __X, __m64 __Y)
|
||||
{
|
||||
return (__m64) __builtin_ia32_phaddd ((__v2si)__X, (__v2si)__Y);
|
||||
}
|
||||
|
||||
static __inline __m64 __attribute__((__always_inline__))
|
||||
_mm_hadds_pi16 (__m64 __X, __m64 __Y)
|
||||
{
|
||||
return (__m64) __builtin_ia32_phaddsw ((__v4hi)__X, (__v4hi)__Y);
|
||||
}
|
||||
|
||||
static __inline __m128i __attribute__((__always_inline__))
|
||||
_mm_hsub_epi16 (__m128i __X, __m128i __Y)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_phsubw128 ((__v8hi)__X, (__v8hi)__Y);
|
||||
}
|
||||
|
||||
static __inline __m128i __attribute__((__always_inline__))
|
||||
_mm_hsub_epi32 (__m128i __X, __m128i __Y)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_phsubd128 ((__v4si)__X, (__v4si)__Y);
|
||||
}
|
||||
|
||||
static __inline __m128i __attribute__((__always_inline__))
|
||||
_mm_hsubs_epi16 (__m128i __X, __m128i __Y)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_phsubsw128 ((__v8hi)__X, (__v8hi)__Y);
|
||||
}
|
||||
|
||||
static __inline __m64 __attribute__((__always_inline__))
|
||||
_mm_hsub_pi16 (__m64 __X, __m64 __Y)
|
||||
{
|
||||
return (__m64) __builtin_ia32_phsubw ((__v4hi)__X, (__v4hi)__Y);
|
||||
}
|
||||
|
||||
static __inline __m64 __attribute__((__always_inline__))
|
||||
_mm_hsub_pi32 (__m64 __X, __m64 __Y)
|
||||
{
|
||||
return (__m64) __builtin_ia32_phsubd ((__v2si)__X, (__v2si)__Y);
|
||||
}
|
||||
|
||||
static __inline __m64 __attribute__((__always_inline__))
|
||||
_mm_hsubs_pi16 (__m64 __X, __m64 __Y)
|
||||
{
|
||||
return (__m64) __builtin_ia32_phsubsw ((__v4hi)__X, (__v4hi)__Y);
|
||||
}
|
||||
|
||||
static __inline __m128i __attribute__((__always_inline__))
|
||||
_mm_maddubs_epi16 (__m128i __X, __m128i __Y)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_pmaddubsw128 ((__v16qi)__X, (__v16qi)__Y);
|
||||
}
|
||||
|
||||
static __inline __m64 __attribute__((__always_inline__))
|
||||
_mm_maddubs_pi16 (__m64 __X, __m64 __Y)
|
||||
{
|
||||
return (__m64) __builtin_ia32_pmaddubsw ((__v8qi)__X, (__v8qi)__Y);
|
||||
}
|
||||
|
||||
static __inline __m128i __attribute__((__always_inline__))
|
||||
_mm_mulhrs_epi16 (__m128i __X, __m128i __Y)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_pmulhrsw128 ((__v8hi)__X, (__v8hi)__Y);
|
||||
}
|
||||
|
||||
static __inline __m64 __attribute__((__always_inline__))
|
||||
_mm_mulhrs_pi16 (__m64 __X, __m64 __Y)
|
||||
{
|
||||
return (__m64) __builtin_ia32_pmulhrsw ((__v4hi)__X, (__v4hi)__Y);
|
||||
}
|
||||
|
||||
static __inline __m128i __attribute__((__always_inline__))
|
||||
_mm_shuffle_epi8 (__m128i __X, __m128i __Y)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_pshufb128 ((__v16qi)__X, (__v16qi)__Y);
|
||||
}
|
||||
|
||||
static __inline __m64 __attribute__((__always_inline__))
|
||||
_mm_shuffle_pi8 (__m64 __X, __m64 __Y)
|
||||
{
|
||||
return (__m64) __builtin_ia32_pshufb ((__v8qi)__X, (__v8qi)__Y);
|
||||
}
|
||||
|
||||
static __inline __m128i __attribute__((__always_inline__))
|
||||
_mm_sign_epi8 (__m128i __X, __m128i __Y)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_psignb128 ((__v16qi)__X, (__v16qi)__Y);
|
||||
}
|
||||
|
||||
static __inline __m128i __attribute__((__always_inline__))
|
||||
_mm_sign_epi16 (__m128i __X, __m128i __Y)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_psignw128 ((__v8hi)__X, (__v8hi)__Y);
|
||||
}
|
||||
|
||||
static __inline __m128i __attribute__((__always_inline__))
|
||||
_mm_sign_epi32 (__m128i __X, __m128i __Y)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_psignd128 ((__v4si)__X, (__v4si)__Y);
|
||||
}
|
||||
|
||||
static __inline __m64 __attribute__((__always_inline__))
|
||||
_mm_sign_pi8 (__m64 __X, __m64 __Y)
|
||||
{
|
||||
return (__m64) __builtin_ia32_psignb ((__v8qi)__X, (__v8qi)__Y);
|
||||
}
|
||||
|
||||
static __inline __m64 __attribute__((__always_inline__))
|
||||
_mm_sign_pi16 (__m64 __X, __m64 __Y)
|
||||
{
|
||||
return (__m64) __builtin_ia32_psignw ((__v4hi)__X, (__v4hi)__Y);
|
||||
}
|
||||
|
||||
static __inline __m64 __attribute__((__always_inline__))
|
||||
_mm_sign_pi32 (__m64 __X, __m64 __Y)
|
||||
{
|
||||
return (__m64) __builtin_ia32_psignd ((__v2si)__X, (__v2si)__Y);
|
||||
}
|
||||
|
||||
#define _mm_alignr_epi8(__X, __Y, __N) \
|
||||
((__m128i)__builtin_ia32_palignr128 ((__v2di) __X, (__v2di) __Y, (__N) * 8))
|
||||
|
||||
#define _mm_alignr_pi8(__X, __Y, __N) \
|
||||
((__m64)__builtin_ia32_palignr ((long long) (__X), (long long) (__Y), (__N) * 8))
|
||||
|
||||
static __inline __m128i __attribute__((__always_inline__))
|
||||
_mm_abs_epi8 (__m128i __X)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_pabsb128 ((__v16qi)__X);
|
||||
}
|
||||
|
||||
static __inline __m128i __attribute__((__always_inline__))
|
||||
_mm_abs_epi16 (__m128i __X)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_pabsw128 ((__v8hi)__X);
|
||||
}
|
||||
|
||||
static __inline __m128i __attribute__((__always_inline__))
|
||||
_mm_abs_epi32 (__m128i __X)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_pabsd128 ((__v4si)__X);
|
||||
}
|
||||
|
||||
static __inline __m64 __attribute__((__always_inline__))
|
||||
_mm_abs_pi8 (__m64 __X)
|
||||
{
|
||||
return (__m64) __builtin_ia32_pabsb ((__v8qi)__X);
|
||||
}
|
||||
|
||||
static __inline __m64 __attribute__((__always_inline__))
|
||||
_mm_abs_pi16 (__m64 __X)
|
||||
{
|
||||
return (__m64) __builtin_ia32_pabsw ((__v4hi)__X);
|
||||
}
|
||||
|
||||
static __inline __m64 __attribute__((__always_inline__))
|
||||
_mm_abs_pi32 (__m64 __X)
|
||||
{
|
||||
return (__m64) __builtin_ia32_pabsd ((__v2si)__X);
|
||||
}
|
||||
|
||||
#endif /* __SSSE3__ */
|
||||
|
||||
#endif /* _TMMINTRIN_H_INCLUDED */
|
||||
/* Copyright (C) 2006 Free Software Foundation, Inc.
|
||||
|
||||
This file is part of GCC.
|
||||
|
||||
GCC is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2, or (at your option)
|
||||
any later version.
|
||||
|
||||
GCC is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with GCC; see the file COPYING. If not, write to
|
||||
the Free Software Foundation, 59 Temple Place - Suite 330,
|
||||
Boston, MA 02111-1307, USA. */
|
||||
|
||||
/* As a special exception, if you include this header file into source
|
||||
files compiled by GCC, this header file does not by itself cause
|
||||
the resulting executable to be covered by the GNU General Public
|
||||
License. This exception does not however invalidate any other
|
||||
reasons why the executable file might be covered by the GNU General
|
||||
Public License. */
|
||||
|
||||
/* Implemented from the specification included in the Intel C++ Compiler
|
||||
User Guide and Reference, version 9.1. */
|
||||
|
||||
#ifndef _TMMINTRIN_H_INCLUDED
|
||||
#define _TMMINTRIN_H_INCLUDED
|
||||
|
||||
#ifdef __SSSE3__
|
||||
#include <pmmintrin.h>
|
||||
|
||||
static __inline __m128i __attribute__((__always_inline__))
|
||||
_mm_hadd_epi16 (__m128i __X, __m128i __Y)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_phaddw128 ((__v8hi)__X, (__v8hi)__Y);
|
||||
}
|
||||
|
||||
static __inline __m128i __attribute__((__always_inline__))
|
||||
_mm_hadd_epi32 (__m128i __X, __m128i __Y)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_phaddd128 ((__v4si)__X, (__v4si)__Y);
|
||||
}
|
||||
|
||||
static __inline __m128i __attribute__((__always_inline__))
|
||||
_mm_hadds_epi16 (__m128i __X, __m128i __Y)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_phaddsw128 ((__v8hi)__X, (__v8hi)__Y);
|
||||
}
|
||||
|
||||
static __inline __m64 __attribute__((__always_inline__))
|
||||
_mm_hadd_pi16 (__m64 __X, __m64 __Y)
|
||||
{
|
||||
return (__m64) __builtin_ia32_phaddw ((__v4hi)__X, (__v4hi)__Y);
|
||||
}
|
||||
|
||||
static __inline __m64 __attribute__((__always_inline__))
|
||||
_mm_hadd_pi32 (__m64 __X, __m64 __Y)
|
||||
{
|
||||
return (__m64) __builtin_ia32_phaddd ((__v2si)__X, (__v2si)__Y);
|
||||
}
|
||||
|
||||
static __inline __m64 __attribute__((__always_inline__))
|
||||
_mm_hadds_pi16 (__m64 __X, __m64 __Y)
|
||||
{
|
||||
return (__m64) __builtin_ia32_phaddsw ((__v4hi)__X, (__v4hi)__Y);
|
||||
}
|
||||
|
||||
static __inline __m128i __attribute__((__always_inline__))
|
||||
_mm_hsub_epi16 (__m128i __X, __m128i __Y)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_phsubw128 ((__v8hi)__X, (__v8hi)__Y);
|
||||
}
|
||||
|
||||
static __inline __m128i __attribute__((__always_inline__))
|
||||
_mm_hsub_epi32 (__m128i __X, __m128i __Y)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_phsubd128 ((__v4si)__X, (__v4si)__Y);
|
||||
}
|
||||
|
||||
static __inline __m128i __attribute__((__always_inline__))
|
||||
_mm_hsubs_epi16 (__m128i __X, __m128i __Y)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_phsubsw128 ((__v8hi)__X, (__v8hi)__Y);
|
||||
}
|
||||
|
||||
static __inline __m64 __attribute__((__always_inline__))
|
||||
_mm_hsub_pi16 (__m64 __X, __m64 __Y)
|
||||
{
|
||||
return (__m64) __builtin_ia32_phsubw ((__v4hi)__X, (__v4hi)__Y);
|
||||
}
|
||||
|
||||
static __inline __m64 __attribute__((__always_inline__))
|
||||
_mm_hsub_pi32 (__m64 __X, __m64 __Y)
|
||||
{
|
||||
return (__m64) __builtin_ia32_phsubd ((__v2si)__X, (__v2si)__Y);
|
||||
}
|
||||
|
||||
static __inline __m64 __attribute__((__always_inline__))
|
||||
_mm_hsubs_pi16 (__m64 __X, __m64 __Y)
|
||||
{
|
||||
return (__m64) __builtin_ia32_phsubsw ((__v4hi)__X, (__v4hi)__Y);
|
||||
}
|
||||
|
||||
static __inline __m128i __attribute__((__always_inline__))
|
||||
_mm_maddubs_epi16 (__m128i __X, __m128i __Y)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_pmaddubsw128 ((__v16qi)__X, (__v16qi)__Y);
|
||||
}
|
||||
|
||||
static __inline __m64 __attribute__((__always_inline__))
|
||||
_mm_maddubs_pi16 (__m64 __X, __m64 __Y)
|
||||
{
|
||||
return (__m64) __builtin_ia32_pmaddubsw ((__v8qi)__X, (__v8qi)__Y);
|
||||
}
|
||||
|
||||
static __inline __m128i __attribute__((__always_inline__))
|
||||
_mm_mulhrs_epi16 (__m128i __X, __m128i __Y)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_pmulhrsw128 ((__v8hi)__X, (__v8hi)__Y);
|
||||
}
|
||||
|
||||
static __inline __m64 __attribute__((__always_inline__))
|
||||
_mm_mulhrs_pi16 (__m64 __X, __m64 __Y)
|
||||
{
|
||||
return (__m64) __builtin_ia32_pmulhrsw ((__v4hi)__X, (__v4hi)__Y);
|
||||
}
|
||||
|
||||
static __inline __m128i __attribute__((__always_inline__))
|
||||
_mm_shuffle_epi8 (__m128i __X, __m128i __Y)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_pshufb128 ((__v16qi)__X, (__v16qi)__Y);
|
||||
}
|
||||
|
||||
static __inline __m64 __attribute__((__always_inline__))
|
||||
_mm_shuffle_pi8 (__m64 __X, __m64 __Y)
|
||||
{
|
||||
return (__m64) __builtin_ia32_pshufb ((__v8qi)__X, (__v8qi)__Y);
|
||||
}
|
||||
|
||||
static __inline __m128i __attribute__((__always_inline__))
|
||||
_mm_sign_epi8 (__m128i __X, __m128i __Y)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_psignb128 ((__v16qi)__X, (__v16qi)__Y);
|
||||
}
|
||||
|
||||
static __inline __m128i __attribute__((__always_inline__))
|
||||
_mm_sign_epi16 (__m128i __X, __m128i __Y)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_psignw128 ((__v8hi)__X, (__v8hi)__Y);
|
||||
}
|
||||
|
||||
static __inline __m128i __attribute__((__always_inline__))
|
||||
_mm_sign_epi32 (__m128i __X, __m128i __Y)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_psignd128 ((__v4si)__X, (__v4si)__Y);
|
||||
}
|
||||
|
||||
static __inline __m64 __attribute__((__always_inline__))
|
||||
_mm_sign_pi8 (__m64 __X, __m64 __Y)
|
||||
{
|
||||
return (__m64) __builtin_ia32_psignb ((__v8qi)__X, (__v8qi)__Y);
|
||||
}
|
||||
|
||||
static __inline __m64 __attribute__((__always_inline__))
|
||||
_mm_sign_pi16 (__m64 __X, __m64 __Y)
|
||||
{
|
||||
return (__m64) __builtin_ia32_psignw ((__v4hi)__X, (__v4hi)__Y);
|
||||
}
|
||||
|
||||
static __inline __m64 __attribute__((__always_inline__))
|
||||
_mm_sign_pi32 (__m64 __X, __m64 __Y)
|
||||
{
|
||||
return (__m64) __builtin_ia32_psignd ((__v2si)__X, (__v2si)__Y);
|
||||
}
|
||||
|
||||
#define _mm_alignr_epi8(__X, __Y, __N) \
|
||||
((__m128i)__builtin_ia32_palignr128 ((__v2di) __X, (__v2di) __Y, (__N) * 8))
|
||||
|
||||
#define _mm_alignr_pi8(__X, __Y, __N) \
|
||||
((__m64)__builtin_ia32_palignr ((long long) (__X), (long long) (__Y), (__N) * 8))
|
||||
|
||||
static __inline __m128i __attribute__((__always_inline__))
|
||||
_mm_abs_epi8 (__m128i __X)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_pabsb128 ((__v16qi)__X);
|
||||
}
|
||||
|
||||
static __inline __m128i __attribute__((__always_inline__))
|
||||
_mm_abs_epi16 (__m128i __X)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_pabsw128 ((__v8hi)__X);
|
||||
}
|
||||
|
||||
static __inline __m128i __attribute__((__always_inline__))
|
||||
_mm_abs_epi32 (__m128i __X)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_pabsd128 ((__v4si)__X);
|
||||
}
|
||||
|
||||
static __inline __m64 __attribute__((__always_inline__))
|
||||
_mm_abs_pi8 (__m64 __X)
|
||||
{
|
||||
return (__m64) __builtin_ia32_pabsb ((__v8qi)__X);
|
||||
}
|
||||
|
||||
static __inline __m64 __attribute__((__always_inline__))
|
||||
_mm_abs_pi16 (__m64 __X)
|
||||
{
|
||||
return (__m64) __builtin_ia32_pabsw ((__v4hi)__X);
|
||||
}
|
||||
|
||||
static __inline __m64 __attribute__((__always_inline__))
|
||||
_mm_abs_pi32 (__m64 __X)
|
||||
{
|
||||
return (__m64) __builtin_ia32_pabsd ((__v2si)__X);
|
||||
}
|
||||
|
||||
#endif /* __SSSE3__ */
|
||||
|
||||
#endif /* _TMMINTRIN_H_INCLUDED */
|
@ -1243,7 +1243,9 @@ do { \
|
||||
} while (0)
|
||||
|
||||
/* For backward source compatibility. */
|
||||
#ifdef __SSE2__
|
||||
#include <emmintrin.h>
|
||||
#endif
|
||||
|
||||
#endif /* __SSE__ */
|
||||
#endif /* _XMMINTRIN_H_INCLUDED */
|
||||
|
@ -7211,6 +7211,52 @@ The following built-in functions are available when @option{-msse3} is used.
|
||||
Generates the @code{movddup} machine instruction as a load from memory.
|
||||
@end table
|
||||
|
||||
The following built-in functions are available when @option{-mssse3} is used.
|
||||
All of them generate the machine instruction that is part of the name
|
||||
with MMX registers.
|
||||
|
||||
@smallexample
|
||||
v2si __builtin_ia32_phaddd (v2si, v2si)
|
||||
v4hi __builtin_ia32_phaddw (v4hi, v4hi)
|
||||
v4hi __builtin_ia32_phaddsw (v4hi, v4hi)
|
||||
v2si __builtin_ia32_phsubd (v2si, v2si)
|
||||
v4hi __builtin_ia32_phsubw (v4hi, v4hi)
|
||||
v4hi __builtin_ia32_phsubsw (v4hi, v4hi)
|
||||
v8qi __builtin_ia32_pmaddubsw (v8qi, v8qi)
|
||||
v4hi __builtin_ia32_pmulhrsw (v4hi, v4hi)
|
||||
v8qi __builtin_ia32_pshufb (v8qi, v8qi)
|
||||
v8qi __builtin_ia32_psignb (v8qi, v8qi)
|
||||
v2si __builtin_ia32_psignd (v2si, v2si)
|
||||
v4hi __builtin_ia32_psignw (v4hi, v4hi)
|
||||
long long __builtin_ia32_palignr (long long, long long, int)
|
||||
v8qi __builtin_ia32_pabsb (v8qi)
|
||||
v2si __builtin_ia32_pabsd (v2si)
|
||||
v4hi __builtin_ia32_pabsw (v4hi)
|
||||
@end smallexample
|
||||
|
||||
The following built-in functions are available when @option{-mssse3} is used.
|
||||
All of them generate the machine instruction that is part of the name
|
||||
with SSE registers.
|
||||
|
||||
@smallexample
|
||||
v4si __builtin_ia32_phaddd128 (v4si, v4si)
|
||||
v8hi __builtin_ia32_phaddw128 (v8hi, v8hi)
|
||||
v8hi __builtin_ia32_phaddsw128 (v8hi, v8hi)
|
||||
v4si __builtin_ia32_phsubd128 (v4si, v4si)
|
||||
v8hi __builtin_ia32_phsubw128 (v8hi, v8hi)
|
||||
v8hi __builtin_ia32_phsubsw128 (v8hi, v8hi)
|
||||
v16qi __builtin_ia32_pmaddubsw128 (v16qi, v16qi)
|
||||
v8hi __builtin_ia32_pmulhrsw128 (v8hi, v8hi)
|
||||
v16qi __builtin_ia32_pshufb128 (v16qi, v16qi)
|
||||
v16qi __builtin_ia32_psignb128 (v16qi, v16qi)
|
||||
v4si __builtin_ia32_psignd128 (v4si, v4si)
|
||||
v8hi __builtin_ia32_psignw128 (v8hi, v8hi)
|
||||
v2di __builtin_ia32_palignr (v2di, v2di, int)
|
||||
v16qi __builtin_ia32_pabsb128 (v16qi)
|
||||
v4si __builtin_ia32_pabsd128 (v4si)
|
||||
v8hi __builtin_ia32_pabsw128 (v8hi)
|
||||
@end smallexample
|
||||
|
||||
The following built-in functions are available when @option{-m3dnow} is used.
|
||||
All of them generate the machine instruction that is part of the name.
|
||||
|
||||
|
@ -541,7 +541,7 @@ in the following sections.
|
||||
\&\-mno\-fp\-ret\-in\-387 \-msoft\-float \-msvr3\-shlib
|
||||
\&\-mno\-wide\-multiply \-mrtd \-malign\-double
|
||||
\&\-mpreferred\-stack\-boundary=\fR\fInum\fR
|
||||
\&\fB\-mmmx \-msse \-msse2 \-msse3 \-m3dnow
|
||||
\&\fB\-mmmx \-msse \-msse2 \-msse3 \-mssse3 \-m3dnow
|
||||
\&\-mthreads \-mno\-align\-stringops \-minline\-all\-stringops
|
||||
\&\-mpush\-args \-maccumulate\-outgoing\-args \-m128bit\-long\-double
|
||||
\&\-m96bit\-long\-double \-mregparm=\fR\fInum\fR \fB\-msseregparm
|
||||
@ -8735,7 +8735,7 @@ Improved version of Intel Pentium4 \s-1CPU\s0 with 64\-bit extensions, \s-1MMX\s
|
||||
\&\s-1SSE2\s0 and \s-1SSE3\s0 instruction set support.
|
||||
.IP "\fIcore2\fR" 4
|
||||
.IX Item "core2"
|
||||
Intel Core2 \s-1CPU\s0 with 64\-bit extensions, \s-1MMX\s0, \s-1SSE\s0, \s-1SSE2\s0 and \s-1SSE3\s0
|
||||
Intel Core2 \s-1CPU\s0 with 64\-bit extensions, \s-1MMX\s0, \s-1SSE\s0, \s-1SSE2\s0, \s-1SSE3\s0 and \s-1SSSE3\s0
|
||||
instruction set support.
|
||||
.IP "\fIk6\fR" 4
|
||||
.IX Item "k6"
|
||||
@ -9057,15 +9057,20 @@ preferred alignment to \fB\-mpreferred\-stack\-boundary=2\fR.
|
||||
.IX Item "-msse3"
|
||||
.IP "\fB\-mno\-sse3\fR" 4
|
||||
.IX Item "-mno-sse3"
|
||||
.IP "\fB\-mssse3\fR" 4
|
||||
.IX Item "-mssse3"
|
||||
.IP "\fB\-mno\-ssse3\fR" 4
|
||||
.IX Item "-mno-ssse3"
|
||||
.IP "\fB\-m3dnow\fR" 4
|
||||
.IX Item "-m3dnow"
|
||||
.IP "\fB\-mno\-3dnow\fR" 4
|
||||
.IX Item "-mno-3dnow"
|
||||
.PD
|
||||
These switches enable or disable the use of instructions in the \s-1MMX\s0,
|
||||
\&\s-1SSE\s0, \s-1SSE2\s0 or 3DNow! extended instruction sets. These extensions are
|
||||
also available as built-in functions: see \fBX86 Built-in Functions\fR,
|
||||
for details of the functions enabled and disabled by these switches.
|
||||
\&\s-1SSE\s0, \s-1SSE2\s0, \s-1SSE3\s0, \s-1SSSE3\s0 or 3DNow! extended instruction sets.
|
||||
These extensions are also available as built-in functions: see
|
||||
\fBX86 Built-in Functions\fR, for details of the functions enabled and
|
||||
disabled by these switches.
|
||||
.Sp
|
||||
To have \s-1SSE/SSE2\s0 instructions generated automatically from floating-point
|
||||
code (as opposed to 387 instructions), see \fB\-mfpmath=sse\fR.
|
||||
|
@ -533,7 +533,7 @@ Objective-C and Objective-C++ Dialects}.
|
||||
-mno-fp-ret-in-387 -msoft-float -msvr3-shlib @gol
|
||||
-mno-wide-multiply -mrtd -malign-double @gol
|
||||
-mpreferred-stack-boundary=@var{num} @gol
|
||||
-mmmx -msse -msse2 -msse3 -m3dnow @gol
|
||||
-mmmx -msse -msse2 -msse3 -mssse3 -m3dnow @gol
|
||||
-mthreads -mno-align-stringops -minline-all-stringops @gol
|
||||
-mpush-args -maccumulate-outgoing-args -m128bit-long-double @gol
|
||||
-m96bit-long-double -mregparm=@var{num} -msseregparm @gol
|
||||
@ -9679,6 +9679,8 @@ preferred alignment to @option{-mpreferred-stack-boundary=2}.
|
||||
@itemx -mno-sse2
|
||||
@item -msse3
|
||||
@itemx -mno-sse3
|
||||
@item -mssse3
|
||||
@itemx -mno-ssse3
|
||||
@item -m3dnow
|
||||
@itemx -mno-3dnow
|
||||
@opindex mmmx
|
||||
@ -9688,9 +9690,10 @@ preferred alignment to @option{-mpreferred-stack-boundary=2}.
|
||||
@opindex m3dnow
|
||||
@opindex mno-3dnow
|
||||
These switches enable or disable the use of instructions in the MMX,
|
||||
SSE, SSE2 or 3DNow! extended instruction sets. These extensions are
|
||||
also available as built-in functions: see @ref{X86 Built-in Functions},
|
||||
for details of the functions enabled and disabled by these switches.
|
||||
SSE, SSE2, SSE3, SSSE3 or 3DNow! extended instruction sets.
|
||||
These extensions are also available as built-in functions: see
|
||||
@ref{X86 Built-in Functions}, for details of the functions enabled and
|
||||
disabled by these switches.
|
||||
|
||||
To have SSE/SSE2 instructions generated automatically from floating-point
|
||||
code (as opposed to 387 instructions), see @option{-mfpmath=sse}.
|
||||
|
Loading…
x
Reference in New Issue
Block a user