From 003f0fa63f779bf3e5c2e43351a5c1dd91a7e627 Mon Sep 17 00:00:00 2001 From: John-Mark Gurney Date: Tue, 3 Sep 2013 17:33:29 +0000 Subject: [PATCH] add support to gcc for AES and PCLMUL intrinsics... This addes the -maes option, but not the -mpclmul option as I ran out of bits in the 32 bit flags field... You can -D__PCLMUL__ to get this, but it won't be compatible w/ clang and modern gcc... Reviewed by: -current, -toolchain --- contrib/gcc/config/i386/i386.c | 12 +++++ contrib/gcc/config/i386/i386.h | 2 + contrib/gcc/config/i386/i386.opt | 4 ++ contrib/gcc/doc/invoke.texi | 12 +++-- contrib/gcc/opth-gen.awk | 2 +- gnu/usr.bin/cc/include/Makefile | 3 +- gnu/usr.bin/cc/include/__wmmintrin_aes.h | 54 +++++++++++++++++++++ gnu/usr.bin/cc/include/__wmmintrin_pclmul.h | 53 ++++++++++++++++++++ 8 files changed, 135 insertions(+), 7 deletions(-) create mode 100644 gnu/usr.bin/cc/include/__wmmintrin_aes.h create mode 100644 gnu/usr.bin/cc/include/__wmmintrin_pclmul.h diff --git a/contrib/gcc/config/i386/i386.c b/contrib/gcc/config/i386/i386.c index 0187cb25d0fc..1598ae91ecaa 100644 --- a/contrib/gcc/config/i386/i386.c +++ b/contrib/gcc/config/i386/i386.c @@ -1684,6 +1684,14 @@ ix86_handle_option (size_t code, const char *arg ATTRIBUTE_UNUSED, int value) } return true; + case OPT_maes: + if (!value) + { + target_flags &= ~MASK_AES; + target_flags_explicit |= MASK_AES; + } + return true; + default: return true; } @@ -2187,6 +2195,10 @@ override_options (void) if (TARGET_SSE3) target_flags |= MASK_SSE2; + /* Turn on SSE2 builtins for -maes. */ + if (TARGET_AES) + target_flags |= MASK_SSE2; + /* Turn on SSE builtins for -msse2. */ if (TARGET_SSE2) target_flags |= MASK_SSE; diff --git a/contrib/gcc/config/i386/i386.h b/contrib/gcc/config/i386/i386.h index e918fc309ca9..1394fbabfa71 100644 --- a/contrib/gcc/config/i386/i386.h +++ b/contrib/gcc/config/i386/i386.h @@ -428,6 +428,8 @@ extern const char *host_detect_local_cpu (int argc, const char **argv); builtin_define ("__SSSE3__"); \ if (TARGET_SSE4A) \ builtin_define ("__SSE4A__"); \ + if (TARGET_AES) \ + builtin_define ("__AES__"); \ if (TARGET_SSE_MATH && TARGET_SSE) \ builtin_define ("__SSE_MATH__"); \ if (TARGET_SSE_MATH && TARGET_SSE2) \ diff --git a/contrib/gcc/config/i386/i386.opt b/contrib/gcc/config/i386/i386.opt index fa73e177e7b2..4de75698f19e 100644 --- a/contrib/gcc/config/i386/i386.opt +++ b/contrib/gcc/config/i386/i386.opt @@ -205,6 +205,10 @@ msse4a Target Report Mask(SSE4A) Support MMX, SSE, SSE2, SSE3 and SSE4A built-in functions and code generation +maes +Target Report Mask(AES) +Support AES built-in functions and code generation. + mpopcnt Target Report Mask(POPCNT) Support code generation of popcount instruction for popcount built-ins diff --git a/contrib/gcc/doc/invoke.texi b/contrib/gcc/doc/invoke.texi index 0f66c3dc28b6..13b9ae93d957 100644 --- a/contrib/gcc/doc/invoke.texi +++ b/contrib/gcc/doc/invoke.texi @@ -513,7 +513,7 @@ in the following sections. -mno-fp-ret-in-387 -msoft-float -msvr3-shlib @gol -mno-wide-multiply -mrtd -malign-double @gol -mpreferred-stack-boundary=@var{num} @gol --mmmx -msse -msse2 -msse3 -mssse3 -msse4a -m3dnow -mpopcnt -mabm @gol +-mmmx -msse -msse2 -msse3 -mssse3 -msse4a -m3dnow -mpopcnt -mabm -maes @gol -mthreads -mno-align-stringops -minline-all-stringops @gol -mpush-args -maccumulate-outgoing-args -m128bit-long-double @gol -m96bit-long-double -mregparm=@var{num} -msseregparm @gol @@ -9367,6 +9367,8 @@ preferred alignment to @option{-mpreferred-stack-boundary=2}. @itemx -mno-popcnt @item -mabm @itemx -mno-abm +@item -maes +@itemx -mno-aes @opindex mmmx @opindex mno-mmx @opindex msse @@ -9374,10 +9376,10 @@ preferred alignment to @option{-mpreferred-stack-boundary=2}. @opindex m3dnow @opindex mno-3dnow These switches enable or disable the use of instructions in the MMX, -SSE, SSE2, SSE3, SSSE3, SSE4A, ABM or 3DNow! extended instruction sets. -These extensions are also available as built-in functions: see -@ref{X86 Built-in Functions}, for details of the functions enabled and -disabled by these switches. +SSE, SSE2, SSE3, SSSE3, SSE4A, ABM, AES or 3DNow! extended +instruction sets. These extensions are also available as built-in +functions: see @ref{X86 Built-in Functions}, for details of the functions +enabled and disabled by these switches. To have SSE/SSE2 instructions generated automatically from floating-point code (as opposed to 387 instructions), see @option{-mfpmath=sse}. diff --git a/contrib/gcc/opth-gen.awk b/contrib/gcc/opth-gen.awk index e7ffc1a646eb..84b3170935fc 100644 --- a/contrib/gcc/opth-gen.awk +++ b/contrib/gcc/opth-gen.awk @@ -87,7 +87,7 @@ for (i = 0; i < n_extra_masks; i++) { } for (var in masknum) { - if (masknum[var] > 31) { + if (masknum[var] > 32) { if (var == "") print "#error too many target masks" else diff --git a/gnu/usr.bin/cc/include/Makefile b/gnu/usr.bin/cc/include/Makefile index 2b04b4cb1df1..c48975e8f307 100644 --- a/gnu/usr.bin/cc/include/Makefile +++ b/gnu/usr.bin/cc/include/Makefile @@ -6,11 +6,12 @@ INCSDIR=${INCLUDEDIR}/gcc/${GCCVER} -.PATH: ${GCCDIR}/config/${GCC_CPU} +.PATH: ${GCCDIR}/config/${GCC_CPU} ${.CURDIR}/../../../../contrib/llvm/tools/clang/lib/Headers .if ${TARGET_ARCH} == "i386" || ${TARGET_ARCH} == "amd64" INCS= ammintrin.h emmintrin.h mmintrin.h mm3dnow.h pmmintrin.h \ tmmintrin.h xmmintrin.h mm_malloc.h +INCS+= wmmintrin.h __wmmintrin_aes.h __wmmintrin_pclmul.h .elif ${TARGET_ARCH} == "ia64" INCS= ia64intrin.h .elif ${TARGET_ARCH} == "arm" diff --git a/gnu/usr.bin/cc/include/__wmmintrin_aes.h b/gnu/usr.bin/cc/include/__wmmintrin_aes.h new file mode 100644 index 000000000000..ff8a3457948c --- /dev/null +++ b/gnu/usr.bin/cc/include/__wmmintrin_aes.h @@ -0,0 +1,54 @@ +/*- + * Copyright 2013 John-Mark Gurney + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + * + */ + +#ifndef _WMMINTRIN_AES_H_ +#define _WMMINTRIN_AES_H_ + +#include + +#define MAKE_AES(name) \ +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) \ +_mm_## name ##_si128(__m128i __V, __m128i __R) \ +{ \ + __m128i v = __V; \ + \ + __asm__ (#name " %2, %0": "=x" (v): "0" (v), "xm" (__R)); \ + \ + return v; \ +} + +MAKE_AES(aesimc) +MAKE_AES(aesenc) +MAKE_AES(aesenclast) +MAKE_AES(aesdec) +MAKE_AES(aesdeclast) + +#undef MAKE_AES + +#endif /* _WMMINTRIN_AES_H_ */ diff --git a/gnu/usr.bin/cc/include/__wmmintrin_pclmul.h b/gnu/usr.bin/cc/include/__wmmintrin_pclmul.h new file mode 100644 index 000000000000..5bebd81be261 --- /dev/null +++ b/gnu/usr.bin/cc/include/__wmmintrin_pclmul.h @@ -0,0 +1,53 @@ +/*- + * Copyright 2013 John-Mark Gurney + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + * + */ + +#ifndef _WMMINTRIN_PCLMUL_H_ +#define _WMMINTRIN_PCLMUL_H_ + +#include + +/* + * c selects which parts of a and b to multiple: + * 0x00: a[ 63: 0] * b[ 63: 0] + * 0x01: a[127:64] * b[ 63: 0] + * 0x10: a[ 63: 0] * b[127:64] + * 0x11: a[127:64] * b[127:64] + */ +#define _mm_clmulepi64_si128(a, b, c) \ +({ \ + __m128i _a = (a); \ + __m128i _b = (b); \ + \ + __asm__("pclmulqdq %3, %2, %0": "=x" (_a): "0" (_a), "xm" (_b), \ + "i" (c)); \ + \ + _a; \ +}) + +#endif /* _WMMINTRIN_PCLMUL_H_ */