From 0937df81ca9895fcef7b43e9e10bff718e339568 Mon Sep 17 00:00:00 2001 From: Kris Kennaway Date: Mon, 19 Feb 2001 03:59:05 +0000 Subject: [PATCH] Introduce support for using OpenSSL ASM optimizations. This is done through the use of a new build directive, MACHINE_CPU, which contains a list of the CPU generations/features for which optimizations are desired. This feature will be extended to cover the ports tree in the future. Currently OpenSSL provides optimizations for i386, i586 and i686-class CPUs. Currently it has not been tested on an i386 or i486. Teach make(1) to provide sensible defaults for MACHINE_CPU if it is not defined (namely, the lowest common denominator CPU we support for each architecture). Currently this is i386 for the i386 architecture and ev4 for the alpha. sys.mk also sets the variable as a last resort for consistency with MACHINE_ARCH and bootstrapping from very old versions of make. Benchmarks show a significant speed increase even in the i386 case, with additional improvements for i586 and i686 systems. For maximum performance define MACHINE_CPU=i686 i586 i386 in /etc/make.conf. Based on a patch submitted by: Mike Silbersack Reviewed by: current --- etc/defaults/make.conf | 15 ++++- secure/lib/libcrypto/Makefile | 92 ++++++++++++++++++++++++++++--- secure/lib/libcrypto/Makefile.inc | 9 ++- share/examples/etc/make.conf | 15 ++++- share/mk/sys.mk | 4 +- usr.bin/make/main.c | 15 +++++ 6 files changed, 137 insertions(+), 13 deletions(-) diff --git a/etc/defaults/make.conf b/etc/defaults/make.conf index 9e9d66978c9c..41ad7bf38cda 100644 --- a/etc/defaults/make.conf +++ b/etc/defaults/make.conf @@ -13,6 +13,19 @@ # You have to find the things you can put here in the Makefiles and # documentation of the source tree. # +# +# MACHINE_CPU controls which processor-specific optimizations will be +# used by certain components of FreeBSD (currently only OpenSSL). +# This should be set to a list of your CPU type, plus all previous +# generations of the CPU architecture. The reason for using a list is +# because not all programs which use the MACHINE_CPU variable may have +# optimizations for your specific CPU generation (e.g. Pentium Pro), +# but may have optimizations for the previous generation (e.g. Pentium). +# Currently only the following CPU generations are used: +# i686 i585 i386 +# +#MACHINE_CPU=i686 i586 i386 +# # CFLAGS controls the compiler settings used when compiling C code. # Note that optimization settings above -O (-O2, ...) are not recommended # or supported for compiling the world or the kernel - please revert any @@ -82,7 +95,7 @@ BDECFLAGS= -W -Wall -ansi -pedantic -Wbad-function-cast -Wcast-align \ #NOGAMES= true # do not build games (games/ subdir) #NOINFO= true # do not make or install info files #NOLIBC_R= true # do not build libc_r (re-entrant version of libc) -#NOPERL= true # To avoid building perl +#NOPERL= true # do not build perl. Disables OpenSSL optimizations #NOPROFILE= true # Avoid compiling profiled libraries #NOSECURE= true # do not build crypto code in secure/ subdir #NOSHARE= true # do not go into the share subdir diff --git a/secure/lib/libcrypto/Makefile b/secure/lib/libcrypto/Makefile index 7fb60be60a8d..28109570b43d 100644 --- a/secure/lib/libcrypto/Makefile +++ b/secure/lib/libcrypto/Makefile @@ -16,6 +16,15 @@ ${LCRYPTO_SRC}/stack ${LCRYPTO_SRC}/txt_db ${LCRYPTO_SRC}/x509 \ ${LCRYPTO_SRC}/x509v3 +.if !defined(NOPERL) && ${MACHINE_ARCH} == "i386" +.PATH: ${LCRYPTO_SRC}/rc4/asm ${LCRYPTO_SRC}/rc5/asm \ + ${LCRYPTO_SRC}/des/asm ${LCRYPTO_SRC}/cast/asm \ + ${LCRYPTO_SRC}/sha/asm ${LCRYPTO_SRC}/bn/asm \ + ${LCRYPTO_SRC}/bf/asm ${LCRYPTO_SRC}/md5/asm \ + ${LCRYPTO_SRC}/ripemd/asm +PERLPATH= ${LCRYPTO_SRC}/des/asm:${LCRYPTO_SRC}/perlasm +.endif + .if defined(MAKE_IDEA) && ${MAKE_IDEA} == YES .PATH: ${LCRYPTO_SRC}/idea .endif @@ -48,7 +57,16 @@ SRCS+= a_bitstr.c a_bmp.c a_bool.c a_bytes.c a_d2i_fp.c a_digest.c \ x_req.c x_sig.c x_spki.c x_val.c x_x509.c x_x509a.c # blowfish -SRCS+= bf_cfb64.c bf_ecb.c bf_enc.c bf_ofb64.c bf_skey.c +SRCS+= bf_cfb64.c bf_ecb.c bf_ofb64.c bf_skey.c +.if !defined(NOPERL) && ${MACHINE_ARCH} == "i386" +.if ${MACHINE_CPU:Mi686} +SRCS+= bf-686.pl +.else +SRCS+= bf-586.pl +.endif +.else +SRCS+= bf_enc.c +.endif # bio SRCS+= b_dump.c b_print.c b_sock.c bf_buff.c bf_nbio.c bf_null.c \ @@ -57,16 +75,28 @@ SRCS+= b_dump.c b_print.c b_sock.c bf_buff.c bf_nbio.c bf_null.c \ # bn -SRCS+= bn_add.c bn_asm.c bn_blind.c bn_ctx.c bn_div.c bn_err.c \ +SRCS+= bn_add.c bn_blind.c bn_ctx.c bn_div.c bn_err.c \ bn_exp.c bn_exp2.c bn_gcd.c bn_lib.c bn_mont.c bn_mpi.c \ bn_mul.c bn_prime.c bn_print.c bn_rand.c bn_recp.c bn_shift.c \ bn_sqr.c bn_word.c +.if !defined(NOPERL) && ${MACHINE_ARCH} == "i386" +SRCS+= bn-586.pl co-586.pl +.elif ${MACHINE_ARCH} == "alpha" +SRCS+= bn-alpha.pl co-alpha.pl +.else +SRCS+= bn_asm.c +.endif # buffer SRCS+= buf_err.c buffer.c # cast -SRCS+= c_cfb64.c c_ecb.c c_enc.c c_ofb64.c c_skey.c +SRCS+= c_cfb64.c c_ecb.c c_ofb64.c c_skey.c +.if !defined(NOPERL) && ${MACHINE_ARCH} == "i386" +SRCS+= cast-586.pl +.else +SRCS+= c_enc.c +.endif # comp SRCS+= c_rle.c c_zlib.c comp_lib.c @@ -75,11 +105,16 @@ SRCS+= c_rle.c c_zlib.c comp_lib.c SRCS+= conf_api.c conf_def.c conf_err.c conf_lib.c # des -SRCS+= cbc_cksm.c cbc_enc.c cfb64ede.c cfb64enc.c cfb_enc.c des_enc.c \ +SRCS+= cbc_cksm.c cbc_enc.c cfb64ede.c cfb64enc.c cfb_enc.c \ ecb3_enc.c ecb_enc.c ede_cbcm_enc.c enc_read.c enc_writ.c \ - fcrypt.c fcrypt_b.c ofb64ede.c ofb64enc.c ofb_enc.c pcbc_enc.c \ + fcrypt.c ofb64ede.c ofb64enc.c ofb_enc.c pcbc_enc.c \ qud_cksm.c rand_key.c read2pwd.c read_pwd.c rpc_enc.c \ set_key.c str2key.c xcbc_enc.c rnd_keys.c +.if !defined(NOPERL) && ${MACHINE_ARCH} == "i386" +SRCS+= des-586.pl crypt586.pl +.else +SRCS+= des_enc.c +.endif # dh SRCS+= dh_check.c dh_err.c dh_gen.c dh_key.c dh_lib.c @@ -123,6 +158,9 @@ SRCS+= md4_dgst.c md4_one.c # md5 SRCS+= md5_dgst.c md5_one.c +.if !defined(NOPERL) && ${MACHINE_ARCH} == "i386" +SRCS+= md5-586.pl +.endif # mdc2 SRCS+= mdc2dgst.c mdc2_one.c @@ -148,13 +186,26 @@ SRCS+= md_rand.c rand_egd.c rand_err.c rand_lib.c rand_win.c randfile.c SRCS+= rc2_cbc.c rc2cfb64.c rc2_ecb.c rc2ofb64.c rc2_skey.c # rc4 -SRCS+= rc4_enc.c rc4_skey.c +SRCS+= rc4_skey.c +.if !defined(NOPERL) && ${MACHINE_ARCH} == "i386" +SRCS+= rc4-586.pl +.else +SRCS+= rc4_enc.c +.endif # rc5 -SRCS+= rc5cfb64.c rc5_ecb.c rc5_enc.c rc5ofb64.c rc5_skey.c +SRCS+= rc5cfb64.c rc5_ecb.c rc5ofb64.c rc5_skey.c +.if !defined(NOPERL) && ${MACHINE_ARCH} == "i386" +SRCS+= rc5-586.pl +.else +SRCS+= rc5_enc.c +.endif # ripemd SRCS+= rmd_dgst.c rmd_one.c +.if !defined(NOPERL) && ${MACHINE_ARCH} == "i386" +SRCS+= rmd-586.pl +.endif # rsa .if defined(WITH_RSA) && ${WITH_RSA} == YES @@ -164,6 +215,9 @@ SRCS+= rsa_chk.c rsa_eay.c rsa_err.c rsa_gen.c rsa_lib.c rsa_none.c \ # sha SRCS+= sha_dgst.c sha_one.c sha1_one.c sha1dgst.c +.if !defined(NOPERL) && ${MACHINE_ARCH} == "i386" +SRCS+= sha1-586.pl +.endif # stack SRCS+= stack.c @@ -263,6 +317,30 @@ beforeinstall: openssl/opensslconf.h openssl/evp.h .include +# If we don't want 686/586 asm, use the "386" modifier to the perl scripts to give +# compatible output +.if !defined(NOPERL) && ${MACHINE_ARCH} == "i386" +.if ${MACHINE_CPU:Mi686} || ${MACHINE_CPU:Mi586} +CPUTYPE= +.elif ${MACHINE_CPU:Mi386} +CPUTYPE= 386 +.endif +.endif + +.if !defined(NOPERL) && ${MACHINE_ARCH} == "i386" +.SUFFIXES: .o .pl +.SUFFIXES: .po .pl +.SUFFIXES: .So .pl +.pl.o: + perl -I${PERLPATH} $(.ALLSRC) elf ${CPUTYPE} > $(.PREFIX).pl.s ; ${AS} ${AFLAGS} $(.PREFIX).pl.s -o $(.TARGET) + +.pl.po: + perl -I${PERLPATH} $(.ALLSRC) elf ${CPUTYPE} > $(.PREFIX).pl.s ; ${AS} ${AFLAGS} $(.PREFIX).pl.s -o $(.TARGET) + +.pl.So: + perl -I${PERLPATH} $(.ALLSRC) elf ${CPUTYPE} > $(.PREFIX).pl.s ; ${AS} ${AFLAGS} $(.PREFIX).pl.s -o $(.TARGET) +.endif + afterinstall: .if !defined(NOPIC) @cd ${DESTDIR}${SHLIBDIR}; \ diff --git a/secure/lib/libcrypto/Makefile.inc b/secure/lib/libcrypto/Makefile.inc index 075d2467cf6e..5f927eefb9e0 100644 --- a/secure/lib/libcrypto/Makefile.inc +++ b/secure/lib/libcrypto/Makefile.inc @@ -7,8 +7,10 @@ CFLAGS+= -DNO_IDEA .endif .if ${MACHINE_ARCH} == "i386" -CFLAGS+= -DL_ENDIAN -# -DBN_ASM -DSHA1_ASM -DMD5_ASM -DRMD160_ASM - XXX notyet +CFLAGS+= -DL_ENDIAN +.if !defined(NOPERL) +CFLAGS+= -DSHA1_ASM -DBN_ASM -DMD5_ASM -DRMD160_ASM +.endif .elif ${MACHINE_ARCH} == "alpha" # no ENDIAN stuff defined for alpha (64-bit) .endif @@ -23,7 +25,8 @@ HDRS= asn1/asn1.h asn1/asn1_mac.h bio/bio.h bf/blowfish.h bn/bn.h \ pem/pem2.h pkcs12/pkcs12.h pkcs7/pkcs7.h rand/rand.h rc2/rc2.h \ rc4/rc4.h rc5/rc5.h ripemd/ripemd.h rsa/rsa.h stack/safestack.h \ sha/sha.h stack/stack.h tmdiff.h txt_db/txt_db.h x509/x509.h \ - x509/x509_vfy.h x509v3/x509v3.h symhacks.h objects/obj_mac.h md4/md4.h dso/dso.h conf/conf_api.h + x509/x509_vfy.h x509v3/x509v3.h symhacks.h objects/obj_mac.h \ + md4/md4.h dso/dso.h conf/conf_api.h .if defined(MAKE_IDEA) && ${MAKE_IDEA} == YES HDRS+= idea/idea.h diff --git a/share/examples/etc/make.conf b/share/examples/etc/make.conf index 9e9d66978c9c..41ad7bf38cda 100644 --- a/share/examples/etc/make.conf +++ b/share/examples/etc/make.conf @@ -13,6 +13,19 @@ # You have to find the things you can put here in the Makefiles and # documentation of the source tree. # +# +# MACHINE_CPU controls which processor-specific optimizations will be +# used by certain components of FreeBSD (currently only OpenSSL). +# This should be set to a list of your CPU type, plus all previous +# generations of the CPU architecture. The reason for using a list is +# because not all programs which use the MACHINE_CPU variable may have +# optimizations for your specific CPU generation (e.g. Pentium Pro), +# but may have optimizations for the previous generation (e.g. Pentium). +# Currently only the following CPU generations are used: +# i686 i585 i386 +# +#MACHINE_CPU=i686 i586 i386 +# # CFLAGS controls the compiler settings used when compiling C code. # Note that optimization settings above -O (-O2, ...) are not recommended # or supported for compiling the world or the kernel - please revert any @@ -82,7 +95,7 @@ BDECFLAGS= -W -Wall -ansi -pedantic -Wbad-function-cast -Wcast-align \ #NOGAMES= true # do not build games (games/ subdir) #NOINFO= true # do not make or install info files #NOLIBC_R= true # do not build libc_r (re-entrant version of libc) -#NOPERL= true # To avoid building perl +#NOPERL= true # do not build perl. Disables OpenSSL optimizations #NOPROFILE= true # Avoid compiling profiled libraries #NOSECURE= true # do not build crypto code in secure/ subdir #NOSHARE= true # do not go into the share subdir diff --git a/share/mk/sys.mk b/share/mk/sys.mk index 1449d9937007..11d600291d02 100644 --- a/share/mk/sys.mk +++ b/share/mk/sys.mk @@ -101,8 +101,10 @@ YFLAGS ?= -d # which knows MACHINE, but not MACHINE_ARCH. When building on other # architectures, assume that the version of make being used has an # explicit MACHINE_ARCH setting and treat a missing MACHINE_ARCH -# as an i386 architecture. +# as an i386 architecture. Similarly for MACHINE_CPU, which indicates +# the specific CPU generation to use, for optimization purposes. MACHINE_ARCH ?= i386 +MACHINE_CPU ?= i386 # For tags rule. GTAGSFLAGS= -o diff --git a/usr.bin/make/main.c b/usr.bin/make/main.c index 4695066bb1fd..8ff1de368bbb 100644 --- a/usr.bin/make/main.c +++ b/usr.bin/make/main.c @@ -472,6 +472,7 @@ main(argc, argv) char cdpath[MAXPATHLEN + 1]; char *machine = getenv("MACHINE"); char *machine_arch = getenv("MACHINE_ARCH"); + char *machine_cpu = getenv("MACHINE_CPU"); Lst sysMkPath; /* Path of sys.mk */ char *cp = NULL, *start; /* avoid faults on read-only strings */ @@ -570,6 +571,19 @@ main(argc, argv) #endif } + /* + * Set machine_cpu to the minumum supported CPU revision based + * on the target architecture, if not already set. + */ + if (!machine_cpu) { + if (!strcmp(machine_arch, "i386")) + machine_cpu = "i386"; + else if (!strcmp(machine_arch, "alpha")) + machine_cpu = "ev4"; + else + machine_cpu = "unknown"; + } + /* * The object directory location is determined using the * following order of preference: @@ -672,6 +686,7 @@ main(argc, argv) Var_Set("MFLAGS", "", VAR_GLOBAL); Var_Set("MACHINE", machine, VAR_GLOBAL); Var_Set("MACHINE_ARCH", machine_arch, VAR_GLOBAL); + Var_Set("MACHINE_CPU", machine_cpu, VAR_GLOBAL); /* * First snag any flags out of the MAKE environment variable.